"""Defines parsing functions used by isort for parsing import definitions""" from collections import OrderedDict, defaultdict from functools import partial from itertools import chain from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Set, Tuple from warnings import warn from . import place from .comments import parse as parse_comments from .exceptions import MissingSection from .settings import DEFAULT_CONFIG, Config if TYPE_CHECKING: from mypy_extensions import TypedDict CommentsAboveDict = TypedDict( "CommentsAboveDict", {"straight": Dict[str, Any], "from": Dict[str, Any]} ) CommentsDict = TypedDict( "CommentsDict", { "from": Dict[str, Any], "straight": Dict[str, Any], "nested": Dict[str, Any], "above": CommentsAboveDict, }, ) def _infer_line_separator(contents: str) -> str: if "\r\n" in contents: return "\r\n" if "\r" in contents: return "\r" return "\n" def _normalize_line(raw_line: str) -> Tuple[str, str]: """Normalizes import related statements in the provided line. Returns (normalized_line: str, raw_line: str) """ line = raw_line.replace("from.import ", "from . import ") line = line.replace("from.cimport ", "from . cimport ") line = line.replace("import*", "import *") line = line.replace(" .import ", " . import ") line = line.replace(" .cimport ", " . cimport ") line = line.replace("\t", " ") return (line, raw_line) def import_type(line: str, config: Config = DEFAULT_CONFIG) -> Optional[str]: """If the current line is an import line it will return its type (from or straight)""" if config.honor_noqa and line.lower().rstrip().endswith("noqa"): return None if "isort:skip" in line or "isort: skip" in line or "isort: split" in line: return None if line.startswith(("import ", "cimport ")): return "straight" if line.startswith("from "): return "from" return None def _strip_syntax(import_string: str) -> str: import_string = import_string.replace("_import", "[[i]]") import_string = import_string.replace("_cimport", "[[ci]]") for remove_syntax in ["\\", "(", ")", ","]: import_string = import_string.replace(remove_syntax, " ") import_list = import_string.split() for key in ("from", "import", "cimport"): if key in import_list: import_list.remove(key) import_string = " ".join(import_list) import_string = import_string.replace("[[i]]", "_import") import_string = import_string.replace("[[ci]]", "_cimport") return import_string.replace("{ ", "{|").replace(" }", "|}") def skip_line( line: str, in_quote: str, index: int, section_comments: Tuple[str, ...], needs_import: bool = True, ) -> Tuple[bool, str]: """Determine if a given line should be skipped. Returns back a tuple containing: (skip_line: bool, in_quote: str,) """ should_skip = bool(in_quote) if '"' in line or "'" in line: char_index = 0 while char_index < len(line): if line[char_index] == "\\": char_index += 1 elif in_quote: if line[char_index : char_index + len(in_quote)] == in_quote: in_quote = "" elif line[char_index] in ("'", '"'): long_quote = line[char_index : char_index + 3] if long_quote in ('"""', "'''"): in_quote = long_quote char_index += 2 else: in_quote = line[char_index] elif line[char_index] == "#": break char_index += 1 if ";" in line.split("#")[0] and needs_import: for part in (part.strip() for part in line.split(";")): if ( part and not part.startswith("from ") and not part.startswith(("import ", "cimport ")) ): should_skip = True return (bool(should_skip or in_quote), in_quote) class ParsedContent(NamedTuple): in_lines: List[str] lines_without_imports: List[str] import_index: int place_imports: Dict[str, List[str]] import_placements: Dict[str, str] as_map: Dict[str, Dict[str, List[str]]] imports: Dict[str, Dict[str, Any]] categorized_comments: "CommentsDict" change_count: int original_line_count: int line_separator: str sections: Any verbose_output: List[str] trailing_commas: Set[str] def file_contents(contents: str, config: Config = DEFAULT_CONFIG) -> ParsedContent: """Parses a python file taking out and categorizing imports.""" line_separator: str = config.line_ending or _infer_line_separator(contents) in_lines = contents.splitlines() if contents and contents[-1] in ("\n", "\r"): in_lines.append("") out_lines = [] original_line_count = len(in_lines) if config.old_finders: from .deprecated.finders import FindersManager finder = FindersManager(config=config).find else: finder = partial(place.module, config=config) line_count = len(in_lines) place_imports: Dict[str, List[str]] = {} import_placements: Dict[str, str] = {} as_map: Dict[str, Dict[str, List[str]]] = { "straight": defaultdict(list), "from": defaultdict(list), } imports: OrderedDict[str, Dict[str, Any]] = OrderedDict() verbose_output: List[str] = [] for section in chain(config.sections, config.forced_separate): imports[section] = {"straight": OrderedDict(), "from": OrderedDict()} categorized_comments: CommentsDict = { "from": {}, "straight": {}, "nested": {}, "above": {"straight": {}, "from": {}}, } trailing_commas: Set[str] = set() index = 0 import_index = -1 in_quote = "" while index < line_count: line = in_lines[index] index += 1 statement_index = index (skipping_line, in_quote) = skip_line( line, in_quote=in_quote, index=index, section_comments=config.section_comments ) if ( line in config.section_comments or line in config.section_comments_end ) and not skipping_line: if import_index == -1: # pragma: no branch import_index = index - 1 continue if "isort:imports-" in line and line.startswith("#"): section = line.split("isort:imports-")[-1].split()[0].upper() place_imports[section] = [] import_placements[line] = section elif "isort: imports-" in line and line.startswith("#"): section = line.split("isort: imports-")[-1].split()[0].upper() place_imports[section] = [] import_placements[line] = section if skipping_line: out_lines.append(line) continue lstripped_line = line.lstrip() if ( config.float_to_top and import_index == -1 and line and not in_quote and not lstripped_line.startswith("#") and not lstripped_line.startswith("'''") and not lstripped_line.startswith('"""') ): if not lstripped_line.startswith("import") and not lstripped_line.startswith("from"): import_index = index - 1 while import_index and not in_lines[import_index - 1]: import_index -= 1 else: commentless = line.split("#", 1)[0].strip() if ( ("isort:skip" in line or "isort: skip" in line) and "(" in commentless and ")" not in commentless ): import_index = index starting_line = line while "isort:skip" in starting_line or "isort: skip" in starting_line: commentless = starting_line.split("#", 1)[0] if ( "(" in commentless and not commentless.rstrip().endswith(")") and import_index < line_count ): while import_index < line_count and not commentless.rstrip().endswith( ")" ): commentless = in_lines[import_index].split("#", 1)[0] import_index += 1 else: import_index += 1 if import_index >= line_count: break starting_line = in_lines[import_index] line, *end_of_line_comment = line.split("#", 1) if ";" in line: statements = [line.strip() for line in line.split(";")] else: statements = [line] if end_of_line_comment: statements[-1] = f"{statements[-1]}#{end_of_line_comment[0]}" for statement in statements: line, raw_line = _normalize_line(statement) type_of_import = import_type(line, config) or "" raw_lines = [raw_line] if not type_of_import: out_lines.append(raw_line) continue if import_index == -1: import_index = index - 1 nested_comments = {} import_string, comment = parse_comments(line) comments = [comment] if comment else [] line_parts = [part for part in _strip_syntax(import_string).strip().split(" ") if part] if type_of_import == "from" and len(line_parts) == 2 and comments: nested_comments[line_parts[-1]] = comments[0] if "(" in line.split("#", 1)[0] and index < line_count: while not line.split("#")[0].strip().endswith(")") and index < line_count: line, new_comment = parse_comments(in_lines[index]) index += 1 if new_comment: comments.append(new_comment) stripped_line = _strip_syntax(line).strip() if ( type_of_import == "from" and stripped_line and " " not in stripped_line.replace(" as ", "") and new_comment ): nested_comments[stripped_line] = comments[-1] import_string += line_separator + line raw_lines.append(line) else: while line.strip().endswith("\\"): line, new_comment = parse_comments(in_lines[index]) line = line.lstrip() index += 1 if new_comment: comments.append(new_comment) # Still need to check for parentheses after an escaped line if ( "(" in line.split("#")[0] and ")" not in line.split("#")[0] and index < line_count ): stripped_line = _strip_syntax(line).strip() if ( type_of_import == "from" and stripped_line and " " not in stripped_line.replace(" as ", "") and new_comment ): nested_comments[stripped_line] = comments[-1] import_string += line_separator + line raw_lines.append(line) while not line.split("#")[0].strip().endswith(")") and index < line_count: line, new_comment = parse_comments(in_lines[index]) index += 1 if new_comment: comments.append(new_comment) stripped_line = _strip_syntax(line).strip() if ( type_of_import == "from" and stripped_line and " " not in stripped_line.replace(" as ", "") and new_comment ): nested_comments[stripped_line] = comments[-1] import_string += line_separator + line raw_lines.append(line) stripped_line = _strip_syntax(line).strip() if ( type_of_import == "from" and stripped_line and " " not in stripped_line.replace(" as ", "") and new_comment ): nested_comments[stripped_line] = comments[-1] if import_string.strip().endswith( (" import", " cimport") ) or line.strip().startswith(("import ", "cimport ")): import_string += line_separator + line else: import_string = import_string.rstrip().rstrip("\\") + " " + line.lstrip() if type_of_import == "from": cimports: bool import_string = ( import_string.replace("import(", "import (") .replace("\\", " ") .replace("\n", " ") ) if "import " not in import_string: out_lines.extend(raw_lines) continue if " cimport " in import_string: parts = import_string.split(" cimport ") cimports = True else: parts = import_string.split(" import ") cimports = False from_import = parts[0].split(" ") import_string = (" cimport " if cimports else " import ").join( [from_import[0] + " " + "".join(from_import[1:])] + parts[1:] ) just_imports = [ item.replace("{|", "{ ").replace("|}", " }") for item in _strip_syntax(import_string).split() ] attach_comments_to: Optional[List[Any]] = None direct_imports = just_imports[1:] straight_import = True top_level_module = "" if "as" in just_imports and (just_imports.index("as") + 1) < len(just_imports): straight_import = False while "as" in just_imports: nested_module = None as_index = just_imports.index("as") if type_of_import == "from": nested_module = just_imports[as_index - 1] top_level_module = just_imports[0] module = top_level_module + "." + nested_module as_name = just_imports[as_index + 1] direct_imports.remove(nested_module) direct_imports.remove(as_name) direct_imports.remove("as") if nested_module == as_name and config.remove_redundant_aliases: pass elif as_name not in as_map["from"][module]: # pragma: no branch as_map["from"][module].append(as_name) full_name = f"{nested_module} as {as_name}" associated_comment = nested_comments.get(full_name) if associated_comment: categorized_comments["nested"].setdefault(top_level_module, {})[ full_name ] = associated_comment if associated_comment in comments: # pragma: no branch comments.pop(comments.index(associated_comment)) else: module = just_imports[as_index - 1] as_name = just_imports[as_index + 1] if module == as_name and config.remove_redundant_aliases: pass elif as_name not in as_map["straight"][module]: as_map["straight"][module].append(as_name) if comments and attach_comments_to is None: if nested_module and config.combine_as_imports: attach_comments_to = categorized_comments["from"].setdefault( f"{top_level_module}.__combined_as__", [] ) else: if type_of_import == "from" or ( config.remove_redundant_aliases and as_name == module.split(".")[-1] ): attach_comments_to = categorized_comments["straight"].setdefault( module, [] ) else: attach_comments_to = categorized_comments["straight"].setdefault( f"{module} as {as_name}", [] ) del just_imports[as_index : as_index + 2] if type_of_import == "from": import_from = just_imports.pop(0) placed_module = finder(import_from) if config.verbose and not config.only_modified: print(f"from-type place_module for {import_from} returned {placed_module}") elif config.verbose: verbose_output.append( f"from-type place_module for {import_from} returned {placed_module}" ) if placed_module == "": warn( f"could not place module {import_from} of line {line} --" " Do you need to define a default section?" ) if placed_module and placed_module not in imports: raise MissingSection(import_module=import_from, section=placed_module) root = imports[placed_module][type_of_import] # type: ignore for import_name in just_imports: associated_comment = nested_comments.get(import_name) if associated_comment: categorized_comments["nested"].setdefault(import_from, {})[ import_name ] = associated_comment if associated_comment in comments: # pragma: no branch comments.pop(comments.index(associated_comment)) if ( config.force_single_line and comments and attach_comments_to is None and len(just_imports) == 1 ): nested_from_comments = categorized_comments["nested"].setdefault( import_from, {} ) existing_comment = nested_from_comments.get(just_imports[0], "") nested_from_comments[ just_imports[0] ] = f"{existing_comment}{'; ' if existing_comment else ''}{'; '.join(comments)}" comments = [] if comments and attach_comments_to is None: attach_comments_to = categorized_comments["from"].setdefault(import_from, []) if len(out_lines) > max(import_index, 1) - 1: last = out_lines[-1].rstrip() if out_lines else "" while ( last.startswith("#") and not last.endswith('"""') and not last.endswith("'''") and "isort:imports-" not in last and "isort: imports-" not in last and not config.treat_all_comments_as_code and not last.strip() in config.treat_comments_as_code ): categorized_comments["above"]["from"].setdefault(import_from, []).insert( 0, out_lines.pop(-1) ) if out_lines: last = out_lines[-1].rstrip() else: last = "" if statement_index - 1 == import_index: # pragma: no cover import_index -= len( categorized_comments["above"]["from"].get(import_from, []) ) if import_from not in root: root[import_from] = OrderedDict( (module, module in direct_imports) for module in just_imports ) else: root[import_from].update( (module, root[import_from].get(module, False) or module in direct_imports) for module in just_imports ) if comments and attach_comments_to is not None: attach_comments_to.extend(comments) if "," in import_string.split(just_imports[-1])[-1]: trailing_commas.add(import_from) else: if comments and attach_comments_to is not None: attach_comments_to.extend(comments) comments = [] for module in just_imports: if comments: categorized_comments["straight"][module] = comments comments = [] if len(out_lines) > max(import_index, +1, 1) - 1: last = out_lines[-1].rstrip() if out_lines else "" while ( last.startswith("#") and not last.endswith('"""') and not last.endswith("'''") and "isort:imports-" not in last and "isort: imports-" not in last and not config.treat_all_comments_as_code and not last.strip() in config.treat_comments_as_code ): categorized_comments["above"]["straight"].setdefault(module, []).insert( 0, out_lines.pop(-1) ) if out_lines: last = out_lines[-1].rstrip() else: last = "" if index - 1 == import_index: import_index -= len( categorized_comments["above"]["straight"].get(module, []) ) placed_module = finder(module) if config.verbose and not config.only_modified: print(f"else-type place_module for {module} returned {placed_module}") elif config.verbose: verbose_output.append( f"else-type place_module for {module} returned {placed_module}" ) if placed_module == "": warn( f"could not place module {module} of line {line} --" " Do you need to define a default section?" ) imports.setdefault("", {"straight": OrderedDict(), "from": OrderedDict()}) if placed_module and placed_module not in imports: raise MissingSection(import_module=module, section=placed_module) straight_import |= imports[placed_module][type_of_import].get( # type: ignore module, False ) imports[placed_module][type_of_import][module] = straight_import # type: ignore change_count = len(out_lines) - original_line_count return ParsedContent( in_lines=in_lines, lines_without_imports=out_lines, import_index=import_index, place_imports=place_imports, import_placements=import_placements, as_map=as_map, imports=imports, categorized_comments=categorized_comments, change_count=change_count, original_line_count=original_line_count, line_separator=line_separator, sections=config.sections, verbose_output=verbose_output, trailing_commas=trailing_commas, )