From a81e3630005d982624aed35f1ed109cac04fbf32 Mon Sep 17 00:00:00 2001 From: Sourcery AI <> Date: Sun, 2 Jul 2023 15:21:25 +0000 Subject: [PATCH] 'Refactored by Sourcery' --- archivebox/config.py | 437 +++++++++++++--------- archivebox/parsers/__init__.py | 14 +- archivebox/parsers/readwise_reader_api.py | 8 +- 3 files changed, 258 insertions(+), 201 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 62c79ad1..44312fc4 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -19,6 +19,7 @@ Documentation: """ + __package__ = 'archivebox' import os @@ -65,169 +66,250 @@ except ModuleNotFoundError: CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'SHELL_CONFIG': { - 'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()}, - 'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']}, - 'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now - 'IN_DOCKER': {'type': bool, 'default': False}, - 'PUID': {'type': int, 'default': os.getuid()}, - 'PGID': {'type': int, 'default': os.getgid()}, + 'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()}, + 'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']}, + 'SHOW_PROGRESS': { + 'type': bool, + 'default': lambda c: ( + c['IS_TTY'] and platform.system() != 'Darwin' + ), + }, # progress bars are buggy on mac, disable for now + 'IN_DOCKER': {'type': bool, 'default': False}, + 'PUID': {'type': int, 'default': os.getuid()}, + 'PGID': {'type': int, 'default': os.getgid()}, # TODO: 'SHOW_HINTS': {'type: bool, 'default': True}, }, - 'GENERAL_CONFIG': { - 'OUTPUT_DIR': {'type': str, 'default': None}, - 'CONFIG_FILE': {'type': str, 'default': None}, - 'ONLY_NEW': {'type': bool, 'default': True}, - 'TIMEOUT': {'type': int, 'default': 60}, - 'MEDIA_TIMEOUT': {'type': int, 'default': 3600}, - 'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'}, - 'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, - 'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages - 'URL_WHITELIST': {'type': str, 'default': None}, - 'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, - 'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'}, + 'OUTPUT_DIR': {'type': str, 'default': None}, + 'CONFIG_FILE': {'type': str, 'default': None}, + 'ONLY_NEW': {'type': bool, 'default': True}, + 'TIMEOUT': {'type': int, 'default': 60}, + 'MEDIA_TIMEOUT': {'type': int, 'default': 3600}, + 'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'}, + 'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, + 'URL_BLACKLIST': { + 'type': str, + 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', + }, # to avoid downloading code assets as their own pages + 'URL_WHITELIST': {'type': str, 'default': None}, + 'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, + 'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'}, }, - 'SERVER_CONFIG': { - 'SECRET_KEY': {'type': str, 'default': None}, - 'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]}, - 'ALLOWED_HOSTS': {'type': str, 'default': '*'}, - 'DEBUG': {'type': bool, 'default': False}, - 'PUBLIC_INDEX': {'type': bool, 'default': True}, - 'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True}, - 'PUBLIC_ADD_VIEW': {'type': bool, 'default': False}, - 'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'}, - 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, - 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, - 'TIME_ZONE': {'type': str, 'default': 'UTC'}, - 'TIMEZONE': {'type': str, 'default': 'UTC'}, - 'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'}, - 'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''}, - 'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'}, - 'PREVIEW_ORIGINALS': {'type': bool, 'default': True}, - 'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'}, + 'SECRET_KEY': {'type': str, 'default': None}, + 'BIND_ADDR': { + 'type': str, + 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][ + c['IN_DOCKER'] + ], + }, + 'ALLOWED_HOSTS': {'type': str, 'default': '*'}, + 'DEBUG': {'type': bool, 'default': False}, + 'PUBLIC_INDEX': {'type': bool, 'default': True}, + 'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True}, + 'PUBLIC_ADD_VIEW': {'type': bool, 'default': False}, + 'FOOTER_INFO': { + 'type': str, + 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.', + }, + 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, + 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, + 'TIME_ZONE': {'type': str, 'default': 'UTC'}, + 'TIMEZONE': {'type': str, 'default': 'UTC'}, + 'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'}, + 'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''}, + 'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'}, + 'PREVIEW_ORIGINALS': {'type': bool, 'default': True}, + 'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'}, }, - 'ARCHIVE_METHOD_TOGGLES': { - 'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)}, - 'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)}, - 'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)}, - 'SAVE_WGET_REQUISITES': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)}, - 'SAVE_SINGLEFILE': {'type': bool, 'default': True, 'aliases': ('FETCH_SINGLEFILE',)}, - 'SAVE_READABILITY': {'type': bool, 'default': True, 'aliases': ('FETCH_READABILITY',)}, - 'SAVE_MERCURY': {'type': bool, 'default': True, 'aliases': ('FETCH_MERCURY',)}, - 'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)}, - 'SAVE_SCREENSHOT': {'type': bool, 'default': True, 'aliases': ('FETCH_SCREENSHOT',)}, - 'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)}, - 'SAVE_HEADERS': {'type': bool, 'default': True, 'aliases': ('FETCH_HEADERS',)}, - 'SAVE_WARC': {'type': bool, 'default': True, 'aliases': ('FETCH_WARC',)}, - 'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)}, - 'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)}, - 'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)}, + 'SAVE_TITLE': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_TITLE',), + }, + 'SAVE_FAVICON': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_FAVICON',), + }, + 'SAVE_WGET': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_WGET',), + }, + 'SAVE_WGET_REQUISITES': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_WGET_REQUISITES',), + }, + 'SAVE_SINGLEFILE': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_SINGLEFILE',), + }, + 'SAVE_READABILITY': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_READABILITY',), + }, + 'SAVE_MERCURY': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_MERCURY',), + }, + 'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)}, + 'SAVE_SCREENSHOT': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_SCREENSHOT',), + }, + 'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)}, + 'SAVE_HEADERS': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_HEADERS',), + }, + 'SAVE_WARC': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_WARC',), + }, + 'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)}, + 'SAVE_MEDIA': { + 'type': bool, + 'default': True, + 'aliases': ('FETCH_MEDIA',), + }, + 'SAVE_ARCHIVE_DOT_ORG': { + 'type': bool, + 'default': True, + 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',), + }, }, - 'ARCHIVE_METHOD_OPTIONS': { - 'RESOLUTION': {'type': str, 'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)}, - 'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com'}, - 'CHECK_SSL_VALIDITY': {'type': bool, 'default': True}, - 'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'}, - - 'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'}, - 'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'}, - 'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'}, - - 'COOKIES_FILE': {'type': str, 'default': None}, - 'CHROME_USER_DATA_DIR': {'type': str, 'default': None}, - - 'CHROME_TIMEOUT': {'type': int, 'default': 0}, - 'CHROME_HEADLESS': {'type': bool, 'default': True}, - 'CHROME_SANDBOX': {'type': bool, 'default': lambda c: not c['IN_DOCKER']}, - 'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [ - '--write-description', - '--write-info-json', - '--write-annotations', - '--write-thumbnail', - '--no-call-home', - '--write-sub', - '--all-subs', - # There are too many of these and youtube - # throttles you with HTTP error 429 - #'--write-auto-subs', - '--convert-subs=srt', - '--yes-playlist', - '--continue', - # This flag doesn't exist in youtube-dl - # only in yt-dlp - '--no-abort-on-error', - # --ignore-errors must come AFTER - # --no-abort-on-error - # https://github.com/yt-dlp/yt-dlp/issues/4914 - '--ignore-errors', - '--geo-bypass', - '--add-metadata', - '--max-filesize={}'.format(c['MEDIA_MAX_SIZE']), - ]}, - - - 'WGET_ARGS': {'type': list, 'default': ['--no-verbose', - '--adjust-extension', - '--convert-links', - '--force-directories', - '--backup-converted', - '--span-hosts', - '--no-parent', - '-e', 'robots=off', - ]}, - 'CURL_ARGS': {'type': list, 'default': ['--silent', - '--location', - '--compressed' - ]}, - 'GIT_ARGS': {'type': list, 'default': ['--recursive']}, - 'SINGLEFILE_ARGS': {'type': list, 'default' : None}, - 'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'}, + 'RESOLUTION': { + 'type': str, + 'default': '1440,2000', + 'aliases': ('SCREENSHOT_RESOLUTION',), + }, + 'GIT_DOMAINS': { + 'type': str, + 'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com', + }, + 'CHECK_SSL_VALIDITY': {'type': bool, 'default': True}, + 'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'}, + 'CURL_USER_AGENT': { + 'type': str, + 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}', + }, + 'WGET_USER_AGENT': { + 'type': str, + 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}', + }, + 'CHROME_USER_AGENT': { + 'type': str, + 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/605.1.15 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)', + }, + 'COOKIES_FILE': {'type': str, 'default': None}, + 'CHROME_USER_DATA_DIR': {'type': str, 'default': None}, + 'CHROME_TIMEOUT': {'type': int, 'default': 0}, + 'CHROME_HEADLESS': {'type': bool, 'default': True}, + 'CHROME_SANDBOX': { + 'type': bool, + 'default': lambda c: not c['IN_DOCKER'], + }, + 'YOUTUBEDL_ARGS': { + 'type': list, + 'default': lambda c: [ + '--write-description', + '--write-info-json', + '--write-annotations', + '--write-thumbnail', + '--no-call-home', + '--write-sub', + '--all-subs', + '--convert-subs=srt', + '--yes-playlist', + '--continue', + '--no-abort-on-error', + '--ignore-errors', + '--geo-bypass', + '--add-metadata', + f"--max-filesize={c['MEDIA_MAX_SIZE']}", + ], + }, + 'WGET_ARGS': { + 'type': list, + 'default': [ + '--no-verbose', + '--adjust-extension', + '--convert-links', + '--force-directories', + '--backup-converted', + '--span-hosts', + '--no-parent', + '-e', + 'robots=off', + ], + }, + 'CURL_ARGS': { + 'type': list, + 'default': ['--silent', '--location', '--compressed'], + }, + 'GIT_ARGS': {'type': list, 'default': ['--recursive']}, + 'SINGLEFILE_ARGS': {'type': list, 'default': None}, + 'FAVICON_PROVIDER': { + 'type': str, + 'default': 'https://www.google.com/s2/favicons?domain={}', + }, }, - - 'SEARCH_BACKEND_CONFIG' : { - 'USE_INDEXING_BACKEND': {'type': bool, 'default': True}, - 'USE_SEARCHING_BACKEND': {'type': bool, 'default': True}, - 'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'}, - 'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'}, - 'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491}, - 'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'}, + 'SEARCH_BACKEND_CONFIG': { + 'USE_INDEXING_BACKEND': {'type': bool, 'default': True}, + 'USE_SEARCHING_BACKEND': {'type': bool, 'default': True}, + 'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'ripgrep'}, + 'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'}, + 'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491}, + 'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'}, # SONIC - 'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'}, - 'SONIC_BUCKET': {'type': str, 'default': 'snapshots'}, - 'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90}, + 'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'}, + 'SONIC_BUCKET': {'type': str, 'default': 'snapshots'}, + 'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90}, }, - 'DEPENDENCY_CONFIG': { - 'USE_CURL': {'type': bool, 'default': True}, - 'USE_WGET': {'type': bool, 'default': True}, - 'USE_SINGLEFILE': {'type': bool, 'default': True}, - 'USE_READABILITY': {'type': bool, 'default': True}, - 'USE_MERCURY': {'type': bool, 'default': True}, - 'USE_GIT': {'type': bool, 'default': True}, - 'USE_CHROME': {'type': bool, 'default': True}, - 'USE_NODE': {'type': bool, 'default': True}, - 'USE_YOUTUBEDL': {'type': bool, 'default': True}, - 'USE_RIPGREP': {'type': bool, 'default': True}, - - 'CURL_BINARY': {'type': str, 'default': 'curl'}, - 'GIT_BINARY': {'type': str, 'default': 'git'}, - 'WGET_BINARY': {'type': str, 'default': 'wget'}, - 'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')}, - 'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')}, - 'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('mercury-parser')}, + 'USE_CURL': {'type': bool, 'default': True}, + 'USE_WGET': {'type': bool, 'default': True}, + 'USE_SINGLEFILE': {'type': bool, 'default': True}, + 'USE_READABILITY': {'type': bool, 'default': True}, + 'USE_MERCURY': {'type': bool, 'default': True}, + 'USE_GIT': {'type': bool, 'default': True}, + 'USE_CHROME': {'type': bool, 'default': True}, + 'USE_NODE': {'type': bool, 'default': True}, + 'USE_YOUTUBEDL': {'type': bool, 'default': True}, + 'USE_RIPGREP': {'type': bool, 'default': True}, + 'CURL_BINARY': {'type': str, 'default': 'curl'}, + 'GIT_BINARY': {'type': str, 'default': 'git'}, + 'WGET_BINARY': {'type': str, 'default': 'wget'}, + 'SINGLEFILE_BINARY': { + 'type': str, + 'default': lambda c: bin_path('single-file'), + }, + 'READABILITY_BINARY': { + 'type': str, + 'default': lambda c: bin_path('readability-extractor'), + }, + 'MERCURY_BINARY': { + 'type': str, + 'default': lambda c: bin_path('mercury-parser'), + }, #'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'}, - 'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, - 'NODE_BINARY': {'type': str, 'default': 'node'}, - 'RIPGREP_BINARY': {'type': str, 'default': 'rg'}, - 'CHROME_BINARY': {'type': str, 'default': None}, - - 'POCKET_CONSUMER_KEY': {'type': str, 'default': None}, - 'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}}, - - 'READWISE_READER_TOKENS': {'type': dict, 'default': {}}, + 'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, + 'NODE_BINARY': {'type': str, 'default': 'node'}, + 'RIPGREP_BINARY': {'type': str, 'default': 'rg'}, + 'CHROME_BINARY': {'type': str, 'default': None}, + 'POCKET_CONSUMER_KEY': {'type': str, 'default': None}, + 'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}}, + 'READWISE_READER_TOKENS': {'type': dict, 'default': {}}, }, } @@ -276,7 +358,7 @@ DEFAULT_CLI_COLORS = { 'white': '\033[01;37m', 'black': '\033[01;30m', } -ANSI = {k: '' for k in DEFAULT_CLI_COLORS.keys()} +ANSI = {k: '' for k in DEFAULT_CLI_COLORS} COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], { '00': [(0, 0, 0), (0, 0, 0)], @@ -516,15 +598,11 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]: config_file = ConfigParser() config_file.optionxform = str config_file.read(config_path) - # flatten into one namespace - config_file_vars = { + return { key.upper(): val for section, options in config_file.items() - for key, val in options.items() + for key, val in options.items() } - # print('[i] Loaded config file', os.path.abspath(config_path)) - # print(config_file_vars) - return config_file_vars return None @@ -533,8 +611,12 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: from .system import atomic_write - CONFIG_HEADER = ( - """# This is the config file for your ArchiveBox collection. + out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve() + config_path = Path(out_dir) / CONFIG_FILENAME + + if not config_path.exists(): + CONFIG_HEADER = ( + """# This is the config file for your ArchiveBox collection. # # You can add options here manually in INI format, or automatically by running: # archivebox config --set KEY=VALUE @@ -547,10 +629,6 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: """) - out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve() - config_path = Path(out_dir) / CONFIG_FILENAME - - if not config_path.exists(): atomic_write(config_path, CONFIG_HEADER) config_file = ConfigParser() @@ -565,10 +643,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: # Set up sections in empty config file for key, val in config.items(): section = find_section(key) - if section in config_file: - existing_config = dict(config_file[section]) - else: - existing_config = {} + existing_config = dict(config_file[section]) if section in config_file else {} config_file[section] = {**existing_config, key: val} # always make sure there's a SECRET_KEY defined for Django @@ -601,10 +676,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: if Path(f'{config_path}.bak').exists(): os.remove(f'{config_path}.bak') - return { - key.upper(): CONFIG.get(key.upper()) - for key in config.keys() - } + return {key.upper(): CONFIG.get(key.upper()) for key in config} @@ -634,7 +706,7 @@ def load_config(defaults: ConfigDefaultDict, except Exception as e: stderr() stderr(f'[X] Error while loading configuration value: {key}', color='red', config=extended_config) - stderr(' {}: {}'.format(e.__class__.__name__, e)) + stderr(f' {e.__class__.__name__}: {e}') stderr() stderr(' Check your config for mistakes and try again (your archive data is unaffected).') stderr() @@ -680,7 +752,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Op else: stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi)) for line in text[1:]: - stderr('{} {}'.format(prefix, line)) + stderr(f'{prefix} {line}') # Dependency Metadata Helpers @@ -750,8 +822,7 @@ def find_chrome_binary() -> Optional[str]: 'google-chrome-dev', ) for name in default_executable_paths: - full_path_exists = shutil.which(name) - if full_path_exists: + if full_path_exists := shutil.which(name): return name return None @@ -789,7 +860,7 @@ def wget_supports_compression(config): "--help", ] return not run(cmd, stdout=DEVNULL, stderr=DEVNULL).returncode - except (FileNotFoundError, OSError): + except OSError: return False def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict: @@ -1100,7 +1171,9 @@ def check_system_config(config: ConfigDict=CONFIG) -> None: if '/Default' in str(config['CHROME_USER_DATA_DIR']): stderr() stderr(' Try removing /Default from the end e.g.:') - stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0])) + stderr( + f""" CHROME_USER_DATA_DIR="{config['CHROME_USER_DATA_DIR'].split('/Default')[0]}\"""" + ) raise SystemExit(2) @@ -1113,11 +1186,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None: stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow') for dependency, info in invalid_dependencies: stderr( - ' ! {}: {} ({})'.format( - dependency, - info['path'] or 'unable to find binary', - info['version'] or 'unable to detect version', - ) + f" ! {dependency}: {info['path'] or 'unable to find binary'} ({info['version'] or 'unable to detect version'})" ) if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'): hint(('To install all packages automatically run: archivebox setup', @@ -1174,9 +1243,9 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO output_dir = out_dir or config['OUTPUT_DIR'] from .index.sql import list_migrations - pending_migrations = [name for status, name in list_migrations() if not status] - - if pending_migrations: + if pending_migrations := [ + name for status, name in list_migrations() if not status + ]: stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow') stderr(f' {output_dir}') stderr() diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index 893179c9..3edca9e2 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -87,9 +87,7 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None): links, parser = run_parser_functions(file, timer, root_url=root_url) timer.end() - if parser is None: - return [], 'Failed to parse' - return links, parser + return ([], 'Failed to parse') if parser is None else (links, parser) @enforce_types @@ -103,9 +101,7 @@ def parse_links(source_file: str, root_url: Optional[str]=None, parser: str="aut links, parser = run_parser_functions(file, timer, root_url=root_url, parser=parser) timer.end() - if parser is None: - return [], 'Failed to parse' - return links, parser + return ([], 'Failed to parse') if parser is None else (links, parser) def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None, parser: str="auto") -> Tuple[List[Link], Optional[str]]: @@ -182,11 +178,7 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba timer.end() except Exception as e: timer.end() - print('{}[!] Failed to download {}{}\n'.format( - ANSI['red'], - path, - ANSI['reset'], - )) + print(f"{ANSI['red']}[!] Failed to download {path}{ANSI['reset']}\n") print(' ', e) raise e diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py index a2a0c29a..35965538 100644 --- a/archivebox/parsers/readwise_reader_api.py +++ b/archivebox/parsers/readwise_reader_api.py @@ -44,11 +44,7 @@ class ReadwiseReaderAPI: def get_readwise_reader_articles(api: ReadwiseReaderAPI): response = api.get_archive() body = response.json() - articles = body["results"] - - yield from articles - - + yield from body["results"] if body['nextPageCursor']: api.cursor = body["nextPageCursor"] yield from get_readwise_reader_articles(api) @@ -108,7 +104,7 @@ def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterab pattern = re.compile(r"^readwise-reader:\/\/(\w+)") for line in input_buffer: if should_parse_as_readwise_reader_api(line): - username = pattern.search(line).group(1) + username = pattern.search(line)[1] api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username)) for article in get_readwise_reader_articles(api):