From eef8ca29f0c359263af9f988001ed127ae25432c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Sat, 30 Mar 2019 20:31:28 -0400
Subject: [PATCH 001/333] hide compression detection failure during config
 setup

---
 archivebox/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/config.py b/archivebox/config.py
index 0d49a5d2..23a92ebf 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -74,7 +74,7 @@ TEMPLATES_DIR = os.path.join(PYTHON_PATH, 'templates')
 CHROME_SANDBOX = os.getenv('CHROME_SANDBOX', 'True').lower() == 'true'
 USE_CHROME = FETCH_PDF or FETCH_SCREENSHOT or FETCH_DOM
 USE_WGET = FETCH_WGET or FETCH_WGET_REQUISITES or FETCH_WARC
-WGET_AUTO_COMPRESSION = USE_WGET and WGET_BINARY and (not run([WGET_BINARY, "--compression=auto", "--help"], stdout=DEVNULL).returncode)
+WGET_AUTO_COMPRESSION = USE_WGET and WGET_BINARY and (not run([WGET_BINARY, "--compression=auto", "--help"], stdout=DEVNULL, stderr=DEVNULL).returncode)
 
 URL_BLACKLIST = URL_BLACKLIST and re.compile(URL_BLACKLIST, re.IGNORECASE)
 

From 924de7f68c315cd55fcfb77257b01f28365f855c Mon Sep 17 00:00:00 2001
From: luoliyan <joseph.lorimer@posteo.net>
Date: Tue, 2 Apr 2019 13:13:07 +0930
Subject: [PATCH 002/333] Update purge script to match codebase cleanup

---
 archivebox/purge.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/archivebox/purge.py b/archivebox/purge.py
index 26b18817..e2e4e97c 100755
--- a/archivebox/purge.py
+++ b/archivebox/purge.py
@@ -6,9 +6,9 @@ from os.path import exists, join
 from shutil import rmtree
 from typing import List
 
-from archive import parse_json_link_index
 from config import ARCHIVE_DIR, OUTPUT_DIR
-from index import write_html_links_index, write_json_links_index
+from index import (parse_json_links_index, write_html_links_index,
+                   write_json_links_index)
 
 
 def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:
@@ -16,7 +16,7 @@ def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:
         exit('index.json is missing; nothing to do')
 
     compiled = [re.compile(r) for r in regexes]
-    links = parse_json_link_index(OUTPUT_DIR)['links']
+    links = parse_json_links_index(OUTPUT_DIR)
     filtered = []
     remaining = []
 

From 0d2bf610b2ed82c87c78c3655a1f6512551f2ddb Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 3 Apr 2019 03:27:15 -0400
Subject: [PATCH 003/333] typo fix

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 15358d5d..5c698868 100644
--- a/README.md
+++ b/README.md
@@ -164,7 +164,7 @@ I don't think everything should be preserved in an automated fashion, making all
 
 #### User Interface & Intended Purpose
 
-ArchiveBox differentiates itself from [similar projects](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI inferface for users to ingest built feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI.
+ArchiveBox differentiates itself from [similar projects](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI inferface for users to ingest bulk feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI.
 
 An alternative tool [pywb](https://github.com/webrecorder/pywb) allows you to run a browser through an always-running archiving proxy which records the traffic to WARC files.  ArchiveBox intends to support this style of live proxy-archiving using `pywb` in the future, but for now it only ingests lists of links at a time via browser history, bookmarks, RSS, etc.
 

From 585a28e7c919c980a3d0e53cdac835ed3748b630 Mon Sep 17 00:00:00 2001
From: Anton Rieder <1301152+aried3r@users.noreply.github.com>
Date: Wed, 3 Apr 2019 12:49:32 +0200
Subject: [PATCH 004/333] Small typo fix

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5c698868..0fc21154 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ echo 'https://example.com' | ./archive                  # pass URLs to archive v
 ./archive https://getpocket.com/users/example/feed/all  # or import an RSS/JSON/XML/TXT feed
 ```
 
-One you've added your first links, open `output/index.html` in a browser to view the archive.  [DEMO: archive.sweeting.me](https://archive.sweeting.me)  
+Once you've added your first links, open `output/index.html` in a browser to view the archive.  [DEMO: archive.sweeting.me](https://archive.sweeting.me)  
 For more information, see the [full Quickstart guide](https://github.com/pirate/ArchiveBox/wiki/Quickstart), [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage), and [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration) docs.  
 
 *(`pip install archivebox` will be available in the near future, follow our [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap) for progress)*

From 403025a73b1d96ebcd2dba8c681c63529a5a4980 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 10 Apr 2019 17:09:54 -0400
Subject: [PATCH 005/333] Update bug_report.md

---
 .github/ISSUE_TEMPLATE/bug_report.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index b350fb28..66a2d21b 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -7,24 +7,24 @@ assignees: ''
 
 ---
 
-(please fill out the following information, feel free to delete sections if they're not applicable)
+(please fill out the following information, feel free to delete sections if they're not applicable or if long issue templates annoy you)
 
-## Describe the bug
+#### Describe the bug
 A description of what the bug is, what you expected to happen, 
 and any relevant context about issue.
 
-## Steps to reproduce
+#### Steps to reproduce
 
 1. Ran ArchiveBox with the following config '...'
 2. Saw this output during archiving '....'
 3. UI didn't show the thing I was expecting '....'
 
-## Screenshots or log output
+#### Screenshots or log output
 
 If applicable, post any relevant screenshots or copy/pasted terminal output from ArchiveBox.
 If you're reporting a parsing / importing error, **you must paste a copy of your redacted import file here**.
 
-## Software versions
+#### Software versions
 
  - OS:                        ([e.g. macOS 10.14] the operating system you're running ArchiveBox on)
  - ArchiveBox version:        (`git rev-parse HEAD | head -c7` [e.g. d798117] commit ID of the version you're running)

From 4f599c0b0b07c842b1a2d0ec31f229d8fa0d6294 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 10 Apr 2019 22:46:20 -0400
Subject: [PATCH 006/333] escape all non-windows-friendly filenames

---
 archivebox/archive_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py
index 56009cd1..b2f04f33 100644
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@@ -226,7 +226,7 @@ def fetch_wget(link_dir, link, timeout=TIMEOUT):
         '--span-hosts',
         '--no-parent',
         '-e', 'robots=off',
-        '--restrict-file-names=unix',
+        '--restrict-file-names=windows',
         '--timeout={}'.format(timeout),
         *(('--compression=auto',) if WGET_AUTO_COMPRESSION else ()),
         *(() if FETCH_WARC else ('--timestamping',)),

From e9f9c1ec5da2433ef95b23d7526d69458e01ad3c Mon Sep 17 00:00:00 2001
From: Bruno Tavares <connect+github@bltavares.com>
Date: Thu, 11 Apr 2019 22:43:52 -0300
Subject: [PATCH 007/333] Copy project into image instead of cloning

Docker `RUN` statements cache based on the text of the command executed,
not the content of what it does to the image.

Since the command was cloning the project, and the text didn't change,
building the image would not update the code if the image was already
cached. This lead to a stale Docker image distributed on Docker Hub.

This could also cause some confusion, as modified code would not show up
on the image during the build process.

This commit changes the build process to copy the content of the project
into the image. Whenever a file changes it will trigger a new updated
image.
---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d5683cad..c53e5c7a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -45,8 +45,8 @@ RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
     && chown -R pptruser:pptruser /node_modules
 
 # Install the ArchiveBox repository and pip requirements
-RUN git clone https://github.com/pirate/ArchiveBox /home/pptruser/app \
-    && mkdir -p /data \
+COPY . /home/pptruser/app
+RUN mkdir -p /data \
     && chown -R pptruser:pptruser /data \
     && ln -s /data /home/pptruser/app/archivebox/output \
     && ln -s /home/pptruser/app/bin/* /bin/ \

From 6401158f7f30f04a15bd070d9a94416a1c621e77 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Fri, 12 Apr 2019 13:59:22 -0400
Subject: [PATCH 008/333] comment out IRC links until we find a better chat
 solution

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 0fc21154..435e8b82 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@
 "Your own personal internet archive" (网站存档 / 爬虫)
 </pre>
 
-<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>
+<!--<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>-->
 <a href="https://github.com/pirate/ArchiveBox/blob/master/LICENSE"><img src="https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git&logoColor=green"/></a>
 <a href="https://github.com/pirate/ArchiveBox/commits/dev"><img src="https://img.shields.io/github/last-commit/pirate/ArchiveBox.svg?logo=Sublime+Text&logoColor=green&label=Active"/></a>
 <a href="https://github.com/pirate/ArchiveBox"><img src="https://img.shields.io/github/stars/pirate/ArchiveBox.svg?logo=github&label=Stars&logoColor=blue"/></a>
@@ -178,7 +178,7 @@ Because ArchiveBox is designed to ingest a firehose of browser history and bookm
 
 ## Learn more
 
-▶ **Join out our [community chat](http://webchat.freenode.net?channels=ArchiveBox&uio=d4) hosted on IRC freenode.net:`#ArchiveBox`!**
+<!--▶ **Join out our [community chat](http://webchat.freenode.net?channels=ArchiveBox&uio=d4) hosted on IRC freenode.net:`#ArchiveBox`!**-->
 
 Whether you want learn which organizations are the big players in the web archiving space, want to find a specific open source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community.  Check it out to learn about some of the coolest web archiving projects and communities on the web!
 
@@ -268,7 +268,7 @@ Contributor Spotlight:<br/><br/>
 <br/>
 <a href="https://twitter.com/thesquashSH"><img src="https://img.shields.io/badge/Tweet-%40theSquashSH-blue.svg?style=flat"/></a>
 <a href="https://github.com/pirate/ArchiveBox"><img src="https://img.shields.io/github/stars/pirate/ArchiveBox.svg?style=flat&label=Star+on+Github"/></a>
-<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>
+<!--<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>-->
 
 <br/><br/>
 

From adfcb1517a086d77441ff6b4d9d766a5c8d94d84 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 16 Apr 2019 21:03:51 -0400
Subject: [PATCH 009/333] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 435e8b82..6b36c859 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,9 @@
 <a href="https://hub.docker.com/r/nikisweeting/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
 
 <hr/>
+
+*Attention: Big API changes are coming soon! See [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) for more info...*
+
 </div>
 
 **ArchiveBox takes a list of website URLs you want to archive, and creates a local, static, browsable HTML clone of the content from those websites (it saves HTML, JS, media files, PDFs, images and more).** 

From 24e8eb95ddf1af7040e539503a56c1dc55774bcc Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 16 Apr 2019 21:04:18 -0400
Subject: [PATCH 010/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6b36c859..63fa7f32 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
 
 <hr/>
 
-*Attention: Big API changes are coming soon! See [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) for more info...*
+*💥 Attention: Big API changes are coming soon! See [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) for more info... 💥*
 
 </div>
 

From 59da48206ad7f64ea9b5a7e869d47a87e5534c3a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 16 Apr 2019 21:21:40 -0400
Subject: [PATCH 011/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 63fa7f32..1622c393 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
 
 <hr/>
 
-*💥 Attention: Big API changes are coming soon! See [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) for more info... 💥*
+*💥 Attention: Big API changes are coming soon! Check out [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) and help us test it! 💥*
 
 </div>
 

From 332a32f4f9b6f548d9a61495ec9008667ca1f5f6 Mon Sep 17 00:00:00 2001
From: Drewry Pope <drewrypope@gmail.com>
Date: Sat, 20 Apr 2019 02:59:44 -0500
Subject: [PATCH 012/333] Resolve 3 typos in util.py

---
 archivebox/util.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/archivebox/util.py b/archivebox/util.py
index cec23035..3c08c9bb 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -66,7 +66,7 @@ HTML_TITLE_REGEX = re.compile(
     re.IGNORECASE | re.MULTILINE | re.DOTALL | re.UNICODE,
 )
 STATICFILE_EXTENSIONS = {
-    # 99.999% of the time, URLs ending in these extentions are static files
+    # 99.999% of the time, URLs ending in these extensions are static files
     # that can be downloaded as-is, not html pages that need to be rendered
     'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
     'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
@@ -82,7 +82,7 @@ STATICFILE_EXTENSIONS = {
     # pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
     # ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
 
-    # Thse are always treated as pages, not as static files, never add them:
+    # These are always treated as pages, not as static files, never add them:
     # html, htm, shtml, xhtml, xml, aspx, php, cgi
 }
 
@@ -293,7 +293,7 @@ def str_between(string, start, end=None):
 ### Link Helpers
 
 def merge_links(a, b):
-    """deterministially merge two links, favoring longer field values over shorter,
+    """deterministically merge two links, favoring longer field values over shorter,
     and "cleaner" values over worse ones.
     """
     longer = lambda key: (a[key] if len(a[key]) > len(b[key]) else b[key]) if (a[key] and b[key]) else (a[key] or b[key])

From 34270b2b1239b948d9598b7bb6ea8b31131066b8 Mon Sep 17 00:00:00 2001
From: Pig Monkey <pm@pig-monkey.com>
Date: Tue, 30 Apr 2019 17:25:41 -0700
Subject: [PATCH 013/333] only use stdin if it has a value

Closes #228
---
 archivebox/archive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/archivebox/archive.py b/archivebox/archive.py
index 5c0d195d..3e553e6e 100755
--- a/archivebox/archive.py
+++ b/archivebox/archive.py
@@ -86,8 +86,8 @@ def main(*args):
             )
             print_help()
             raise SystemExit(1)
-
-        import_path = save_stdin_source(stdin_raw_text)
+        if stdin_raw_text:
+            import_path = save_stdin_source(stdin_raw_text)
 
     ### Handle ingesting urls from a remote file/feed
     # (e.g. if an RSS feed URL is used as the import path) 

From 500534f4be87e94f05d9cf6063babd4faa5145cc Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 May 2019 15:17:16 -0400
Subject: [PATCH 014/333] fix missing comma in staticfile extensions list

---
 archivebox/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/util.py b/archivebox/util.py
index 3c08c9bb..6f63b53f 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -70,7 +70,7 @@ STATICFILE_EXTENSIONS = {
     # that can be downloaded as-is, not html pages that need to be rendered
     'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
     'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
-    'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', 'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8'
+    'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v', 'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
     'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
     'atom', 'rss', 'css', 'js', 'json',
     'dmg', 'iso', 'img',

From 050cd9c8616cae31e388ecb4a312e107decc1f57 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 15:28:55 -0400
Subject: [PATCH 015/333] add license to manifest

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index ddb780e6..9100b772 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
+include LICENSE
 include archivebox/VERSION
 graft archivebox/themes
 graft archivebox/themes/static

From 2440c1c1bf5901dc058baee0a9aeac78f2babcc8 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 15:30:42 -0400
Subject: [PATCH 016/333] just use simple version instead of git hash

---
 setup.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index adca4887..310c9691 100644
--- a/setup.py
+++ b/setup.py
@@ -8,19 +8,10 @@ with open("README.md", "r") as fh:
 script_dir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
 
 VERSION = open(os.path.join(script_dir, 'archivebox', 'VERSION'), 'r').read().strip()
-try:
-    GIT_HEAD = open(os.path.join(script_dir, '.git', 'HEAD'), 'r').read().strip().split(': ')[1]
-    GIT_SHA = open(os.path.join(script_dir, '.git', GIT_HEAD), 'r').read().strip()[:9]
-    PYPI_VERSION = "{}+{}".format(VERSION, GIT_SHA)
-except:
-    PYPI_VERSION = VERSION
-
-with open(os.path.join(script_dir, 'archivebox', 'VERSION'), 'w+') as f:
-    f.write(PYPI_VERSION)
 
 setuptools.setup(
     name="archivebox",
-    version=PYPI_VERSION,
+    version=VERSION,
     author="Nick Sweeting",
     author_email="git@nicksweeting.com",
     description="The self-hosted internet archive.",

From 7ee837c12784e196865a06ce09b84a91f1c1274d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 15:32:18 -0400
Subject: [PATCH 017/333] add twine to packages

---
 Pipfile      |   1 +
 Pipfile.lock | 117 ++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 84 insertions(+), 34 deletions(-)

diff --git a/Pipfile b/Pipfile
index 194f81db..7c7e05ce 100644
--- a/Pipfile
+++ b/Pipfile
@@ -12,6 +12,7 @@ setuptools = "*"
 sphinx = "*"
 recommonmark = "*"
 sphinx-rtd-theme = "*"
+twine = "*"
 
 [packages]
 dataclasses = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 663654b1..64a9bae2 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "8ac4f9e5cd266406a861a283b321b9eee0ca469638f838e93467403ef2f0594d"
+            "sha256": "5a1618caef76ff53b66c5e8674d8e639d25f75068f7026ad799e217d307628fc"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -64,11 +64,11 @@
         },
         "django": {
             "hashes": [
-                "sha256:7c3543e4fb070d14e10926189a7fcf42ba919263b7473dceaefce34d54e8a119",
-                "sha256:a2814bffd1f007805b19194eb0b9a331933b82bd5da1c3ba3d7b7ba16e06dc4b"
+                "sha256:6fcc3cbd55b16f9a01f37de8bcbe286e0ea22e87096557f1511051780338eaea",
+                "sha256:bb407d0bb46395ca1241f829f5bd03f7e482f97f7d1936e26e98dacb201ed4ec"
             ],
             "index": "pypi",
-            "version": "==2.2"
+            "version": "==2.2.1"
         },
         "django-extensions": {
             "hashes": [
@@ -203,11 +203,11 @@
         },
         "youtube-dl": {
             "hashes": [
-                "sha256:46f6e30c673ba71de84748dad4c264d1b6fb30beebf1ef834846a651b4524a78",
-                "sha256:b20d110e1bed8d16f5771bb938ab6e5da67f08af62b599af65301cca290f2e15"
+                "sha256:31844229a4f4d7003e03ab309ff2caff1b16ce0acbd3cfb7a13276058af13056",
+                "sha256:a751bd293e2d7ee963910de14b3eb95b88837021899be488fade0b8abe815650"
             ],
             "index": "pypi",
-            "version": "==2019.4.24"
+            "version": "==2019.4.30"
         }
     },
     "develop": {
@@ -240,6 +240,13 @@
             ],
             "version": "==0.1.0"
         },
+        "bleach": {
+            "hashes": [
+                "sha256:213336e49e102af26d9cde77dd2d0397afabc5a6bf2fed985dc35b5d1e285a16",
+                "sha256:3fdf7f77adcf649c9911387df51254b813185e32b2c6619f690b593a617e19fa"
+            ],
+            "version": "==3.1.0"
+        },
         "certifi": {
             "hashes": [
                 "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5",
@@ -256,10 +263,10 @@
         },
         "commonmark": {
             "hashes": [
-                "sha256:9f6dda7876b2bb88dd784440166f4bc8e56cb2b2551264051123bacb0b6c1d8a",
-                "sha256:abcbc854e0eae5deaf52ae5e328501b78b4a0758bf98ac8bb792fce993006084"
+                "sha256:14c3df31e8c9c463377e287b2a1eefaa6019ab97b22dad36e2f32be59d61d68d",
+                "sha256:867fc5db078ede373ab811e16b6789e9d033b15ccd7296f370ca52d1ee792ce0"
             ],
-            "version": "==0.8.1"
+            "version": "==0.9.0"
         },
         "decorator": {
             "hashes": [
@@ -449,6 +456,13 @@
             ],
             "version": "==0.7.5"
         },
+        "pkginfo": {
+            "hashes": [
+                "sha256:7424f2c8511c186cd5424bbf31045b77435b37a8d604990b79d4e70d741148bb",
+                "sha256:a6d9e40ca61ad3ebd0b72fbadd4fba16e4c0e4df0428c041e01e06eb6ee71f32"
+            ],
+            "version": "==1.5.0.1"
+        },
         "prompt-toolkit": {
             "hashes": [
                 "sha256:11adf3389a996a6d45cc277580d0d53e8a5afd281d0c9ec71b28e6f121463780",
@@ -499,6 +513,13 @@
             ],
             "version": "==2019.1"
         },
+        "readme-renderer": {
+            "hashes": [
+                "sha256:bb16f55b259f27f75f640acf5e00cf897845a8b3e4731b5c1a436e4b8529202f",
+                "sha256:c8532b79afc0375a85f10433eca157d6b50f7d6990f337fa498c96cd4bfc203d"
+            ],
+            "version": "==24.0"
+        },
         "recommonmark": {
             "hashes": [
                 "sha256:a520b8d25071a51ae23a27cf6252f2fe387f51bdc913390d83b2b50617f5bb48",
@@ -514,6 +535,13 @@
             ],
             "version": "==2.21.0"
         },
+        "requests-toolbelt": {
+            "hashes": [
+                "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f",
+                "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"
+            ],
+            "version": "==0.9.1"
+        },
         "six": {
             "hashes": [
                 "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
@@ -586,6 +614,13 @@
             ],
             "version": "==1.1.3"
         },
+        "tqdm": {
+            "hashes": [
+                "sha256:d385c95361699e5cf7622485d9b9eae2d4864b21cd5a2374a9c381ffed701021",
+                "sha256:e22977e3ebe961f72362f6ddfb9197cc531c9737aaf5f607ef09740c849ecd05"
+            ],
+            "version": "==4.31.1"
+        },
         "traitlets": {
             "hashes": [
                 "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
@@ -593,30 +628,37 @@
             ],
             "version": "==4.3.2"
         },
+        "twine": {
+            "hashes": [
+                "sha256:0fb0bfa3df4f62076cab5def36b1a71a2e4acb4d1fa5c97475b048117b1a6446",
+                "sha256:d6c29c933ecfc74e9b1d9fa13aa1f87c5d5770e119f5a4ce032092f0ff5b14dc"
+            ],
+            "index": "pypi",
+            "version": "==1.13.0"
+        },
         "typed-ast": {
             "hashes": [
-                "sha256:04894d268ba6eab7e093d43107869ad49e7b5ef40d1a94243ea49b352061b200",
-                "sha256:16616ece19daddc586e499a3d2f560302c11f122b9c692bc216e821ae32aa0d0",
-                "sha256:252fdae740964b2d3cdfb3f84dcb4d6247a48a6abe2579e8029ab3be3cdc026c",
-                "sha256:2af80a373af123d0b9f44941a46df67ef0ff7a60f95872412a145f4500a7fc99",
-                "sha256:2c88d0a913229a06282b285f42a31e063c3bf9071ff65c5ea4c12acb6977c6a7",
-                "sha256:2ea99c029ebd4b5a308d915cc7fb95b8e1201d60b065450d5d26deb65d3f2bc1",
-                "sha256:3d2e3ab175fc097d2a51c7a0d3fda442f35ebcc93bb1d7bd9b95ad893e44c04d",
-                "sha256:4766dd695548a15ee766927bf883fb90c6ac8321be5a60c141f18628fb7f8da8",
-                "sha256:56b6978798502ef66625a2e0f80cf923da64e328da8bbe16c1ff928c70c873de",
-                "sha256:5cddb6f8bce14325b2863f9d5ac5c51e07b71b462361fd815d1d7706d3a9d682",
-                "sha256:644ee788222d81555af543b70a1098f2025db38eaa99226f3a75a6854924d4db",
-                "sha256:64cf762049fc4775efe6b27161467e76d0ba145862802a65eefc8879086fc6f8",
-                "sha256:68c362848d9fb71d3c3e5f43c09974a0ae319144634e7a47db62f0f2a54a7fa7",
-                "sha256:6c1f3c6f6635e611d58e467bf4371883568f0de9ccc4606f17048142dec14a1f",
-                "sha256:b213d4a02eec4ddf622f4d2fbc539f062af3788d1f332f028a2e19c42da53f15",
-                "sha256:bb27d4e7805a7de0e35bd0cb1411bc85f807968b2b0539597a49a23b00a622ae",
-                "sha256:c9d414512eaa417aadae7758bc118868cd2396b0e6138c1dd4fda96679c079d3",
-                "sha256:f0937165d1e25477b01081c4763d2d9cdc3b18af69cb259dd4f640c9b900fe5e",
-                "sha256:fb96a6e2c11059ecf84e6741a319f93f683e440e341d4489c9b161eca251cf2a",
-                "sha256:fc71d2d6ae56a091a8d94f33ec9d0f2001d1cb1db423d8b4355debfe9ce689b7"
+                "sha256:132eae51d6ef3ff4a8c47c393a4ef5ebf0d1aecc96880eb5d6c8ceab7017cc9b",
+                "sha256:18141c1484ab8784006c839be8b985cfc82a2e9725837b0ecfa0203f71c4e39d",
+                "sha256:2baf617f5bbbfe73fd8846463f5aeafc912b5ee247f410700245d68525ec584a",
+                "sha256:3d90063f2cbbe39177e9b4d888e45777012652d6110156845b828908c51ae462",
+                "sha256:4304b2218b842d610aa1a1d87e1dc9559597969acc62ce717ee4dfeaa44d7eee",
+                "sha256:4983ede548ffc3541bae49a82675996497348e55bafd1554dc4e4a5d6eda541a",
+                "sha256:5315f4509c1476718a4825f45a203b82d7fdf2a6f5f0c8f166435975b1c9f7d4",
+                "sha256:6cdfb1b49d5345f7c2b90d638822d16ba62dc82f7616e9b4caa10b72f3f16649",
+                "sha256:7b325f12635598c604690efd7a0197d0b94b7d7778498e76e0710cd582fd1c7a",
+                "sha256:8d3b0e3b8626615826f9a626548057c5275a9733512b137984a68ba1598d3d2f",
+                "sha256:8f8631160c79f53081bd23446525db0bc4c5616f78d04021e6e434b286493fd7",
+                "sha256:912de10965f3dc89da23936f1cc4ed60764f712e5fa603a09dd904f88c996760",
+                "sha256:b010c07b975fe853c65d7bbe9d4ac62f1c69086750a574f6292597763781ba18",
+                "sha256:c908c10505904c48081a5415a1e295d8403e353e0c14c42b6d67f8f97fae6616",
+                "sha256:c94dd3807c0c0610f7c76f078119f4ea48235a953512752b9175f9f98f5ae2bd",
+                "sha256:ce65dee7594a84c466e79d7fb7d3303e7295d16a83c22c7c4037071b059e2c21",
+                "sha256:eaa9cfcb221a8a4c2889be6f93da141ac777eb8819f077e1d09fb12d00a09a93",
+                "sha256:f3376bc31bad66d46d44b4e6522c5c21976bf9bca4ef5987bb2bf727f4506cbb",
+                "sha256:f9202fa138544e13a4ec1a6792c35834250a85958fde1251b6a22e07d1260ae7"
             ],
-            "version": "==1.3.4"
+            "version": "==1.3.5"
         },
         "typing-extensions": {
             "hashes": [
@@ -628,10 +670,10 @@
         },
         "urllib3": {
             "hashes": [
-                "sha256:4c291ca23bbb55c76518905869ef34bdd5f0e46af7afe6861e8375643ffee1a0",
-                "sha256:9a247273df709c4fedb38c711e44292304f73f39ab01beda9f6b9fc375669ac3"
+                "sha256:2393a695cd12afedd0dcb26fe5d50d0cf248e5a66f75dbd89a3d4eb333a61af4",
+                "sha256:a637e5fae88995b256e3409dc4d52c2e2e0ba32c42a6365fee8bbd2238de3cfb"
             ],
-            "version": "==1.24.2"
+            "version": "==1.24.3"
         },
         "wcwidth": {
             "hashes": [
@@ -639,6 +681,13 @@
                 "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
             ],
             "version": "==0.1.7"
+        },
+        "webencodings": {
+            "hashes": [
+                "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
+                "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"
+            ],
+            "version": "==0.5.1"
         }
     }
 }

From ef77a6d43f69a60d4d29cc74e61833a3fd7b39c4 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 15:39:55 -0400
Subject: [PATCH 018/333] add some missing fields to setup.py

---
 setup.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index 310c9691..32809c76 100644
--- a/setup.py
+++ b/setup.py
@@ -1,13 +1,15 @@
 import os
 import setuptools
 
-with open("README.md", "r") as fh:
-    long_description = fh.read()
+BASE_DIR = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
+PYTHON_DIR = os.path.join(BASE_DIR, 'archivebox')
 
+with open('README.md', "r") as f:
+    README = f.read()
 
-script_dir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
+with open(os.path.join(PYTHON_DIR, 'VERSION'), 'r') as f:
+    VERSION = f.read().strip()
 
-VERSION = open(os.path.join(script_dir, 'archivebox', 'VERSION'), 'r').read().strip()
 
 setuptools.setup(
     name="archivebox",
@@ -15,9 +17,10 @@ setuptools.setup(
     author="Nick Sweeting",
     author_email="git@nicksweeting.com",
     description="The self-hosted internet archive.",
-    long_description=long_description,
+    long_description=README,
     long_description_content_type="text/markdown",
     url="https://github.com/pirate/ArchiveBox",
+    license='MIT',
     project_urls={
         'Documentation': 'https://github.com/pirate/ArchiveBox/Wiki',
         'Community': 'https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community',
@@ -27,7 +30,7 @@ setuptools.setup(
         'Changelog': 'https://github.com/pirate/ArchiveBox/wiki/Changelog',
         'Patreon': 'https://github.com/pirate/ArchiveBox/wiki/Donations',
     },
-    packages=setuptools.find_packages(),
+    packages=['archivebox',],
     python_requires='>=3.6',
     install_requires=[
         "dataclasses==0.6",

From ba21ff46f3b65809f47a5b37920cc8dbe402355d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 16:10:34 -0400
Subject: [PATCH 019/333] reverse the url order

---
 setup.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/setup.py b/setup.py
index 32809c76..723aeaa1 100644
--- a/setup.py
+++ b/setup.py
@@ -22,15 +22,15 @@ setuptools.setup(
     url="https://github.com/pirate/ArchiveBox",
     license='MIT',
     project_urls={
-        'Documentation': 'https://github.com/pirate/ArchiveBox/Wiki',
-        'Community': 'https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community',
-        'Source': 'https://github.com/pirate/ArchiveBox',
-        'Bug Tracker': 'https://github.com/pirate/ArchiveBox/issues',
-        'Roadmap': 'https://github.com/pirate/ArchiveBox/wiki/Roadmap',
+        'Donate': 'https://github.com/pirate/ArchiveBox/wiki/Donations',
         'Changelog': 'https://github.com/pirate/ArchiveBox/wiki/Changelog',
-        'Patreon': 'https://github.com/pirate/ArchiveBox/wiki/Donations',
+        'Roadmap': 'https://github.com/pirate/ArchiveBox/wiki/Roadmap',
+        'Bug Tracker': 'https://github.com/pirate/ArchiveBox/issues',
+        'Source': 'https://github.com/pirate/ArchiveBox',
+        'Community': 'https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community',
+        'Documentation': 'https://github.com/pirate/ArchiveBox/Wiki',
     },
-    packages=['archivebox',],
+    packages=setuptools.find_packages(),
     python_requires='>=3.6',
     install_requires=[
         "dataclasses==0.6",

From e0489d77e71a60f2a66d2ede9f774d8fa0eea632 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 17:39:45 -0400
Subject: [PATCH 020/333] bump the version

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index 1d0ba9ea..267577d4 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.0
+0.4.1

From d398bd59b017a28fd3911e32550c5d20ec1f09c2 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 17:51:21 -0400
Subject: [PATCH 021/333] switch to pure Manifest-based package includes

---
 MANIFEST.in |  9 ++-------
 setup.py    | 14 +-------------
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 9100b772..a73ef711 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,9 +1,4 @@
 include LICENSE
+include README.md
 include archivebox/VERSION
-graft archivebox/themes
-graft archivebox/themes/static
-graft archivebox/themes/admin
-graft archivebox/themes/default
-graft archivebox/themes/default/static
-graft archivebox/themes/legacy
-graft archivebox/themes/legacy/static
+recursive-include archivebox/themes *
diff --git a/setup.py b/setup.py
index 723aeaa1..f23ae7b5 100644
--- a/setup.py
+++ b/setup.py
@@ -55,19 +55,7 @@ setuptools.setup(
             'archivebox = archivebox.__main__:main',
         ],
     },
-    package_data={
-        'archivebox': [
-            # Manifest.ini must correspond 1:1 with this list
-            'VERSION',
-            'themes/*',
-            'themes/static/*',
-            'themes/admin/*'
-            'themes/default/*'
-            'themes/default/static/*'
-            'themes/legacy/*',
-            'themes/legacy/static/*',
-        ],
-    },
+    include_package_data=True,
     classifiers=[
         "Development Status :: 4 - Beta",
 

From d016f1efb50a6ba13b84a05754fd92e631e90346 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 17:52:43 -0400
Subject: [PATCH 022/333] bump version

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index 267577d4..2b7c5ae0 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.1
+0.4.2

From 3c3b2ee62167c499f7f2a047b9d635a28a58544a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Thu, 2 May 2019 19:15:16 -0400
Subject: [PATCH 023/333] expose more django server config options

---
 archivebox/config/__init__.py             | 29 ++++++++++++---
 archivebox/config/stubs.py                |  9 ++++-
 archivebox/core/settings.py               | 43 ++++++++++-------------
 archivebox/core/urls.py                   |  6 ++++
 archivebox/core/views.py                  | 18 +++++++++-
 archivebox/main.py                        |  6 ++--
 archivebox/themes/default/main_index.html |  2 +-
 etc/ArchiveBox.conf.default               | 13 +++++--
 8 files changed, 89 insertions(+), 37 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 72baec64..04b8515c 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -44,10 +44,19 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'TIMEOUT':                  {'type': int,   'default': 60},
         'MEDIA_TIMEOUT':            {'type': int,   'default': 3600},
         'OUTPUT_PERMISSIONS':       {'type': str,   'default': '755'},
-        'FOOTER_INFO':              {'type': str,   'default': 'Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.'},
         'URL_BLACKLIST':            {'type': str,   'default': None},
     },
 
+    'SERVER_CONFIG': {
+        'SECRET_KEY':               {'type': str,   'default': None},
+        'ALLOWED_HOSTS':            {'type': str,   'default': '*'},
+        'DEBUG':                    {'type': bool,  'default': False},
+        'PUBLIC_INDEX':             {'type': bool,  'default': True},
+        'PUBLIC_SNAPSHOTS':         {'type': bool,  'default': True},
+        'FOOTER_INFO':              {'type': str,   'default': 'Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.'},
+        'ACTIVE_THEME':             {'type': str,   'default': 'default'},
+    },
+
     'ARCHIVE_METHOD_TOGGLES': {
         'SAVE_TITLE':               {'type': bool,  'default': True, 'aliases': ('FETCH_TITLE',)},
         'SAVE_FAVICON':             {'type': bool,  'default': True, 'aliases': ('FETCH_FAVICON',)},
@@ -313,9 +322,6 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
         with open(config_path, 'w+') as f:
             f.write(CONFIG_HEADER)
 
-    if not config:
-        return {}
-
     config_file = ConfigParser()
     config_file.optionxform = str
     config_file.read(config_path)
@@ -336,6 +342,21 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
 
             config_file[section] = {**existing_config, key: val}
 
+        # always make sure there's a SECRET_KEY defined for Django
+        existing_secret_key = None
+        if 'SERVER_CONFIG' in config_file and 'SECRET_KEY' in config_file['SERVER_CONFIG']:
+            existing_secret_key = config_file['SERVER_CONFIG']['SECRET_KEY']
+
+        if (not existing_secret_key) or ('not a valid secret' in existing_secret_key):
+            from django.utils.crypto import get_random_string
+            chars = 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.'
+            random_secret_key = get_random_string(50, chars)
+            if 'SERVER_CONFIG' in config_file:
+                config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key
+            else:
+                config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key}
+
+        f.write(CONFIG_HEADER)
         config_file.write(f)
 
     try:
diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py
index f7d5059a..7d3925dd 100644
--- a/archivebox/config/stubs.py
+++ b/archivebox/config/stubs.py
@@ -22,9 +22,16 @@ class ConfigDict(BaseConfig, total=False):
     TIMEOUT: int
     MEDIA_TIMEOUT: int
     OUTPUT_PERMISSIONS: str
-    FOOTER_INFO: str
     URL_BLACKLIST: Optional[str]
 
+    SECRET_KEY: str
+    ALLOWED_HOSTS: str
+    DEBUG: bool
+    PUBLIC_INDEX: bool
+    PUBLIC_SNAPSHOTS: bool
+    FOOTER_INFO: str
+    ACTIVE_THEME: str
+
     SAVE_TITLE: bool
     SAVE_FAVICON: bool
     SAVE_WGET: bool
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index e128f8d0..463a7079 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -3,26 +3,25 @@ __package__ = 'archivebox.core'
 import os
 import sys
 
-SECRET_KEY = '---------------- not a valid secret key ! ----------------'
-DEBUG = os.getenv('DEBUG', 'False').lower() == 'true'
-ALLOWED_HOSTS = ['*']
 
-REPO_DIR = os.path.abspath(os.path.join(os.path.abspath(__file__), os.path.pardir, os.path.pardir))
-OUTPUT_DIR = os.path.abspath(os.getenv('OUTPUT_DIR', os.curdir))
-ARCHIVE_DIR = os.path.join(OUTPUT_DIR, 'archive')
-DATABASE_FILE = os.path.join(OUTPUT_DIR, 'index.sqlite3')
+from ..config import (
+    OUTPUT_DIR,
+    SECRET_KEY,
+    DEBUG,
+    ALLOWED_HOSTS,
+    PYTHON_DIR,
+    ACTIVE_THEME,
+    SQL_INDEX_FILENAME,
+)
 
-ACTIVE_THEME = 'default'
 
+ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
 IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
 
-APPEND_SLASH = True
-
 INSTALLED_APPS = [
     'django.contrib.auth',
     'django.contrib.contenttypes',
     'django.contrib.sessions',
-    # 'django.contrib.sites',
     'django.contrib.messages',
     'django.contrib.admin',
     'django.contrib.staticfiles',
@@ -40,17 +39,17 @@ MIDDLEWARE = [
     'django.middleware.csrf.CsrfViewMiddleware',
     'django.contrib.auth.middleware.AuthenticationMiddleware',
     'django.contrib.messages.middleware.MessageMiddleware',
-    # 'django.middleware.clickjacking.XFrameOptionsMiddleware',
 ]
 
 ROOT_URLCONF = 'core.urls'
+APPEND_SLASH = True
 TEMPLATES = [
     {
         'BACKEND': 'django.template.backends.django.DjangoTemplates',
         'DIRS': [
-            os.path.join(REPO_DIR, 'themes', ACTIVE_THEME),
-            os.path.join(REPO_DIR, 'themes', 'default'),
-            os.path.join(REPO_DIR, 'themes'),
+            os.path.join(PYTHON_DIR, 'themes', ACTIVE_THEME),
+            os.path.join(PYTHON_DIR, 'themes', 'default'),
+            os.path.join(PYTHON_DIR, 'themes'),
         ],
         'APP_DIRS': True,
         'OPTIONS': {
@@ -69,7 +68,7 @@ WSGI_APPLICATION = 'core.wsgi.application'
 DATABASES = {
     'default': {
         'ENGINE': 'django.db.backends.sqlite3',
-        'NAME': DATABASE_FILE,
+        'NAME': os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME),
     }
 }
 
@@ -104,7 +103,7 @@ SHELL_PLUS_PRINT_SQL = False
 IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
 IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
 if IS_SHELL:
-    os.environ['PYTHONSTARTUP'] = os.path.join(REPO_DIR, 'core', 'welcome_message.py')
+    os.environ['PYTHONSTARTUP'] = os.path.join(PYTHON_DIR, 'core', 'welcome_message.py')
 
 
 LANGUAGE_CODE = 'en-us'
@@ -118,11 +117,7 @@ EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
 
 STATIC_URL = '/static/'
 STATICFILES_DIRS = [
-    os.path.join(REPO_DIR, 'themes', ACTIVE_THEME, 'static'),
-    os.path.join(REPO_DIR, 'themes', 'default', 'static'),
-    os.path.join(REPO_DIR, 'themes', 'static'),
+    os.path.join(PYTHON_DIR, 'themes', ACTIVE_THEME, 'static'),
+    os.path.join(PYTHON_DIR, 'themes', 'default', 'static'),
+    os.path.join(PYTHON_DIR, 'themes', 'static'),
 ]
-
-SERVE_STATIC = True
-
-
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index 2a001f6b..9b4af5a5 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -22,8 +22,14 @@ urlpatterns = [
     path('add/', AddLinks.as_view(), name='AddLinks'),
     
     path('static/<path>', views.serve),
+    
+    path('accounts/login/', RedirectView.as_view(url='/admin/login/')),
+    path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')),
+
     path('accounts/', include('django.contrib.auth.urls')),
     path('admin/', admin.site.urls),
+    
+
     path('', MainIndex.as_view(), name='Home'),
 ]
 
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 2c140d58..7fee7408 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -4,11 +4,18 @@ from django.shortcuts import render, redirect
 
 from django.http import HttpResponse
 from django.views import View, static
+from django.conf import settings
 
 from core.models import Snapshot
 
 from ..index import load_main_index, load_main_index_meta
-from ..config import OUTPUT_DIR, VERSION, FOOTER_INFO
+from ..config import (
+    OUTPUT_DIR,
+    VERSION,
+    FOOTER_INFO,
+    PUBLIC_INDEX,
+    PUBLIC_SNAPSHOTS,
+)
 from ..util import base_url
 
 
@@ -16,6 +23,9 @@ class MainIndex(View):
     template = 'main_index.html'
 
     def get(self, request):
+        if not request.user.is_authenticated and not PUBLIC_INDEX:
+            return redirect(f'/admin/login/?next={request.path}')
+
         all_links = load_main_index(out_dir=OUTPUT_DIR)
         meta_info = load_main_index_meta(out_dir=OUTPUT_DIR)
 
@@ -34,6 +44,9 @@ class AddLinks(View):
     template = 'add_links.html'
 
     def get(self, request):
+        if not request.user.is_authenticated and not PUBLIC_INDEX:
+            return redirect(f'/admin/login/?next={request.path}')
+
         context = {}
 
         return render(template_name=self.template, request=request, context=context)
@@ -54,6 +67,9 @@ class LinkDetails(View):
         if '/' not in path:
             return redirect(f'{path}/index.html')
 
+        if not request.user.is_authenticated and not PUBLIC_SNAPSHOTS:
+            return redirect(f'/admin/login/?next={request.path}')
+
         try:
             slug, archivefile = path.split('/', 1)
         except (IndexError, ValueError):
diff --git a/archivebox/main.py b/archivebox/main.py
index 00529743..80e4b77b 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -292,14 +292,14 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
     
     setup_django(out_dir, check_db=False)
     from django.conf import settings
-    assert settings.DATABASE_FILE == os.path.join(out_dir, SQL_INDEX_FILENAME)
-    print(f'    √ {settings.DATABASE_FILE}')
+    DATABASE_FILE = os.path.join(out_dir, SQL_INDEX_FILENAME)
+    print(f'    √ {DATABASE_FILE}')
     print()
     for migration_line in apply_migrations(out_dir):
         print(f'    {migration_line}')
 
 
-    assert os.path.exists(settings.DATABASE_FILE)
+    assert os.path.exists(DATABASE_FILE)
     
     # from django.contrib.auth.models import User
     # if IS_TTY and not User.objects.filter(is_superuser=True).exists():
diff --git a/archivebox/themes/default/main_index.html b/archivebox/themes/default/main_index.html
index 4ad00be7..925c4fa4 100644
--- a/archivebox/themes/default/main_index.html
+++ b/archivebox/themes/default/main_index.html
@@ -190,7 +190,7 @@
                     </div>
                     <div class="col-sm-10" style="text-align: right">
                         <a href="/add/">Add Links</a> &nbsp; | &nbsp; 
-                        <a href="/admin/core/page/">Admin</a> &nbsp; | &nbsp; 
+                        <a href="/admin/core/snapshot/">Admin</a> &nbsp; | &nbsp; 
                         <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
                     </div>
                 </div>
diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default
index 31017ce4..df9abe22 100644
--- a/etc/ArchiveBox.conf.default
+++ b/etc/ArchiveBox.conf.default
@@ -1,6 +1,6 @@
 # This is the example default configiration file for ArchiveBox.
 # 
-# Copy example config from here into your project's ArchiveBox.conf file,
+# Copy lines from here into your project's ArchiveBox.conf file and uncomment,
 # DO NOT EDIT THIS FILE DIRECTLY!
 #
 # See the list of all the possible options. documentation, and examples here:
@@ -11,10 +11,17 @@
 # ONLY_NEW = False
 # TIMEOUT = 60
 # MEDIA_TIMEOUT = 3600
-# ACTIVE_THEME = default
-# FOOTER_INFO = Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
 # URL_BLACKLIST = (://(.*\.)?facebook\.com)|(://(.*\.)?ebay\.com)|(.*\.exe$)
 
+[SERVER_CONFIG]
+# SECRET_KEY = ---------------- not a valid secret key ! ----------------
+# DEBUG = False
+# PUBLIC_INDEX = True
+# PUBLIC_SNAPSHOTS = True
+# FOOTER_INFO = Content is hosted for personal archiving purposes only.  Contact server owner for any takedown requests.
+# ACTIVE_THEME = default
+
+
 [ARCHIVE_METHOD_TOGGLES]
 # SAVE_TITLE = True
 # SAVE_FAVICON = True

From ca9c9ef956e9ecc7e4f3b07ed6ea74cd434ebb68 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Mon, 6 May 2019 17:16:20 -0400
Subject: [PATCH 024/333] add warning about running manage.py directly

---
 archivebox/manage.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/archivebox/manage.py b/archivebox/manage.py
index 52c21895..3976c2c2 100755
--- a/archivebox/manage.py
+++ b/archivebox/manage.py
@@ -3,6 +3,20 @@ import os
 import sys
 
 if __name__ == '__main__':
+    # if you're a developer working on archivebox, still prefer the archivebox
+    # versions of ./manage.py commands whenever possible. When that's not possible
+    # (e.g. makemigrations), you can comment out this check temporarily
+
+    print("[X] Don't run ./manage.py directly, use the archivebox CLI instead e.g.:")
+    print('    archivebox manage createsuperuser')
+    print()
+    print('    Hint: Use these archivebox commands instead of the ./manage.py equivalents:')
+    print('        archivebox init          (migrates the databse to latest version)')
+    print('        archivebox server        (runs the Django web server)')
+    print('        archivebox shell         (opens an iPython Django shell with all models imported)')
+    print('        archivebox manage [cmd]  (any other management commands)')
+    raise SystemExit(2)
+
     os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
     try:
         from django.core.management import execute_from_command_line

From 3b0236b087defc2e73e8f0301c016ce6efbd0b01 Mon Sep 17 00:00:00 2001
From: Starbeamrainbowlabs <sbrl@starbeamrainbowlabs.com>
Date: Tue, 14 May 2019 23:54:17 +0100
Subject: [PATCH 025/333] Add prefers-color-scheme: dark support

---
 archivebox/templates/index.html | 46 +++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/archivebox/templates/index.html b/archivebox/templates/index.html
index 264deb4d..dd2e16cd 100644
--- a/archivebox/templates/index.html
+++ b/archivebox/templates/index.html
@@ -3,6 +3,34 @@
         <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
         <title>Archived Sites</title>
         <style>
+            :root {
+                --accent-1: #aa1e55;
+                --accent-2: #ffebeb;
+                --accent-3: #efefef;
+                
+                --bg-main: #efefef;
+                --text-main: black;
+                --text-1: #1a1a1a;
+                --text-2: #eaeaea;
+            }
+            
+            @media (prefers-color-scheme: dark) {
+                :root {
+                    --accent-2: hsl(160, 100%, 96%);
+                    
+                    --text-1: #eaeaea;
+                    --text-2: #1a1a1a;
+                    --bg-main: #101010;
+                }
+                
+                #table-bookmarks_wrapper,
+                #table-bookmarks_wrapper img,
+                tbody td:nth-child(3),
+                tbody td:nth-child(3) span,
+                footer {
+                    filter: invert(100%);
+                }
+            }
             html, body {
                 width: 100%;
                 height: 100%;
@@ -12,10 +40,12 @@
                 margin: 0px;
                 padding: 0px;
                 font-family: "Gill Sans", Helvetica, sans-serif;
+                background: var(--bg-main);
+                color: var(--text-main);
             }
             header {
-                background-color: #aa1e55;
-                color: #1a1a1a;
+                background-color: var(--accent-1);
+                color: var(--text-1);
                 padding: 10px;
                 padding-top: 0px;
                 padding-bottom: 15px;
@@ -25,7 +55,7 @@
                 margin: 7px 0px;
                 font-size: 35px;
                 font-weight: 300;
-                color: #1a1a1a;
+                color: var(--text-1);
             }
             header h1 img {
                 height: 44px;
@@ -33,7 +63,7 @@
             }
             header a {
                 text-decoration: none !important;
-                color: #1a1a1a;
+                color: var(--text-1);
             }
             .header-center {
                 margin: auto;
@@ -42,7 +72,7 @@
                 padding-top: 6px;
             }
             .header-center small {
-                color: #eaeaea;
+                color: var(--text-2);
                 opacity: 0.7;
             }
             .header-left {
@@ -54,7 +84,7 @@
                 padding-right: 10px;
             }
             header + div {
-                margin-top: 10px;
+                padding-top: 10px;
             }
             #table-bookmarks_length, #table-bookmarks_filter {
                 padding: 0px 15px;
@@ -70,7 +100,7 @@
                 height: 35px;
             }
             tbody tr:nth-child(odd) {
-               background-color: #ffebeb !important;
+               background-color: var(--accent-2) !important;
             }
             table tr td {
                 white-space: nowrap;
@@ -127,7 +157,7 @@
                 color:black;
             }
             tr td a.title small {
-                background-color: #efefef;
+                background-color: var(--accent-3);
                 border-radius: 4px;
                 float:right
             }

From 78bc52abf290c5107a663373b350e29c3964cc3f Mon Sep 17 00:00:00 2001
From: t-mw <t-mw@users.noreply.github.com>
Date: Sat, 18 May 2019 22:32:09 +0200
Subject: [PATCH 026/333] Fix json export date parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The format string for `datetime.strptime` did not match the example line in the comment below, since `%z` matches ["UTC offset in the form ±HHMM"](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
---
 archivebox/parse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/parse.py b/archivebox/parse.py
index baaa447e..edd497a6 100644
--- a/archivebox/parse.py
+++ b/archivebox/parse.py
@@ -103,7 +103,7 @@ def parse_json_export(json_file):
 
     json_file.seek(0)
     links = json.load(json_file)
-    json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')
+    json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ')
 
     for link in links:
         # example line

From e2b714ba95b26646b6d671576f343f1b9768189c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 20 May 2019 14:29:28 -0400
Subject: [PATCH 027/333] Add chrome binary path for BSD chromium

---
 archivebox/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/archivebox/config.py b/archivebox/config.py
index 23a92ebf..47f1776f 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -104,6 +104,7 @@ try:
         default_executable_paths = (
             'chromium-browser',
             'chromium',
+            'chrome',
             '/Applications/Chromium.app/Contents/MacOS/Chromium',
             'google-chrome',
             '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
@@ -129,6 +130,7 @@ try:
             '~/.config/chromium',
             '~/Library/Application Support/Chromium',
             '~/AppData/Local/Chromium/User Data',
+            '~/.config/chrome',
             '~/.config/google-chrome',
             '~/Library/Application Support/Google/Chrome',
             '~/AppData/Local/Google/Chrome/User Data',

From b109dd636475692d740aa0ed9738a4b6301618f8 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 20 May 2019 15:19:08 -0400
Subject: [PATCH 028/333] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1622c393..c72175db 100644
--- a/README.md
+++ b/README.md
@@ -138,7 +138,7 @@ If you're importing URLs with secret tokens in them (e.g Google Docs, CodiMD not
  - **Doesn't require a constantly-running server**, proxy, or native app
  - Easy to set up **[scheduled importing](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving) from multiple sources**
  - Uses common, **durable, [long-term formats](#saves-lots-of-useful-stuff-for-each-imported-link)** like HTML, JSON, PDF, PNG, and WARC
- - **Suitable for paywalled / [authenticated content](https://github.com/pirate/ArchiveBox/wiki/Configuration#chrome_user_data_dir)** (can use your cookies)
+ - ~~**Suitable for paywalled / [authenticated content](https://github.com/pirate/ArchiveBox/wiki/Configuration#chrome_user_data_dir)** (can use your cookies)~~ (do not do this until v0.4 is released with some security fixes)
  - Can [**run scripts during archiving**](https://github.com/pirate/ArchiveBox/issues/51) to [scroll pages](https://github.com/pirate/ArchiveBox/issues/80), [close modals](https://github.com/pirate/ArchiveBox/issues/175), expand comment threads, etc.
  - Can also [**mirror content to 3rd-party archiving services**](https://github.com/pirate/ArchiveBox/wiki/Configuration#submit_archive_dot_org) automatically for redundancy
 
@@ -173,7 +173,7 @@ An alternative tool [pywb](https://github.com/webrecorder/pywb) allows you to ru
 
 #### Private Local Archives vs Centralized Public Archives
 
-Unlike crawler software that starts from a seed URL and works outwards, or public tools like Archive.org designed for users to manually submit links from the public internet, ArchiveBox tries to be a set-and-forget archiver suitable for archiving your entire browsing history, RSS feeds, or bookmarks, including private/authenticated content that you wouldn't otherwise share with a centralized service.  Also by having each user store their own content locally, we can save much larger portions of everyone's browsing history than a shared centralized service would be able to handle.
+Unlike crawler software that starts from a seed URL and works outwards, or public tools like Archive.org designed for users to manually submit links from the public internet, ArchiveBox tries to be a set-and-forget archiver suitable for archiving your entire browsing history, RSS feeds, or bookmarks, ~~including private/authenticated content that you wouldn't otherwise share with a centralized service~~ (do not do this until v0.4 is released with some security fixes).  Also by having each user store their own content locally, we can save much larger portions of everyone's browsing history than a shared centralized service would be able to handle.
 
 #### Storage Requirements
 

From db1f6efc934bbcdf53377bf51a064c6fd0fc5b1d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 May 2019 11:40:51 -0400
Subject: [PATCH 029/333] Create FUNDING.yml

---
 .github/FUNDING.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .github/FUNDING.yml

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..34ebcccd
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,5 @@
+# These are supported funding model platforms
+
+github: pirate
+patreon: theSquashSH
+custom: https://paypal.me/NicholasSweeting

From 8ae9665076d10640710d98c58b4c4d0b9e7a9cdb Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 12:50:24 -0400
Subject: [PATCH 030/333] Update on status: I'm very busy but haven't forgotten
 about ArchiveBox!

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index c72175db..e16b2ea3 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,8 @@
 
 *💥 Attention: Big API changes are coming soon! Check out [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) and help us test it! 💥*
 
+**Note: There are some important security design issues that need to be fixed before the next release can be pushed, all help is appreciated! (This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.) See this [ticket](https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553) for more information.**
+
 </div>
 
 **ArchiveBox takes a list of website URLs you want to archive, and creates a local, static, browsable HTML clone of the content from those websites (it saves HTML, JS, media files, PDFs, images and more).** 

From 94da77d4d8a6d235eb4d6b43e10e415d4993890f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 12:51:00 -0400
Subject: [PATCH 031/333] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index e16b2ea3..1085a8c3 100644
--- a/README.md
+++ b/README.md
@@ -22,12 +22,12 @@
 <a href="https://github.com/pirate/ArchiveBox/wiki/Install#dependencies"><img src="https://img.shields.io/badge/Chromium-%3E%3D59-orange.svg?logo=Google+Chrome&logoColor=orange"/></a>
 <a href="https://hub.docker.com/r/nikisweeting/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
 
-<hr/>
-
 *💥 Attention: Big API changes are coming soon! Check out [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) and help us test it! 💥*
 
 **Note: There are some important security design issues that need to be fixed before the next release can be pushed, all help is appreciated! (This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.) See this [ticket](https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553) for more information.**
 
+<hr/>
+
 </div>
 
 **ArchiveBox takes a list of website URLs you want to archive, and creates a local, static, browsable HTML clone of the content from those websites (it saves HTML, JS, media files, PDFs, images and more).** 

From c60b9f42a1569c4797b1d768b7a6f13901a61142 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 12:51:44 -0400
Subject: [PATCH 032/333] formatting

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1085a8c3..542052e2 100644
--- a/README.md
+++ b/README.md
@@ -22,9 +22,13 @@
 <a href="https://github.com/pirate/ArchiveBox/wiki/Install#dependencies"><img src="https://img.shields.io/badge/Chromium-%3E%3D59-orange.svg?logo=Google+Chrome&logoColor=orange"/></a>
 <a href="https://hub.docker.com/r/nikisweeting/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
 
-*💥 Attention: Big API changes are coming soon! Check out [v0.4.0](https://github.com/pirate/ArchiveBox/pull/207) and help us test it! 💥*
+<hr/>
 
-**Note: There are some important security design issues that need to be fixed before the next release can be pushed, all help is appreciated! (This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.) See this [ticket](https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553) for more information.**
+*💥 Attention: Big API changes are coming soon! Check out [v0.4](https://github.com/pirate/ArchiveBox/pull/207) and help us test it! 💥*
+
+**Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!   
+(This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)   
+See this [ticket](https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553) for more information.**
 
 <hr/>
 

From c0275bc698a1ea0860ba5bf22952aabb20e134f3 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 12:59:48 -0400
Subject: [PATCH 033/333] add some comments to docker-compose file

---
 docker-compose.yml | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 631ebcf7..614c5a1b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,16 +12,16 @@ version: '3'
 
 services:
     archivebox:
-        build: .
-        stdin_open: true
-        tty: true
-        # env_file: path/to/your/ArchiveBox.conf
+        build: .                                   # replace this with nikisweeting/archivebox to use the docker-compose.yml file as a standalone file without avoid having to clone the repo
+        stdin_open: true                           # needed to be able to input URLs directly after `docker-compose up`
+        tty: true                                  # needed to be able to pipe in URLs via stdin to `docker-compose exec ...`
+        # env_file: path/to/your/ArchiveBox.conf   # this feature is available starting >v0.4
         environment:
-            - USE_COLOR=False
-            - SHOW_PROGRESS=False
+            - USE_COLOR=False                      # make docker logs nicer by not spamming lots of ANSI colors
+            - SHOW_PROGRESS=False                  # make docker logs nicer by not writing lots of progress bar lines
         volumes:
             - ./data:/data
-        command: bash -c 'echo "https://github.com/pirate/ArchiveBox" | /bin/archive; tail -f /dev/null'
+        command: bash -c 'echo "https://github.com/pirate/ArchiveBox" | /bin/archive; tail -f /dev/null'  # archive the Github repo homepage as a starting point so the index doesn't just show an empty list to new users
 
     nginx:
         image: 'nginx'
@@ -30,3 +30,9 @@ services:
         volumes:
             - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
             - ./data:/var/www
+
+# you can also use named volumes / network volumes if you prefer them to simple local mounts
+# volumes:
+#     archivebox-data:
+#     archivebox-config:
+#     etc. a full example will be added after the v0.4 release

From 92ffb264072857372859b57558692ce008eb2cfe Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:03:55 -0400
Subject: [PATCH 034/333] Update .gitignore

---
 .gitignore | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5a6fcf3d..78dc7a5e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,20 +1,15 @@
-# OS cruft
 .DS_Store
 ._*
-
-# python
 __pycache__/
-venv
-.venv
-archivebox/.venv
-archivebox/venv
 
-# vim
-.swp*
+# Dependency code
+.venv                # main pipenv venv path
+venv                 # old venv path, (no longer used)
+archivebox/.venv     # old venv path, (no longer used)
+archivebox/venv      # old venv path, (no longer used)
 
-# output artifacts
-output
-output/
-data
-data/
-archivebox/output
+
+# Stateful data folders
+data/                # main archivebox data folder
+archivebox/output/   # old output folder path (no longer used)
+output/              # old output folder path (no longer used)

From 5c9b1130b6eae5d6244c144a7527d1243906d407 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:04:39 -0400
Subject: [PATCH 035/333] Update .dockerignore

---
 .dockerignore | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index 9979d3ca..73505c0f 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,6 +1,15 @@
-output
-__pycache__
 .DS_Store
-venv
-.venv
-data
+._*
+__pycache__/
+
+# Dependency code
+.venv                # main pipenv venv path
+venv                 # old venv path, (no longer used)
+archivebox/.venv     # old venv path, (no longer used)
+archivebox/venv      # old venv path, (no longer used)
+
+
+# Stateful data folders
+data                 # main archivebox data folder
+archivebox/output    # old output folder path (no longer used)
+output               # old output folder path (no longer used)

From 4bf5a04b2c4c7ca647310eea2e43e184b4612dc1 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:05:51 -0400
Subject: [PATCH 036/333] Update Dockerfile

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index c53e5c7a..63361bb3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,6 +8,8 @@
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker
 
+# TODO: bump to latest chrome and node version, confirm chrome doesn't hang on simple pages
+
 FROM node:11-slim
 LABEL maintainer="Nick Sweeting <archivebox-git@sweeting.me>"
 

From a411528e8db9993a1e1fa0316acd1f6780940dbb Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:09:36 -0400
Subject: [PATCH 037/333] bump docs version

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index d8daa737..f7a2ed43 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit d8daa7373ee39b14db08fa9c6c66f90ff38ed590
+Subproject commit f7a2ed435161dcfabdd0bd454a558ced55676ae6

From 9ffc3f318c72186144e52347251756a7f83f6884 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:11:00 -0400
Subject: [PATCH 038/333] ignore pypi folders

---
 .dockerignore | 5 +++++
 .gitignore    | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/.dockerignore b/.dockerignore
index 73505c0f..4bc77608 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,6 +1,11 @@
 .DS_Store
 ._*
+
 __pycache__/
+.mypy_cache/
+archivebox.egg-info/
+build/
+dist/
 
 # Dependency code
 .venv                # main pipenv venv path
diff --git a/.gitignore b/.gitignore
index 78dc7a5e..c6567ffe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,12 @@
 .DS_Store
 ._*
+
 __pycache__/
+.mypy_cache/
+archivebox.egg-info/
+build/
+dist/
+
 
 # Dependency code
 .venv                # main pipenv venv path

From bfa1f0a217792f66863f5d221485656147d292ba Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:21:01 -0400
Subject: [PATCH 039/333] Update __init__.py

---
 archivebox/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/__init__.py b/archivebox/__init__.py
index e69de29b..737873e5 100644
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -0,0 +1 @@
+# if you're looking for the source of the main `archivebox` shell command, it's in `archivebox/archivebox.py`

From 2afc3cbc8a637e49c9c18050b6ee38d88b3808f2 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:23:39 -0400
Subject: [PATCH 040/333] Update FUNDING.yml

---
 .github/FUNDING.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index 34ebcccd..c131875a 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,5 +1,7 @@
 # These are supported funding model platforms
 
-github: pirate
+# @Github plzzz accept me in the Sponsored devs program <3
+# github: pirate
+
 patreon: theSquashSH
 custom: https://paypal.me/NicholasSweeting

From acf70b494ae500f07b353ca9d085a1321fc50ad4 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:47:04 -0400
Subject: [PATCH 041/333] fix github pages formatting

---
 README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 542052e2..d2e4d77a 100644
--- a/README.md
+++ b/README.md
@@ -23,13 +23,13 @@
 <a href="https://hub.docker.com/r/nikisweeting/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
 
 <hr/>
-
-*💥 Attention: Big API changes are coming soon! Check out [v0.4](https://github.com/pirate/ArchiveBox/pull/207) and help us test it! 💥*
-
-**Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!   
-(This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)   
-See this [ticket](https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553) for more information.**
-
+<br/>
+<i>💥 Attention: Big API changes are coming soon! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥<i>
+<br/><br/>
+<b>Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!<br/>
+(This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)<br/>
+See the <a href="https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553">v0.4 release PR</a> for more information.</b>
+<br/>
 <hr/>
 
 </div>

From 8d044ec0939a49c2b255c9c810de4eba9c9668a5 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:51:21 -0400
Subject: [PATCH 042/333] fix missing trailing tag

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d2e4d77a..061a1dcd 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
 
 <hr/>
 <br/>
-<i>💥 Attention: Big API changes are coming soon! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥<i>
+<i>💥 Attention: Big API changes are coming soon! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
 <br/><br/>
 <b>Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!<br/>
 (This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)<br/>

From c3e4e59a9933a033daf2e22e48104f88f62c14fe Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:52:29 -0400
Subject: [PATCH 043/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 061a1dcd..54fb0431 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
 
 <hr/>
 <br/>
-<i>💥 Attention: Big API changes are coming soon! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
+<i>💥 Attention: Big API changes are coming soon (including a proper config file format and `pip install archivebox`)! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
 <br/><br/>
 <b>Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!<br/>
 (This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)<br/>

From a740439aafa1b9656d18dc7b50578e68f13eda29 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:52:46 -0400
Subject: [PATCH 044/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 54fb0431..6a298f8a 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
 
 <hr/>
 <br/>
-<i>💥 Attention: Big API changes are coming soon (including a proper config file format and `pip install archivebox`)! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
+<i>💥 Attention: Big API changes are coming soon (including a proper config file format and <pre>pip install archivebox</pre>)! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
 <br/><br/>
 <b>Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!<br/>
 (This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)<br/>

From e2b054ae7522ccb44d6af380d6400752a9a806ea Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 9 Jul 2019 13:52:57 -0400
Subject: [PATCH 045/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6a298f8a..f3f313be 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
 
 <hr/>
 <br/>
-<i>💥 Attention: Big API changes are coming soon (including a proper config file format and <pre>pip install archivebox</pre>)! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
+<i>💥 Attention: Big API changes are coming soon (including a proper config file format and <code>pip install archivebox</code>)! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
 <br/><br/>
 <b>Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!<br/>
 (This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)<br/>

From 0bb216ce026e15a592a21d6eaa7e46a95aef7bc5 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <mashiat.sarker@gmail.com>
Date: Mon, 26 Aug 2019 17:25:22 -0400
Subject: [PATCH 046/333] util.py: Use dateparser to parse date strings.

---
 archivebox/util.py | 46 ++--------------------------------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

diff --git a/archivebox/util.py b/archivebox/util.py
index 8b606db1..5a4ec88c 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -10,6 +10,7 @@ from urllib.request import Request, urlopen
 from urllib.parse import urlparse, quote, unquote
 from html import escape, unescape
 from datetime import datetime
+from dateutil import parser as dateparser
 
 from base32_crockford import encode as base32_encode         # type: ignore
 import json as pyjson
@@ -140,51 +141,8 @@ def parse_date(date: Any) -> Optional[datetime]:
         date = str(date)
 
     if isinstance(date, str):
-        if date.replace('.', '').isdigit():
-            # this is a brittle attempt at unix timestamp parsing (which is
-            # notoriously hard to do). It may lead to dates being off by
-            # anything from hours to decades, depending on which app, OS,
-            # and sytem time configuration was used for the original timestamp
-            # more info: https://github.com/pirate/ArchiveBox/issues/119
+        return dateparser.parse(date)
 
-            # Note: always always always store the original timestamp string
-            # somewhere indepentendly of the parsed datetime, so that later
-            # bugs dont repeatedly misparse and rewrite increasingly worse dates.
-            # the correct date can always be re-derived from the timestamp str
-            timestamp = float(date)
-
-            EARLIEST_POSSIBLE = 473403600.0  # 1985
-            LATEST_POSSIBLE = 1735707600.0   # 2025
-
-            if EARLIEST_POSSIBLE < timestamp < LATEST_POSSIBLE:
-                # number is seconds
-                return datetime.fromtimestamp(timestamp)
-                
-            elif EARLIEST_POSSIBLE * 1000 < timestamp < LATEST_POSSIBLE * 1000:
-                # number is milliseconds
-                return datetime.fromtimestamp(timestamp / 1000)
-
-            elif EARLIEST_POSSIBLE * 1000*1000 < timestamp < LATEST_POSSIBLE * 1000*1000:
-                # number is microseconds
-                return datetime.fromtimestamp(timestamp / (1000*1000))
-
-            else:
-                # continue to the end and raise a parsing failed error.
-                # we dont want to even attempt parsing timestamp strings that
-                # arent within these ranges
-                pass
-
-        if '-' in date:
-            # 2019-04-07T05:44:39.227520
-            try:
-                return datetime.fromisoformat(date)
-            except Exception:
-                pass
-            try:
-                return datetime.strptime(date, '%Y-%m-%d %H:%M')
-            except Exception:
-                pass
-    
     raise ValueError('Tried to parse invalid date! {}'.format(date))
 
 
From 2ba17136e098f9d321356a758742f2c7b00ab592 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <mashiat.sarker@gmail.com>
Date: Mon, 26 Aug 2019 17:33:58 -0400
Subject: [PATCH 047/333] index/schema.py: Make cmd_version actually optional.

During migration, we may have to import json index file which was
produced with an older version. If the index file is missing
cmd_version, migration will fail.
---
 archivebox/index/schema.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 66baa9d9..194878f7 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -59,6 +59,7 @@ class ArchiveResult:
         }
         info['start_ts'] = parse_date(info['start_ts'])
         info['end_ts'] = parse_date(info['end_ts'])
+        info['cmd_version'] = info.get('cmd_version')
         return cls(**info)
 
     def to_dict(self, *keys) -> dict:

From 037da6bb126ae2712016c6fb6c56395dbc3eb7bf Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:27:46 -0500
Subject: [PATCH 048/333] Update bug_report.md

---
 .github/ISSUE_TEMPLATE/bug_report.md | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 66a2d21b..c959d1b3 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -7,22 +7,33 @@ assignees: ''
 
 ---
 
-(please fill out the following information, feel free to delete sections if they're not applicable or if long issue templates annoy you)
+<!--
+Please fill out the following information, 
+feel free to delete sections if they're not applicable 
+or if long issue templates annoy you :)
+-->
 
 #### Describe the bug
-A description of what the bug is, what you expected to happen, 
+<!--
+A description of what the bug is, 
+what you expected to happen, 
 and any relevant context about issue.
+-->
 
 #### Steps to reproduce
-
+<!--
+For example:
 1. Ran ArchiveBox with the following config '...'
 2. Saw this output during archiving '....'
 3. UI didn't show the thing I was expecting '....'
+-->
 
 #### Screenshots or log output
 
+<!--
 If applicable, post any relevant screenshots or copy/pasted terminal output from ArchiveBox.
 If you're reporting a parsing / importing error, **you must paste a copy of your redacted import file here**.
+-->
 
 #### Software versions
 

From 2d49836d14474ada29329e21cae670f221833bcb Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:30:22 -0500
Subject: [PATCH 049/333] Update feature_request.md

---
 .github/ISSUE_TEMPLATE/feature_request.md | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 0f9423f5..ff40cb1c 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -7,28 +7,39 @@ assignees: ''
 
 ---
 
-(feel free to delete this template and write your own issue description if you don't find it helpful)
+<!--
+Please fill out the following information, 
+feel free to delete sections if they're not applicable 
+or if long issue templates annoy you :)
+-->
 
 ## Type 
 
- - [ ] General Question or Disussion
+ - [ ] General question or discussion
  - [ ] Propose a brand new feature
  - [ ] Request modification of existing behavior or design
 
 ## What is the problem that your feature request solves
+<!--
 e.g. I need to be able to archive spanish and french subtitle files  
 from a particular <example.com> movie site that's going down soon.
+-->
 
 ## Describe the ideal specific solution you'd want, and whether it fits into any broader scope of changes
-e.g.  I specifically need a new archive method to look for multilingual subtitle files related to pages.  
+<!--
+e.g. I specifically need a new archive method to look for multilingual subtitle files related to pages.  
 The bigger picture solution is the ability for custom user scripts to be run in a puppeteer context during archiving.
+-->
 
 ## What hacks or alternative solutions have you tried to solve the problem?
-A clear and concise description of any alternative solutions or features you've considered.
+<!--
+A clear and concise description of any alternative solutions, 
+workarounds, or other software you've considered using to fix the problem.
+-->
 
 ## How badly do you want this new feature?
 
- - [ ] It's an urgent deal-breaker, I cant live without it
+ - [ ] It's an urgent deal-breaker, I can't live without it
  - [ ] It's important to add it in the near-mid term future
  - [ ] It would be nice to have eventually
 

From ad4da878ce967df2f3ee46928239865fb56649cf Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:32:16 -0500
Subject: [PATCH 050/333] Update documentation_change.md

---
 .github/ISSUE_TEMPLATE/documentation_change.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/documentation_change.md b/.github/ISSUE_TEMPLATE/documentation_change.md
index dc3c2741..9a7eb83b 100644
--- a/.github/ISSUE_TEMPLATE/documentation_change.md
+++ b/.github/ISSUE_TEMPLATE/documentation_change.md
@@ -8,8 +8,9 @@ assignees: ''
 ---
 
 ## Wiki Page URL
+<!-- e.g. https://github.com/pirate/ArchiveBox/wiki/Configuration#use_color -->
 
 
 ## Suggested Edit
+<!-- e.g. Please add more example usages, or please fix `xyz` typo to be `abc`. -->
 
-...

From 4f8b54064b1c3087c78837ee20007639d4fba2e5 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:32:54 -0500
Subject: [PATCH 051/333] Update documentation_change.md

---
 .github/ISSUE_TEMPLATE/documentation_change.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/documentation_change.md b/.github/ISSUE_TEMPLATE/documentation_change.md
index 9a7eb83b..a02e9374 100644
--- a/.github/ISSUE_TEMPLATE/documentation_change.md
+++ b/.github/ISSUE_TEMPLATE/documentation_change.md
@@ -1,7 +1,7 @@
 ---
 name: 📑 Documentation change
 about: Submit a suggestion for the Wiki documentation
-title: ''
+title: 'Documentation: Improvement request ...'
 labels: ''
 assignees: ''
 

From 0def5c216bd209f920dc3fb50b678a764b30d3d3 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:33:28 -0500
Subject: [PATCH 052/333] Update bug_report.md

---
 .github/ISSUE_TEMPLATE/bug_report.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index c959d1b3..c2bf8b23 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,8 +1,8 @@
 ---
 name: 🐞 Bug report
 about: Create a report to help us improve
-title: ''
-labels: ''
+title: 'Bugfix: ...'
+labels: 'changes: bugfixes'
 assignees: ''
 
 ---

From 1ad7930a709fa8f8a40d782b9d650bb0663ef961 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:34:01 -0500
Subject: [PATCH 053/333] Update feature_request.md

---
 .github/ISSUE_TEMPLATE/feature_request.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index ff40cb1c..43d739f0 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,8 +1,8 @@
 ---
 name: 💡 Feature request
 about: Suggest an idea for this project
-title: ''
-labels: ''
+title: 'Feature Request: ...'
+labels: 'changes: behavior'
 assignees: ''
 
 ---

From 688b7b6b01a835c2b21762d03830bf6de1dc457f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:36:33 -0500
Subject: [PATCH 054/333] Create question_or_discussion.md

---
 .github/ISSUE_TEMPLATE/question_or_discussion.md | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/question_or_discussion.md

diff --git a/.github/ISSUE_TEMPLATE/question_or_discussion.md b/.github/ISSUE_TEMPLATE/question_or_discussion.md
new file mode 100644
index 00000000..4b7fb02f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/question_or_discussion.md
@@ -0,0 +1,9 @@
+---
+name: 💬 Question, discussion, or support request
+about: Start a discussion or ask a question about ArchiveBox
+title: 'Question: ...'
+labels: ''
+assignees: ''
+
+---
+

From b7ea44a1e2792441d8afa43d18b8b1f88d6b4a4b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 18 Sep 2019 19:38:33 -0500
Subject: [PATCH 055/333] Update feature_request.md

---
 .github/ISSUE_TEMPLATE/feature_request.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 43d739f0..1ea27469 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -2,7 +2,7 @@
 name: 💡 Feature request
 about: Suggest an idea for this project
 title: 'Feature Request: ...'
-labels: 'changes: behavior'
+labels: 'changes: behavior,status: idea phase'
 assignees: ''
 
 ---

From c86cbbfefdce42785fbd2fd8acff7106e577e335 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 25 Sep 2019 02:22:42 -0400
Subject: [PATCH 056/333] Update feature_request.md

---
 .github/ISSUE_TEMPLATE/feature_request.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 1ea27469..3d2d89eb 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -45,5 +45,5 @@ workarounds, or other software you've considered using to fix the problem.
 
 ---
 
- - [ ] I'm willing to contribute to development / fixing this issue
+ - [ ] I'm willing to contribute dev time / money to fix this issue
  - [ ] I like ArchiveBox so far / would recommend it to a friend

From 4d25980e31cb20af797ea9bec909405f990373ce Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 25 Sep 2019 02:35:42 -0400
Subject: [PATCH 057/333] Update feature_request.md

---
 .github/ISSUE_TEMPLATE/feature_request.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 3d2d89eb..3361571d 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -47,3 +47,4 @@ workarounds, or other software you've considered using to fix the problem.
 
  - [ ] I'm willing to contribute dev time / money to fix this issue
  - [ ] I like ArchiveBox so far / would recommend it to a friend
+ - [ ] I've had a lot of difficulty getting ArchiveBox set up

From 1dbb3d49c670511f91e2bbbb5ab6f736a9efd76b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Par=C3=B3czai=20Oliv=C3=A9r?=
 <paroczai.oliver@protonmail.com>
Date: Tue, 1 Oct 2019 19:38:39 +0200
Subject: [PATCH 058/333] Fix grammar errors

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index f3f313be..c183d6e4 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ See the <a href="https://github.com/pirate/ArchiveBox/pull/207#issuecomment-4941
 
 **ArchiveBox takes a list of website URLs you want to archive, and creates a local, static, browsable HTML clone of the content from those websites (it saves HTML, JS, media files, PDFs, images and more).** 
 
-You can use it to preserve access to websites you care about by storing them locally offline.  ArchiveBox imports lists of URLs, renders the pages in a headless, autheticated, user-scriptable browser, and then archives the content in multiple redundant common formats (HTML, PDF, PNG, WARC) that will last long after the originals disappear off the internet.  It automatically extracts assets and media from pages and saves them in easily-accessible folders, with out-of-the-box support for extracting git repositories, audio, video, subtitles, images, PDFs, and more.
+You can use it to preserve access to websites you care about by storing them locally offline.  ArchiveBox imports lists of URLs, renders the pages in a headless, authenticated, user-scriptable browser, and then archives the content in multiple redundant common formats (HTML, PDF, PNG, WARC) that will last long after the originals disappear off the internet.  It automatically extracts assets and media from pages and saves them in easily-accessible folders, with out-of-the-box support for extracting git repositories, audio, video, subtitles, images, PDFs, and more.
 
 #### How does it work?
 
@@ -45,7 +45,7 @@ echo 'http://example.com' | ./archive
 ```
 After installing the dependencies, just pipe some new links into the `./archive` command to start your archive.
 
-ArchiveBox is written in Python 3.5 and uses wget, Chrome headless, youtube-dl, pywb, and other common unix tools to save each page you add in multiple redundant formats.  It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs.  If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
+ArchiveBox is written in Python 3.5 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats.  It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs.  If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
 
 <div align="center">
 
@@ -173,9 +173,9 @@ I don't think everything should be preserved in an automated fashion, making all
 
 #### User Interface & Intended Purpose
 
-ArchiveBox differentiates itself from [similar projects](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI inferface for users to ingest bulk feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI.
+ArchiveBox differentiates itself from [similar projects](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI interface for users to ingest bulk feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI.
 
-An alternative tool [pywb](https://github.com/webrecorder/pywb) allows you to run a browser through an always-running archiving proxy which records the traffic to WARC files.  ArchiveBox intends to support this style of live proxy-archiving using `pywb` in the future, but for now it only ingests lists of links at a time via browser history, bookmarks, RSS, etc.
+An alternative tool [pywb](https://github.com/webrecorder/pywb) allows you to run a browser through an always-running archiving proxy which records the traffic to WARC files.  ArchiveBox intends to support this style of live proxy-archiving using `pywb` in the future, but for now, it only ingests lists of links at a time via browser history, bookmarks, RSS, etc.
 
 #### Private Local Archives vs Centralized Public Archives
 
@@ -183,13 +183,13 @@ Unlike crawler software that starts from a seed URL and works outwards, or publi
 
 #### Storage Requirements
 
-Because ArchiveBox is designed to ingest a firehose of browser history and bookmark feeds to a local disk, it can be much more disk-space intensive than a centralized service like the Internet Archive or Archive.today.  However, as storage space gets cheaper and compression improves, you should be able to use it continuously over the years without having to delete anything.  In my experience, ArchiveBox uses about 5gb per 1000 articles, but your milage may vary depending on which options you have enabled and what types of sites you're archiving. By default, it archives everything in as many formats as possible, meaning it takes more space than a using a single method, but more content is accurately replayable over extended periods of time. Storage requirements can be reduced by using a compressed/deduplicated filesystem like ZFS/BTRFS, or by setting `FETCH_MEDIA=False` to skip audio & video files.
+Because ArchiveBox is designed to ingest a firehose of browser history and bookmark feeds to a local disk, it can be much more disk-space intensive than a centralized service like the Internet Archive or Archive.today.  However, as storage space gets cheaper and compression improves, you should be able to use it continuously over the years without having to delete anything.  In my experience, ArchiveBox uses about 5gb per 1000 articles, but your mileage may vary depending on which options you have enabled and what types of sites you're archiving. By default, it archives everything in as many formats as possible, meaning it takes more space than using a single method, but more content is accurately replayable over extended periods. Storage requirements can be reduced by using a compressed/deduplicated filesystem like ZFS/BTRFS, or by setting `FETCH_MEDIA=False` to skip audio & video files.
 
 ## Learn more
 
 <!--▶ **Join out our [community chat](http://webchat.freenode.net?channels=ArchiveBox&uio=d4) hosted on IRC freenode.net:`#ArchiveBox`!**-->
 
-Whether you want learn which organizations are the big players in the web archiving space, want to find a specific open source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community.  Check it out to learn about some of the coolest web archiving projects and communities on the web!
+Whether you want to learn which organizations are the big players in the web archiving space, want to find a specific open-source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community.  Check it out to learn about some of the coolest web archiving projects and communities on the web!
 
 <img src="https://i.imgur.com/0ZOmOvN.png" width="14%" align="right"/>
 

From 2c71db27fe353de3c8c27f26e75e26961c360f7f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Fri, 4 Oct 2019 11:39:03 -0400
Subject: [PATCH 059/333] Update CONTRIBUTING.md

---
 .github/CONTRIBUTING.md | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 76273c54..97c0b378 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -1 +1,40 @@
-Make sure check in with me first or confirm your desired features line up with our roadmap: https://github.com/pirate/ArchiveBox#roadmap
+# Contribution Process
+
+1. Confirm your desired features fit into our bigger project goals roadmap: https://github.com/pirate/ArchiveBox#roadmap
+2. Check in with me before starting development to make sure your work wont conflict with or duplicate existing work
+3. Open an issue with your planned implementation to discuss
+4. Setup your dev environment, make some changes, and test using the test input files
+5. Commit, push, and submit a PR and wait for review feedback
+6. Have patience, don't abandon your PR! We love contributors but we all have day jobs and don't always have time to respond to notifications instantly. If you want a faster response, ping @theSquashSH on twitter or Patreon.
+
+**Useful links:**
+
+- https://github.com/pirate/ArchiveBox/issues
+- https://github.com/pirate/ArchiveBox/pulls
+- https://github.com/pirate/ArchiveBox/wiki/Roadmap
+- https://github.com/pirate/ArchiveBox/wiki/Install#manual-setup
+
+### Development Setup
+
+```bash
+git clone https://github.com/pirate/ArchiveBox
+cd ArchiveBox
+# Optionally create a virtualenv
+pip install -r requirements.txt
+pip install -e .
+```
+
+### Running Tests
+
+```bash
+./bin/archive tests/*
+# look for errors in stdout/stderr
+# then confirm output html looks right
+
+# if on >v0.4 run the django test suite:
+archivebox manage test
+```
+
+### Getting Help
+
+Open issues on Github or contact me https://sweeting.me/#contact.

From 73cdb8dafecb1069ceaad6ad69ce08956b4899b9 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Fri, 4 Oct 2019 11:39:30 -0400
Subject: [PATCH 060/333] Update CONTRIBUTING.md

---
 .github/CONTRIBUTING.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 97c0b378..5eeae85b 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -1,8 +1,8 @@
 # Contribution Process
 
 1. Confirm your desired features fit into our bigger project goals roadmap: https://github.com/pirate/ArchiveBox#roadmap
-2. Check in with me before starting development to make sure your work wont conflict with or duplicate existing work
-3. Open an issue with your planned implementation to discuss
+2. Open an issue with your planned implementation to discuss
+3. Check in with me before starting development to make sure your work wont conflict with or duplicate existing work
 4. Setup your dev environment, make some changes, and test using the test input files
 5. Commit, push, and submit a PR and wait for review feedback
 6. Have patience, don't abandon your PR! We love contributors but we all have day jobs and don't always have time to respond to notifications instantly. If you want a faster response, ping @theSquashSH on twitter or Patreon.

From eb641b3e358fdbf21ea3d93f630571253286408c Mon Sep 17 00:00:00 2001
From: Julian Berman <Julian@GrayVines.com>
Date: Sun, 13 Oct 2019 18:18:26 -0400
Subject: [PATCH 061/333] Fix a leaking file descriptor.

---
 archivebox/index.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/archivebox/index.py b/archivebox/index.py
index 3f4ada3f..802a7f0b 100644
--- a/archivebox/index.py
+++ b/archivebox/index.py
@@ -186,7 +186,8 @@ def patch_links_index(link, out_dir=OUTPUT_DIR):
 
     # Patch HTML index
     html_path = os.path.join(out_dir, 'index.html')
-    html = open(html_path, 'r').read().split('\n')
+    with open(html_path, 'r') as html_file:
+        html = [line[:-1] for line in html_file]
     for idx, line in enumerate(html):
         if title and ('<span data-title-for="{}"'.format(link['url']) in line):
             html[idx] = '<span>{}</span>'.format(title)

From bdfbd517b6edd4bc0b590976885b04b0fdd22fa0 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 17 Oct 2019 12:40:43 -0400
Subject: [PATCH 062/333] Update FUNDING.yml

---
 .github/FUNDING.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index c131875a..34ebcccd 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,7 +1,5 @@
 # These are supported funding model platforms
 
-# @Github plzzz accept me in the Sponsored devs program <3
-# github: pirate
-
+github: pirate
 patreon: theSquashSH
 custom: https://paypal.me/NicholasSweeting

From d6e9c89c7f2f6f3ad98804f02fcad5f09f9a395e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 17 Oct 2019 12:43:25 -0400
Subject: [PATCH 063/333] Update FUNDING.yml

---
 .github/FUNDING.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index 34ebcccd..35c09aa8 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -3,3 +3,5 @@
 github: pirate
 patreon: theSquashSH
 custom: https://paypal.me/NicholasSweeting
+bitcoin: `1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH`
+ethereum: `0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471`

From 66c8949fb7a6bcddee7f01fa95c7d178294f9112 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 17 Oct 2019 12:44:06 -0400
Subject: [PATCH 064/333] Update FUNDING.yml

---
 .github/FUNDING.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index 35c09aa8..ce2e075e 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -2,6 +2,7 @@
 
 github: pirate
 patreon: theSquashSH
-custom: https://paypal.me/NicholasSweeting
-bitcoin: `1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH`
-ethereum: `0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471`
+custom: 
+  - https://paypal.me/NicholasSweeting
+  - `BTC: 1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH`
+  - `ETH: 0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471`

From f6e34cf0d3661270278230fc35787df2a180e991 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 17 Oct 2019 12:48:42 -0400
Subject: [PATCH 065/333] Update FUNDING.yml

---
 .github/FUNDING.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index ce2e075e..8bb11985 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,8 +1,3 @@
-# These are supported funding model platforms
-
 github: pirate
 patreon: theSquashSH
-custom: 
-  - https://paypal.me/NicholasSweeting
-  - `BTC: 1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH`
-  - `ETH: 0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471`
+custom: ["https://paypal.me/NicholasSweeting", "bitcoin:1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH", "ethereum:0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471"]

From 15f9b7d4a5f1506813af5db08a0c575a1b7856cc Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 17 Oct 2019 12:53:53 -0400
Subject: [PATCH 066/333] Update FUNDING.yml

---
 .github/FUNDING.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index 8bb11985..d1c8820f 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,3 +1,3 @@
 github: pirate
 patreon: theSquashSH
-custom: ["https://paypal.me/NicholasSweeting", "bitcoin:1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH", "ethereum:0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471"]
+custom: ["https://paypal.me/NicholasSweeting", "https://www.blockchain.com/btc/payment_request?address=1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH&message=ArchiveBox+donation+to+@pirate+on+Github&amount_local=25&currency=USD&nosavecurrency=true", "https://www.blockchain.com/eth/address/0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471"]

From 26c369d8f65b0cdfcd8b6876d72efb437b3c3c98 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 17 Oct 2019 12:57:57 -0400
Subject: [PATCH 067/333] Update FUNDING.yml

---
 .github/FUNDING.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index d1c8820f..7104562d 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,3 +1,3 @@
 github: pirate
 patreon: theSquashSH
-custom: ["https://paypal.me/NicholasSweeting", "https://www.blockchain.com/btc/payment_request?address=1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH&message=ArchiveBox+donation+to+@pirate+on+Github&amount_local=25&currency=USD&nosavecurrency=true", "https://www.blockchain.com/eth/address/0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471"]
+custom: ["https://paypal.me/NicholasSweeting", "https://www.blockchain.com/btc/address/1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH", "https://www.blockchain.com/eth/address/0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471"]

From 9fc4344cff6198038b9c3f73033a541ccbd476cf Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 17 Oct 2019 12:58:25 -0400
Subject: [PATCH 068/333] Add BTC and ETH links to Funding

---
 .github/FUNDING.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index 7104562d..766165b2 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,3 +1,3 @@
 github: pirate
 patreon: theSquashSH
-custom: ["https://paypal.me/NicholasSweeting", "https://www.blockchain.com/btc/address/1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH", "https://www.blockchain.com/eth/address/0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471"]
+custom: ["https://paypal.me/NicholasSweeting", "https://www.blockchain.com/eth/address/0x5D4c34D4a121Fe08d1dDB7969F07550f2dB9f471", "https://www.blockchain.com/btc/address/1HuxXriPE2Bbnag3jJrqa3bkNHrs297dYH"]

From 2e4b506c21adcfc29253b163520d30e0bce8a7ae Mon Sep 17 00:00:00 2001
From: Julian Berman <Julian@GrayVines.com>
Date: Sat, 19 Oct 2019 12:47:42 -0400
Subject: [PATCH 069/333] Update archivebox/index.py

Co-Authored-By: Nick Sweeting <git@sweeting.me>
---
 archivebox/index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/index.py b/archivebox/index.py
index 802a7f0b..0fdf9b62 100644
--- a/archivebox/index.py
+++ b/archivebox/index.py
@@ -187,7 +187,7 @@ def patch_links_index(link, out_dir=OUTPUT_DIR):
     # Patch HTML index
     html_path = os.path.join(out_dir, 'index.html')
     with open(html_path, 'r') as html_file:
-        html = [line[:-1] for line in html_file]
+        html = html_file.read().splitlines()
     for idx, line in enumerate(html):
         if title and ('<span data-title-for="{}"'.format(link['url']) in line):
             html[idx] = '<span>{}</span>'.format(title)

From 464b5b73919bf3f6b25ef0d796f5faad81cfa847 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 3 Feb 2020 19:25:08 -0500
Subject: [PATCH 070/333] Update archive.py

---
 archivebox/archive.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/archive.py b/archivebox/archive.py
index 3e553e6e..e8640d44 100755
--- a/archivebox/archive.py
+++ b/archivebox/archive.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+
 """
 ArchiveBox command line application.
 

From 0c1b1b523cfc98833ec31297aa901360ae98318d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 3 Feb 2020 20:18:18 -0500
Subject: [PATCH 071/333] Add safari history support to
 archivebox-export-browser-history

---
 bin/archivebox-export-browser-history | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/bin/archivebox-export-browser-history b/bin/archivebox-export-browser-history
index dbea951e..4b12a49c 100755
--- a/bin/archivebox-export-browser-history
+++ b/bin/archivebox-export-browser-history
@@ -35,3 +35,19 @@ if [[ "$1" == "--firefox" ]]; then
     echo "Firefox history exported to:"
     echo "    output/sources/firefox_history.json"
 fi
+
+if [[ "$1" == "--safari" ]]; then
+    # Safari
+    if [[ -e "$2" ]]; then
+        cp "$2" "$REPO_DIR/output/sources/safari_history.db.tmp"
+    else
+        default="~/Library/Safari/History.db"
+        echo "Defaulting to history db: $default"
+        echo "Optionally specify the path to a different sqlite history database as the 2nd argument."
+        cp "$default" "$REPO_DIR/output/sources/safari_history.db.tmp"
+    fi
+    sqlite3 "$REPO_DIR/output/sources/safari_history.db.tmp" "select url from history_items" > "$REPO_DIR/output/sources/safari_history.json"
+    rm "$REPO_DIR"/output/sources/safari_history.db.*
+    echo "Safari history exported to:"
+    echo "    output/sources/safari_history.json"
+fi

From 9e4dd0dead580a79eb1a1e801f73ef4ebaf39efa Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 3 Feb 2020 20:26:49 -0500
Subject: [PATCH 072/333] bump docs version

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index f7a2ed43..8aecf874 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit f7a2ed435161dcfabdd0bd454a558ced55676ae6
+Subproject commit 8aecf874deaccb6e4ad9d47e8dd23e04abd3b7b4

From 2171c3933c0966c9cb7ee6bfe780012e46e56493 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 3 Feb 2020 20:28:10 -0500
Subject: [PATCH 073/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c183d6e4..9c7096cb 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ ArchiveBox is written in Python 3.5 and uses wget, Chrome headless, youtube-dl,
 
 ## Quickstart
 
-ArchiveBox has [3 main dependencies](https://github.com/pirate/ArchiveBox/wiki/Install#dependencies) beyond `python3`: `wget`, `chromium`, and `youtube-dl`.
+ArchiveBox is written in `python3.7` and has [3 main binary dependencies](https://github.com/pirate/ArchiveBox/wiki/Install#dependencies): `wget`, `chromium`, and `youtube-dl`.
 To get started, you can [install them manually](https://github.com/pirate/ArchiveBox/wiki/Install) using your system's package manager, use the [automated helper script](https://github.com/pirate/ArchiveBox/wiki/Quickstart), or use the official [Docker](https://github.com/pirate/ArchiveBox/wiki/Docker) container.  All three dependencies are optional if [disabled](https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles) in settings.
 
 ```bash

From 90d8093bae30e600bdf2cc0958b232d80d268e33 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 3 Feb 2020 20:28:36 -0500
Subject: [PATCH 074/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9c7096cb..14cd5f1a 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ echo 'http://example.com' | ./archive
 ```
 After installing the dependencies, just pipe some new links into the `./archive` command to start your archive.
 
-ArchiveBox is written in Python 3.5 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats.  It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs.  If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
+ArchiveBox is written in Python 3.7 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats.  It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs.  If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
 
 <div align="center">
 

From a64730aae323e53f58f7e329094b5bf15b3027dd Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 12 Feb 2020 22:53:11 -0500
Subject: [PATCH 075/333] bump docs version

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index d6d43042..8aecf874 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit d6d43042893a017e0d43723da0b9890422102554
+Subproject commit 8aecf874deaccb6e4ad9d47e8dd23e04abd3b7b4

From 698b63116fd7b15e92a0fdf8919a1093e585b326 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 13 Feb 2020 18:48:07 -0500
Subject: [PATCH 076/333] fix demo links

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 14cd5f1a..a63a0d58 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 <h1>ArchiveBox<br/><sub>The open-source self-hosted web archive.</sub></h1>
 
 ▶️ <a href="https://github.com/pirate/ArchiveBox/wiki/Quickstart">Quickstart</a> | 
-<a href="https://archive.sweeting.me">Demo</a> | 
+<a href="https://archivebox.zervice.io/">Demo</a> | 
 <a href="https://github.com/pirate/ArchiveBox">Github</a> | 
 <a href="https://github.com/pirate/ArchiveBox/wiki">Documentation</a> | 
 <a href="#background--motivation">Info & Motivation</a> | 
@@ -75,7 +75,7 @@ echo 'https://example.com' | ./archive                  # pass URLs to archive v
 ./archive https://getpocket.com/users/example/feed/all  # or import an RSS/JSON/XML/TXT feed
 ```
 
-Once you've added your first links, open `output/index.html` in a browser to view the archive.  [DEMO: archive.sweeting.me](https://archive.sweeting.me)  
+Once you've added your first links, open `output/index.html` in a browser to view the archive.  [DEMO: archivebox.zervice.io/](https://archivebox.zervice.io)  
 For more information, see the [full Quickstart guide](https://github.com/pirate/ArchiveBox/wiki/Quickstart), [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage), and [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration) docs.  
 
 *(`pip install archivebox` will be available in the near future, follow our [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap) for progress)*

From c79ce2b1f5c24ac30185c9e430fb3ea197d2a9f1 Mon Sep 17 00:00:00 2001
From: "michael.bub" <michael.bub+github@gmail.com>
Date: Sat, 15 Feb 2020 13:31:27 +0100
Subject: [PATCH 077/333] guess encoding via chardet if available

---
 archivebox/util.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/archivebox/util.py b/archivebox/util.py
index 6f63b53f..34c34221 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -33,6 +33,12 @@ from config import (
 )
 from logs import pretty_path
 
+try:
+    import chardet
+    detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
+except ImportError:
+    detect_encoding = lambda rawdata: "utf-8"
+
 ### Parsing Helpers
 
 # Url Parsing: https://docs.python.org/3/library/urllib.parse.html#url-parsing
@@ -189,7 +195,6 @@ def save_remote_source(url, timeout=TIMEOUT):
 
 def fetch_page_title(url, timeout=10, progress=SHOW_PROGRESS):
     """Attempt to guess a page's title by downloading the html"""
-    
     if not FETCH_TITLE:
         return None
 
@@ -199,7 +204,6 @@ def fetch_page_title(url, timeout=10, progress=SHOW_PROGRESS):
             sys.stdout.flush()
 
         html = download_url(url, timeout=timeout)
-
         match = re.search(HTML_TITLE_REGEX, html)
         return match.group(1).strip() if match else None
     except Exception as err:  # noqa
@@ -523,8 +527,9 @@ def download_url(url, timeout=TIMEOUT):
         insecure = ssl._create_unverified_context()
         resp = urlopen(req, timeout=timeout, context=insecure)
 
-    encoding = resp.headers.get_content_charset() or 'utf-8'
-    return resp.read().decode(encoding)
+    rawdata = resp.read()
+    encoding = resp.headers.get_content_charset() or detect_encoding(rawdata)
+    return rawdata.decode(encoding)
 
 def chmod_file(path, cwd='.', permissions=OUTPUT_PERMISSIONS, timeout=30):
     """chmod -R <permissions> <cwd>/<path>"""

From 7d699578e8f1101576f70c08e0c3421baf96b08b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Fri, 27 Mar 2020 08:37:31 -0400
Subject: [PATCH 078/333] implement add page

---
 archivebox/core/views.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 7fee7408..5cb678fe 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -17,6 +17,7 @@ from ..config import (
     PUBLIC_SNAPSHOTS,
 )
 from ..util import base_url
+from .. main import add
 
 
 class MainIndex(View):
@@ -51,12 +52,13 @@ class AddLinks(View):
 
         return render(template_name=self.template, request=request, context=context)
 
-
     def post(self, request):
-        import_path = request.POST['url']
-        
-        # TODO: add the links to the index here using archivebox.main.add
-        print(f'Adding URL: {import_path}')
+        print(f'[+] Adding URL: {import_path}')
+        add(
+            import_str=request.POST['url'],
+            update_all=False,
+            out_dir=OUTPUT_DIR,
+        )
 
         return render(template_name=self.template, request=request, context={})
 

From 18f0f66f1ebaf3a71f4ab35bf88fedcb3ea57ef2 Mon Sep 17 00:00:00 2001
From: misha <shmelev1996@mail.ru>
Date: Sun, 5 Apr 2020 18:22:59 +0300
Subject: [PATCH 079/333] 05042020

---
 archivebox/archive_methods.py | 5 +++--
 archivebox/config.py          | 5 ++++-
 etc/ArchiveBox.conf.default   | 1 +
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py
index b2f04f33..75e7be0d 100644
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@@ -30,6 +30,7 @@ from config import (
     OUTPUT_DIR,
     GIT_DOMAINS,
     GIT_SHA,
+    CURL_USER_AGENT,
     WGET_USER_AGENT,
     CHECK_SSL_VALIDITY,
     COOKIES_FILE,
@@ -226,7 +227,7 @@ def fetch_wget(link_dir, link, timeout=TIMEOUT):
         '--span-hosts',
         '--no-parent',
         '-e', 'robots=off',
-        '--restrict-file-names=windows',
+        '--restrict-file-names=nocontrol',
         '--timeout={}'.format(timeout),
         *(('--compression=auto',) if WGET_AUTO_COMPRESSION else ()),
         *(() if FETCH_WARC else ('--timestamping',)),
@@ -561,7 +562,7 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
         CURL_BINARY,
         '--location',
         '--head',
-        '--user-agent', 'ArchiveBox/{} (+https://github.com/pirate/ArchiveBox/)'.format(GIT_SHA),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
+	    *(('--user-agent', '{}'.format(CURL_USER_AGENT),) if CURL_USER_AGENT else ()),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
         '--max-time', str(timeout),
         *(() if CHECK_SSL_VALIDITY else ('--insecure',)),
         submit_url,
diff --git a/archivebox/config.py b/archivebox/config.py
index 47f1776f..18fe204c 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -35,6 +35,7 @@ SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True'
 CHECK_SSL_VALIDITY =     os.getenv('CHECK_SSL_VALIDITY',     'True'             ).lower() == 'true'
 RESOLUTION =             os.getenv('RESOLUTION',             '1440,2000'        )
 GIT_DOMAINS =            os.getenv('GIT_DOMAINS',            'github.com,bitbucket.org,gitlab.com').split(',')
+CURL_USER_AGENT =        os.getenv('CURL_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/)')
 WGET_USER_AGENT =        os.getenv('WGET_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
 COOKIES_FILE =           os.getenv('COOKIES_FILE',           None)
 CHROME_USER_DATA_DIR =   os.getenv('CHROME_USER_DATA_DIR',   None)
@@ -192,13 +193,15 @@ try:
         raise
 
     ### Make sure curl is installed
-    if FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG:
+    if FETCH_FAVICON or FETCH_TITLE or SUBMIT_ARCHIVE_DOT_ORG:
         if run(['which', CURL_BINARY], stdout=DEVNULL, stderr=DEVNULL).returncode or run([CURL_BINARY, '--version'], stdout=DEVNULL, stderr=DEVNULL).returncode:
             print('{red}[X] Missing dependency: curl{reset}'.format(**ANSI))
             print('    Install it, then confirm it works with: {} --version'.format(CURL_BINARY))
             print('    See https://github.com/pirate/ArchiveBox/wiki/Install for help.')
             raise SystemExit(1)
 
+        CURL_USER_AGENT = CURL_USER_AGENT.format(GIT_SHA=GIT_SHA[:9])
+
     ### Make sure wget is installed and calculate version
     if FETCH_WGET or FETCH_WARC:
         if run(['which', WGET_BINARY], stdout=DEVNULL, stderr=DEVNULL).returncode or run([WGET_BINARY, '--version'], stdout=DEVNULL, stderr=DEVNULL).returncode:
diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default
index dcb8aeac..9ceeff17 100644
--- a/etc/ArchiveBox.conf.default
+++ b/etc/ArchiveBox.conf.default
@@ -40,6 +40,7 @@
 #CHECK_SSL_VALIDITY=True
 #FETCH_WGET_REQUISITES=True
 #RESOLUTION="1440,900"
+#CURL_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
 #WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
 #CHROME_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
 #GIT_DOMAINS="github.com,bitbucket.org,gitlab.com"

From bb580533f715a1b40f8534f81ba99591b3f24821 Mon Sep 17 00:00:00 2001
From: comsomisha <shmelev1996@mail.ru>
Date: Sun, 5 Apr 2020 18:35:13 +0300
Subject: [PATCH 080/333] 0504202002

---
 archivebox/archive_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py
index 75e7be0d..1ff03027 100644
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@@ -562,7 +562,7 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
         CURL_BINARY,
         '--location',
         '--head',
-	    *(('--user-agent', '{}'.format(CURL_USER_AGENT),) if CURL_USER_AGENT else ()),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
+	*(('--user-agent', '{}'.format(CURL_USER_AGENT),) if CURL_USER_AGENT else ()),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
         '--max-time', str(timeout),
         *(() if CHECK_SSL_VALIDITY else ('--insecure',)),
         submit_url,

From 1aa2a5b0697e09d20f674571c7f1695ee4c354b2 Mon Sep 17 00:00:00 2001
From: misha <shmelev1996@mail.ru>
Date: Wed, 15 Apr 2020 11:54:53 +0300
Subject: [PATCH 081/333] 15042020

---
 archivebox/archive_methods.py | 3 ++-
 archivebox/config.py          | 1 +
 etc/ArchiveBox.conf.default   | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py
index 1ff03027..6fd08d0e 100644
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@@ -30,6 +30,7 @@ from config import (
     OUTPUT_DIR,
     GIT_DOMAINS,
     GIT_SHA,
+    RESTRICT_FILE_NAMES,
     CURL_USER_AGENT,
     WGET_USER_AGENT,
     CHECK_SSL_VALIDITY,
@@ -227,7 +228,7 @@ def fetch_wget(link_dir, link, timeout=TIMEOUT):
         '--span-hosts',
         '--no-parent',
         '-e', 'robots=off',
-        '--restrict-file-names=nocontrol',
+        *(('--restrict-file-names={}'.format(RESTRICT_FILE_NAMES),) if RESTRICT_FILE_NAMES else ()),
         '--timeout={}'.format(timeout),
         *(('--compression=auto',) if WGET_AUTO_COMPRESSION else ()),
         *(() if FETCH_WARC else ('--timestamping',)),
diff --git a/archivebox/config.py b/archivebox/config.py
index 18fe204c..f4907a30 100644
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -34,6 +34,7 @@ SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True'
 
 CHECK_SSL_VALIDITY =     os.getenv('CHECK_SSL_VALIDITY',     'True'             ).lower() == 'true'
 RESOLUTION =             os.getenv('RESOLUTION',             '1440,2000'        )
+RESTRICT_FILE_NAMES =    os.getenv('RESTRICT_FILE_NAMES',    'windows'        )
 GIT_DOMAINS =            os.getenv('GIT_DOMAINS',            'github.com,bitbucket.org,gitlab.com').split(',')
 CURL_USER_AGENT =        os.getenv('CURL_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/)')
 WGET_USER_AGENT =        os.getenv('WGET_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default
index 9ceeff17..a48ee8e2 100644
--- a/etc/ArchiveBox.conf.default
+++ b/etc/ArchiveBox.conf.default
@@ -39,6 +39,7 @@
 
 #CHECK_SSL_VALIDITY=True
 #FETCH_WGET_REQUISITES=True
+#RESTRICT_FILE_NAMES="windows"
 #RESOLUTION="1440,900"
 #CURL_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
 #WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"

From a3a048d43e3fbb4cab15a945406152aa6c9ddf43 Mon Sep 17 00:00:00 2001
From: misha <shmelev1996@mail.ru>
Date: Thu, 16 Apr 2020 14:48:23 +0300
Subject: [PATCH 082/333] 16042020

---
 archivebox/archive_methods.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py
index 6fd08d0e..de46f60c 100644
--- a/archivebox/archive_methods.py
+++ b/archivebox/archive_methods.py
@@ -496,7 +496,6 @@ def fetch_media(link_dir, link, timeout=MEDIA_TIMEOUT):
         '--write-thumbnail',
         '--no-call-home',
         '--no-check-certificate',
-        '--user-agent',
         '--all-subs',
         '--extract-audio',
         '--keep-video',
@@ -563,7 +562,7 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
         CURL_BINARY,
         '--location',
         '--head',
-	*(('--user-agent', '{}'.format(CURL_USER_AGENT),) if CURL_USER_AGENT else ()),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
+        *(('--user-agent', '{}'.format(CURL_USER_AGENT),) if CURL_USER_AGENT else ()),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
         '--max-time', str(timeout),
         *(() if CHECK_SSL_VALIDITY else ('--insecure',)),
         submit_url,

From 3939f9ec7d888b451c80d2c74c839f03729edf83 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Apr 2020 21:13:49 -0400
Subject: [PATCH 083/333] add docker venv

---
 .dockerignore |  1 +
 Dockerfile    | 27 ++++++++++++++++++++-------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index 9979d3ca..d8810a34 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,4 +3,5 @@ __pycache__
 .DS_Store
 venv
 .venv
+.docker-venv
 data
diff --git a/Dockerfile b/Dockerfile
index 216e60b7..6996b706 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,12 +8,12 @@
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker
 
-FROM node:11-slim
+FROM node:13-slim
 LABEL maintainer="Nick Sweeting <archivebox-git@sweeting.me>"
 
 RUN apt-get update \
     && apt-get install -yq --no-install-recommends \
-        git zlib1g-dev wget curl youtube-dl gnupg2 libgconf-2-4 python3 python3-pip \
+        jq git zlib1g-dev wget curl youtube-dl gnupg2 libgconf-2-4 python3 python3-pip \
     && rm -rf /var/lib/apt/lists/*
 
 # Install latest chrome package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others)
@@ -35,7 +35,7 @@ RUN chmod +x /usr/local/bin/dumb-init
 ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
 
 # Install puppeteer so it's available in the container.
-RUN npm i puppeteer
+RUN npm install puppeteer
 
 # Add user so we don't need --no-sandbox.
 RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
@@ -43,17 +43,30 @@ RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
     && chown -R pptruser:pptruser /home/pptruser \
     && chown -R pptruser:pptruser /node_modules
 
+WORKDIR /home/pptruser/app
+
+RUN python3 -m pip install --upgrade pip setuptools && python3 -m pip install virtualenv \
+    && python3 -m virtualenv ".docker-venv"
+ENV PATH="/home/pttruser/app/.docker-venv/bin:${PATH}"
+COPY ./Pipfile.lock "/home/pttruser/app/Pipfile.lock"
+RUN jq -r \
+        '.default,.develop | to_entries[] | .key + .value.version' \
+        "/home/pttruser/app/Pipfile.lock" \
+    | /home/pttruser/app/.docker-venv/bin/python -m pip install --no-cache-dir -r /dev/stdin \
+    && rm "/home/pttruser/app/Pipfile.lock"
+
 # Install the ArchiveBox repository and pip requirements
-RUN git clone https://github.com/pirate/ArchiveBox /home/pptruser/app \
-    && mkdir -p /data \
+# RUN git clone https://github.com/pirate/ArchiveBox /home/pptruser/app \
+ADD . /home/pptruser/app
+RUN mkdir -p /data \
     && chown -R pptruser:pptruser /data \
     && ln -s /data /home/pptruser/app/archivebox/output \
     && ln -s /home/pptruser/app/bin/* /bin/ \
     && ln -s /home/pptruser/app/bin/archivebox /bin/archive \
     && chown -R pptruser:pptruser /home/pptruser/app/archivebox
-    # && pip3 install -r /home/pptruser/app/archivebox/requirements.txt
 
 VOLUME /data
+EXPOSE 8000
 
 ENV LANG=C.UTF-8 \
     LANGUAGE=en_US:en \
@@ -68,4 +81,4 @@ USER pptruser
 WORKDIR /home/pptruser/app
 
 ENTRYPOINT ["dumb-init", "--"]
-CMD ["/bin/archive"]
+CMD ["/bin/archivebox"]

From 2829b18b0b4812d4f2fc5abc1156dd9911ea4d09 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Apr 2020 21:14:20 -0400
Subject: [PATCH 084/333] new save playlists option

---
 archivebox/config/__init__.py  | 2 ++
 archivebox/config/stubs.py     | 1 +
 archivebox/extractors/media.py | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 04b8515c..f54f33ef 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -68,6 +68,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'SAVE_WARC':                {'type': bool,  'default': True, 'aliases': ('FETCH_WARC',)},
         'SAVE_GIT':                 {'type': bool,  'default': True, 'aliases': ('FETCH_GIT',)},
         'SAVE_MEDIA':               {'type': bool,  'default': True, 'aliases': ('FETCH_MEDIA',)},
+        'SAVE_PLAYLISTS':           {'type': bool,  'default': True, 'aliases': ('FETCH_PLAYLISTS',)},
         'SAVE_ARCHIVE_DOT_ORG':     {'type': bool,  'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
     },
 
@@ -228,6 +229,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'USE_YOUTUBEDL':            {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
     'YOUTUBEDL_VERSION':        {'default': lambda c: bin_version(c['YOUTUBEDL_BINARY']) if c['USE_YOUTUBEDL'] else None},
     'SAVE_MEDIA':               {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
+    'SAVE_PLAYLISTS':           {'default': lambda c: c['SAVE_PLAYLISTS'] and c['SAVE_MEDIA']},
 
     'USE_CHROME':               {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'])},
     'CHROME_BINARY':            {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py
index 7d3925dd..444757e9 100644
--- a/archivebox/config/stubs.py
+++ b/archivebox/config/stubs.py
@@ -42,6 +42,7 @@ class ConfigDict(BaseConfig, total=False):
     SAVE_WARC: bool
     SAVE_GIT: bool
     SAVE_MEDIA: bool
+    SAVE_PLAYLISTS: bool
     SAVE_ARCHIVE_DOT_ORG: bool
 
     RESOLUTION: str
diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py
index ece47f0a..861f3459 100644
--- a/archivebox/extractors/media.py
+++ b/archivebox/extractors/media.py
@@ -13,6 +13,7 @@ from ..util import (
 from ..config import (
     MEDIA_TIMEOUT,
     SAVE_MEDIA,
+    SAVE_PLAYLISTS,
     YOUTUBEDL_BINARY,
     YOUTUBEDL_VERSION,
     CHECK_SSL_VALIDITY
@@ -45,7 +46,6 @@ def save_media(link: Link, out_dir: Optional[str]=None, timeout: int=MEDIA_TIMEO
         '--write-description',
         '--write-info-json',
         '--write-annotations',
-        '--yes-playlist',
         '--write-thumbnail',
         '--no-call-home',
         '--no-check-certificate',
@@ -59,6 +59,7 @@ def save_media(link: Link, out_dir: Optional[str]=None, timeout: int=MEDIA_TIMEO
         '--audio-quality', '320K',
         '--embed-thumbnail',
         '--add-metadata',
+        *(['--yes-playlist'] if SAVE_PLAYLISTS else []),
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
         link.url,
     ]

From afacc5c5daf260c99548a94d81cb2290ddeeb46f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Apr 2020 21:14:43 -0400
Subject: [PATCH 085/333] use fallback random secret key

---
 archivebox/core/settings.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index 463a7079..9a28b4f6 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -2,6 +2,7 @@ __package__ = 'archivebox.core'
 
 import os
 import sys
+from django.utils.crypto import get_random_string
 
 
 from ..config import (
@@ -18,6 +19,8 @@ from ..config import (
 ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
 IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
 
+SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
+
 INSTALLED_APPS = [
     'django.contrib.auth',
     'django.contrib.contenttypes',

From 9fc431102bcba55943df759659ccab2f0ccbd70a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Apr 2020 21:15:15 -0400
Subject: [PATCH 086/333] better timestamp handling

---
 archivebox/index/schema.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 194878f7..a8f50373 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.index'
 
 import os
 
-from datetime import datetime
+from datetime import datetime, timedelta
 
 from typing import List, Dict, Any, Optional, Union
 
@@ -268,7 +268,16 @@ class Link:
     @property
     def bookmarked_date(self) -> Optional[str]:
         from ..util import ts_to_date
-        return ts_to_date(self.timestamp) if self.timestamp else None
+
+        max_ts = (datetime.now() + timedelta(days=30)).timestamp()
+
+        if self.timestamp and self.timestamp.replace('.', '').isdigit():
+            if 0 < float(self.timestamp) < max_ts:
+                return ts_to_date(datetime.fromtimestamp(float(self.timestamp)))
+            else:
+                return str(self.timestamp)
+        return None
+
 
     @property
     def updated_date(self) -> Optional[str]:

From 31a454db368f2357c3be7d448296875dc4ca7951 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Apr 2020 21:15:42 -0400
Subject: [PATCH 087/333] show loading message in title before its fetched

---
 archivebox/themes/default/main_index.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/themes/default/main_index.html b/archivebox/themes/default/main_index.html
index 925c4fa4..24ee8906 100644
--- a/archivebox/themes/default/main_index.html
+++ b/archivebox/themes/default/main_index.html
@@ -216,7 +216,7 @@
                                 <a href="archive/{{link.timestamp}}/index.html"><img src="{% static 'spinner.gif' %}" class="link-favicon" decoding="async"></a>
                             {% endif %}
                             <a href="archive/{{link.timestamp}}/{{link.canonical_outputs.wget_path}}" title="{{link.title}}">
-                                <span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title}}</span>
+                                <span data-title-for="{{link.url}}" data-archived="{{link.is_archived}}">{{link.title|default:'Loading...'}}</span>
                                 <small style="float:right">{{link.tags|default:''}}</small>
                             </a>
                         </td>

From 60c6adf7f5068c5f89dde637031b0e41085061d6 Mon Sep 17 00:00:00 2001
From: "Martin M. S. Pedersen" <mp@superusers.dk>
Date: Fri, 15 May 2020 10:01:35 +0200
Subject: [PATCH 088/333] Fix example in Dockerfile. The source for a mount
 must be absolute.

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 63361bb3..785b433b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,8 +3,8 @@
 #     - ArchiveBox
 # Usage:
 #     docker build github.com/pirate/ArchiveBox -t archivebox
-#     echo 'https://example.com' | docker run -i --mount type=bind,source=./data,target=/data archivebox /bin/archive
-#     docker run --mount type=bind,source=./data,target=/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
+#     echo 'https://example.com' | docker run -i --mount type=bind,source=$PWD/data,target=/data archivebox /bin/archive
+#     docker run --mount type=bind,source=$PWD/data,target=/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker
 

From 930688820b688aaf46b00430d7bf08d594b13b8c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 18 May 2020 16:54:58 -0400
Subject: [PATCH 089/333] change to volume mount style

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 785b433b..a0ce3bc1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,7 +4,7 @@
 # Usage:
 #     docker build github.com/pirate/ArchiveBox -t archivebox
 #     echo 'https://example.com' | docker run -i --mount type=bind,source=$PWD/data,target=/data archivebox /bin/archive
-#     docker run --mount type=bind,source=$PWD/data,target=/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
+#     docker run -v ./data:/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker
 

From 6c4c6862e2052d4d0b0c7640361dfd93d6ea7748 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 18 May 2020 16:55:39 -0400
Subject: [PATCH 090/333] change to volume mount style

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index a0ce3bc1..70a89c64 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@
 #     - ArchiveBox
 # Usage:
 #     docker build github.com/pirate/ArchiveBox -t archivebox
-#     echo 'https://example.com' | docker run -i --mount type=bind,source=$PWD/data,target=/data archivebox /bin/archive
+#     echo 'https://example.com' | docker run -i -v ./data:/data archivebox /bin/archive
 #     docker run -v ./data:/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker

From e2a5e0136c2b6ce994f96ac2fe52eb9b39dce75a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 25 Jun 2020 17:46:11 -0400
Subject: [PATCH 091/333] update docker setup and dependencies

---
 Dockerfile         | 135 +++++++++++++++++++++------------------------
 docker-compose.yml |  13 +++--
 setup.py           |  12 ++--
 3 files changed, 77 insertions(+), 83 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 6996b706..af75b709 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,84 +1,77 @@
-# This Dockerfile for ArchiveBox installs the following in a container:
-#     - curl, wget, python3, youtube-dl, google-chrome-beta
-#     - ArchiveBox
+# This is the Dockerfile for ArchiveBox, it includes the following major pieces:
+#     git, curl, wget, python3, youtube-dl, google-chrome-stable, ArchiveBox
 # Usage:
-#     docker build github.com/pirate/ArchiveBox -t archivebox
-#     echo 'https://example.com' | docker run -i --mount type=bind,source=./data,target=/data archivebox /bin/archive
-#     docker run --mount type=bind,source=./data,target=/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
+#     docker build . -t archivebox:latest
+#     docker run -v=./data:/data archivebox:latest init
+#     docker run -v=./data:/data archivebox:latest add 'https://example.com'
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker
 
-FROM node:13-slim
-LABEL maintainer="Nick Sweeting <archivebox-git@sweeting.me>"
-
-RUN apt-get update \
-    && apt-get install -yq --no-install-recommends \
-        jq git zlib1g-dev wget curl youtube-dl gnupg2 libgconf-2-4 python3 python3-pip \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install latest chrome package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others)
-RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
-    && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
-    && apt-get update \
-    && apt-get install -y google-chrome-beta fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst ttf-freefont \
-      --no-install-recommends \
-    && rm -rf /var/lib/apt/lists/* \
-    && rm -rf /src/*.deb
-
-# It's a good idea to use dumb-init to help prevent zombie chrome processes.
-ADD https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.2.0_amd64 /usr/local/bin/dumb-init
-RUN chmod +x /usr/local/bin/dumb-init
-
-# Uncomment to skip the chromium download when installing puppeteer. If you do,
-# you'll need to launch puppeteer with:
-#     browser.launch({executablePath: 'google-chrome-beta'})
-ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
-
-# Install puppeteer so it's available in the container.
-RUN npm install puppeteer
-
-# Add user so we don't need --no-sandbox.
-RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
-    && mkdir -p /home/pptruser/Downloads \
-    && chown -R pptruser:pptruser /home/pptruser \
-    && chown -R pptruser:pptruser /node_modules
-
-WORKDIR /home/pptruser/app
-
-RUN python3 -m pip install --upgrade pip setuptools && python3 -m pip install virtualenv \
-    && python3 -m virtualenv ".docker-venv"
-ENV PATH="/home/pttruser/app/.docker-venv/bin:${PATH}"
-COPY ./Pipfile.lock "/home/pttruser/app/Pipfile.lock"
-RUN jq -r \
-        '.default,.develop | to_entries[] | .key + .value.version' \
-        "/home/pttruser/app/Pipfile.lock" \
-    | /home/pttruser/app/.docker-venv/bin/python -m pip install --no-cache-dir -r /dev/stdin \
-    && rm "/home/pttruser/app/Pipfile.lock"
-
-# Install the ArchiveBox repository and pip requirements
-# RUN git clone https://github.com/pirate/ArchiveBox /home/pptruser/app \
-ADD . /home/pptruser/app
-RUN mkdir -p /data \
-    && chown -R pptruser:pptruser /data \
-    && ln -s /data /home/pptruser/app/archivebox/output \
-    && ln -s /home/pptruser/app/bin/* /bin/ \
-    && ln -s /home/pptruser/app/bin/archivebox /bin/archive \
-    && chown -R pptruser:pptruser /home/pptruser/app/archivebox
-
-VOLUME /data
-EXPOSE 8000
+FROM python:3.8-slim-buster
+LABEL name="archivebox" \
+      maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
+      version="0.4.3" \
+      description="All-in-one personal internet archiving container"
 
 ENV LANG=C.UTF-8 \
     LANGUAGE=en_US:en \
     LC_ALL=C.UTF-8 \
     PYTHONIOENCODING=UTF-8 \
-    CHROME_SANDBOX=False \
-    CHROME_BINARY=google-chrome-beta \
-    OUTPUT_DIR=/data
+    PYTHONUNBUFFERED=1 \
+    APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
+    CODE_PATH=/app \
+    VENV_PATH=/venv \
+    DATA_PATH=/data
+
+# Install latest chrome package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others)
+RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
+    && apt-get update -qq \
+    && apt-get install -qq -y --no-install-recommends \
+       apt-transport-https ca-certificates apt-utils gnupg gnupg2 libgconf-2-4 zlib1g-dev dumb-init \
+       wget curl youtube-dl jq git ffmpeg avconv \
+    && curl -sSL https://dl.google.com/linux/linux_signing_key.pub | apt-key add - \
+    && echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
+    && apt-get update \
+    && apt-get install -qq -y --no-install-recommends \
+       google-chrome-stable \
+       fontconfig \
+       fonts-ipafont-gothic \
+       fonts-wqy-zenhei \
+       fonts-thai-tlwg \
+       fonts-kacst \
+       fonts-symbola \
+       fonts-noto \
+       fonts-freefont-ttf \
+    && rm -rf /var/lib/apt/lists/*
+
+# Add user so we don't need --no-sandbox to run chrome
+RUN groupadd -r archivebox && useradd -r -g archivebox -G audio,video archivebox \
+    && mkdir -p /home/archivebox/Downloads \
+    && chown -R archivebox:archivebox /home/archivebox
+
+WORKDIR "$CODE_PATH"
+ADD . "$CODE_PATH"
+VOLUME "$CODE_PATH"
+RUN chown -R archivebox:archivebox "$CODE_PATH"
+
+ENV PATH="$VENV_PATH/bin:${PATH}"
+RUN python --version \
+    && python -m venv "$VENV_PATH" \
+    && pip install --upgrade pip \
+    && pip install -e . \
+    && chown -R archivebox:archivebox "$VENV_PATH"
+
+WORKDIR "$DATA_PATH"
+VOLUME "$DATA_PATH"
+RUN chown -R archivebox:archivebox "$DATA_PATH"
 
 # Run everything from here on out as non-privileged user
-USER pptruser
-WORKDIR /home/pptruser/app
+USER archivebox
+ENV CHROME_BINARY=google-chrome \
+    CHROME_SANDBOX=False \
+    OUTPUT_DIR="$DATA_PATH"
+
+RUN archivebox version
 
 ENTRYPOINT ["dumb-init", "--"]
-CMD ["/bin/archivebox"]
+CMD ["archivebox"]
diff --git a/docker-compose.yml b/docker-compose.yml
index 631ebcf7..d1f52961 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,25 +8,26 @@
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker-compose
 
-version: '3'
+version: '3.7'
 
 services:
     archivebox:
         build: .
+        image: archivebox:latest
+        command: archivebox server
         stdin_open: true
         tty: true
-        # env_file: path/to/your/ArchiveBox.conf
         environment:
-            - USE_COLOR=False
+            - USE_COLOR=True
             - SHOW_PROGRESS=False
         volumes:
             - ./data:/data
-        command: bash -c 'echo "https://github.com/pirate/ArchiveBox" | /bin/archive; tail -f /dev/null'
 
     nginx:
-        image: 'nginx'
+        image: nginx:alpine
         ports:
-            - '8098:80'
+            - 443:443
+            - 80:80
         volumes:
             - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
             - ./data:/var/www
diff --git a/setup.py b/setup.py
index f23ae7b5..f335bb59 100644
--- a/setup.py
+++ b/setup.py
@@ -14,13 +14,13 @@ with open(os.path.join(PYTHON_DIR, 'VERSION'), 'r') as f:
 setuptools.setup(
     name="archivebox",
     version=VERSION,
+    license='MIT',
     author="Nick Sweeting",
     author_email="git@nicksweeting.com",
     description="The self-hosted internet archive.",
     long_description=README,
     long_description_content_type="text/markdown",
     url="https://github.com/pirate/ArchiveBox",
-    license='MIT',
     project_urls={
         'Donate': 'https://github.com/pirate/ArchiveBox/wiki/Donations',
         'Changelog': 'https://github.com/pirate/ArchiveBox/wiki/Changelog',
@@ -31,14 +31,14 @@ setuptools.setup(
         'Documentation': 'https://github.com/pirate/ArchiveBox/Wiki',
     },
     packages=setuptools.find_packages(),
-    python_requires='>=3.6',
+    python_requires='>=3.7',
     install_requires=[
         "dataclasses==0.6",
-        "mypy-extensions==0.4.1",
+        "mypy-extensions==0.4.3",
         "base32-crockford==0.3.0",
-        "django==2.2",
-        "django-extensions==2.1.6",
-        "python-crontab==2.3.6",
+        "django==3.0.7",
+        "django-extensions==2.2.9",
+        "python-crontab==2.5.1",
         "youtube-dl",
         "ipython",
 

From 43c471e4af6fade5c55c38b0ff4eda1eeaeef4fe Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 25 Jun 2020 17:47:55 -0400
Subject: [PATCH 092/333] cli experience improvements

---
 .../migrations/0002_auto_20200625_1521.py     | 18 ++++++++++++++++++
 archivebox/core/models.py                     |  2 +-
 archivebox/extractors/favicon.py              |  1 +
 archivebox/main.py                            |  4 ++--
 archivebox/manage.py                          | 19 ++++++++++---------
 archivebox/system.py                          |  1 +
 6 files changed, 33 insertions(+), 12 deletions(-)
 create mode 100644 archivebox/core/migrations/0002_auto_20200625_1521.py

diff --git a/archivebox/core/migrations/0002_auto_20200625_1521.py b/archivebox/core/migrations/0002_auto_20200625_1521.py
new file mode 100644
index 00000000..48112829
--- /dev/null
+++ b/archivebox/core/migrations/0002_auto_20200625_1521.py
@@ -0,0 +1,18 @@
+# Generated by Django 3.0.7 on 2020-06-25 15:21
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='timestamp',
+            field=models.CharField(default=None, max_length=32, null=True),
+        ),
+    ]
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 2c889585..f343fcbc 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -12,7 +12,7 @@ class Snapshot(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
 
     url = models.URLField(unique=True)
-    timestamp = models.CharField(unique=True, max_length=32, null=True, default=None)
+    timestamp = models.CharField(max_length=32, null=True, default=None)
 
     title = models.CharField(max_length=128, null=True, default=None)
     tags = models.CharField(max_length=256, null=True, default=None)
diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py
index ab5485c8..6f68fccf 100644
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@@ -33,6 +33,7 @@ def save_favicon(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
     output: ArchiveOutput = 'favicon.ico'
     cmd = [
         CURL_BINARY,
+        '--silent',
         '--max-time', str(timeout),
         '--location',
         '--output', str(output),
diff --git a/archivebox/main.py b/archivebox/main.py
index 80e4b77b..68e7e8ba 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -483,7 +483,7 @@ def add(import_str: Optional[str]=None,
 
     check_data_folder(out_dir=out_dir)
 
-    if import_str and import_path:
+    if (import_str and import_path) or (not import_str and not import_path):
         stderr(
             '[X] You should pass either an import path as an argument, '
             'or pass a list of links via stdin, but not both.\n',
@@ -492,7 +492,7 @@ def add(import_str: Optional[str]=None,
         raise SystemExit(2)
     elif import_str:
         import_path = save_stdin_to_sources(import_str, out_dir=out_dir)
-    else:
+    elif import_path:
         import_path = save_file_to_sources(import_path, out_dir=out_dir)
 
     check_dependencies()
diff --git a/archivebox/manage.py b/archivebox/manage.py
index 3976c2c2..6951d8f7 100755
--- a/archivebox/manage.py
+++ b/archivebox/manage.py
@@ -7,15 +7,16 @@ if __name__ == '__main__':
     # versions of ./manage.py commands whenever possible. When that's not possible
     # (e.g. makemigrations), you can comment out this check temporarily
 
-    print("[X] Don't run ./manage.py directly, use the archivebox CLI instead e.g.:")
-    print('    archivebox manage createsuperuser')
-    print()
-    print('    Hint: Use these archivebox commands instead of the ./manage.py equivalents:')
-    print('        archivebox init          (migrates the databse to latest version)')
-    print('        archivebox server        (runs the Django web server)')
-    print('        archivebox shell         (opens an iPython Django shell with all models imported)')
-    print('        archivebox manage [cmd]  (any other management commands)')
-    raise SystemExit(2)
+    if not ('makemigrations' in sys.argv or 'migrate' in sys.argv):
+        print("[X] Don't run ./manage.py directly, use the archivebox CLI instead e.g.:")
+        print('    archivebox manage createsuperuser')
+        print()
+        print('    Hint: Use these archivebox commands instead of the ./manage.py equivalents:')
+        print('        archivebox init          (migrates the databse to latest version)')
+        print('        archivebox server        (runs the Django web server)')
+        print('        archivebox shell         (opens an iPython Django shell with all models imported)')
+        print('        archivebox manage [cmd]  (any other management commands)')
+        raise SystemExit(2)
 
     os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
     try:
diff --git a/archivebox/system.py b/archivebox/system.py
index aa6263e9..b6063ac2 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -25,6 +25,7 @@ from .config import OUTPUT_PERMISSIONS
 def run(*popenargs, input=None, capture_output=False, timeout=None, check=False, **kwargs):
     """Patched of subprocess.run to fix blocking io making timeout=innefective"""
 
+    
     if input is not None:
         if 'stdin' in kwargs:
             raise ValueError('stdin and input arguments may not both be used.')

From fae2fdaf2ba770d83cefe637eda1b9fc006fdf9f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 25 Jun 2020 17:48:27 -0400
Subject: [PATCH 093/333] fix if to elif bug in index snapshot sql writing

---
 archivebox/index/sql.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index f861adaf..0a13d9b8 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -35,7 +35,7 @@ def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
                 info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys}
                 snapshot.delete()
                 Snapshot.objects.create(**info)
-            if snapshot.url in all_urls:
+            elif snapshot.url in all_urls:
                 info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys}
                 snapshot.delete()
                 Snapshot.objects.create(**info)

From 1a16221752cb7bf04ec6834288b08fbd0fba2a81 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 25 Jun 2020 22:14:32 -0400
Subject: [PATCH 094/333] .


From 5c2bbe7efe44b51dea00fa0cbb25a734b6c1332d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 25 Jun 2020 22:14:40 -0400
Subject: [PATCH 095/333] bufixes

---
 archivebox/config/__init__.py        |  2 +-
 archivebox/extractors/archive_org.py | 11 +++---
 archivebox/extractors/dom.py         |  4 +-
 archivebox/extractors/favicon.py     |  6 +--
 archivebox/extractors/git.py         |  4 +-
 archivebox/extractors/media.py       |  4 +-
 archivebox/extractors/pdf.py         |  4 +-
 archivebox/extractors/screenshot.py  |  4 +-
 archivebox/extractors/title.py       |  7 ++++
 archivebox/extractors/wget.py        |  4 +-
 archivebox/system.py                 | 55 ++++++++--------------------
 11 files changed, 44 insertions(+), 61 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 921c258a..df2b01c8 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -78,7 +78,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com'},
         'CHECK_SSL_VALIDITY':       {'type': bool,  'default': True},
 
-        'CURL_USER_AGENT':          {'type': str,   'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) curl/{CURL_VERSION}'}
+        'CURL_USER_AGENT':          {'type': str,   'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) curl/{CURL_VERSION}'},
         'WGET_USER_AGENT':          {'type': str,   'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}'},
         'CHROME_USER_AGENT':        {'type': str,   'default': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'},
 
diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py
index 7522ddb8..185a01cb 100644
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@@ -6,18 +6,18 @@ from typing import Optional, List, Dict, Tuple
 from collections import defaultdict
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, PIPE, DEVNULL, chmod_file
+from ..system import run, chmod_file
 from ..util import (
     enforce_types,
     is_static_file,
 )
 from ..config import (
-    VERSION,
     TIMEOUT,
+    CHECK_SSL_VALIDITY,
     SAVE_ARCHIVE_DOT_ORG,
     CURL_BINARY,
     CURL_VERSION,
-    CHECK_SSL_VALIDITY
+    CURL_USER_AGENT,
 )
 from ..cli.logging import TimedProgress
 
@@ -45,17 +45,18 @@ def save_archive_dot_org(link: Link, out_dir: Optional[str]=None, timeout: int=T
     submit_url = 'https://web.archive.org/save/{}'.format(link.url)
     cmd = [
         CURL_BINARY,
+        '--silent',
         '--location',
         '--head',
-        '--user-agent', 'ArchiveBox/{} (+https://github.com/pirate/ArchiveBox/)'.format(VERSION),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
         '--max-time', str(timeout),
+        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         submit_url,
     ]
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        result = run(cmd, stdout=PIPE, stderr=DEVNULL, cwd=out_dir, timeout=timeout)
+        result = run(cmd, cwd=out_dir, timeout=timeout)
         content_location, errors = parse_archive_dot_org_response(result.stdout)
         if content_location:
             archive_org_url = 'https://web.archive.org{}'.format(content_location[0])
diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py
index 331531c0..b46137b6 100644
--- a/archivebox/extractors/dom.py
+++ b/archivebox/extractors/dom.py
@@ -5,7 +5,7 @@ import os
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, PIPE, chmod_file
+from ..system import run, chmod_file
 from ..util import (
     enforce_types,
     is_static_file,
@@ -47,7 +47,7 @@ def save_dom(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     timer = TimedProgress(timeout, prefix='      ')
     try:
         with open(output_path, 'w+') as f:
-            result = run(cmd, stdout=f, stderr=PIPE, cwd=out_dir, timeout=timeout)
+            result = run(cmd, stdout=f, cwd=out_dir, timeout=timeout)
 
         if result.returncode:
             hints = result.stderr.decode()
diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py
index 40433a69..2f5e87ba 100644
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@@ -5,7 +5,7 @@ import os
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput
-from ..system import chmod_file, run, PIPE
+from ..system import chmod_file, run
 from ..util import enforce_types, domain
 from ..config import (
     TIMEOUT,
@@ -38,14 +38,14 @@ def save_favicon(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
         '--max-time', str(timeout),
         '--location',
         '--output', str(output),
-        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else [],
+        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         'https://www.google.com/s2/favicons?domain={}'.format(domain(link.url)),
     ]
     status = 'pending'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        run(cmd, stdout=PIPE, stderr=PIPE, cwd=out_dir, timeout=timeout)
+        run(cmd, cwd=out_dir, timeout=timeout)
         chmod_file(output, cwd=out_dir)
         status = 'succeeded'
     except Exception as err:
diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py
index 54e67d8b..75674ab8 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@@ -5,7 +5,7 @@ import os
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, PIPE, chmod_file
+from ..system import run, chmod_file
 from ..util import (
     enforce_types,
     is_static_file,
@@ -64,7 +64,7 @@ def save_git(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        result = run(cmd, stdout=PIPE, stderr=PIPE, cwd=output_path, timeout=timeout + 1)
+        result = run(cmd, cwd=output_path, timeout=timeout + 1)
 
         if result.returncode == 128:
             # ignore failed re-download when the folder already exists
diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py
index 861f3459..554f27c9 100644
--- a/archivebox/extractors/media.py
+++ b/archivebox/extractors/media.py
@@ -5,7 +5,7 @@ import os
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, PIPE, chmod_file
+from ..system import run, chmod_file
 from ..util import (
     enforce_types,
     is_static_file,
@@ -66,7 +66,7 @@ def save_media(link: Link, out_dir: Optional[str]=None, timeout: int=MEDIA_TIMEO
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        result = run(cmd, stdout=PIPE, stderr=PIPE, cwd=output_path, timeout=timeout + 1)
+        result = run(cmd, cwd=output_path, timeout=timeout + 1)
         chmod_file(output, cwd=out_dir)
         if result.returncode:
             if (b'ERROR: Unsupported URL' in result.stderr
diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py
index c29f3b22..3786c4cc 100644
--- a/archivebox/extractors/pdf.py
+++ b/archivebox/extractors/pdf.py
@@ -5,7 +5,7 @@ import os
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, PIPE, chmod_file
+from ..system import run, chmod_file
 from ..util import (
     enforce_types,
     is_static_file,
@@ -45,7 +45,7 @@ def save_pdf(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        result = run(cmd, stdout=PIPE, stderr=PIPE, cwd=out_dir, timeout=timeout)
+        result = run(cmd, cwd=out_dir, timeout=timeout)
 
         if result.returncode:
             hints = (result.stderr or result.stdout).decode()
diff --git a/archivebox/extractors/screenshot.py b/archivebox/extractors/screenshot.py
index d2879c95..33936499 100644
--- a/archivebox/extractors/screenshot.py
+++ b/archivebox/extractors/screenshot.py
@@ -5,7 +5,7 @@ import os
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, PIPE, chmod_file
+from ..system import run, chmod_file
 from ..util import (
     enforce_types,
     is_static_file,
@@ -45,7 +45,7 @@ def save_screenshot(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        result = run(cmd, stdout=PIPE, stderr=PIPE, cwd=out_dir, timeout=timeout)
+        result = run(cmd, cwd=out_dir, timeout=timeout)
 
         if result.returncode:
             hints = (result.stderr or result.stdout).decode()
diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py
index 497c0ffb..b54d5a04 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@@ -12,9 +12,11 @@ from ..util import (
 )
 from ..config import (
     TIMEOUT,
+    CHECK_SSL_VALIDITY,
     SAVE_TITLE,
     CURL_BINARY,
     CURL_VERSION,
+    CURL_USER_AGENT,
 )
 from ..cli.logging import TimedProgress
 
@@ -44,6 +46,11 @@ def save_title(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
     output: ArchiveOutput = None
     cmd = [
         CURL_BINARY,
+        '--silent',
+        '--max-time', str(timeout),
+        '--location',
+        *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
+        *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         link.url,
         '|',
         'grep',
diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index 4f6d7000..50d0111d 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -7,7 +7,7 @@ from typing import Optional
 from datetime import datetime
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, PIPE
+from ..system import run
 from ..util import (
     enforce_types,
     is_static_file,
@@ -81,7 +81,7 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        result = run(cmd, stdout=PIPE, stderr=PIPE, cwd=out_dir, timeout=timeout)
+        result = run(cmd, cwd=out_dir, timeout=timeout)
         output = wget_output_path(link)
 
         # parse out number of files downloaded from last line of stderr:
diff --git a/archivebox/system.py b/archivebox/system.py
index 4200ec9b..4f238ceb 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -4,69 +4,44 @@ __package__ = 'archivebox'
 import os
 import shutil
 
-import json as pyjson
+from json import dump
+from pathlib import Path
 from typing import Optional, Union, Set, Tuple
+from subprocess import run as subprocess_run
 
 from crontab import CronTab
-from atomicwrites import atomic_write as awrite
-
-from subprocess import (
-    Popen,
-    PIPE,
-    DEVNULL, 
-    CompletedProcess,
-    TimeoutExpired,
-    CalledProcessError,
-)
+from atomicwrites import atomic_write as lib_atomic_write
 
 from .util import enforce_types, ExtendedEncoder
 from .config import OUTPUT_PERMISSIONS
 
 
-def run(*popenargs, input=None, capture_output=False, timeout=None, check=False, **kwargs):
+def run(*args, input=None, capture_output=True, text=True, timeout=None, check=False, **kwargs):
     """Patched of subprocess.run to fix blocking io making timeout=innefective"""
 
     if input is not None:
         if 'stdin' in kwargs:
             raise ValueError('stdin and input arguments may not both be used.')
-        kwargs['stdin'] = PIPE
 
     if capture_output:
         if ('stdout' in kwargs) or ('stderr' in kwargs):
             raise ValueError('stdout and stderr arguments may not be used '
                              'with capture_output.')
-        kwargs['stdout'] = PIPE
-        kwargs['stderr'] = PIPE
 
-    with Popen(*popenargs, **kwargs) as process:
-        try:
-            stdout, stderr = process.communicate(input, timeout=timeout)
-        except TimeoutExpired:
-            process.kill()
-            try:
-                stdout, stderr = process.communicate(input, timeout=2)
-            except:
-                pass
-            raise TimeoutExpired(popenargs[0][0], timeout)
-        except BaseException:
-            process.kill()
-            # We don't call process.wait() as .__exit__ does that for us.
-            raise 
-        retcode = process.poll()
-        if check and retcode:
-            raise CalledProcessError(retcode, process.args,
-                                     output=stdout, stderr=stderr)
-    return CompletedProcess(process.args, retcode, stdout, stderr)
+    return subprocess_run(*args, input=input, capture_output=capture_output, text=text, timeout=timeout, check=check, **kwargs)
 
-
-def atomic_write(path: str, contents: Union[dict, str, bytes], overwrite: bool=True) -> None:
+@enforce_types
+def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], overwrite: bool=True) -> None:
     """Safe atomic write to filesystem by writing to temp file + atomic rename"""
     
-    with awrite(path, overwrite=overwrite) as f:
+    mode = 'wb+' if isinstance(contents, bytes) else 'w'
+
+    # print('\n> Atomic Write:', mode, path, len(contents), f'overwrite={overwrite}')
+    with lib_atomic_write(path, mode=mode, overwrite=overwrite) as f:
         if isinstance(contents, dict):
-            pyjson.dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
-        else:
+            dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
+        elif isinstance(contents, (bytes, str)):
             f.write(contents)
 
 @enforce_types
@@ -76,7 +51,7 @@ def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS, tim
     if not os.path.exists(os.path.join(cwd, path)):
         raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
 
-    chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, stdout=DEVNULL, stderr=PIPE, timeout=timeout)
+    chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, timeout=timeout)
     if chmod_result.returncode == 1:
         print('     ', chmod_result.stderr.decode())
         raise Exception('Failed to chmod {}/{}'.format(cwd, path))

From b9e17fa0d11ed1fc6d7c211e59a8b9db187a12fe Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 25 Jun 2020 23:32:01 -0400
Subject: [PATCH 096/333] rename archivebox-info to archivebox-status

---
 ...rchivebox_info.py => archivebox_status.py} | 10 +--
 archivebox/config/__init__.py                 |  8 +--
 archivebox/core/models.py                     | 18 +++--
 archivebox/index/schema.py                    |  9 +++
 archivebox/main.py                            | 68 ++++++++++++-------
 5 files changed, 76 insertions(+), 37 deletions(-)
 rename archivebox/cli/{archivebox_info.py => archivebox_status.py} (77%)

diff --git a/archivebox/cli/archivebox_info.py b/archivebox/cli/archivebox_status.py
similarity index 77%
rename from archivebox/cli/archivebox_info.py
rename to archivebox/cli/archivebox_status.py
index 814690b8..ac9b56d8 100644
--- a/archivebox/cli/archivebox_info.py
+++ b/archivebox/cli/archivebox_status.py
@@ -1,30 +1,30 @@
 #!/usr/bin/env python3
 
 __package__ = 'archivebox.cli'
-__command__ = 'archivebox info'
+__command__ = 'archivebox status'
 
 import sys
 import argparse
 
 from typing import Optional, List, IO
 
-from ..main import info, docstring
+from ..main import status, docstring
 from ..config import OUTPUT_DIR
 from .logging import SmartFormatter, reject_stdin
 
 
-@docstring(info.__doc__)
+@docstring(status.__doc__)
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
     parser = argparse.ArgumentParser(
         prog=__command__,
-        description=info.__doc__,
+        description=status.__doc__,
         add_help=True,
         formatter_class=SmartFormatter,
     )
     parser.parse_args(args or ())
     reject_stdin(__command__, stdin)
 
-    info(out_dir=pwd or OUTPUT_DIR)
+    status(out_dir=pwd or OUTPUT_DIR)
 
 
 if __name__ == '__main__':
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index df2b01c8..f357e9d2 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -785,14 +785,14 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
 
     json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME))
     if not json_index_exists:
-        stderr('[X] No archive main index was found in current directory.', color='red')
-        stderr(f'    {output_dir}')
+        stderr('[X] No archivebox index found in the current directory.', color='red')
+        stderr(f'    {output_dir}', color='lightyellow')
         stderr()
-        stderr('    Are you running archivebox in the right folder?')
+        stderr('    {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
         stderr('        cd path/to/your/archive/folder')
         stderr('        archivebox [command]')
         stderr()
-        stderr('    To create a new archive collection or import existing data in this folder, run:')
+        stderr('    {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
         stderr('        archivebox init')
         raise SystemExit(2)
 
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index f343fcbc..2c0c9e37 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -12,13 +12,13 @@ class Snapshot(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
 
     url = models.URLField(unique=True)
-    timestamp = models.CharField(max_length=32, null=True, default=None)
+    timestamp = models.CharField(max_length=32, null=True, default=None, db_index=True)
 
-    title = models.CharField(max_length=128, null=True, default=None)
-    tags = models.CharField(max_length=256, null=True, default=None)
+    title = models.CharField(max_length=128, null=True, default=None, db_index=True)
+    tags = models.CharField(max_length=256, null=True, default=None, db_index=True)
 
-    added = models.DateTimeField(auto_now_add=True)
-    updated = models.DateTimeField(null=True, default=None)
+    added = models.DateTimeField(auto_now_add=True, db_index=True)
+    updated = models.DateTimeField(null=True, default=None, db_index=True)
     # bookmarked = models.DateTimeField()
 
     keys = ('url', 'timestamp', 'title', 'tags', 'updated')
@@ -68,3 +68,11 @@ class Snapshot(models.Model):
     @property
     def link_dir(self):
         return self.as_link().link_dir
+
+    @property
+    def archive_path(self):
+        return self.as_link().archive_path
+
+    @property
+    def archive_size(self):
+        return self.as_link().archive_size
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index a8f50373..637e0589 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -9,6 +9,8 @@ from typing import List, Dict, Any, Optional, Union
 from dataclasses import dataclass, asdict, field, fields
 
 
+from ..system import get_dir_size
+
 class ArchiveError(Exception):
     def __init__(self, message, hints=None):
         super().__init__(message)
@@ -227,6 +229,13 @@ class Link:
         from ..config import ARCHIVE_DIR_NAME
         return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
     
+    @property
+    def archive_size(self) -> float:
+        try:
+            return get_dir_size(self.archive_path)[0]
+        except Exception:
+            return 0
+
     ### URL Helpers
     @property
     def url_hash(self):
diff --git a/archivebox/main.py b/archivebox/main.py
index 68e7e8ba..7b03e5b0 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -89,6 +89,7 @@ from .config import (
     get_real_name,
 )
 from .cli.logging import (
+    TERM_WIDTH,
     TimedProgress,
     log_archiving_started,
     log_archiving_paused,
@@ -161,7 +162,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
 {lightred}Example Use:{reset}
     mkdir my-archive; cd my-archive/
     archivebox init
-    archivebox info
+    archivebox status
 
     archivebox add https://example.com/some/page
     archivebox add --depth=1 ~/Downloads/bookmarks_export.html
@@ -364,7 +365,7 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
         print('        X ' + '\n        X '.join(f'{folder} {link}' for folder, link in invalid_folders.items()))
         print()
         print('    {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
-        print('        archivebox info')
+        print('        archivebox status')
         print('        archivebox list --status=invalid')
 
 
@@ -387,16 +388,20 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
 
 
 @enforce_types
-def info(out_dir: str=OUTPUT_DIR) -> None:
+def status(out_dir: str=OUTPUT_DIR) -> None:
     """Print out some info and statistics about the archive collection"""
 
     check_data_folder(out_dir=out_dir)
 
-    print('{green}[*] Scanning archive collection main index...{reset}'.format(**ANSI))
-    print(f'    {out_dir}/*')
+    from core.models import Snapshot
+    from django.contrib.auth import get_user_model
+    User = get_user_model()
+
+    print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI))
+    print(ANSI['lightyellow'], f'   {out_dir}/*', ANSI['reset'])
     num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.')
     size = printable_filesize(num_bytes)
-    print(f'    Size: {size} across {num_files} files')
+    print(f'    Index size: {size} across {num_files} files')
     print()
 
     links = list(load_main_index(out_dir=out_dir))
@@ -404,33 +409,23 @@ def info(out_dir: str=OUTPUT_DIR) -> None:
     num_sql_links = sum(1 for link in parse_sql_main_index(out_dir=out_dir))
     num_html_links = sum(1 for url in parse_html_main_index(out_dir=out_dir))
     num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
-    users = get_admins().values_list('username', flat=True)
     print(f'    > JSON Main Index: {num_json_links} links'.ljust(36),  f'(found in {JSON_INDEX_FILENAME})')
     print(f'    > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
     print(f'    > HTML Main Index: {num_html_links} links'.ljust(36), f'(found in {HTML_INDEX_FILENAME})')
     print(f'    > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
 
-    print(f'    > Admin: {len(users)} users {", ".join(users)}'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
-    
     if num_html_links != len(links) or num_sql_links != len(links):
         print()
         print('    {lightred}Hint:{reset} You can fix index count differences automatically by running:'.format(**ANSI))
         print('        archivebox init')
     
-    if not users:
-        print()
-        print('    {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
-        print('        archivebox manage createsuperuser')
-
     print()
-    print('{green}[*] Scanning archive collection link data directories...{reset}'.format(**ANSI))
-    print(f'    {ARCHIVE_DIR}/*')
-
+    print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
+    print(ANSI['lightyellow'], f'   {ARCHIVE_DIR}/*', ANSI['reset'])
     num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR)
     size = printable_filesize(num_bytes)
     print(f'    Size: {size} across {num_files} files in {num_dirs} directories')
-    print()
-
+    print(ANSI['black'])
     num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
     num_archived = len(get_archived_folders(links, out_dir=out_dir))
     num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
@@ -454,23 +449,50 @@ def info(out_dir: str=OUTPUT_DIR) -> None:
     print(f'        > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})')
     print(f'        > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})')
     print(f'        > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})')
-    
+        
+    print(ANSI['reset'])
+
     if num_indexed:
-        print()
         print('    {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
         print('        archivebox list --status=<status>  (e.g. indexed, corrupted, archived, etc.)')
 
     if orphaned:
-        print()
         print('    {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI))
         print('        archivebox init')
 
     if num_invalid:
-        print()
         print('    {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI))
         print('        archivebox init')
     
     print()
+    print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**ANSI))
+    print(ANSI['lightyellow'], f'   {LOGS_DIR}/*', ANSI['reset'])
+    users = get_admins().values_list('username', flat=True)
+    print(f'    UI users {len(users)}: {", ".join(users)}')
+    last_login = User.objects.order_by('last_login').last()
+    print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
+    last_updated = Snapshot.objects.order_by('updated').last()
+    print(f'    Last changed: {str(last_updated.updated)[:16]}')
+
+    if not users:
+        print()
+        print('    {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI))
+        print('        archivebox manage createsuperuser')
+
+    print()
+    for snapshot in Snapshot.objects.order_by('-updated')[:10]:
+        if not snapshot.updated:
+            continue
+        print(
+            ANSI['black'],
+            (
+                f'   > {str(snapshot.updated)[:16]} '
+                f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
+                f'"{snapshot.title}": {snapshot.url}'
+            )[:TERM_WIDTH()],
+            ANSI['reset'],
+        )
+    print(ANSI['black'], '   ...', ANSI['reset'])
 
 
 @enforce_types

From 094beac1d966dd26e3f90a9520daa51ed542224e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 29 Jun 2020 21:26:59 -0400
Subject: [PATCH 097/333] fix add page redirect

---
 archivebox/core/views.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 5cb678fe..84731e51 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -53,14 +53,14 @@ class AddLinks(View):
         return render(template_name=self.template, request=request, context=context)
 
     def post(self, request):
-        print(f'[+] Adding URL: {import_path}')
+        url = request.POST['url']
+        print(f'[+] Adding URL: {url}')
         add(
-            import_str=request.POST['url'],
+            import_str=url,
             update_all=False,
             out_dir=OUTPUT_DIR,
         )
-
-        return render(template_name=self.template, request=request, context={})
+        return redirect('/')
 
 
 class LinkDetails(View):

From 1ecb94fe09523415dee468b3d4ed683843cc137b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:07:38 -0400
Subject: [PATCH 098/333] check PYTHON_ENCODING correclty when lowercase

---
 archivebox/cli/logging.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/archivebox/cli/logging.py b/archivebox/cli/logging.py
index 88c472e7..293c47cf 100644
--- a/archivebox/cli/logging.py
+++ b/archivebox/cli/logging.py
@@ -17,6 +17,7 @@ from ..index.csv import links_to_csv
 from ..util import enforce_types
 from ..config import (
     ConfigDict,
+    PYTHON_ENCODING,
     ANSI,
     OUTPUT_DIR,
     IS_TTY,
@@ -66,6 +67,7 @@ def reject_stdin(caller: str, stdin: Optional[IO]=sys.stdin) -> None:
             stderr()
             raise SystemExit(1)
 
+
 def accept_stdin(stdin: Optional[IO]=sys.stdin) -> Optional[str]:
     """accept any standard input and return it as a string or None"""
     if not stdin:
@@ -91,22 +93,25 @@ class TimedProgress:
 
         end_ts = datetime.now()
         self.stats['end_ts'] = end_ts
+        
         if SHOW_PROGRESS:
-            # protect from double termination
-            #if p is None or not hasattr(p, 'kill'):
-            #    return
+            # terminate if we havent already terminated
             if self.p is not None:
                 self.p.terminate()
-            
-            self.p = None
+                self.p = None
 
-            sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))  # clear whole terminal line
+            # clear whole terminal line
+            try:
+                sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
+            except (IOError, BrokenPipeError):
+                # ignore when the parent proc has stopped listening to our stdout
+                pass
 
 
 @enforce_types
 def progress_bar(seconds: int, prefix: str='') -> None:
     """show timer in the form of progress bar, with percentage and seconds remaining"""
-    chunk = '█' if sys.stdout.encoding == 'UTF-8' else '#'
+    chunk = '█' if PYTHON_ENCODING == 'UTF-8' else '#'
     chunks = TERM_WIDTH() - len(prefix) - 20  # number of progress chunks to show (aka max bar width)
     try:
         for s in range(seconds * chunks):

From 4783daa6fa0288eb199e999d0baaa2b4089ae6bd Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:08:14 -0400
Subject: [PATCH 099/333] use full python version instead of only major,minor

---
 archivebox/config/__init__.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index f357e9d2..ed03d056 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -188,11 +188,11 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'TERM_WIDTH':               {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
     'USER':                     {'default': lambda c: getpass.getuser() or os.getlogin()},
     'ANSI':                     {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
-    
+
     'REPO_DIR':                 {'default': lambda c: os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..'))},
     'PYTHON_DIR':               {'default': lambda c: os.path.join(c['REPO_DIR'], PYTHON_DIR_NAME)},
     'TEMPLATES_DIR':            {'default': lambda c: os.path.join(c['PYTHON_DIR'], TEMPLATES_DIR_NAME, 'legacy')},
-    
+
     'OUTPUT_DIR':               {'default': lambda c: os.path.abspath(os.path.expanduser(c['OUTPUT_DIR'])) if c['OUTPUT_DIR'] else os.path.abspath(os.curdir)},
     'ARCHIVE_DIR':              {'default': lambda c: os.path.join(c['OUTPUT_DIR'], ARCHIVE_DIR_NAME)},
     'SOURCES_DIR':              {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
@@ -208,7 +208,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
 
     'PYTHON_BINARY':            {'default': lambda c: sys.executable},
     'PYTHON_ENCODING':          {'default': lambda c: sys.stdout.encoding.upper()},
-    'PYTHON_VERSION':           {'default': lambda c: '{}.{}'.format(sys.version_info.major, sys.version_info.minor)},
+    'PYTHON_VERSION':           {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
 
     'DJANGO_BINARY':            {'default': lambda c: django.__file__.replace('__init__.py', 'bin/django-admin.py')},
     'DJANGO_VERSION':           {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)},
@@ -278,7 +278,7 @@ def load_config_val(key: str,
             return default(config)
 
         return default
-    
+
     elif type is bool:
         if val.lower() in ('true', 'yes', '1'):
             return True
@@ -299,6 +299,7 @@ def load_config_val(key: str,
 
     raise Exception('Config values can only be str, bool, or int')
 
+
 def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
 
@@ -705,7 +706,7 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
         raise SystemExit(2)
 
     ### Check Python environment
-    if float(config['PYTHON_VERSION']) < 3.6:
+    if sys.version_info[:3] < (3, 6, 0):
         stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
         stderr('    See https://github.com/pirate/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
         raise SystemExit(2)

From 912cbb22d57b715bb4271f538d339ad7ebf5a002 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:10:21 -0400
Subject: [PATCH 100/333] fix CURL_USER_AGENT missing from config stubs

---
 archivebox/config/stubs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py
index 444757e9..38c7ed4c 100644
--- a/archivebox/config/stubs.py
+++ b/archivebox/config/stubs.py
@@ -48,6 +48,7 @@ class ConfigDict(BaseConfig, total=False):
     RESOLUTION: str
     GIT_DOMAINS: str
     CHECK_SSL_VALIDITY: bool
+    CURL_USER_AGENT: str
     WGET_USER_AGENT: str
     CHROME_USER_AGENT: str
     COOKIES_FILE: Optional[str]

From cf01eff9c727844dcc5535e1d923eb11916e52d8 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:10:54 -0400
Subject: [PATCH 101/333] tweak log output text

---
 archivebox/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/main.py b/archivebox/main.py
index 7b03e5b0..0949b6e5 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -472,7 +472,7 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
     last_login = User.objects.order_by('last_login').last()
     print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
     last_updated = Snapshot.objects.order_by('updated').last()
-    print(f'    Last changed: {str(last_updated.updated)[:16]}')
+    print(f'    Last changes: {str(last_updated.updated)[:16]}')
 
     if not users:
         print()

From 79b19ddf3598c5f782b53969974748972a85b24f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:12:06 -0400
Subject: [PATCH 102/333] use atomic writes for config file writing as well

---
 archivebox/config/__init__.py | 67 +++++++++++++++++------------------
 archivebox/extractors/dom.py  |  6 ++--
 archivebox/extractors/git.py  |  1 -
 archivebox/extractors/pdf.py  |  1 +
 4 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index ed03d056..130bb5ec 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -320,65 +320,64 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
         return config_file_vars
     return None
 
+
 def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
 
     out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
     config_path = os.path.join(out_dir, CONFIG_FILENAME)
+    
     if not os.path.exists(config_path):
-        with open(config_path, 'w+') as f:
-            f.write(CONFIG_HEADER)
+        atomic_write(config_path, CONFIG_HEADER)
 
     config_file = ConfigParser()
     config_file.optionxform = str
     config_file.read(config_path)
 
+    with open(config_path, 'r') as old:
+        atomic_write(f'{config_path}.bak', old.read())
+
     find_section = lambda key: [name for name, opts in CONFIG_DEFAULTS.items() if key in opts][0]
 
-    with open(f'{config_path}.old', 'w+') as old:
-        with open(config_path, 'r') as new:
-            old.write(new.read())
+    # Set up sections in empty config file
+    for key, val in config.items():
+        section = find_section(key)
+        if section in config_file:
+            existing_config = dict(config_file[section])
+        else:
+            existing_config = {}
+        config_file[section] = {**existing_config, key: val}
 
-    with open(config_path, 'w+') as f:
-        for key, val in config.items():
-            section = find_section(key)
-            if section in config_file:
-                existing_config = dict(config_file[section])
-            else:
-                existing_config = {}
+    # always make sure there's a SECRET_KEY defined for Django
+    existing_secret_key = None
+    if 'SERVER_CONFIG' in config_file and 'SECRET_KEY' in config_file['SERVER_CONFIG']:
+        existing_secret_key = config_file['SERVER_CONFIG']['SECRET_KEY']
 
-            config_file[section] = {**existing_config, key: val}
+    if (not existing_secret_key) or ('not a valid secret' in existing_secret_key):
+        from django.utils.crypto import get_random_string
+        chars = 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.'
+        random_secret_key = get_random_string(50, chars)
+        if 'SERVER_CONFIG' in config_file:
+            config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key
+        else:
+            config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key}
 
-        # always make sure there's a SECRET_KEY defined for Django
-        existing_secret_key = None
-        if 'SERVER_CONFIG' in config_file and 'SECRET_KEY' in config_file['SERVER_CONFIG']:
-            existing_secret_key = config_file['SERVER_CONFIG']['SECRET_KEY']
-
-        if (not existing_secret_key) or ('not a valid secret' in existing_secret_key):
-            from django.utils.crypto import get_random_string
-            chars = 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.'
-            random_secret_key = get_random_string(50, chars)
-            if 'SERVER_CONFIG' in config_file:
-                config_file['SERVER_CONFIG']['SECRET_KEY'] = random_secret_key
-            else:
-                config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key}
-
-        f.write(CONFIG_HEADER)
-        config_file.write(f)
 
+    atomic_write(config_path, '\n'.join((CONFIG_HEADER, config_file)))
     try:
+        # validate the config by attempting to re-parse it
         CONFIG = load_all_config()
         return {
             key.upper(): CONFIG.get(key.upper())
             for key in config.keys()
         }
     except:
-        with open(f'{config_path}.old', 'r') as old:
-            with open(config_path, 'w+') as new:
-                new.write(old.read())
+        # something went horribly wrong, rever to the previous version
+        with open(f'{config_path}.bak', 'r') as old:
+            atomic_write(config_path, old.read())
 
-    if os.path.exists(f'{config_path}.old'):
-        os.remove(f'{config_path}.old')
+    if os.path.exists(f'{config_path}.bak'):
+        os.remove(f'{config_path}.bak')
 
     return {}
 
diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py
index b46137b6..63e24692 100644
--- a/archivebox/extractors/dom.py
+++ b/archivebox/extractors/dom.py
@@ -5,7 +5,7 @@ import os
 from typing import Optional
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run, chmod_file
+from ..system import run, chmod_file, atomic_write
 from ..util import (
     enforce_types,
     is_static_file,
@@ -46,8 +46,8 @@ def save_dom(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
-        with open(output_path, 'w+') as f:
-            result = run(cmd, stdout=f, cwd=out_dir, timeout=timeout)
+        result = run(cmd, cwd=out_dir, timeout=timeout)
+        atomic_write(output_path, result.stdout)
 
         if result.returncode:
             hints = result.stderr.decode()
diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py
index 75674ab8..1534ce34 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@@ -65,7 +65,6 @@ def save_git(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     timer = TimedProgress(timeout, prefix='      ')
     try:
         result = run(cmd, cwd=output_path, timeout=timeout + 1)
-
         if result.returncode == 128:
             # ignore failed re-download when the folder already exists
             pass
diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py
index 3786c4cc..bd8093bf 100644
--- a/archivebox/extractors/pdf.py
+++ b/archivebox/extractors/pdf.py
@@ -58,6 +58,7 @@ def save_pdf(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     finally:
         timer.end()
 
+
     return ArchiveResult(
         cmd=cmd,
         pwd=out_dir,

From c68543af74ff7f49370e90945519b429b0e708a6 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:14:35 -0400
Subject: [PATCH 103/333] fix text encoding for subprocesses

---
 archivebox/system.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/system.py b/archivebox/system.py
index 4f238ceb..d49037c7 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -17,7 +17,7 @@ from .config import OUTPUT_PERMISSIONS
 
 
-def run(*args, input=None, capture_output=True, text=True, timeout=None, check=False, **kwargs):
+def run(*args, input=None, capture_output=True, text=False, timeout=None, check=False, **kwargs):
     """Patched of subprocess.run to fix blocking io making timeout=innefective"""
 
     if input is not None:

From 44bd6866ac3cb5d03585bbe324393e64075176ab Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:48:41 -0400
Subject: [PATCH 104/333] colorize hints in CLI output more consistently

---
 archivebox/cli/logging.py | 11 ++++++-----
 archivebox/main.py        |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/archivebox/cli/logging.py b/archivebox/cli/logging.py
index 293c47cf..53a65664 100644
--- a/archivebox/cli/logging.py
+++ b/archivebox/cli/logging.py
@@ -19,10 +19,11 @@ from ..config import (
     ConfigDict,
     PYTHON_ENCODING,
     ANSI,
-    OUTPUT_DIR,
     IS_TTY,
     SHOW_PROGRESS,
     TERM_WIDTH,
+    OUTPUT_DIR,
+    HTML_INDEX_FILENAME,
 )
 
 
@@ -221,8 +222,8 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str):
         total=num_links,
     ))
     print()
-    print('    To view your archive, open:')
-    print('        {}/index.html'.format(OUTPUT_DIR))
+    print('    {lightred}Hint:{reset} To view your archive index, open:'.format(**ANSI))
+    print('        {}/{}'.format(OUTPUT_DIR, HTML_INDEX_FILENAME))
     print('    Continue archiving where you left off by running:')
     print('        archivebox update --resume={}'.format(timestamp))
 
@@ -248,8 +249,8 @@ def log_archiving_finished(num_links: int):
     print('    - {} links updated'.format(_LAST_RUN_STATS.succeeded))
     print('    - {} links had errors'.format(_LAST_RUN_STATS.failed))
     print()
-    print('    To view your archive, open:')
-    print('        {}/index.html'.format(OUTPUT_DIR))
+    print('    {lightred}Hint:{reset} To view your archive index, open:'.format(**ANSI))
+    print('        {}/{}'.format(OUTPUT_DIR, HTML_INDEX_FILENAME))
     print('    Or run the built-in webserver:')
     print('        archivebox server')
 
diff --git a/archivebox/main.py b/archivebox/main.py
index 0949b6e5..7c824527 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -377,7 +377,7 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
     else:
         print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links), **ANSI))
     print()
-    print('    To view your archive index, open:')
+    print('    {lightred}Hint:{reset}To view your archive index, open:'.format(**ANSI))
     print('        {}'.format(os.path.join(out_dir, HTML_INDEX_FILENAME)))
     print()
     print('    To add new links, you can run:')

From 4da1d82b86c2bdf2a53db41ecf941899ec1e9f0e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 01:49:08 -0400
Subject: [PATCH 105/333] use python builtin os.chmod instead of subprocess
 call

---
 archivebox/system.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/archivebox/system.py b/archivebox/system.py
index d49037c7..1a994cb1 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -48,13 +48,12 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
 def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS, timeout: int=30) -> None:
     """chmod -R <permissions> <cwd>/<path>"""
 
-    if not os.path.exists(os.path.join(cwd, path)):
+    root = Path(cwd) / path
+    if not root.exists():
         raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
 
-    chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, timeout=timeout)
-    if chmod_result.returncode == 1:
-        print('     ', chmod_result.stderr.decode())
-        raise Exception('Failed to chmod {}/{}'.format(cwd, path))
+    for subpath in Path(path).glob('**/*'):
+        os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))
 
 
 @enforce_types

From 602e141f08e5ee5600f19183caa09dd3adf21aea Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 02:04:16 -0400
Subject: [PATCH 106/333] fix config file atomic writing bugs

---
 archivebox/config/__init__.py     |  7 +++++--
 archivebox/extractors/__init__.py |  6 +++---
 archivebox/extractors/wget.py     |  3 ++-
 archivebox/main.py                |  2 +-
 archivebox/system.py              | 12 +++++++-----
 5 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 130bb5ec..9cb6d677 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -324,6 +324,8 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
 def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
 
+    from ..system import atomic_write
+
     out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
     config_path = os.path.join(out_dir, CONFIG_FILENAME)
     
@@ -362,8 +364,9 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
         else:
             config_file['SERVER_CONFIG'] = {'SECRET_KEY': random_secret_key}
 
-
-    atomic_write(config_path, '\n'.join((CONFIG_HEADER, config_file)))
+    with open(config_path, 'w+') as new:
+        config_file.write(new)
+    
     try:
         # validate the config by attempting to re-parse it
         CONFIG = load_all_config()
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index a8f28ce1..c6a4f33c 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -45,7 +45,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
         ('media', should_save_media, save_media),
         ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
     )
-    
+
     out_dir = out_dir or link.link_dir
     try:
         is_new = not os.path.exists(out_dir)
@@ -61,7 +61,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
             try:
                 if method_name not in link.history:
                     link.history[method_name] = []
-                
+
                 if should_run(link, out_dir) or overwrite:
                     log_archive_method_started(method_name)
 
@@ -83,7 +83,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
 
         write_link_details(link, out_dir=link.link_dir)
         patch_main_index(link)
-        
+
         # # If any changes were made, update the main links index json and html
         # was_changed = stats['succeeded'] or stats['failed']
         # if was_changed:
diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index 50d0111d..2e0957e0 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -78,6 +78,7 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']),
         link.url,
     ]
+
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     try:
@@ -111,7 +112,7 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
                 raise ArchiveError('500 Internal Server Error', hints)
             raise ArchiveError('Got an error from the server', hints)
 
-        # chmod_file(output, cwd=out_dir)
+        chmod_file(output, cwd=out_dir)
     except Exception as err:
         status = 'failed'
         output = err
diff --git a/archivebox/main.py b/archivebox/main.py
index 7c824527..a1aba118 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -882,7 +882,7 @@ def config(config_options_str: Optional[str]=None,
                 print('    {}'.format(printable_config(side_effect_changes, prefix='    ')))
         if failed_options:
             stderr()
-            stderr('[X] These options failed to set:', color='red')
+            stderr('[X] These options failed to set (check for typos):', color='red')
             stderr('    {}'.format('\n    '.join(failed_options)))
         raise SystemExit(bool(failed_options))
     elif reset:
diff --git a/archivebox/system.py b/archivebox/system.py
index 1a994cb1..d6206557 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -16,8 +16,7 @@ from .util import enforce_types, ExtendedEncoder
 from .config import OUTPUT_PERMISSIONS
 
 
-
-def run(*args, input=None, capture_output=True, text=False, timeout=None, check=False, **kwargs):
+def run(*args, input=None, capture_output=True, text=False, **kwargs):
     """Patched of subprocess.run to fix blocking io making timeout=innefective"""
 
     if input is not None:
@@ -29,12 +28,13 @@ def run(*args, input=None, capture_output=True, text=False, timeout=None, check=
             raise ValueError('stdout and stderr arguments may not be used '
                              'with capture_output.')
 
-    return subprocess_run(*args, input=input, capture_output=capture_output, text=text, timeout=timeout, check=check, **kwargs)
+    return subprocess_run(*args, input=input, capture_output=capture_output, text=text, **kwargs)
+
 
 @enforce_types
 def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], overwrite: bool=True) -> None:
     """Safe atomic write to filesystem by writing to temp file + atomic rename"""
-    
+
     mode = 'wb+' if isinstance(contents, bytes) else 'w'
 
     # print('\n> Atomic Write:', mode, path, len(contents), f'overwrite={overwrite}')
@@ -44,8 +44,9 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
         elif isinstance(contents, (bytes, str)):
             f.write(contents)
 
+
 @enforce_types
-def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS, timeout: int=30) -> None:
+def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) -> None:
     """chmod -R <permissions> <cwd>/<path>"""
 
     root = Path(cwd) / path
@@ -93,6 +94,7 @@ def get_dir_size(path: str, recursive: bool=True, pattern: Optional[str]=None) -
 
 CRON_COMMENT = 'archivebox_schedule'
 
+
 @enforce_types
 def dedupe_cron_jobs(cron: CronTab) -> CronTab:
     deduped: Set[Tuple[str, str]] = set()

From e6830284c5f365af094290c7a7808075b808c6f2 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 03:35:20 -0400
Subject: [PATCH 107/333] add script to auto-generate config subs

---
 archivebox/config/__init__.py |  1 +
 archivebox/config/stubs.py    | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 9cb6d677..0cb41563 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -35,6 +35,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'IS_TTY':                   {'type': bool,  'default': lambda _: sys.stdout.isatty()},
         'USE_COLOR':                {'type': bool,  'default': lambda c: c['IS_TTY']},
         'SHOW_PROGRESS':            {'type': bool,  'default': lambda c: c['IS_TTY']},
+        # TODO: 'SHOW_HINTS':       {'type:  bool,  'default': True},
     },
 
     'GENERAL_CONFIG': {
diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py
index 38c7ed4c..438f7d8a 100644
--- a/archivebox/config/stubs.py
+++ b/archivebox/config/stubs.py
@@ -12,6 +12,20 @@ class BaseConfig(TypedDict):
     pass
 
 class ConfigDict(BaseConfig, total=False):
+    """
+    # Regenerate by pasting this quine into `archivebox shell` 🥚
+    from archivebox.config import ConfigDict, CONFIG_DEFAULTS
+    print('class ConfigDict(BaseConfig, total=False):')
+    print('    ' + '"'*3 + ConfigDict.__doc__ + '"'*3)
+    for section, configs in CONFIG_DEFAULTS.items():
+        for key, attrs in configs.items():
+            Type, default = attrs['type'], attrs['default']
+            if default is None:
+                print(f'    {key}: Optional[{Type.__name__}]')
+            else:
+                print(f'    {key}: {Type.__name__}')
+        print()
+    """
     IS_TTY: bool
     USE_COLOR: bool
     SHOW_PROGRESS: bool

From df593dea0a0fcb04df2675029ad3490084878da6 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 05:55:34 -0400
Subject: [PATCH 108/333] fix missing imports

---
 archivebox/cli/logging.py     | 5 +++++
 archivebox/core/models.py     | 6 ++++--
 archivebox/extractors/wget.py | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/archivebox/cli/logging.py b/archivebox/cli/logging.py
index 53a65664..6de78d8f 100644
--- a/archivebox/cli/logging.py
+++ b/archivebox/cli/logging.py
@@ -24,6 +24,7 @@ from ..config import (
     TERM_WIDTH,
     OUTPUT_DIR,
     HTML_INDEX_FILENAME,
+    stderr,
 )
 
 
@@ -160,6 +161,7 @@ def log_parsing_started(source_file: str):
         **ANSI,
     ))
 
+
 def log_parsing_finished(num_parsed: int, num_new_links: int, parser_name: str):
     end_ts = datetime.now()
     _LAST_RUN_STATS.parse_end_ts = end_ts
@@ -178,14 +180,17 @@ def log_indexing_process_started(num_links: int):
         **ANSI,
     ))
 
+
 def log_indexing_process_finished():
     end_ts = datetime.now()
     _LAST_RUN_STATS.index_end_ts = end_ts
 
+
 def log_indexing_started(out_path: str):
     if IS_TTY:
         sys.stdout.write(f'    > {out_path}')
 
+
 def log_indexing_finished(out_path: str):
     print(f'\r    √ {out_path}')
 
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 2c0c9e37..f3c03119 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -25,10 +25,12 @@ class Snapshot(models.Model):
 
 
     def __repr__(self) -> str:
-        return f'[{self.timestamp}] {self.url[:64]} ({self.title[:64]})'
+        title = self.title or '-'
+        return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
 
     def __str__(self) -> str:
-        return f'[{self.timestamp}] {self.url[:64]} ({self.title[:64]})'
+        title = self.title or '-'
+        return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
 
     @classmethod
     def from_json(cls, info: dict):
diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index 2e0957e0..503c3bad 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -7,7 +7,7 @@ from typing import Optional
 from datetime import datetime
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from ..system import run
+from ..system import run, chmod_file
 from ..util import (
     enforce_types,
     is_static_file,

From 9f440c2cf8b75620ffbaa5abf14dcffcdabb81b6 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 05:55:54 -0400
Subject: [PATCH 109/333] use requests.get to fetch and decode instead of
 urllib

---
 archivebox/util.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/archivebox/util.py b/archivebox/util.py
index 44196c6d..a85bc7fc 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -13,6 +13,7 @@ from html import escape, unescape
 from datetime import datetime
 from dateutil import parser as dateparser
 
+import requests
 from base32_crockford import encode as base32_encode                            # type: ignore
 
 from .config import (
@@ -155,18 +156,13 @@ def parse_date(date: Any) -> Optional[datetime]:
 @enforce_types
 def download_url(url: str, timeout: int=TIMEOUT) -> str:
     """Download the contents of a remote url and return the text"""
-
-    req = Request(url, headers={'User-Agent': WGET_USER_AGENT})
-
-    if CHECK_SSL_VALIDITY:
-        resp = urlopen(req, timeout=timeout)
-    else:
-        insecure = ssl._create_unverified_context()
-        resp = urlopen(req, timeout=timeout, context=insecure)
-
-    rawdata = resp.read()
-    encoding = resp.headers.get_content_charset() or detect_encoding(rawdata)
-    return rawdata.decode(encoding)
+    response = requests.get(
+        url,
+        headers={'User-Agent': WGET_USER_AGENT},
+        verify=CHECK_SSL_VALIDITY,
+        timeout=timeout,
+    )
+    return response.text
 
 
 @enforce_types

From 178f6ac1a6cf94ae2e8d9edf68c6720ccebf1f2d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 05:56:17 -0400
Subject: [PATCH 110/333] switch to django admin snapshots list as new homepage

---
 archivebox/core/admin.py | 74 ++++++++++++++++++++++++++++++++++++----
 archivebox/core/urls.py  | 18 +++++-----
 2 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 526d0602..39482c4c 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -1,17 +1,79 @@
 from django.contrib import admin
+from django.utils.html import format_html
 
 from core.models import Snapshot
+from cli.logging import printable_filesize
+
+
+# TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
 
 class SnapshotAdmin(admin.ModelAdmin):
-    list_display = ('timestamp', 'short_url', 'title', 'is_archived', 'num_outputs', 'added', 'updated', 'url_hash')
-    readonly_fields = ('num_outputs', 'is_archived', 'added', 'updated', 'bookmarked')
+    list_display = ('id_str', 'title_str', 'url_str', 'tags', 'files', 'added', 'updated', 'timestamp')
+    # sort_fields = ('id_str', 'files', 'url_str', 'title_str', 'tags', 'added', 'updated', 'timestamp')
+    readonly_fields = ('id', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
+    search_fields = ('url', 'timestamp', 'title', 'tags')
     fields = ('url', 'timestamp', 'title', 'tags', *readonly_fields)
+    list_filter = ('added', 'updated', 'tags')
+    ordering = ['-added']
 
-    def short_url(self, obj):
-        return obj.url[:64]
+    def id_str(self, obj):
+        return format_html(
+            '<code style="font-size: 10px">{}</code>',
+            obj.url_hash[:8],
+        )
 
-    def updated(self, obj):
-        return obj.isoformat()
+    def title_str(self, obj):
+        canon = obj.as_link().canonical_outputs()
+        return format_html(
+            '<a href="/{}">'
+            '<img src="/{}/{}" style="height: 20px; width: 20px;" onerror="this.style.opacity=0">'
+            '</a>'
+            '<a href="/{}/{}">'
+            ' &nbsp; &nbsp; <b>{}</b></a>',
+            obj.archive_path,
+            obj.archive_path, canon['favicon_path'],
+            obj.archive_path, canon['wget_path'] or '',
+            (obj.title or '...')[:128],
+        )
+
+    def files(self, obj):
+        canon = obj.as_link().canonical_outputs()
+        return format_html(
+            '<span style="font-size: 1.2em; opacity: 0.8">'
+            '<a href="/{}/{}">🌐 </a> '
+            '<a href="/{}/{}">📄</a> '
+            '<a href="/{}/{}">🖥 </a> '
+            '<a href="/{}/{}">🅷 </a> '
+            '<a href="/{}/{}">📼 </a> '
+            '<a href="/{}/{}">📦 </a> '
+            '<a href="/{}/{}">🏛 </a> '
+            '</span>'
+            '<a href="/{}">{}</a>',
+            obj.archive_path, canon['wget_path'] or '',
+            obj.archive_path, canon['pdf_path'],
+            obj.archive_path, canon['screenshot_path'],
+            obj.archive_path, canon['dom_path'],
+            obj.archive_path, canon['media_path'],
+            obj.archive_path, canon['git_path'],
+            obj.archive_path, canon['archive_org_path'],
+            obj.archive_path,
+            printable_filesize(obj.archive_size),
+        )
+
+    def url_str(self, obj):
+        return format_html(
+            '<a href="{}"><code>{}</code></a>',
+            obj.url,
+            obj.url.split('://', 1)[-1][:128],
+        )
+
+    id_str.short_description = 'ID'
+    title_str.short_description = 'Title'
+    url_str.short_description = 'URL'
+
+    id_str.admin_order_field = 'id'
+    title_str.admin_order_field = 'title'
+    url_str.admin_order_field = 'url'
 
 admin.site.register(Snapshot, SnapshotAdmin)
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index 9b4af5a5..7bbaf479 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -8,12 +8,12 @@ from django.views.generic.base import RedirectView
 
 from core.views import MainIndex, AddLinks, LinkDetails
 
-admin.site.site_header = 'ArchiveBox Admin'
-admin.site.index_title = 'Archive Administration'
+admin.site.site_header = 'ArchiveBox'
+admin.site.index_title = 'Links' 
+admin.site.site_title = 'Index'
+
 
 urlpatterns = [
-    path('index.html', RedirectView.as_view(url='/')),
-    path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
     path('robots.txt', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'robots.txt'}),
     path('favicon.ico', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'favicon.ico'}),
 
@@ -26,11 +26,13 @@ urlpatterns = [
     path('accounts/login/', RedirectView.as_view(url='/admin/login/')),
     path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')),
 
+    path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
+
     path('accounts/', include('django.contrib.auth.urls')),
     path('admin/', admin.site.urls),
     
-
-    path('', MainIndex.as_view(), name='Home'),
+    path('old.html', MainIndex.as_view(), name='OldHome'),
+    path('index.html', RedirectView.as_view(url='/')),
+    path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
+    path('', RedirectView.as_view(url='/admin/core/snapshot/'), name='Home'),
 ]
-
-

From c415420f3323c3a3e8d723777dd0f82e4b326334 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 06:41:48 -0400
Subject: [PATCH 111/333] improve sort columns and UI placeholders

---
 archivebox/core/admin.py  | 19 ++++++++++---------
 archivebox/core/models.py | 31 +++++++++++++++++++++++--------
 archivebox/util.py        |  2 --
 3 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 39482c4c..5cf71796 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -1,16 +1,16 @@
 from django.contrib import admin
 from django.utils.html import format_html
 
+from archivebox.util import htmldecode, urldecode
 from core.models import Snapshot
 from cli.logging import printable_filesize
 
-
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
 
 class SnapshotAdmin(admin.ModelAdmin):
-    list_display = ('id_str', 'title_str', 'url_str', 'tags', 'files', 'added', 'updated', 'timestamp')
-    # sort_fields = ('id_str', 'files', 'url_str', 'title_str', 'tags', 'added', 'updated', 'timestamp')
+    list_display = ('title_str', 'url_str', 'tags', 'files', 'added', 'updated')
+    sort_fields = ('title_str', 'url_str', 'tags', 'added', 'updated')
     readonly_fields = ('id', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
     search_fields = ('url', 'timestamp', 'title', 'tags')
     fields = ('url', 'timestamp', 'title', 'tags', *readonly_fields)
@@ -27,14 +27,15 @@ class SnapshotAdmin(admin.ModelAdmin):
         canon = obj.as_link().canonical_outputs()
         return format_html(
             '<a href="/{}">'
-            '<img src="/{}/{}" style="height: 20px; width: 20px;" onerror="this.style.opacity=0">'
+            '<img src="/{}/{}" style="height: 20px; width: 20px;" onerror="this.remove()">'
+            ' &nbsp; &nbsp; '
             '</a>'
             '<a href="/{}/{}">'
-            ' &nbsp; &nbsp; <b>{}</b></a>',
+            '<b>{}</b></a>',
             obj.archive_path,
-            obj.archive_path, canon['favicon_path'],
+            obj.archive_path, canon['google_favicon_path'],
             obj.archive_path, canon['wget_path'] or '',
-            (obj.title or '...')[:128],
+            urldecode(htmldecode(obj.latest_title or obj.title or '-'))[:128],
         )
 
     def files(self, obj):
@@ -58,14 +59,14 @@ class SnapshotAdmin(admin.ModelAdmin):
             obj.archive_path, canon['git_path'],
             obj.archive_path, canon['archive_org_path'],
             obj.archive_path,
-            printable_filesize(obj.archive_size),
+            printable_filesize(obj.archive_size) if obj.archive_size else 'pending',
         )
 
     def url_str(self, obj):
         return format_html(
             '<a href="{}"><code>{}</code></a>',
             obj.url,
-            obj.url.split('://', 1)[-1][:128],
+            obj.url.split('://www.', 1)[-1].split('://', 1)[-1][:64],
         )
 
     id_str.short_description = 'ID'
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index f3c03119..2cbfc1b1 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
 import uuid
 
 from django.db import models
+from django.utils.functional import cached_property
 
 from ..util import parse_date
 from ..index.schema import Link
@@ -47,34 +48,48 @@ class Snapshot(models.Model):
     def as_link(self) -> Link:
         return Link.from_json(self.as_json())
 
-    @property
+    @cached_property
     def bookmarked(self):
         return parse_date(self.timestamp)
 
-    @property
+    @cached_property
     def is_archived(self):
         return self.as_link().is_archived
 
-    @property
+    @cached_property
     def num_outputs(self):
         return self.as_link().num_outputs
 
-    @property
+    @cached_property
     def url_hash(self):
         return self.as_link().url_hash
 
-    @property
+    @cached_property
     def base_url(self):
         return self.as_link().base_url
 
-    @property
+    @cached_property
     def link_dir(self):
         return self.as_link().link_dir
 
-    @property
+    @cached_property
     def archive_path(self):
         return self.as_link().archive_path
 
-    @property
+    @cached_property
     def archive_size(self):
         return self.as_link().archive_size
+
+    @cached_property
+    def history(self):
+        from ..index import load_link_details
+        return load_link_details(self.as_link()).history
+
+    @cached_property
+    def latest_title(self):
+        if ('title' in self.history
+            and self.history['title']
+            and (self.history['title'][-1].status == 'succeeded')
+            and self.history['title'][-1].output.strip()):
+            return self.history['title'][-1].output.strip()
+        return None
diff --git a/archivebox/util.py b/archivebox/util.py
index a85bc7fc..87c98263 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -1,5 +1,4 @@
 import re
-import ssl
 import json as pyjson
 
 
@@ -7,7 +6,6 @@ from typing import List, Optional, Any
 from inspect import signature
 from functools import wraps
 from hashlib import sha256
-from urllib.request import Request, urlopen
 from urllib.parse import urlparse, quote, unquote
 from html import escape, unescape
 from datetime import datetime

From 264cae2b6a9e0691cb07a770634433824aa90c91 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 06:42:17 -0400
Subject: [PATCH 112/333] add migration

---
 .../migrations/0003_auto_20200630_1034.py     | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 archivebox/core/migrations/0003_auto_20200630_1034.py

diff --git a/archivebox/core/migrations/0003_auto_20200630_1034.py b/archivebox/core/migrations/0003_auto_20200630_1034.py
new file mode 100644
index 00000000..61fd4727
--- /dev/null
+++ b/archivebox/core/migrations/0003_auto_20200630_1034.py
@@ -0,0 +1,38 @@
+# Generated by Django 3.0.7 on 2020-06-30 10:34
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0002_auto_20200625_1521'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='added',
+            field=models.DateTimeField(auto_now_add=True, db_index=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='tags',
+            field=models.CharField(db_index=True, default=None, max_length=256, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='timestamp',
+            field=models.CharField(db_index=True, default=None, max_length=32, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='title',
+            field=models.CharField(db_index=True, default=None, max_length=128, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='updated',
+            field=models.DateTimeField(db_index=True, default=None, null=True),
+        ),
+    ]

From dda3542d60ffb47fd4403ceabfb36d773b8a1ca4 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 13:45:47 -0400
Subject: [PATCH 113/333] bump sql updated time after every link details save

---
 archivebox/index/__init__.py |  2 ++
 archivebox/index/sql.py      | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 4ac4c4af..e82cfefa 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -51,6 +51,7 @@ from .json import (
 from .sql import (
     write_sql_main_index,
     parse_sql_main_index,
+    write_sql_link_details,
 )
 
 ### Link filtering and checking
@@ -345,6 +346,7 @@ def write_link_details(link: Link, out_dir: Optional[str]=None) -> None:
 
     write_json_link_details(link, out_dir=out_dir)
     write_html_link_details(link, out_dir=out_dir)
+    write_sql_link_details(link)
 
 
 @enforce_types
diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index 0a13d9b8..a168e589 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -46,6 +46,18 @@ def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
             info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
             Snapshot.objects.update_or_create(url=url, defaults=info)
 
+@enforce_types
+def write_sql_link_details(link: Link, out_dir: str=OUTPUT_DIR) -> None:
+    setup_django(out_dir, check_db=True)
+    from core.models import Snapshot
+    from django.db import transaction
+
+    with transaction.atomic():
+        snap = Snapshot.objects.get(url=link['url'], timestamp=link['timestamp'])
+        snap.title = link.title
+        snap.tags = link.tags
+        snap.save()
+
 
 
 @enforce_types

From 58701abf380277ed8de339a30f8d11b7ac6ea337 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 13:50:57 -0400
Subject: [PATCH 114/333] update docs

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index 8aecf874..d6d43042 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit 8aecf874deaccb6e4ad9d47e8dd23e04abd3b7b4
+Subproject commit d6d43042893a017e0d43723da0b9890422102554

From bc1f9255425e5672b69e9311975825ad579d7119 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 30 Jun 2020 15:24:23 -0400
Subject: [PATCH 115/333] fix snapshot getter in sql saving

---
 archivebox/index/sql.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index a168e589..0ad68de0 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -53,7 +53,7 @@ def write_sql_link_details(link: Link, out_dir: str=OUTPUT_DIR) -> None:
     from django.db import transaction
 
     with transaction.atomic():
-        snap = Snapshot.objects.get(url=link['url'], timestamp=link['timestamp'])
+        snap = Snapshot.objects.get(url=link.url, timestamp=link.timestamp)
         snap.title = link.title
         snap.tags = link.tags
         snap.save()

From c971e00c9cd746085a5a9f8605a45f231e6558f0 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 1 Jul 2020 12:23:59 -0500
Subject: [PATCH 116/333] feat: Add stdout from process to the template

---
 archivebox/config/__init__.py            |  10 +
 archivebox/core/views.py                 |  25 +-
 archivebox/themes/default/add_links.html | 425 ++++++++++++-----------
 archivebox/util.py                       |  24 ++
 4 files changed, 276 insertions(+), 208 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 0cb41563..fa979211 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -133,6 +133,16 @@ DEFAULT_CLI_COLORS = {
 }
 ANSI = {k: '' for k in DEFAULT_CLI_COLORS.keys()}
 
+COLOR_DICT = {
+    '00': [(0, 0, 0), (0, 0, 0)],
+    '31': [(255, 0, 0), (128, 0, 0)],
+    '32': [(0, 200, 0), (0, 128, 0)],
+    '33': [(255, 255, 0), (128, 128, 0)],
+    '34': [(0, 0, 255), (0, 0, 128)],
+    '35': [(255, 0, 255), (128, 0, 128)],
+    '36': [(0, 255, 255), (0, 128, 128)],
+}
+
 STATICFILE_EXTENSIONS = {
     # 99.999% of the time, URLs ending in these extensions are static files
     # that can be downloaded as-is, not html pages that need to be rendered
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 84731e51..b7911674 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -8,6 +8,9 @@ from django.conf import settings
 
 from core.models import Snapshot
 
+from contextlib import redirect_stdout
+from io import StringIO
+
 from ..index import load_main_index, load_main_index_meta
 from ..config import (
     OUTPUT_DIR,
@@ -16,7 +19,7 @@ from ..config import (
     PUBLIC_INDEX,
     PUBLIC_SNAPSHOTS,
 )
-from ..util import base_url
+from ..util import base_url, ansi_to_html
 from .. main import add
 
 
@@ -55,12 +58,20 @@ class AddLinks(View):
     def post(self, request):
         url = request.POST['url']
         print(f'[+] Adding URL: {url}')
-        add(
-            import_str=url,
-            update_all=False,
-            out_dir=OUTPUT_DIR,
-        )
-        return redirect('/')
+        add_stdout = StringIO()
+        with redirect_stdout(add_stdout):
+            extracted_links = add(
+                import_str=url,
+                update_all=False,
+                out_dir=OUTPUT_DIR,
+            )
+        print(add_stdout.getvalue())
+
+        context = {
+            "stdout": ansi_to_html(add_stdout.getvalue())
+        }
+
+        return render(template_name=self.template, request=request, context=context)
 
 
 class LinkDetails(View):
diff --git a/archivebox/themes/default/add_links.html b/archivebox/themes/default/add_links.html
index dd144834..db09322f 100644
--- a/archivebox/themes/default/add_links.html
+++ b/archivebox/themes/default/add_links.html
@@ -2,208 +2,231 @@
 
 <!DOCTYPE html>
 <html lang="en">
-    <head>
-        <title>Archived Sites</title>
-        <meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
-        <style>
-            html, body {
-                width: 100%;
-                height: 100%;
-                font-size: 18px;
-                font-weight: 200;
-                text-align: center;
-                margin: 0px;
-                padding: 0px;
-                font-family: "Gill Sans", Helvetica, sans-serif;
-            }
-            .header-top small {
-                font-weight: 200;
-                color: #efefef;
-            }
-            
-            .header-top {
-                width: 100%;
-                height: auto;
-                min-height: 40px;
-                margin: 0px;
-                text-align: center;
-                color: white;
-                font-size: calc(11px + 0.84vw);
-                font-weight: 200;
-                padding: 4px 4px;
-                border-bottom: 3px solid #aa1e55;
-                background-color: #aa1e55;
-            }
-            input[type=search] {
-                width: 22vw;
-                border-radius: 4px;
-                border: 1px solid #aeaeae;
-                padding: 3px 5px;
-            }
-            .nav > div {
-                min-height: 30px;
-            }
-            .header-top a {
-                text-decoration: none;
-                color: rgba(0,0,0,0.6);
-            }
-            .header-top a:hover {
-                text-decoration: none;
-                color: rgba(0,0,0,0.9);
-            }
-            .header-top .col-lg-4 {
-                text-align: center;
-                padding-top: 4px;
-                padding-bottom: 4px;
-            }
-            .header-archivebox img {
-                display: inline-block;
-                margin-right: 3px;
-                height: 30px;
-                margin-left: 12px;
-                margin-top: -4px;
-                margin-bottom: 2px;
-            }
-            .header-archivebox img:hover {
-                opacity: 0.5;
-            }
+  <head>
+    <title>Archived Sites</title>
+    <meta
+      charset="utf-8"
+      name="viewport"
+      content="width=device-width, initial-scale=1"
+    />
+    <style>
+      html,
+      body {
+        width: 100%;
+        height: 100%;
+        font-size: 18px;
+        font-weight: 200;
+        text-align: center;
+        margin: 0px;
+        padding: 0px;
+        font-family: "Gill Sans", Helvetica, sans-serif;
+      }
+      .header-top small {
+        font-weight: 200;
+        color: #efefef;
+      }
 
-            #table-bookmarks_length, #table-bookmarks_filter {
-                padding-top: 12px;
-                opacity: 0.8;
-                padding-left: 24px;
-                padding-right: 22px;
-                margin-bottom: -16px;
-            }
-            table {
-                padding: 6px;
-                width: 100%;
-            }
-            table thead th {
-                font-weight: 400;
-            }
-            table tr {
-                height: 35px;
-            }
-            tbody tr:nth-child(odd) {
-               background-color: #ffebeb !important;
-            }
-            table tr td {
-                white-space: nowrap;
-                overflow: hidden;
-                /*padding-bottom: 0.4em;*/
-                /*padding-top: 0.4em;*/
-                padding-left: 2px;
-                text-align: center;
-            }
-            table tr td a {
-                text-decoration: none;
-            }
-            table tr td img, table tr td object {
-                display: inline-block;
-                margin: auto;
-                height: 24px;
-                width: 24px;
-                padding: 0px;
-                padding-right: 5px;
-                vertical-align: middle;
-                margin-left: 4px;
-            }
-            #table-bookmarks {
-                width: 100%; 
-                overflow-y: scroll;
-                table-layout: fixed;
-            }
-            .dataTables_wrapper {
-                background-color: #fafafa;
-            }
-            table tr a span[data-archived~=False] {
-                opacity: 0.4;
-            }
-            .files-spinner {
-                height: 15px;
-                width: auto;
-                opacity: 0.5;
-                vertical-align: -2px;
-            }
-            .in-progress {
-                display: none;
-            }
-            body[data-status~=finished] .files-spinner {
-                display: none;
-            }
-            /*body[data-status~=running] .in-progress {
+      .header-top {
+        width: 100%;
+        height: auto;
+        min-height: 40px;
+        margin: 0px;
+        text-align: center;
+        color: white;
+        font-size: calc(11px + 0.84vw);
+        font-weight: 200;
+        padding: 4px 4px;
+        border-bottom: 3px solid #aa1e55;
+        background-color: #aa1e55;
+      }
+      input[type="search"] {
+        width: 22vw;
+        border-radius: 4px;
+        border: 1px solid #aeaeae;
+        padding: 3px 5px;
+      }
+      .nav > div {
+        min-height: 30px;
+      }
+      .header-top a {
+        text-decoration: none;
+        color: rgba(0, 0, 0, 0.6);
+      }
+      .header-top a:hover {
+        text-decoration: none;
+        color: rgba(0, 0, 0, 0.9);
+      }
+      .header-top .col-lg-4 {
+        text-align: center;
+        padding-top: 4px;
+        padding-bottom: 4px;
+      }
+      .header-archivebox img {
+        display: inline-block;
+        margin-right: 3px;
+        height: 30px;
+        margin-left: 12px;
+        margin-top: -4px;
+        margin-bottom: 2px;
+      }
+      .header-archivebox img:hover {
+        opacity: 0.5;
+      }
+
+      #table-bookmarks_length,
+      #table-bookmarks_filter {
+        padding-top: 12px;
+        opacity: 0.8;
+        padding-left: 24px;
+        padding-right: 22px;
+        margin-bottom: -16px;
+      }
+      table {
+        padding: 6px;
+        width: 100%;
+      }
+      table thead th {
+        font-weight: 400;
+      }
+      table tr {
+        height: 35px;
+      }
+      tbody tr:nth-child(odd) {
+        background-color: #ffebeb !important;
+      }
+      table tr td {
+        white-space: nowrap;
+        overflow: hidden;
+        /*padding-bottom: 0.4em;*/
+        /*padding-top: 0.4em;*/
+        padding-left: 2px;
+        text-align: center;
+      }
+      table tr td a {
+        text-decoration: none;
+      }
+      table tr td img,
+      table tr td object {
+        display: inline-block;
+        margin: auto;
+        height: 24px;
+        width: 24px;
+        padding: 0px;
+        padding-right: 5px;
+        vertical-align: middle;
+        margin-left: 4px;
+      }
+      #table-bookmarks {
+        width: 100%;
+        overflow-y: scroll;
+        table-layout: fixed;
+      }
+      .dataTables_wrapper {
+        background-color: #fafafa;
+      }
+      table tr a span[data-archived~="False"] {
+        opacity: 0.4;
+      }
+      .files-spinner {
+        height: 15px;
+        width: auto;
+        opacity: 0.5;
+        vertical-align: -2px;
+      }
+      .in-progress {
+        display: none;
+      }
+      body[data-status~="finished"] .files-spinner {
+        display: none;
+      }
+      /*body[data-status~=running] .in-progress {
                 display: inline-block;
             }*/
-            tr td a.favicon img {
-                padding-left: 6px;
-                padding-right: 12px;
-                vertical-align: -4px;
-            }
-            tr td a.title {
-                font-size: 1.4em;
-                text-decoration:none;
-                color:black;
-            }
-            tr td a.title small {
-                background-color: #efefef;
-                border-radius: 4px;
-                float:right
-            }
-            input[type=search]::-webkit-search-cancel-button {
-                -webkit-appearance: searchfield-cancel-button;
-            }
-            .title-col {
-                text-align: left;
-            }
-            .title-col a {
-                color: black;
-            }
-        </style>
-        <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
-        <link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}"/>
-        <script src="{% static 'jquery.min.js' %}"></script>
-        <script src="{% static 'jquery.dataTables.min.js' %}"></script>
-        <script>
-            document.addEventListener('error', function(e) {
-              e.target.style.opacity = 0;
-            }, true)
-            jQuery(document).ready(function() {
-                jQuery('#table-bookmarks').DataTable({
-                    stateSave: true, // save state (filtered input, number of entries shown, etc) in localStorage
-                    dom: '<lf<t>ip>', // how to show the table and its helpers (filter, etc) in the DOM
-                    order: [[0, 'desc']],
-                    iDisplayLength: 100,
-                });
-            });
-        </script>
-    </head>
-    <body data-status="finished">
-        <header>
-            <div class="header-top container-fluid">
-                <div class="row nav">
-                    <div class="col-sm-2">
-                        <a href="/" class="header-archivebox" title="Last updated: {{updated}}">
-                            <img src="{% static 'archive.png' %}" alt="Logo"/>
-                            ArchiveBox: Add
-                        </a>
-                    </div>
-                    <div class="col-sm-10" style="text-align: right">
-                        <a href="/">Main Index</a> &nbsp; | &nbsp; 
-                        <a href="/admin/">Admin</a> &nbsp; | &nbsp; 
-                        <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
-                    </div>
-                </div>
-            </div>
-        </header>
-        <center>
-            <br/><br/>
-            <form action="?" method="POST">{% csrf_token %}
-                Add new links...<br/>
-                <input type="text" name="url" placeholder="URL of page or feed..."/><br/>
-                <button role="submit">Add</button>
-            </form>
-        </center>
-        
-    </body>
+      tr td a.favicon img {
+        padding-left: 6px;
+        padding-right: 12px;
+        vertical-align: -4px;
+      }
+      tr td a.title {
+        font-size: 1.4em;
+        text-decoration: none;
+        color: black;
+      }
+      tr td a.title small {
+        background-color: #efefef;
+        border-radius: 4px;
+        float: right;
+      }
+      input[type="search"]::-webkit-search-cancel-button {
+        -webkit-appearance: searchfield-cancel-button;
+      }
+      .title-col {
+        text-align: left;
+      }
+      .title-col a {
+        color: black;
+      }
+    </style>
+    <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}" />
+    <link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}" />
+    <script src="{% static 'jquery.min.js' %}"></script>
+    <script src="{% static 'jquery.dataTables.min.js' %}"></script>
+    <script>
+      document.addEventListener(
+        "error",
+        function (e) {
+          e.target.style.opacity = 0;
+        },
+        true
+      );
+      jQuery(document).ready(function () {
+        jQuery("#table-bookmarks").DataTable({
+          stateSave: true, // save state (filtered input, number of entries shown, etc) in localStorage
+          dom: "<lf<t>ip>", // how to show the table and its helpers (filter, etc) in the DOM
+          order: [[0, "desc"]],
+          iDisplayLength: 100,
+        });
+      });
+    </script>
+  </head>
+  <body data-status="finished">
+    <header>
+      <div class="header-top container-fluid">
+        <div class="row nav">
+          <div class="col-sm-2">
+            <a
+              href="/"
+              class="header-archivebox"
+              title="Last updated: {{updated}}"
+            >
+              <img src="{% static 'archive.png' %}" alt="Logo" />
+              ArchiveBox: Add
+            </a>
+          </div>
+          <div class="col-sm-10" style="text-align: right;">
+            <a href="/">Main Index</a> &nbsp; | &nbsp;
+            <a href="/admin/">Admin</a> &nbsp; | &nbsp;
+            <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
+          </div>
+        </div>
+      </div>
+    </header>
+    <center>
+      {{ stdout | safe }}
+      <br /><br />
+      <form action="?" method="POST">
+        {% csrf_token %} Add new links...<br />
+        <input
+          type="text"
+          name="url"
+          placeholder="URL of page or feed..."
+        /><br />
+        <button role="submit">Add</button>
+      </form>
+
+      <a href="{% url 'admin:core_snapshot_changelist' %}"
+        >Go back to Snapshot list</a
+      >
+    </center>
+  </body>
 </html>
diff --git a/archivebox/util.py b/archivebox/util.py
index 87c98263..50511313 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -20,6 +20,7 @@ from .config import (
     CHECK_SSL_VALIDITY,
     WGET_USER_AGENT,
     CHROME_OPTIONS,
+    COLOR_DICT
 )
 
 try:
@@ -69,6 +70,8 @@ URL_REGEX = re.compile(
     re.IGNORECASE,
 )
 
+COLOR_REGEX = re.compile(r'\[(?P<arg_1>\d+)(;(?P<arg_2>\d+)(;(?P<arg_3>\d+))?)?m')
+
 
 def enforce_types(func):
     """
@@ -195,6 +198,27 @@ def chrome_args(**options) -> List[str]:
     
     return cmd_args
 
+def ansi_to_html(text):
+    """
+    Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
+    """
+    TEMPLATE = '<span style="color: rgb{}"><br>'
+    text = text.replace('[m', '</span>')
+
+    def single_sub(match):
+        argsdict = match.groupdict()
+        if argsdict['arg_3'] is None:
+            if argsdict['arg_2'] is None:
+                bold, color = 0, argsdict['arg_1']
+            else:
+                bold, color = argsdict['arg_1'], argsdict['arg_2']
+        else:
+            bold, color = argsdict['arg_3'], argsdict['arg_2']
+
+        return TEMPLATE.format(COLOR_DICT[color][0])
+
+    return COLOR_REGEX.sub(single_sub, text)
+
 
 class ExtendedEncoder(pyjson.JSONEncoder):
     """

From 364c5752d827c87a927bed00e89e4e3d7c5b6e4a Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 1 Jul 2020 12:29:56 -0500
Subject: [PATCH 117/333] feat: Handle empty URL case

---
 archivebox/core/views.py                 |  27 +-
 archivebox/themes/default/add_links.html | 426 +++++++++++------------
 2 files changed, 218 insertions(+), 235 deletions(-)

diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index b7911674..5efa79cd 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -57,19 +57,22 @@ class AddLinks(View):
 
     def post(self, request):
         url = request.POST['url']
-        print(f'[+] Adding URL: {url}')
-        add_stdout = StringIO()
-        with redirect_stdout(add_stdout):
-            extracted_links = add(
-                import_str=url,
-                update_all=False,
-                out_dir=OUTPUT_DIR,
-            )
-        print(add_stdout.getvalue())
+        if url:
+            print(f'[+] Adding URL: {url}')
+            add_stdout = StringIO()
+            with redirect_stdout(add_stdout):
+                extracted_links = add(
+                    import_str=url,
+                    update_all=False,
+                    out_dir=OUTPUT_DIR,
+                )
+            print(add_stdout.getvalue())
 
-        context = {
-            "stdout": ansi_to_html(add_stdout.getvalue())
-        }
+            context = {
+                "stdout": ansi_to_html(add_stdout.getvalue())
+            }
+        else:
+            context = {"stdout": "Please enter a URL"}
 
         return render(template_name=self.template, request=request, context=context)
 
diff --git a/archivebox/themes/default/add_links.html b/archivebox/themes/default/add_links.html
index db09322f..6c625594 100644
--- a/archivebox/themes/default/add_links.html
+++ b/archivebox/themes/default/add_links.html
@@ -2,231 +2,211 @@
 
 <!DOCTYPE html>
 <html lang="en">
-  <head>
-    <title>Archived Sites</title>
-    <meta
-      charset="utf-8"
-      name="viewport"
-      content="width=device-width, initial-scale=1"
-    />
-    <style>
-      html,
-      body {
-        width: 100%;
-        height: 100%;
-        font-size: 18px;
-        font-weight: 200;
-        text-align: center;
-        margin: 0px;
-        padding: 0px;
-        font-family: "Gill Sans", Helvetica, sans-serif;
-      }
-      .header-top small {
-        font-weight: 200;
-        color: #efefef;
-      }
+    <head>
+        <title>Archived Sites</title>
+        <meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
+        <style>
+            html, body {
+                width: 100%;
+                height: 100%;
+                font-size: 18px;
+                font-weight: 200;
+                text-align: center;
+                margin: 0px;
+                padding: 0px;
+                font-family: "Gill Sans", Helvetica, sans-serif;
+            }
+            .header-top small {
+                font-weight: 200;
+                color: #efefef;
+            }
+            
+            .header-top {
+                width: 100%;
+                height: auto;
+                min-height: 40px;
+                margin: 0px;
+                text-align: center;
+                color: white;
+                font-size: calc(11px + 0.84vw);
+                font-weight: 200;
+                padding: 4px 4px;
+                border-bottom: 3px solid #aa1e55;
+                background-color: #aa1e55;
+            }
+            input[type=search] {
+                width: 22vw;
+                border-radius: 4px;
+                border: 1px solid #aeaeae;
+                padding: 3px 5px;
+            }
+            .nav > div {
+                min-height: 30px;
+            }
+            .header-top a {
+                text-decoration: none;
+                color: rgba(0,0,0,0.6);
+            }
+            .header-top a:hover {
+                text-decoration: none;
+                color: rgba(0,0,0,0.9);
+            }
+            .header-top .col-lg-4 {
+                text-align: center;
+                padding-top: 4px;
+                padding-bottom: 4px;
+            }
+            .header-archivebox img {
+                display: inline-block;
+                margin-right: 3px;
+                height: 30px;
+                margin-left: 12px;
+                margin-top: -4px;
+                margin-bottom: 2px;
+            }
+            .header-archivebox img:hover {
+                opacity: 0.5;
+            }
 
-      .header-top {
-        width: 100%;
-        height: auto;
-        min-height: 40px;
-        margin: 0px;
-        text-align: center;
-        color: white;
-        font-size: calc(11px + 0.84vw);
-        font-weight: 200;
-        padding: 4px 4px;
-        border-bottom: 3px solid #aa1e55;
-        background-color: #aa1e55;
-      }
-      input[type="search"] {
-        width: 22vw;
-        border-radius: 4px;
-        border: 1px solid #aeaeae;
-        padding: 3px 5px;
-      }
-      .nav > div {
-        min-height: 30px;
-      }
-      .header-top a {
-        text-decoration: none;
-        color: rgba(0, 0, 0, 0.6);
-      }
-      .header-top a:hover {
-        text-decoration: none;
-        color: rgba(0, 0, 0, 0.9);
-      }
-      .header-top .col-lg-4 {
-        text-align: center;
-        padding-top: 4px;
-        padding-bottom: 4px;
-      }
-      .header-archivebox img {
-        display: inline-block;
-        margin-right: 3px;
-        height: 30px;
-        margin-left: 12px;
-        margin-top: -4px;
-        margin-bottom: 2px;
-      }
-      .header-archivebox img:hover {
-        opacity: 0.5;
-      }
-
-      #table-bookmarks_length,
-      #table-bookmarks_filter {
-        padding-top: 12px;
-        opacity: 0.8;
-        padding-left: 24px;
-        padding-right: 22px;
-        margin-bottom: -16px;
-      }
-      table {
-        padding: 6px;
-        width: 100%;
-      }
-      table thead th {
-        font-weight: 400;
-      }
-      table tr {
-        height: 35px;
-      }
-      tbody tr:nth-child(odd) {
-        background-color: #ffebeb !important;
-      }
-      table tr td {
-        white-space: nowrap;
-        overflow: hidden;
-        /*padding-bottom: 0.4em;*/
-        /*padding-top: 0.4em;*/
-        padding-left: 2px;
-        text-align: center;
-      }
-      table tr td a {
-        text-decoration: none;
-      }
-      table tr td img,
-      table tr td object {
-        display: inline-block;
-        margin: auto;
-        height: 24px;
-        width: 24px;
-        padding: 0px;
-        padding-right: 5px;
-        vertical-align: middle;
-        margin-left: 4px;
-      }
-      #table-bookmarks {
-        width: 100%;
-        overflow-y: scroll;
-        table-layout: fixed;
-      }
-      .dataTables_wrapper {
-        background-color: #fafafa;
-      }
-      table tr a span[data-archived~="False"] {
-        opacity: 0.4;
-      }
-      .files-spinner {
-        height: 15px;
-        width: auto;
-        opacity: 0.5;
-        vertical-align: -2px;
-      }
-      .in-progress {
-        display: none;
-      }
-      body[data-status~="finished"] .files-spinner {
-        display: none;
-      }
-      /*body[data-status~=running] .in-progress {
+            #table-bookmarks_length, #table-bookmarks_filter {
+                padding-top: 12px;
+                opacity: 0.8;
+                padding-left: 24px;
+                padding-right: 22px;
+                margin-bottom: -16px;
+            }
+            table {
+                padding: 6px;
+                width: 100%;
+            }
+            table thead th {
+                font-weight: 400;
+            }
+            table tr {
+                height: 35px;
+            }
+            tbody tr:nth-child(odd) {
+               background-color: #ffebeb !important;
+            }
+            table tr td {
+                white-space: nowrap;
+                overflow: hidden;
+                /*padding-bottom: 0.4em;*/
+                /*padding-top: 0.4em;*/
+                padding-left: 2px;
+                text-align: center;
+            }
+            table tr td a {
+                text-decoration: none;
+            }
+            table tr td img, table tr td object {
+                display: inline-block;
+                margin: auto;
+                height: 24px;
+                width: 24px;
+                padding: 0px;
+                padding-right: 5px;
+                vertical-align: middle;
+                margin-left: 4px;
+            }
+            #table-bookmarks {
+                width: 100%; 
+                overflow-y: scroll;
+                table-layout: fixed;
+            }
+            .dataTables_wrapper {
+                background-color: #fafafa;
+            }
+            table tr a span[data-archived~=False] {
+                opacity: 0.4;
+            }
+            .files-spinner {
+                height: 15px;
+                width: auto;
+                opacity: 0.5;
+                vertical-align: -2px;
+            }
+            .in-progress {
+                display: none;
+            }
+            body[data-status~=finished] .files-spinner {
+                display: none;
+            }
+            /*body[data-status~=running] .in-progress {
                 display: inline-block;
             }*/
-      tr td a.favicon img {
-        padding-left: 6px;
-        padding-right: 12px;
-        vertical-align: -4px;
-      }
-      tr td a.title {
-        font-size: 1.4em;
-        text-decoration: none;
-        color: black;
-      }
-      tr td a.title small {
-        background-color: #efefef;
-        border-radius: 4px;
-        float: right;
-      }
-      input[type="search"]::-webkit-search-cancel-button {
-        -webkit-appearance: searchfield-cancel-button;
-      }
-      .title-col {
-        text-align: left;
-      }
-      .title-col a {
-        color: black;
-      }
-    </style>
-    <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}" />
-    <link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}" />
-    <script src="{% static 'jquery.min.js' %}"></script>
-    <script src="{% static 'jquery.dataTables.min.js' %}"></script>
-    <script>
-      document.addEventListener(
-        "error",
-        function (e) {
-          e.target.style.opacity = 0;
-        },
-        true
-      );
-      jQuery(document).ready(function () {
-        jQuery("#table-bookmarks").DataTable({
-          stateSave: true, // save state (filtered input, number of entries shown, etc) in localStorage
-          dom: "<lf<t>ip>", // how to show the table and its helpers (filter, etc) in the DOM
-          order: [[0, "desc"]],
-          iDisplayLength: 100,
-        });
-      });
-    </script>
-  </head>
-  <body data-status="finished">
-    <header>
-      <div class="header-top container-fluid">
-        <div class="row nav">
-          <div class="col-sm-2">
-            <a
-              href="/"
-              class="header-archivebox"
-              title="Last updated: {{updated}}"
-            >
-              <img src="{% static 'archive.png' %}" alt="Logo" />
-              ArchiveBox: Add
-            </a>
-          </div>
-          <div class="col-sm-10" style="text-align: right;">
-            <a href="/">Main Index</a> &nbsp; | &nbsp;
-            <a href="/admin/">Admin</a> &nbsp; | &nbsp;
-            <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
-          </div>
-        </div>
-      </div>
-    </header>
-    <center>
-      {{ stdout | safe }}
-      <br /><br />
-      <form action="?" method="POST">
-        {% csrf_token %} Add new links...<br />
-        <input
-          type="text"
-          name="url"
-          placeholder="URL of page or feed..."
-        /><br />
-        <button role="submit">Add</button>
-      </form>
+            tr td a.favicon img {
+                padding-left: 6px;
+                padding-right: 12px;
+                vertical-align: -4px;
+            }
+            tr td a.title {
+                font-size: 1.4em;
+                text-decoration:none;
+                color:black;
+            }
+            tr td a.title small {
+                background-color: #efefef;
+                border-radius: 4px;
+                float:right
+            }
+            input[type=search]::-webkit-search-cancel-button {
+                -webkit-appearance: searchfield-cancel-button;
+            }
+            .title-col {
+                text-align: left;
+            }
+            .title-col a {
+                color: black;
+            }
+        </style>
+        <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
+        <link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}"/>
+        <script src="{% static 'jquery.min.js' %}"></script>
+        <script src="{% static 'jquery.dataTables.min.js' %}"></script>
+        <script>
+            document.addEventListener('error', function(e) {
+              e.target.style.opacity = 0;
+            }, true)
+            jQuery(document).ready(function() {
+                jQuery('#table-bookmarks').DataTable({
+                    stateSave: true, // save state (filtered input, number of entries shown, etc) in localStorage
+                    dom: '<lf<t>ip>', // how to show the table and its helpers (filter, etc) in the DOM
+                    order: [[0, 'desc']],
+                    iDisplayLength: 100,
+                });
+            });
+        </script>
+    </head>
+    <body data-status="finished">
+        <header>
+            <div class="header-top container-fluid">
+                <div class="row nav">
+                    <div class="col-sm-2">
+                        <a href="/" class="header-archivebox" title="Last updated: {{updated}}">
+                            <img src="{% static 'archive.png' %}" alt="Logo"/>
+                            ArchiveBox: Add
+                        </a>
+                    </div>
+                    <div class="col-sm-10" style="text-align: right">
+                        <a href="/">Main Index</a> &nbsp; | &nbsp; 
+                        <a href="/admin/">Admin</a> &nbsp; | &nbsp; 
+                        <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
+                    </div>
+                </div>
+            </div>
+        </header>
+        <center>
+            {{ stdout | safe }}
+            <br/><br/>
+            <form action="?" method="POST">{% csrf_token %}
+                Add new links...<br/>
+                <input type="text" name="url" placeholder="URL of page or feed..."/><br/>
+                <button role="submit">Add</button>
+            </form>
+        </center>
 
-      <a href="{% url 'admin:core_snapshot_changelist' %}"
-        >Go back to Snapshot list</a
-      >
-    </center>
-  </body>
+        <a href="{% url 'admin:core_snapshot_changelist' %}">Go back to Snapshot list</a>
+        
+    </body>
 </html>

From 8840ad72bbc2006c9e02690b814b6524679ef79f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 Jul 2020 03:12:30 -0400
Subject: [PATCH 118/333] remove circular import possibilities

---
 archivebox/config/__init__.py |  8 ++++++++
 archivebox/core/admin.py      |  2 +-
 archivebox/util.py            | 25 ++++++++++++++-----------
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index fa979211..f06b0f3d 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -21,6 +21,14 @@ from .stubs import (
     ConfigDefaultDict,
 )
 
+# precedence order for config:
+# 1. cli args
+# 2. shell environment vars
+# 3. config file
+# 4. defaults
+
+# env USE_COLO=false archivebox add '...'
+# env SHOW_PROGRESS=1 archivebox add '...'
 
 # ******************************************************************************
 # Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 5cf71796..7942c6c2 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -1,7 +1,7 @@
 from django.contrib import admin
 from django.utils.html import format_html
 
-from archivebox.util import htmldecode, urldecode
+from util import htmldecode, urldecode
 from core.models import Snapshot
 from cli.logging import printable_filesize
 
diff --git a/archivebox/util.py b/archivebox/util.py
index 50511313..717e1185 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -14,15 +14,6 @@ from dateutil import parser as dateparser
 import requests
 from base32_crockford import encode as base32_encode                            # type: ignore
 
-from .config import (
-    TIMEOUT,
-    STATICFILE_EXTENSIONS,
-    CHECK_SSL_VALIDITY,
-    WGET_USER_AGENT,
-    CHROME_OPTIONS,
-    COLOR_DICT
-)
-
 try:
     import chardet
     detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
@@ -49,7 +40,6 @@ base_url = lambda url: without_scheme(url)  # uniq base url used to dedupe links
 without_www = lambda url: url.replace('://www.', '://', 1)
 without_trailing_slash = lambda url: url[:-1] if url[-1] == '/' else url.replace('/?', '?')
 hashurl = lambda url: base32_encode(int(sha256(base_url(url).encode('utf-8')).hexdigest(), 16))[:20]
-is_static_file = lambda url: extension(url).lower() in STATICFILE_EXTENSIONS  # TODO: the proper way is with MIME type detection, not using extension
 
 urlencode = lambda s: s and quote(s, encoding='utf-8', errors='replace')
 urldecode = lambda s: s and unquote(s)
@@ -70,7 +60,14 @@ URL_REGEX = re.compile(
     re.IGNORECASE,
 )
 
+<<<<<<< HEAD
 COLOR_REGEX = re.compile(r'\[(?P<arg_1>\d+)(;(?P<arg_2>\d+)(;(?P<arg_3>\d+))?)?m')
+=======
+def is_static_file(url: str):
+    # TODO: the proper way is with MIME type detection + ext, not only extension
+    from .config import STATICFILE_EXTENSIONS
+    return extension(url).lower() in STATICFILE_EXTENSIONS
+>>>>>>> c1fe068... remove circular import possibilities
 
 
 def enforce_types(func):
@@ -155,8 +152,10 @@ def parse_date(date: Any) -> Optional[datetime]:
 
 
 @enforce_types
-def download_url(url: str, timeout: int=TIMEOUT) -> str:
+def download_url(url: str, timeout: int=None) -> str:
     """Download the contents of a remote url and return the text"""
+    from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
+    timeout = timeout or TIMEOUT
     response = requests.get(
         url,
         headers={'User-Agent': WGET_USER_AGENT},
@@ -170,6 +169,8 @@ def download_url(url: str, timeout: int=TIMEOUT) -> str:
 def chrome_args(**options) -> List[str]:
     """helper to build up a chrome shell command with arguments"""
 
+    from .config import CHROME_OPTIONS
+
     options = {**CHROME_OPTIONS, **options}
 
     cmd_args = [options['CHROME_BINARY']]
@@ -202,6 +203,8 @@ def ansi_to_html(text):
     """
     Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
     """
+    from .config import COLOR_DICT
+    
     TEMPLATE = '<span style="color: rgb{}"><br>'
     text = text.replace('[m', '</span>')
 

From 2ece5c20cfb11eff27078faa316aa4af075e5ad9 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 Jul 2020 03:14:07 -0400
Subject: [PATCH 119/333] bump docs

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index d6d43042..2061184e 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit d6d43042893a017e0d43723da0b9890422102554
+Subproject commit 2061184e3ea6a35d8e32cb4ca6d24a1afc06706f

From 3ec97e55283ed88be6ea3df89266378dda5fe09f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 Jul 2020 03:22:37 -0400
Subject: [PATCH 120/333] fix git conflict commited by accident

---
 archivebox/util.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/archivebox/util.py b/archivebox/util.py
index 717e1185..4ba1e3dd 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -60,14 +60,12 @@ URL_REGEX = re.compile(
     re.IGNORECASE,
 )
 
-<<<<<<< HEAD
 COLOR_REGEX = re.compile(r'\[(?P<arg_1>\d+)(;(?P<arg_2>\d+)(;(?P<arg_3>\d+))?)?m')
-=======
+
 def is_static_file(url: str):
     # TODO: the proper way is with MIME type detection + ext, not only extension
     from .config import STATICFILE_EXTENSIONS
     return extension(url).lower() in STATICFILE_EXTENSIONS
->>>>>>> c1fe068... remove circular import possibilities
 
 
 def enforce_types(func):
@@ -204,7 +202,7 @@ def ansi_to_html(text):
     Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
     """
     from .config import COLOR_DICT
-    
+
     TEMPLATE = '<span style="color: rgb{}"><br>'
     text = text.replace('[m', '</span>')
 

From 322be6b29233eee1b77626aab78d9e43b76261b0 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 Jul 2020 03:53:39 -0400
Subject: [PATCH 121/333] move main into cli init and remove circular import
 layer

---
 archivebox/__init__.py       |  6 ----
 archivebox/__main__.py       |  9 ++----
 archivebox/cli/__init__.py   | 55 ++++++++++++++++++++++++++++++-
 archivebox/cli/archivebox.py | 63 ------------------------------------
 setup.py                     | 11 +++----
 5 files changed, 61 insertions(+), 83 deletions(-)
 delete mode 100755 archivebox/cli/archivebox.py

diff --git a/archivebox/__init__.py b/archivebox/__init__.py
index 56b6f16e..b0c00b61 100644
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -1,7 +1 @@
 __package__ = 'archivebox'
-
-from . import core
-from . import cli
-
-# The main CLI source code, is in 'archivebox/main.py'
-from .main import *
diff --git a/archivebox/__main__.py b/archivebox/__main__.py
index 3386d46d..55e94415 100755
--- a/archivebox/__main__.py
+++ b/archivebox/__main__.py
@@ -3,13 +3,8 @@
 __package__ = 'archivebox'
 
 import sys
-from .cli import archivebox
-
-
-def main():
-    archivebox.main(args=sys.argv[1:], stdin=sys.stdin)
 
+from .cli import main
 
 if __name__ == '__main__':
-    archivebox.main(args=sys.argv[1:], stdin=sys.stdin)
-
+    main(args=sys.argv[1:], stdin=sys.stdin)
diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index 7972c02e..ece64f8b 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -1,8 +1,13 @@
 __package__ = 'archivebox.cli'
+__command__ = 'archivebox'
 
 import os
+import argparse
+
+from typing import Optional, Dict, List, IO
+
+from ..config import OUTPUT_DIR
 
-from typing import Dict, List, Optional, IO
 from importlib import import_module
 
 CLI_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -24,6 +29,7 @@ is_valid_cli_module = lambda module, subcommand: (
     and module.__command__.split(' ')[-1] == subcommand
 )
 
+
 def list_subcommands() -> Dict[str, str]:
     """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
 
@@ -57,6 +63,53 @@ def run_subcommand(subcommand: str,
 
 SUBCOMMANDS = list_subcommands()
 
+
+def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+    subcommands = list_subcommands()
+    parser = argparse.ArgumentParser(
+        prog=__command__,
+        description='ArchiveBox: The self-hosted internet archive',
+        add_help=False,
+    )
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        '--help', '-h',
+        action='store_true',
+        help=subcommands['help'],
+    )
+    group.add_argument(
+        '--version',
+        action='store_true',
+        help=subcommands['version'],
+    )
+    group.add_argument(
+        "subcommand",
+        type=str,
+        help= "The name of the subcommand to run",
+        nargs='?',
+        choices=subcommands.keys(),
+        default=None,
+    )
+    parser.add_argument(
+        "subcommand_args",
+        help="Arguments for the subcommand",
+        nargs=argparse.REMAINDER,
+    )
+    command = parser.parse_args(args or ())
+
+    if command.help or command.subcommand is None:
+        command.subcommand = 'help'
+    if command.version:
+        command.subcommand = 'version'
+
+    run_subcommand(
+        subcommand=command.subcommand,
+        subcommand_args=command.subcommand_args,
+        stdin=stdin,
+        pwd=pwd or OUTPUT_DIR,
+    )
+
+
 __all__ = (
     'SUBCOMMANDS',
     'list_subcommands',
diff --git a/archivebox/cli/archivebox.py b/archivebox/cli/archivebox.py
deleted file mode 100755
index c8281937..00000000
--- a/archivebox/cli/archivebox.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-# archivebox [command]
-
-__package__ = 'archivebox.cli'
-__command__ = 'archivebox'
-
-import sys
-import argparse
-
-from typing import Optional, List, IO
-
-from . import list_subcommands, run_subcommand
-from ..config import OUTPUT_DIR
-
-
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    subcommands = list_subcommands()
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description='ArchiveBox: The self-hosted internet archive',
-        add_help=False,
-    )
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument(
-        '--help', '-h',
-        action='store_true',
-        help=subcommands['help'],
-    )
-    group.add_argument(
-        '--version',
-        action='store_true',
-        help=subcommands['version'],
-    )
-    group.add_argument(
-        "subcommand",
-        type=str,
-        help= "The name of the subcommand to run",
-        nargs='?',
-        choices=subcommands.keys(),
-        default=None,
-    )
-    parser.add_argument(
-        "subcommand_args",
-        help="Arguments for the subcommand",
-        nargs=argparse.REMAINDER,
-    )
-    command = parser.parse_args(args or ())
-
-    if command.help or command.subcommand is None:
-        command.subcommand = 'help'
-    if command.version:
-        command.subcommand = 'version'
-
-    run_subcommand(
-        subcommand=command.subcommand,
-        subcommand_args=command.subcommand_args,
-        stdin=stdin,
-        pwd=pwd or OUTPUT_DIR,
-    )
-
-
-if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
diff --git a/setup.py b/setup.py
index 8ac00c44..049528fb 100755
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,3 @@
-import os
 import setuptools
 from pathlib import Path
 
@@ -10,9 +9,9 @@ README = (BASE_DIR / "README.md").read_text()
 VERSION = (SOURCE_DIR / "VERSION").read_text().strip()
 
 # To see when setup.py gets called (uncomment for debugging)
-import sys
-print(SOURCE_DIR, f"     (v{VERSION})")
-print('>', sys.executable, *sys.argv)
+# import sys
+# print(SOURCE_DIR, f"     (v{VERSION})")
+# print('>', sys.executable, *sys.argv)
 # raise SystemExit(0)
 
 setuptools.setup(
@@ -69,10 +68,10 @@ setuptools.setup(
         # 'redis': ['redis', 'django-redis'],
         # 'pywb': ['pywb', 'redis'],
     },
-    packages=[PKG_NAME],
+    packages=setuptools.find_packages(),
     entry_points={
         "console_scripts": [
-            f"{PKG_NAME} = {PKG_NAME}.__main__:main",
+            f"{PKG_NAME} = {PKG_NAME}.cli:main",
         ],
     },
     include_package_data=True,

From 0c48449aa64c58fc350a40d39c3062e90e457a2d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 Jul 2020 04:00:51 -0400
Subject: [PATCH 122/333] fix subcommand and args not being passed

---
 archivebox/cli/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index ece64f8b..8d06855a 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -2,6 +2,7 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox'
 
 import os
+import sys
 import argparse
 
 from typing import Optional, Dict, List, IO
@@ -65,6 +66,7 @@ SUBCOMMANDS = list_subcommands()
 
 
 def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+    args = sys.argv[1:] if args is None else args
     subcommands = list_subcommands()
     parser = argparse.ArgumentParser(
         prog=__command__,

From 528fc8f1f64bae28e54b416be5bb578dc2e38ccb Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 2 Jul 2020 12:11:23 -0500
Subject: [PATCH 123/333] fix: Improve encoding detection for rss+xml content
 types

---
 archivebox/util.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/archivebox/util.py b/archivebox/util.py
index 4ba1e3dd..8fdda389 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -160,6 +160,15 @@ def download_url(url: str, timeout: int=None) -> str:
         verify=CHECK_SSL_VALIDITY,
         timeout=timeout,
     )
+    if response.headers.get('Content-Type') == 'application/rss+xml':
+        # Based on https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py
+        _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
+        _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P<xmlcharset>[\w-]+)')
+        _BODY_ENCODING_PATTERN = r'<\s*(\?xml\s[^>]+%s)' % (_XML_ENCODING_RE)
+        _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE)
+        match = _BODY_ENCODING_STR_RE.search(response.text[:1024])
+        if match:
+            response.encoding = match.group('xmlcharset')
     return response.text
 
 
From f373df7bd43ebe2c557f16c9e0c139975b63396c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 Jul 2020 13:23:40 -0400
Subject: [PATCH 124/333] update helptext to clarify adding links

---
 archivebox/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/archivebox/main.py b/archivebox/main.py
index a1aba118..f1fb98ce 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -377,11 +377,11 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
     else:
         print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links), **ANSI))
     print()
-    print('    {lightred}Hint:{reset}To view your archive index, open:'.format(**ANSI))
-    print('        {}'.format(os.path.join(out_dir, HTML_INDEX_FILENAME)))
+    print('    {lightred}Hint:{reset} To view your archive index, run:'.format(**ANSI))
+    print('        archivebox server  # then visit http://127.0.0.1:8000')
     print()
     print('    To add new links, you can run:')
-    print("        archivebox add 'https://example.com'")
+    print("        archivebox add ~/some/path/or/url/to/list_of_links.txt")
     print()
     print('    For more usage and examples, run:')
     print('        archivebox help')

From 7c428f40c8b74df85c6088ad7fcd5b62c4e10556 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 2 Jul 2020 13:31:05 -0400
Subject: [PATCH 125/333] fix stdin link importing

---
 archivebox/cli/__init__.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index 8d06855a..087f11b5 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -64,9 +64,14 @@ def run_subcommand(subcommand: str,
 
 SUBCOMMANDS = list_subcommands()
 
+class NotProvided:
+    pass
+
+
+def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided, pwd: Optional[str]=None) -> None:
+    args = sys.argv[1:] if args is NotProvided else args
+    stdin = sys.stdin if stdin is NotProvided else stdin
 
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    args = sys.argv[1:] if args is None else args
     subcommands = list_subcommands()
     parser = argparse.ArgumentParser(
         prog=__command__,

From 8bdfa18a3f8eb10dfd05337f7c488d20bda31bcc Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 2 Jul 2020 15:54:25 -0500
Subject: [PATCH 126/333] feat: Allow feed loading from the add links view

---
 archivebox/core/forms.py                 |  7 +++++
 archivebox/core/views.py                 | 33 +++++++++++++++++-------
 archivebox/themes/default/add_links.html | 10 +++++--
 3 files changed, 38 insertions(+), 12 deletions(-)
 create mode 100644 archivebox/core/forms.py

diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py
new file mode 100644
index 00000000..5f67e2c6
--- /dev/null
+++ b/archivebox/core/forms.py
@@ -0,0 +1,7 @@
+from django import forms
+
+CHOICES = (('url', 'URL'), ('feed', 'Feed'))
+
+class AddLinkForm(forms.Form):
+    url = forms.URLField()
+    source = forms.ChoiceField(choices=CHOICES, widget=forms.RadioSelect, initial='url')
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 5efa79cd..0c5efff2 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -22,6 +22,8 @@ from ..config import (
 from ..util import base_url, ansi_to_html
 from .. main import add
 
+from .forms import AddLinkForm
+
 
 class MainIndex(View):
     template = 'main_index.html'
@@ -51,28 +53,39 @@ class AddLinks(View):
         if not request.user.is_authenticated and not PUBLIC_INDEX:
             return redirect(f'/admin/login/?next={request.path}')
 
-        context = {}
+        context = {
+            "form": AddLinkForm()
+        }
 
         return render(template_name=self.template, request=request, context=context)
 
     def post(self, request):
-        url = request.POST['url']
-        if url:
+        #url = request.POST['url']
+        #if url:
+        form = AddLinkForm(request.POST)
+        if form.is_valid():
+            url = form.cleaned_data["url"]
             print(f'[+] Adding URL: {url}')
+            if form.cleaned_data["source"] == "url":
+                key = "import_str"
+            else:
+                key = "import_path"
+            input_kwargs = {
+                key: url,
+                "update_all": False,
+                "out_dir": OUTPUT_DIR,
+            }
             add_stdout = StringIO()
             with redirect_stdout(add_stdout):
-                extracted_links = add(
-                    import_str=url,
-                    update_all=False,
-                    out_dir=OUTPUT_DIR,
-                )
+                extracted_links = add(**input_kwargs)
             print(add_stdout.getvalue())
 
             context = {
-                "stdout": ansi_to_html(add_stdout.getvalue())
+                "stdout": ansi_to_html(add_stdout.getvalue()),
+                "form": AddLinkForm()
             }
         else:
-            context = {"stdout": "Please enter a URL"}
+            context = {"form": form}
 
         return render(template_name=self.template, request=request, context=context)
 
diff --git a/archivebox/themes/default/add_links.html b/archivebox/themes/default/add_links.html
index 6c625594..7143c576 100644
--- a/archivebox/themes/default/add_links.html
+++ b/archivebox/themes/default/add_links.html
@@ -159,6 +159,12 @@
             .title-col a {
                 color: black;
             }
+            .ul-form {
+                list-style: none;
+            }
+            .ul-form li {
+                list-style: none;
+            }
         </style>
         <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
         <link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}"/>
@@ -199,9 +205,9 @@
         <center>
             {{ stdout | safe }}
             <br/><br/>
-            <form action="?" method="POST">{% csrf_token %}
+            <form action="?" method="POST" class="ul-form">{% csrf_token %}
                 Add new links...<br/>
-                <input type="text" name="url" placeholder="URL of page or feed..."/><br/>
+                {{ form.as_ul }}
                 <button role="submit">Add</button>
             </form>
         </center>

From 63fe19e2c2d236cabae36ef441aff9fd46dd6014 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 3 Jul 2020 11:52:57 -0500
Subject: [PATCH 127/333] feat: Add pytest and initial tests

---
 setup.py           |  3 +++
 tests/test_init.py | 40 ++++++++++++++++++++++++++++++++++++++++
 tests/test_util.py | 21 +++++++++++++++++++++
 3 files changed, 64 insertions(+)
 create mode 100644 tests/test_init.py
 create mode 100644 tests/test_util.py

diff --git a/setup.py b/setup.py
index 049528fb..12002580 100755
--- a/setup.py
+++ b/setup.py
@@ -65,6 +65,9 @@ setuptools.setup(
             "sphinx-rtd-theme",
             "recommonmark",
         ],
+        "test": [
+            "pytest"
+        ]
         # 'redis': ['redis', 'django-redis'],
         # 'pywb': ['pywb', 'redis'],
     },
diff --git a/tests/test_init.py b/tests/test_init.py
new file mode 100644
index 00000000..b870a599
--- /dev/null
+++ b/tests/test_init.py
@@ -0,0 +1,40 @@
+# archivebox init
+# archivebox add
+
+import os
+import subprocess
+from pathlib import Path
+import json
+
+import pytest
+
+@pytest.fixture
+def process(tmp_path):
+    os.chdir(tmp_path)
+    process = subprocess.run(['archivebox', 'init'], capture_output=True)
+    return process
+
+
+def test_init(tmp_path, process):
+    assert "Initializing a new ArchiveBox collection in this folder..." in process.stdout.decode("utf-8")
+    
+def test_update(tmp_path, process):
+    os.chdir(tmp_path)
+    update_process = subprocess.run(['archivebox', 'init'], capture_output=True)
+    assert "Updating existing ArchiveBox collection in this folder" in update_process.stdout.decode("utf-8")
+
+def test_add_link(tmp_path, process):
+    os.chdir(tmp_path)
+    add_process = subprocess.run(['archivebox', 'add', 'http://example.com'], capture_output=True)
+    archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
+
+    assert "index.json" in [x.name for x in archived_item_path.iterdir()]
+
+    with open(archived_item_path / "index.json", "r") as f:
+        output_json = json.load(f)
+    assert "IANA — IANA-managed Reserved Domains" == output_json['history']['title'][0]['output']
+
+    with open(tmp_path / "index.html", "r") as f:
+        output_html = f.read()
+    assert "IANA — IANA-managed Reserved Domains" in output_html
+
diff --git a/tests/test_util.py b/tests/test_util.py
new file mode 100644
index 00000000..19ed31c0
--- /dev/null
+++ b/tests/test_util.py
@@ -0,0 +1,21 @@
+#@enforce_types
+#def download_url(url: str, timeout: int=None) -> str:
+#    """Download the contents of a remote url and return the text"""
+#    from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
+#    timeout = timeout or TIMEOUT
+#    response = requests.get(
+#        url,
+#        headers={'User-Agent': WGET_USER_AGENT},
+#        verify=CHECK_SSL_VALIDITY,
+#        timeout=timeout,
+#    )
+#    if response.headers.get('Content-Type') == 'application/rss+xml':
+#        # Based on https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py
+#        _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
+#        _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P<xmlcharset>[\w-]+)')
+#        _BODY_ENCODING_PATTERN = r'<\s*(\?xml\s[^>]+%s)' % (_XML_ENCODING_RE)
+#        _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE)
+#        match = _BODY_ENCODING_STR_RE.search(response.text[:1024])
+#        if match:
+#            response.encoding = match.group('xmlcharset')
+#    return response.text
\ No newline at end of file

From 438203f4cec49e92c49976d57788be6b188f173e Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 3 Jul 2020 12:54:21 -0500
Subject: [PATCH 128/333] test: add basic download_url test

---
 tests/test_util.py | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index 19ed31c0..1497de5a 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,21 +1,5 @@
-#@enforce_types
-#def download_url(url: str, timeout: int=None) -> str:
-#    """Download the contents of a remote url and return the text"""
-#    from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
-#    timeout = timeout or TIMEOUT
-#    response = requests.get(
-#        url,
-#        headers={'User-Agent': WGET_USER_AGENT},
-#        verify=CHECK_SSL_VALIDITY,
-#        timeout=timeout,
-#    )
-#    if response.headers.get('Content-Type') == 'application/rss+xml':
-#        # Based on https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py
-#        _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
-#        _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P<xmlcharset>[\w-]+)')
-#        _BODY_ENCODING_PATTERN = r'<\s*(\?xml\s[^>]+%s)' % (_XML_ENCODING_RE)
-#        _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE)
-#        match = _BODY_ENCODING_STR_RE.search(response.text[:1024])
-#        if match:
-#            response.encoding = match.group('xmlcharset')
-#    return response.text
\ No newline at end of file
+from archivebox import util
+
+def test_download_url_downloads_content():
+    text = util.download_url("https://example.com")
+    assert "Example Domain" in text
\ No newline at end of file

From 4302ae4caa4fccbe40e67084d4b3edd315e9eb1f Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 3 Jul 2020 13:13:59 -0500
Subject: [PATCH 129/333] fix: Remove test section in setup.py

---
 setup.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 12002580..9ca39608 100755
--- a/setup.py
+++ b/setup.py
@@ -64,10 +64,8 @@ setuptools.setup(
             "sphinx",
             "sphinx-rtd-theme",
             "recommonmark",
+            "pytest",
         ],
-        "test": [
-            "pytest"
-        ]
         # 'redis': ['redis', 'django-redis'],
         # 'pywb': ['pywb', 'redis'],
     },

From ffaae510779b49b44450c58c3c631a29f065ae32 Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Fri, 3 Jul 2020 16:52:28 -0400
Subject: [PATCH 130/333] test github actions

---
 .github/workflows/test.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/test.yml

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 00000000..311236c0
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,25 @@
+name: Test workflow
+on: [push]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v1
+        with:
+          fetch-depth: 1
+
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.7
+          architecture: x64
+      
+      - name: Install dependencies
+        run: |
+          pip install -e .[dev]
+
+      - name: Test with pytest
+        run: |
+          pytest -s
\ No newline at end of file

From d5fc13b34e0f29c67b52c05a3ba098f049830e60 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 08:36:58 -0500
Subject: [PATCH 131/333] refactor: Move pytest fixtures to its own file

---
 tests/__init__.py  |  0
 tests/fixtures.py  | 10 ++++++++++
 tests/test_args.py |  0
 tests/test_init.py |  9 +--------
 4 files changed, 11 insertions(+), 8 deletions(-)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/fixtures.py
 create mode 100644 tests/test_args.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/fixtures.py b/tests/fixtures.py
new file mode 100644
index 00000000..9bf2640a
--- /dev/null
+++ b/tests/fixtures.py
@@ -0,0 +1,10 @@
+import os
+import subprocess
+
+import pytest
+
+@pytest.fixture
+def process(tmp_path):
+    os.chdir(tmp_path)
+    process = subprocess.run(['archivebox', 'init'], capture_output=True)
+    return process
\ No newline at end of file
diff --git a/tests/test_args.py b/tests/test_args.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_init.py b/tests/test_init.py
index b870a599..1b80bb1b 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -6,14 +6,7 @@ import subprocess
 from pathlib import Path
 import json
 
-import pytest
-
-@pytest.fixture
-def process(tmp_path):
-    os.chdir(tmp_path)
-    process = subprocess.run(['archivebox', 'init'], capture_output=True)
-    return process
-
+from .fixtures import *
 
 def test_init(tmp_path, process):
     assert "Initializing a new ArchiveBox collection in this folder..." in process.stdout.decode("utf-8")

From 8b22a2a7dd2507e164f0780fa38d73ba36912144 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 09:10:36 -0500
Subject: [PATCH 132/333] feat: Enable --depth flag (still does nothing)

---
 archivebox/cli/archivebox_add.py | 13 +++++++------
 tests/test_args.py               |  7 +++++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 272fe5cf..77a11bd0 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -45,6 +45,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
             '    ~/Desktop/sites_list.csv\n'
         )
     )
+    parser.add_argument(
+        "--depth",
+        action="store",
+        default=0,
+        type=int,
+        help="Recursively archive all linked pages up to this many hops away"
+    )
     command = parser.parse_args(args or ())
     import_str = accept_stdin(stdin)
     add(
@@ -63,12 +70,6 @@ if __name__ == '__main__':
 # TODO: Implement these
 #
 # parser.add_argument(
-#     '--depth', #'-d',
-#     type=int,
-#     help='Recursively archive all linked pages up to this many hops away',
-#     default=0,
-# )
-# parser.add_argument(
 #     '--mirror', #'-m',
 #     action='store_true',
 #     help='Archive an entire site (finding all linked pages below it on the same domain)',
diff --git a/tests/test_args.py b/tests/test_args.py
index e69de29b..b8df1941 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -0,0 +1,7 @@
+import subprocess
+
+from .fixtures import *
+
+def test_depth_flag_is_accepted(tmp_path, process):
+    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
+    assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
\ No newline at end of file

From 2db03245398f0a6c7fcda77a3ebc5688e3836396 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 09:49:28 -0500
Subject: [PATCH 133/333] feat: depth=0 crawls the current page only

---
 archivebox/cli/archivebox_add.py | 14 +++++++++++---
 tests/test_args.py               | 12 ++++++++++--
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 77a11bd0..5bbccb19 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -53,14 +53,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help="Recursively archive all linked pages up to this many hops away"
     )
     command = parser.parse_args(args or ())
-    import_str = accept_stdin(stdin)
+    #import_str = accept_stdin(stdin)
     add(
-        import_str=import_str,
-        import_path=command.import_path,
+        import_str=command.import_path,
+        import_path=None,
         update_all=command.update_all,
         index_only=command.index_only,
         out_dir=pwd or OUTPUT_DIR,
     )
+    #if command.depth == 1:
+    #    add(
+    #        import_str=None,
+    #        import_path=command.import_path,
+    #        update_all=command.update_all,
+    #        index_only=command.index_only,
+    #        out_dir=pwd or OUTPUT_DIR,
+    #    )
 
 
 if __name__ == '__main__':
diff --git a/tests/test_args.py b/tests/test_args.py
index b8df1941..59d43fee 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -1,7 +1,15 @@
 import subprocess
+import json
 
 from .fixtures import *
 
-def test_depth_flag_is_accepted(tmp_path, process):
+def test_depth_flag_is_accepted(process):
     arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
-    assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
\ No newline at end of file
+    assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
+
+def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
+    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
+    archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
+    with open(archived_item_path / "index.json", "r") as f:
+        output_json = json.load(f)
+    assert output_json["base_url"] == "example.com"
\ No newline at end of file

From 32e790979e2f37c3615b52e0ed858603abd429a5 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 10:07:44 -0500
Subject: [PATCH 134/333] feat: Enable depth=1 functionality

---
 archivebox/cli/archivebox_add.py | 16 ++++++++--------
 tests/test_args.py               |  9 ++++++++-
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 5bbccb19..65335679 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -61,14 +61,14 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         index_only=command.index_only,
         out_dir=pwd or OUTPUT_DIR,
     )
-    #if command.depth == 1:
-    #    add(
-    #        import_str=None,
-    #        import_path=command.import_path,
-    #        update_all=command.update_all,
-    #        index_only=command.index_only,
-    #        out_dir=pwd or OUTPUT_DIR,
-    #    )
+    if command.depth == 1:
+        add(
+            import_str=None,
+            import_path=command.import_path,
+            update_all=command.update_all,
+            index_only=command.index_only,
+            out_dir=pwd or OUTPUT_DIR,
+        )
 
 
 if __name__ == '__main__':
diff --git a/tests/test_args.py b/tests/test_args.py
index 59d43fee..e0c6020e 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -12,4 +12,11 @@ def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
     with open(archived_item_path / "index.json", "r") as f:
         output_json = json.load(f)
-    assert output_json["base_url"] == "example.com"
\ No newline at end of file
+    assert output_json["base_url"] == "example.com"
+
+def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process):
+    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=1"], capture_output=True)
+    with open(tmp_path / "index.json", "r") as f:
+        archive_file = f.read()
+    assert "https://example.com" in archive_file
+    assert "https://www.iana.org/domains/example" in archive_file
\ No newline at end of file

From a6940092bbf37123e68e2c22418584fa9b4a2d88 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 10:25:02 -0500
Subject: [PATCH 135/333] feat: Make sure that depth can only be either 1 or 0

---
 archivebox/cli/archivebox_add.py |  2 +-
 tests/test_args.py               | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 65335679..2f77f754 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -49,11 +49,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         "--depth",
         action="store",
         default=0,
+        choices=[0,1],
         type=int,
         help="Recursively archive all linked pages up to this many hops away"
     )
     command = parser.parse_args(args or ())
-    #import_str = accept_stdin(stdin)
     add(
         import_str=command.import_path,
         import_path=None,
diff --git a/tests/test_args.py b/tests/test_args.py
index e0c6020e..91264ef2 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -5,7 +5,13 @@ from .fixtures import *
 
 def test_depth_flag_is_accepted(process):
     arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
-    assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
+    assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8")
+
+def test_depth_flag_fails_if_it_is_not_0_or_1(process):
+    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=5"], capture_output=True)
+    assert 'invalid choice' in arg_process.stderr.decode("utf-8")
+    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=-1"], capture_output=True)
+    assert 'invalid choice' in arg_process.stderr.decode("utf-8")
 
 def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
     arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
@@ -19,4 +25,4 @@ def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process):
     with open(tmp_path / "index.json", "r") as f:
         archive_file = f.read()
     assert "https://example.com" in archive_file
-    assert "https://www.iana.org/domains/example" in archive_file
\ No newline at end of file
+    assert "https://www.iana.org/domains/example" in archive_file

From bca6a06f6035e7a10c9726ef40e7aed4b4b7ee34 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 11:53:02 -0500
Subject: [PATCH 136/333] test: Fix test to reflect new API changes

---
 tests/test_init.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_init.py b/tests/test_init.py
index 1b80bb1b..c5627a2f 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -25,9 +25,9 @@ def test_add_link(tmp_path, process):
 
     with open(archived_item_path / "index.json", "r") as f:
         output_json = json.load(f)
-    assert "IANA — IANA-managed Reserved Domains" == output_json['history']['title'][0]['output']
+    assert "Example Domain" == output_json['history']['title'][0]['output']
 
     with open(tmp_path / "index.html", "r") as f:
         output_html = f.read()
-    assert "IANA — IANA-managed Reserved Domains" in output_html
+    assert "Example Domain" in output_html
 

From b68c13918f28246e8521080a03486dcbb7ff8537 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 12:39:36 -0500
Subject: [PATCH 137/333] feat: Disable stdin from archivebox add

---
 archivebox/cli/archivebox_add.py | 6 ++++--
 archivebox/main.py               | 3 +--
 tests/test_init.py               | 6 ++++++
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 2f77f754..c729e9fb 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -10,7 +10,7 @@ from typing import List, Optional, IO
 
 from ..main import add, docstring
 from ..config import OUTPUT_DIR, ONLY_NEW
-from .logging import SmartFormatter, accept_stdin
+from .logging import SmartFormatter, reject_stdin
 
 
 @docstring(add.__doc__)
@@ -38,9 +38,10 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         type=str,
         default=None,
         help=(
-            'URL or path to local file containing a list of links to import. e.g.:\n'
+            'URL or path to local file containing a page or list of links to import. e.g.:\n'
             '    https://getpocket.com/users/USERNAME/feed/all\n'
             '    https://example.com/some/rss/feed.xml\n'
+            '    https://example.com\n'
             '    ~/Downloads/firefox_bookmarks_export.html\n'
             '    ~/Desktop/sites_list.csv\n'
         )
@@ -54,6 +55,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help="Recursively archive all linked pages up to this many hops away"
     )
     command = parser.parse_args(args or ())
+    reject_stdin(__command__, stdin)
     add(
         import_str=command.import_path,
         import_path=None,
diff --git a/archivebox/main.py b/archivebox/main.py
index f1fb98ce..3f05a385 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -507,8 +507,7 @@ def add(import_str: Optional[str]=None,
 
     if (import_str and import_path) or (not import_str and not import_path):
         stderr(
-            '[X] You should pass either an import path as an argument, '
-            'or pass a list of links via stdin, but not both.\n',
+            '[X] You should pass an import path or a page url as an argument\n',
             color='red',
         )
         raise SystemExit(2)
diff --git a/tests/test_init.py b/tests/test_init.py
index c5627a2f..d592b0a1 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -31,3 +31,9 @@ def test_add_link(tmp_path, process):
         output_html = f.read()
     assert "Example Domain" in output_html
 
+def test_add_link_does_not_support_stdin(tmp_path, process):
+    os.chdir(tmp_path)
+    stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    output = stdin_process.communicate(input="example.com".encode())[0]
+    assert "does not accept stdin" in output.decode("utf-8")
+

From c1d8a74e4f2673047e31b96aa303fbd300dccc50 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 15:46:45 -0500
Subject: [PATCH 138/333] feat: Make input sent via stdin behave the same as
 using args

---
 archivebox/cli/archivebox_add.py | 19 +++++++++++++++----
 tests/test_init.py               | 12 +++++++++---
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index c729e9fb..c692750b 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -10,7 +10,7 @@ from typing import List, Optional, IO
 
 from ..main import add, docstring
 from ..config import OUTPUT_DIR, ONLY_NEW
-from .logging import SmartFormatter, reject_stdin
+from .logging import SmartFormatter, accept_stdin
 
 
 @docstring(add.__doc__)
@@ -55,9 +55,20 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help="Recursively archive all linked pages up to this many hops away"
     )
     command = parser.parse_args(args or ())
-    reject_stdin(__command__, stdin)
+    import_string = accept_stdin(stdin)
+    if import_string and command.import_path:
+        stderr(
+            '[X] You should pass an import path or a page url as an argument or in stdin but not both\n',
+            color='red',
+        )
+        raise SystemExit(2)
+    elif import_string:
+        import_path = import_string
+    else:
+        import_path = command.import_path
+
     add(
-        import_str=command.import_path,
+        import_str=import_path,
         import_path=None,
         update_all=command.update_all,
         index_only=command.index_only,
@@ -66,7 +77,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     if command.depth == 1:
         add(
             import_str=None,
-            import_path=command.import_path,
+            import_path=import_path,
             update_all=command.update_all,
             index_only=command.index_only,
             out_dir=pwd or OUTPUT_DIR,
diff --git a/tests/test_init.py b/tests/test_init.py
index d592b0a1..97870459 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -31,9 +31,15 @@ def test_add_link(tmp_path, process):
         output_html = f.read()
     assert "Example Domain" in output_html
 
-def test_add_link_does_not_support_stdin(tmp_path, process):
+def test_add_link_support_stdin(tmp_path, process):
     os.chdir(tmp_path)
     stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    output = stdin_process.communicate(input="example.com".encode())[0]
-    assert "does not accept stdin" in output.decode("utf-8")
+    stdin_process.communicate(input="http://example.com".encode())
+    archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
+
+    assert "index.json" in [x.name for x in archived_item_path.iterdir()]
+
+    with open(archived_item_path / "index.json", "r") as f:
+        output_json = json.load(f)
+    assert "Example Domain" == output_json['history']['title'][0]['output']
 

From f12bfeb3229345b2d4cd7c1670ba050ca1111e7c Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 8 Jul 2020 08:17:47 -0500
Subject: [PATCH 139/333] refactor: Change add() to receive url and depth
 instead of import_str and import_path

---
 archivebox/cli/archivebox_add.py | 12 ++----------
 archivebox/core/views.py         |  8 +++-----
 archivebox/main.py               | 25 ++++++++++---------------
 3 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index c692750b..8f491d42 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -68,20 +68,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         import_path = command.import_path
 
     add(
-        import_str=import_path,
-        import_path=None,
+        url=import_path,
+        depth=command.depth,
         update_all=command.update_all,
         index_only=command.index_only,
         out_dir=pwd or OUTPUT_DIR,
     )
-    if command.depth == 1:
-        add(
-            import_str=None,
-            import_path=import_path,
-            update_all=command.update_all,
-            index_only=command.index_only,
-            out_dir=pwd or OUTPUT_DIR,
-        )
 
 
 if __name__ == '__main__':
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 0c5efff2..a721b992 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -66,12 +66,10 @@ class AddLinks(View):
         if form.is_valid():
             url = form.cleaned_data["url"]
             print(f'[+] Adding URL: {url}')
-            if form.cleaned_data["source"] == "url":
-                key = "import_str"
-            else:
-                key = "import_path"
+            depth = 0 if form.cleaned_data["source"] == "url" else 1
             input_kwargs = {
-                key: url,
+                "url": url,
+                "depth": depth,
                 "update_all": False,
                 "out_dir": OUTPUT_DIR,
             }
diff --git a/archivebox/main.py b/archivebox/main.py
index 3f05a385..a96c4250 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -496,8 +496,8 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
 
 
 @enforce_types
-def add(import_str: Optional[str]=None,
-        import_path: Optional[str]=None,
+def add(url: str,
+        depth: int=0,
         update_all: bool=not ONLY_NEW,
         index_only: bool=False,
         out_dir: str=OUTPUT_DIR) -> List[Link]:
@@ -505,17 +505,9 @@ def add(import_str: Optional[str]=None,
 
     check_data_folder(out_dir=out_dir)
 
-    if (import_str and import_path) or (not import_str and not import_path):
-        stderr(
-            '[X] You should pass an import path or a page url as an argument\n',
-            color='red',
-        )
-        raise SystemExit(2)
-    elif import_str:
-        import_path = save_stdin_to_sources(import_str, out_dir=out_dir)
-    elif import_path:
-        import_path = save_file_to_sources(import_path, out_dir=out_dir)
-
+    base_path = save_stdin_to_sources(url, out_dir=out_dir)
+    if depth == 1:
+        depth_path = save_file_to_sources(url, out_dir=out_dir)
     check_dependencies()
 
     # Step 1: Load list of links from the existing index
@@ -523,8 +515,11 @@ def add(import_str: Optional[str]=None,
     all_links: List[Link] = []
     new_links: List[Link] = []
     all_links = load_main_index(out_dir=out_dir)
-    if import_path:
-        all_links, new_links = import_new_links(all_links, import_path, out_dir=out_dir)
+    all_links, new_links = import_new_links(all_links, base_path, out_dir=out_dir)
+    if depth == 1:
+        all_links, new_links_depth = import_new_links(all_links, depth_path, out_dir=out_dir)
+        new_links = new_links + new_links_depth
+
 
     # Step 2: Write updated index with deduped old and new links back to disk
     write_main_index(links=all_links, out_dir=out_dir)

From 4ebf929606b50afcce94f2440a7ac363cc96a887 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 8 Jul 2020 08:30:07 -0500
Subject: [PATCH 140/333] refactor: Change wording on CLI help

---
 archivebox/cli/archivebox_add.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 8f491d42..c4c78399 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -38,7 +38,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         type=str,
         default=None,
         help=(
-            'URL or path to local file containing a page or list of links to import. e.g.:\n'
+            'URL or path to local file to start the archiving process from. e.g.:\n'
             '    https://getpocket.com/users/USERNAME/feed/all\n'
             '    https://example.com/some/rss/feed.xml\n'
             '    https://example.com\n'

From d476b130074a18e0a903743bdd3e61b5f7f397b0 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 8 Jul 2020 14:46:31 -0500
Subject: [PATCH 141/333] fix: Add missing permission to add view (post)

---
 archivebox/core/views.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 0c5efff2..57941264 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -60,8 +60,8 @@ class AddLinks(View):
         return render(template_name=self.template, request=request, context=context)
 
     def post(self, request):
-        #url = request.POST['url']
-        #if url:
+        if not request.user.is_authenticated and not PUBLIC_INDEX:
+            return redirect(f'/admin/login/?next={request.path}')
         form = AddLinkForm(request.POST)
         if form.is_valid():
             url = form.cleaned_data["url"]

From 09b4438c9f5ad89c9cc46bdc3c4df131420a8b37 Mon Sep 17 00:00:00 2001
From: Apkallum <apkallum@protonmail.com>
Date: Wed, 8 Jul 2020 17:54:01 -0400
Subject: [PATCH 142/333] fix legacy index.html

---
 archivebox/themes/legacy/main_index.html | 73 +-----------------------
 1 file changed, 2 insertions(+), 71 deletions(-)

diff --git a/archivebox/themes/legacy/main_index.html b/archivebox/themes/legacy/main_index.html
index 1b366300..e246b0d9 100644
--- a/archivebox/themes/legacy/main_index.html
+++ b/archivebox/themes/legacy/main_index.html
@@ -4,34 +4,6 @@
         <title>Archived Sites</title>
         <meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
         <style>
-            :root {
-                --accent-1: #aa1e55;
-                --accent-2: #ffebeb;
-                --accent-3: #efefef;
-                
-                --bg-main: #efefef;
-                --text-main: black;
-                --text-1: #1a1a1a;
-                --text-2: #eaeaea;
-            }
-            
-            @media (prefers-color-scheme: dark) {
-                :root {
-                    --accent-2: hsl(160, 100%, 96%);
-                    
-                    --text-1: #eaeaea;
-                    --text-2: #1a1a1a;
-                    --bg-main: #101010;
-                }
-                
-                #table-bookmarks_wrapper,
-                #table-bookmarks_wrapper img,
-                tbody td:nth-child(3),
-                tbody td:nth-child(3) span,
-                footer {
-                    filter: invert(100%);
-                }
-            }
             html, body {
                 width: 100%;
                 height: 100%;
@@ -41,10 +13,7 @@
                 margin: 0px;
                 padding: 0px;
                 font-family: "Gill Sans", Helvetica, sans-serif;
-                background: var(--bg-main);
-                color: var(--text-main);
             }
-<<<<<<< HEAD:archivebox/themes/legacy/main_index.html
             .header-top small {
                 font-weight: 200;
                 color: #efefef;
@@ -55,33 +24,6 @@
                 height: auto;
                 min-height: 40px;
                 margin: 0px;
-=======
-            header {
-                background-color: var(--accent-1);
-                color: var(--text-1);
-                padding: 10px;
-                padding-top: 0px;
-                padding-bottom: 15px;
-                /*height: 40px;*/
-            }
-            header h1 {
-                margin: 7px 0px;
-                font-size: 35px;
-                font-weight: 300;
-                color: var(--text-1);
-            }
-            header h1 img {
-                height: 44px;
-                vertical-align: bottom;
-            }
-            header a {
-                text-decoration: none !important;
-                color: var(--text-1);
-            }
-            .header-center {
-                margin: auto;
-                float: none;
->>>>>>> master:archivebox/templates/index.html
                 text-align: center;
                 color: white;
                 font-size: calc(11px + 0.84vw);
@@ -90,17 +32,11 @@
                 border-bottom: 3px solid #aa1e55;
                 background-color: #aa1e55;
             }
-<<<<<<< HEAD:archivebox/themes/legacy/main_index.html
             input[type=search] {
                 width: 22vw;
                 border-radius: 4px;
                 border: 1px solid #aeaeae;
                 padding: 3px 5px;
-=======
-            .header-center small {
-                color: var(--text-2);
-                opacity: 0.7;
->>>>>>> master:archivebox/templates/index.html
             }
             .nav > div {
                 min-height: 30px;
@@ -109,14 +45,9 @@
                 text-decoration: none;
                 color: rgba(0,0,0,0.6);
             }
-<<<<<<< HEAD:archivebox/themes/legacy/main_index.html
             .header-top a:hover {
                 text-decoration: none;
                 color: rgba(0,0,0,0.9);
-=======
-            header + div {
-                padding-top: 10px;
->>>>>>> master:archivebox/templates/index.html
             }
             .header-top .col-lg-4 {
                 text-align: center;
@@ -153,7 +84,7 @@
                 height: 35px;
             }
             tbody tr:nth-child(odd) {
-               background-color: var(--accent-2) !important;
+               background-color: #ffebeb !important;
             }
             table tr td {
                 white-space: nowrap;
@@ -213,7 +144,7 @@
                 color:black;
             }
             tr td a.title small {
-                background-color: var(--accent-3);
+                background-color: #efefef;
                 border-radius: 4px;
                 float:right
             }

From 960c92d6d3439fa839bd78c3ca60dbfe56be5fe4 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 9 Jul 2020 10:35:33 -0500
Subject: [PATCH 143/333] fix: Create user home to avoid warning

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 2bad4144..64e5ea98 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -48,7 +48,7 @@ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selectio
 
 # Run everything from here on out as non-privileged user
 RUN groupadd --system archivebox \
-    && useradd --system --gid archivebox --groups audio,video archivebox
+    && useradd --system --create-home --gid archivebox --groups audio,video archivebox
 
 ADD . "$CODE_PATH"
 WORKDIR "$CODE_PATH"

From bf417f50a4fc6e290b5ff762c607e6af4b5b5d1d Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 7 Jul 2020 14:46:45 -0500
Subject: [PATCH 144/333] feat: Add bottle webserver to run along with tests

---
 setup.py                      |  1 +
 tests/conftest.py             | 19 +++++++++++++++++++
 tests/mock_server/__init__.py |  0
 tests/mock_server/server.py   |  8 ++++++++
 4 files changed, 28 insertions(+)
 create mode 100644 tests/conftest.py
 create mode 100644 tests/mock_server/__init__.py
 create mode 100644 tests/mock_server/server.py

diff --git a/setup.py b/setup.py
index 9ca39608..66112bfd 100755
--- a/setup.py
+++ b/setup.py
@@ -65,6 +65,7 @@ setuptools.setup(
             "sphinx-rtd-theme",
             "recommonmark",
             "pytest",
+            "bottle",
         ],
         # 'redis': ['redis', 'django-redis'],
         # 'pywb': ['pywb', 'redis'],
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..20128da7
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,19 @@
+from multiprocessing import Process
+
+import pytest
+from .mock_server.server import start
+
+server_process = None
+
+@pytest.hookimpl
+def pytest_sessionstart(session):
+    global server_process
+    server_process = Process(target=start)
+    server_process.start()
+
+@pytest.hookimpl
+def pytest_sessionfinish(session):
+    if server_process is not None:
+        server_process.terminate()
+        server_process.join()
+    
\ No newline at end of file
diff --git a/tests/mock_server/__init__.py b/tests/mock_server/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/mock_server/server.py b/tests/mock_server/server.py
new file mode 100644
index 00000000..da7ca5b2
--- /dev/null
+++ b/tests/mock_server/server.py
@@ -0,0 +1,8 @@
+from bottle import route, run
+
+@route('/')
+def index():
+    return "Hello"
+
+def start():
+    run(host='localhost', port=8080)
\ No newline at end of file

From 8f2543fa61b5406e0b058e415f5c9309d832decf Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 13 Jul 2020 09:39:43 -0500
Subject: [PATCH 145/333] test: Refactor args tests to use local webserver
 instead of remote requests

---
 tests/mock_server/server.py                  |  13 +-
 tests/mock_server/templates/example.com.html |  49 +++
 tests/mock_server/templates/iana.org.html    | 390 +++++++++++++++++++
 tests/test_args.py                           |  16 +-
 4 files changed, 458 insertions(+), 10 deletions(-)
 create mode 100644 tests/mock_server/templates/example.com.html
 create mode 100644 tests/mock_server/templates/iana.org.html

diff --git a/tests/mock_server/server.py b/tests/mock_server/server.py
index da7ca5b2..0c546e99 100644
--- a/tests/mock_server/server.py
+++ b/tests/mock_server/server.py
@@ -1,8 +1,17 @@
-from bottle import route, run
+from os.path import abspath
+from os import getcwd
+from pathlib import Path
 
-@route('/')
+from bottle import route, run, static_file
+
+@route("/")
 def index():
     return "Hello"
 
+@route("/static/<filename>")
+def static_path(filename):
+    template_path = abspath(getcwd()) / Path("tests/mock_server/templates")
+    return static_file(filename, root=template_path)
+
 def start():
     run(host='localhost', port=8080)
\ No newline at end of file
diff --git a/tests/mock_server/templates/example.com.html b/tests/mock_server/templates/example.com.html
new file mode 100644
index 00000000..8492e858
--- /dev/null
+++ b/tests/mock_server/templates/example.com.html
@@ -0,0 +1,49 @@
+<!doctype html>
+<html>
+	<head>
+		<title>Example Domain</title>
+
+		<meta charset="utf-8"/>
+		<meta http-equiv="Content-type" content="text/html; charset=utf-8"/>
+		<meta name="viewport" content="width=device-width, initial-scale=1"/>
+		<style type="text/css">
+			body {
+				background-color: #f0f0f2;
+				margin: 0;
+				padding: 0;
+				font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
+
+			}
+			div {
+				width: 600px;
+				margin: 5em auto;
+				padding: 2em;
+				background-color: #fdfdff;
+				border-radius: 0.5em;
+				box-shadow: 2px 3px 7px 2px rgba(0, 0, 0, 0.02);
+			}
+			a:link,
+			a:visited {
+				color: #38488f;
+				text-decoration: none;
+			}
+			@media(max-width: 700px) {
+				div {
+					margin: 0 auto;
+					width: auto;
+				}
+			}
+		</style>
+	</head>
+
+	<body>
+		<div>
+			<h1>Example Domain</h1>
+			<p>This domain is for use in illustrative examples in documents. You may use this
+								    domain in literature without prior coordination or asking for permission.</p>
+			<p>
+				<a href="http://localhost:8080/static/iana.org.html">More information...</a>
+			</p>
+		</div>
+	</body>
+</html>
diff --git a/tests/mock_server/templates/iana.org.html b/tests/mock_server/templates/iana.org.html
new file mode 100644
index 00000000..c1e60a2e
--- /dev/null
+++ b/tests/mock_server/templates/iana.org.html
@@ -0,0 +1,390 @@
+<!doctype html>
+<html>
+	<head>
+		<title>IANA — IANA-managed Reserved Domains</title>
+
+		<meta charset="utf-8"/>
+		<meta http-equiv="Content-type" content="text/html; charset=utf-8"/>
+		<meta name="viewport" content="width=device-width, initial-scale=1"/>
+
+		<link rel="stylesheet" media="screen" href="/_css/2015.1/screen.css"/>
+		<link rel="stylesheet" media="print" href="/_css/2015.1/print.css"/>
+		<link rel="shortcut icon" type="image/ico" href="/_img/bookmark_icon.ico"/>
+
+		<script type="text/javascript" src="/_js/2013.1/jquery.js"></script>
+		<script type="text/javascript" src="/_js/2013.1/iana.js"></script>
+
+
+	</head>
+
+	<body>
+
+		<header>
+			<div id="header">
+				<div id="logo">
+					<a href="/"><img src="/_img/2013.1/iana-logo-header.svg" alt="Homepage"/></a>
+				</div>
+				<div class="navigation">
+					<ul>
+						<li><a href="/domains">Domains</a></li>
+						<li><a href="/numbers">Numbers</a></li>
+						<li><a href="/protocols">Protocols</a></li>
+						<li><a href="/about">About Us</a></li>
+					</ul>
+				</div>
+			</div>
+		</header>
+
+		<div id="body">
+
+
+			<div id="main_right">
+
+
+				<h1>IANA-managed Reserved Domains</h1>
+
+				<p>Certain domains are set aside, and nominally registered to &ldquo;IANA&rdquo;, for specific
+							policy or technical purposes.</p>
+
+				<h2>Example domains</h2>
+
+				<p>As described in
+					<a href="/go/rfc2606">RFC 2606</a>
+					and
+					<a href="/go/rfc6761">RFC 6761</a>,
+							a number of domains such as
+					<span class="domain label">example.com</span>
+					and
+					<span class="domain label">example.org</span>
+					are maintained for documentation purposes. These domains may be used as illustrative
+						examples in documents without prior coordination with us. They are 
+						not available for registration or transfer.</p>
+
+				<h2>Test IDN top-level domains</h2>
+
+				<p>These domains were temporarily delegated by IANA for the
+					<a href="http://www.icann.org/topics/idn/">IDN Evaluation</a>
+					being conducted by
+					<a href="http://www.icann.org/">ICANN</a>.</p>
+
+				<div class="iana-table-frame">
+					<table id="arpa-table" class="iana-table">
+						<thead>
+							<tr>
+								<th>Domain</th>
+								<th>Domain (A-label)</th>
+								<th>Language</th>
+								<th>Script</th>
+							</tr>
+						</thead>
+						<tbody>
+							<tr>
+								<td>&#1573;&#1582;&#1578;&#1576;&#1575;&#1585;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--kgbechtv.html">XN--KGBECHTV</a>
+									</span>
+								</td>
+								<td>Arabic</td>
+								<td>Arabic</td>
+							</tr>
+							<tr>
+								<td>&#1570;&#1586;&#1605;&#1575;&#1740;&#1588;&#1740;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--hgbk6aj7f53bba.html">XN--HGBK6AJ7F53BBA</a>
+									</span>
+								</td>
+								<td>Persian</td>
+								<td>Arabic</td>
+							</tr>
+							<tr>
+								<td>&#27979;&#35797;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--0zwm56d.html">XN--0ZWM56D</a>
+									</span>
+								</td>
+								<td>Chinese</td>
+								<td>Han (Simplified variant)</td>
+							</tr>
+							<tr>
+								<td>&#28204;&#35430;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--g6w251d.html">XN--G6W251D</a>
+									</span>
+								</td>
+								<td>Chinese</td>
+								<td>Han (Traditional variant)</td>
+							</tr>
+							<tr>
+								<td>&#1080;&#1089;&#1087;&#1099;&#1090;&#1072;&#1085;&#1080;&#1077;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--80akhbyknj4f.html">XN--80AKHBYKNJ4F</a>
+									</span>
+								</td>
+								<td>Russian</td>
+								<td>Cyrillic</td>
+							</tr>
+							<tr>
+								<td>&#2346;&#2352;&#2368;&#2325;&#2381;&#2359;&#2366;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--11b5bs3a9aj6g.html">XN--11B5BS3A9AJ6G</a>
+									</span>
+								</td>
+								<td>Hindi</td>
+								<td>Devanagari (Nagari)</td>
+							</tr>
+							<tr>
+								<td>&#948;&#959;&#954;&#953;&#956;&#942;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--jxalpdlp.html">XN--JXALPDLP</a>
+									</span>
+								</td>
+								<td>Greek, Modern (1453-)</td>
+								<td>Greek</td>
+							</tr>
+							<tr>
+								<td>&#53580;&#49828;&#53944;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--9t4b11yi5a.html">XN--9T4B11YI5A</a>
+									</span>
+								</td>
+								<td>Korean</td>
+								<td>Hangul (Hang&#x16D;l, Hangeul)</td>
+							</tr>
+							<tr>
+								<td>&#1496;&#1506;&#1505;&#1496;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--deba0ad.html">XN--DEBA0AD</a>
+									</span>
+								</td>
+								<td>Yiddish</td>
+								<td>Hebrew</td>
+							</tr>
+							<tr>
+								<td>&#12486;&#12473;&#12488;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--zckzah.html">XN--ZCKZAH</a>
+									</span>
+								</td>
+								<td>Japanese</td>
+								<td>Katakana</td>
+							</tr>
+							<tr>
+								<td>&#2986;&#2992;&#3007;&#2975;&#3021;&#2970;&#3016;</td>
+								<td>
+									<span class="domain label">
+										<a href="/domains/root/db/xn--hlcj6aya9esc7a.html">XN--HLCJ6AYA9ESC7A</a>
+									</span>
+								</td>
+								<td>Tamil</td>
+								<td>Tamil</td>
+							</tr>
+						</tbody>
+					</table>
+				</div>
+
+				<h2>Policy-reserved domains</h2>
+
+				<p>We act as both the registrant and registrar for a select number of domains
+							which have been reserved under policy grounds. These exclusions are
+							typically indicated in either technical standards (RFC documents),
+							or
+					<a href="http://www.icann.org/en/registries/agreements.htm">contractual limitations</a>.</p>
+
+				<p>Domains which are described as registered to IANA or ICANN on policy
+							grounds are not available for registration or transfer, with the exception
+							of
+					<span class="domain label">
+						<i>country-name</i>.info</span>
+					domains. These domains are available for release
+							by the ICANN Governmental Advisory Committee Secretariat.</p>
+
+				<h2>Other Special-Use Domains</h2>
+
+				<p>There is additionally a
+					<a href="/assignments/special-use-domain-names">Special-Use Domain Names</a>
+					registry documenting special-use domains designated by technical standards. For further information, see
+					<a href="/go/rfc6761">Special-Use Domain Names</a>
+					(RFC 6761).</p>
+
+
+			</div>
+
+			<div id="sidebar_left">
+				<div class="navigation_box">
+					<h2>Domain Names</h2>
+					<ul>
+						<li id="nav_dom_top">
+							<a href="/domains">Overview</a>
+						</li>
+						<li id="nav_dom_root">
+							<a href="/domains/root">Root Zone Management</a>
+						</li>
+						<ul id="nav_dom_root_sub">
+							<li id="nav_dom_root_top">
+								<a href="/domains/root">Overview</a>
+							</li>
+							<li id="nav_dom_root_db">
+								<a href="/domains/root/db">Root Database</a>
+							</li>
+							<li id="nav_dom_root_files">
+								<a href="/domains/root/files">Hint and Zone Files</a>
+							</li>
+							<li id="nav_dom_root_manage">
+								<a href="/domains/root/manage">Change Requests</a>
+							</li>
+							<li id="nav_dom_root_procedures">
+								<a href="/domains/root/help">Instructions &amp; Guides</a>
+							</li>
+							<li id="nav_dom_root_servers">
+								<a href="/domains/root/servers">Root Servers</a>
+							</li>
+						</ul>
+						<li id="nav_dom_int">
+							<a href="/domains/int">.INT Registry</a>
+						</li>
+						<ul id="nav_dom_int_sub">
+							<li id="nav_dom_int_top">
+								<a href="/domains/int">Overview</a>
+							</li>
+							<li id="nav_dom_int_manage">
+								<a href="/domains/int/manage">Register/modify an .INT domain</a>
+							</li>
+							<li id="nav_dom_int_policy">
+								<a href="/domains/int/policy">Eligibility</a>
+							</li>
+						</ul>
+						<li id="nav_dom_arpa">
+							<a href="/domains/arpa">.ARPA Registry</a>
+						</li>
+						<li id="nav_dom_idn">
+							<a href="/domains/idn-tables">IDN Practices Repository</a>
+						</li>
+						<ul id="nav_dom_idn_sub">
+							<li id="nav_dom_idn_top">
+								<a href="/domains/idn-tables">Overview</a>
+							</li>
+							<!-- <li id="nav_dom_idn_tables"><a href="/domains/idn-tables/db">Tables</a></li> -->
+							<li id="nav_dom_idn_submit">
+								<a href="/procedures/idn-repository.html">Submit a table</a>
+							</li>
+						</ul>
+						<li id="nav_dom_dnssec">
+							<a href="/dnssec">Root Key Signing Key (DNSSEC)</a>
+						</li>
+						<ul id="nav_dom_dnssec_sub">
+							<li id="nav_dom_dnssec_top">
+								<a href="/dnssec">Overview</a>
+							</li>
+							<li id="nav_dom_dnssec_ksk">
+								<a href="/dnssec/files">Trusts Anchors and Keys</a>
+							</li>
+							<li id="nav_dom_dnssec_ceremonies">
+								<a href="/dnssec/ceremonies">Root KSK Ceremonies</a>
+							</li>
+							<li id="nav_dom_dnssec_dps">
+								<a href="/dnssec/dps">Practice Statement</a>
+							</li>
+							<li id="nav_dom_dnssec_tcrs">
+								<a href="/dnssec/tcrs">Community Representatives</a>
+							</li>
+						</ul>
+						<li id="nav_dom_special">
+							<a href="/domains/reserved">Reserved Domains</a>
+						</li>
+					</ul>
+				</div>
+			</div>
+
+
+		</div>
+
+		<footer>
+			<div id="footer">
+				<table class="navigation">
+					<tr>
+						<td class="section">
+							<a href="/domains">Domain&nbsp;Names</a>
+						</td>
+						<td class="subsection">
+							<ul>
+								<li><a href="/domains/root">Root Zone Registry</a></li>
+								<li><a href="/domains/int">.INT Registry</a></li>
+								<li><a href="/domains/arpa">.ARPA Registry</a></li>
+								<li><a href="/domains/idn-tables">IDN Repository</a></li>
+							</ul>
+						</td>
+					</tr>
+					<tr>
+						<td class="section">
+							<a href="/numbers">Number&nbsp;Resources</a>
+						</td>
+						<td class="subsection">
+							<ul>
+								<li><a href="/abuse">Abuse Information</a></li>
+							</ul>
+						</td>
+					</tr>
+					<tr>
+						<td class="section">
+							<a href="/protocols">Protocols</a>
+						</td>
+						<td class="subsection">
+							<ul>
+								<li><a href="/protocols">Protocol Registries</a></li>
+								<li><a href="/time-zones">Time Zone Database</a></li>
+							</ul>
+						</td>
+					</tr>
+					<tr>
+						<td class="section">
+							<a href="/about">About&nbsp;Us</a>
+						</td>
+						<td class="subsection">
+							<ul>
+								<li><a href="/about/presentations">Presentations</a></li>
+								<li><a href="/reports">Reports</a></li>
+								<li><a href="/performance">Performance</a></li>
+								<li><a href="/reviews">Reviews</a></li>
+								<li><a href="/about/excellence">Excellence</a></li>
+								<li><a href="/contact">Contact Us</a></li>
+							</ul>
+						</td>
+					</tr>
+				</table>
+
+				<div id="custodian">
+					<p>The IANA functions coordinate the Internet’s globally unique identifiers, and
+						                    are provided by
+						<a href="http://pti.icann.org">Public Technical Identifiers</a>, an affiliate of
+						<a href="http://www.icann.org/">ICANN</a>.</p>
+				</div>
+
+				<div id="legalnotice">
+					<ul>
+						<li><a href="https://www.icann.org/privacy/policy">Privacy Policy</a></li>
+						<li><a href="https://www.icann.org/privacy/tos">Terms of Service</a></li>
+					</ul>
+				</p>
+			</div>
+
+		</div>
+	</body>
+</html></footer><script>
+$(document).ready(function () {
+$("#nav_dom_special").addClass("selected")
+$("#nav_dom_int_sub").hide()
+$("#nav_dom_idn_sub").hide()
+$("#nav_dom_dnssec_sub").hide()
+$("#nav_dom_tools_sub").hide()
+$("#nav_dom_root_sub").hide()
+});</script></body></html>
diff --git a/tests/test_args.py b/tests/test_args.py
index 91264ef2..f52626fb 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -4,25 +4,25 @@ import json
 from .fixtures import *
 
 def test_depth_flag_is_accepted(process):
-    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=0"], capture_output=True)
     assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8")
 
 def test_depth_flag_fails_if_it_is_not_0_or_1(process):
-    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=5"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=5"], capture_output=True)
     assert 'invalid choice' in arg_process.stderr.decode("utf-8")
-    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=-1"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=-1"], capture_output=True)
     assert 'invalid choice' in arg_process.stderr.decode("utf-8")
 
 def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
-    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=0"], capture_output=True)
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
     with open(archived_item_path / "index.json", "r") as f:
         output_json = json.load(f)
-    assert output_json["base_url"] == "example.com"
+    assert output_json["base_url"] == "localhost:8080/static/example.com.html"
 
 def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process):
-    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=1"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=1"], capture_output=True)
     with open(tmp_path / "index.json", "r") as f:
         archive_file = f.read()
-    assert "https://example.com" in archive_file
-    assert "https://www.iana.org/domains/example" in archive_file
+    assert "http://localhost:8080/static/example.com.html" in archive_file
+    assert "http://localhost:8080/static/iana.org.html" in archive_file

From fe80a93a0380a11a3196f194c13bf9ae13531e4e Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 13 Jul 2020 09:43:36 -0500
Subject: [PATCH 146/333] test: Refactor init tests to use local webserver

---
 tests/test_init.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_init.py b/tests/test_init.py
index 97870459..24d3ed52 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -18,7 +18,7 @@ def test_update(tmp_path, process):
 
 def test_add_link(tmp_path, process):
     os.chdir(tmp_path)
-    add_process = subprocess.run(['archivebox', 'add', 'http://example.com'], capture_output=True)
+    add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/example.com.html'], capture_output=True)
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
     assert "index.json" in [x.name for x in archived_item_path.iterdir()]
@@ -34,7 +34,7 @@ def test_add_link(tmp_path, process):
 def test_add_link_support_stdin(tmp_path, process):
     os.chdir(tmp_path)
     stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    stdin_process.communicate(input="http://example.com".encode())
+    stdin_process.communicate(input="http://localhost:8080/static/example.com.html".encode())
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
     assert "index.json" in [x.name for x in archived_item_path.iterdir()]

From 322997e229457bf43ee2281993ccdc30c8455244 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 13 Jul 2020 09:44:50 -0500
Subject: [PATCH 147/333] test: Refactor util tests to use local webserver

---
 tests/test_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index 1497de5a..0a076344 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,5 +1,5 @@
 from archivebox import util
 
 def test_download_url_downloads_content():
-    text = util.download_url("https://example.com")
+    text = util.download_url("http://localhost:8080/static/example.com.html")
     assert "Example Domain" in text
\ No newline at end of file

From 7cbd068c95e5a40851a40e9ed272b62c49a885e9 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:22:07 -0400
Subject: [PATCH 148/333] add flake8

---
 .flake8                       | 6 ++++++
 archivebox/.flake8            | 8 +++++---
 archivebox/__main__.py        | 1 +
 archivebox/config/__init__.py | 4 +++-
 archivebox/core/models.py     | 1 -
 archivebox/index/schema.py    | 1 +
 archivebox/main.py            | 4 ++--
 7 files changed, 18 insertions(+), 7 deletions(-)
 create mode 100644 .flake8

diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..01af646d
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,6 @@
+[flake8]
+ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E272,E701,E731,W293,W503,W291,W391
+select = F,E9,W
+max-line-length = 130
+max-complexity = 10
+exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv
diff --git a/archivebox/.flake8 b/archivebox/.flake8
index 46da144b..dd6ba8e4 100644
--- a/archivebox/.flake8
+++ b/archivebox/.flake8
@@ -1,4 +1,6 @@
 [flake8]
-ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E127,E131,E241,E252,E266,E272,E701,E731,W293,W503
-select = F,E9
-exclude = migrations,util_scripts,node_modules,venv
+ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E272,E701,E731,W293,W503,W291,W391
+select = F,E9,W
+max-line-length = 130
+max-complexity = 10
+exclude = migrations,tests,node_modules,vendor,static,venv,.venv,.venv2,.docker-venv
diff --git a/archivebox/__main__.py b/archivebox/__main__.py
index 55e94415..8afaa27a 100755
--- a/archivebox/__main__.py
+++ b/archivebox/__main__.py
@@ -6,5 +6,6 @@ import sys
 
 from .cli import main
 
+
 if __name__ == '__main__':
     main(args=sys.argv[1:], stdin=sys.stdin)
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index f06b0f3d..14b66e92 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -279,6 +279,8 @@ def load_config_val(key: str,
                     config: Optional[ConfigDict]=None,
                     env_vars: Optional[os._Environ]=None,
                     config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
+    """parse bool, int, and str key=value pairs from env"""
+
 
     config_keys_to_check = (key, *(aliases or ()))
     for key in config_keys_to_check:
@@ -777,7 +779,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
         stderr()
         stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
         stderr('    You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
-        stderr('    (Setting it to somewhere between 30 and 300 seconds is recommended)')
+        stderr('    (Setting it to somewhere between 30 and 3000 seconds is recommended)')
         stderr()
         stderr('    If you want to make ArchiveBox run faster, disable specific archive methods instead:')
         stderr('        https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles')
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 2cbfc1b1..42929e5a 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -24,7 +24,6 @@ class Snapshot(models.Model):
 
     keys = ('url', 'timestamp', 'title', 'tags', 'updated')
 
-
     def __repr__(self) -> str:
         title = self.title or '-'
         return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 637e0589..db17c269 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -98,6 +98,7 @@ class Link:
     updated: Optional[datetime] = None
     schema: str = 'Link'
 
+
     def __str__(self) -> str:
         return f'[{self.timestamp}] {self.base_url} "{self.title}"'
 
diff --git a/archivebox/main.py b/archivebox/main.py
index a96c4250..a6e04dd3 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -641,8 +641,8 @@ def update(resume: Optional[float]=None,
            out_dir: str=OUTPUT_DIR) -> List[Link]:
     """Import any new links from subscriptions and retry any previously failed/skipped links"""
 
-    check_dependencies()
     check_data_folder(out_dir=out_dir)
+    check_dependencies()
 
     # Step 1: Load list of links from the existing index
     #         merge in and dedupe new links from import_path
@@ -990,7 +990,7 @@ def schedule(add: bool=False,
         if total_runs > 60 and not quiet:
             stderr()
             stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **ANSI))
-            stderr(f'    Congrats on being an enthusiastic internet archiver! 👌')
+            stderr('    Congrats on being an enthusiastic internet archiver! 👌')
             stderr()
             stderr('    Make sure you have enough storage space available to hold all the data.')
             stderr('    Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')

From 96b1e4a8ec1eb64c979c185b912ef6d60b25074f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:22:58 -0400
Subject: [PATCH 149/333] accept local paths as valid link URLs when parsing

---
 archivebox/parsers/generic_txt.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py
index cc3653a0..61d1973f 100644
--- a/archivebox/parsers/generic_txt.py
+++ b/archivebox/parsers/generic_txt.py
@@ -5,6 +5,7 @@ import re
 
 from typing import IO, Iterable
 from datetime import datetime
+from pathlib import Path
 
 from ..index.schema import Link
 from ..util import (
@@ -13,14 +14,28 @@ from ..util import (
     URL_REGEX
 )
 
+
 @enforce_types
 def parse_generic_txt_export(text_file: IO[str]) -> Iterable[Link]:
     """Parse raw links from each line in a text file"""
 
     text_file.seek(0)
     for line in text_file.readlines():
-        urls = re.findall(URL_REGEX, line) if line.strip() else ()
-        for url in urls:                                                # type: ignore
+        if not line.strip():
+            continue
+
+        # if the line is a local file path that resolves, then we can archive it
+        if Path(line).exists():
+            yield Link(
+                url=line,
+                timestamp=str(datetime.now().timestamp()),
+                title=None,
+                tags=None,
+                sources=[text_file.name],
+            )
+
+        # otherwise look for anything that looks like a URL in the line
+        for url in re.findall(URL_REGEX, line):
             yield Link(
                 url=htmldecode(url),
                 timestamp=str(datetime.now().timestamp()),

From 16f3746712e3767ea3ab1ef0aec3cc38108b331b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:24:36 -0400
Subject: [PATCH 150/333] check source dir at the end of checking data dir

---
 archivebox/config/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 14b66e92..3638bade 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -838,6 +838,10 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
         stderr('        archivebox init')
         raise SystemExit(3)
 
+    sources_dir = os.path.join(output_dir, SOURCES_DIR_NAME)
+    if not os.path.exists(sources_dir):
+        os.makedirs(sources_dir)
+
 
 
 def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -> None:

From dfb83b4f2728f2f0a389650836d6164a2f80e809 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:24:49 -0400
Subject: [PATCH 151/333] add AttributeDict

---
 archivebox/util.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/archivebox/util.py b/archivebox/util.py
index 8fdda389..0e7ebd31 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -230,6 +230,23 @@ def ansi_to_html(text):
     return COLOR_REGEX.sub(single_sub, text)
 
 
+class AttributeDict(dict):
+    """Helper to allow accessing dict values via Example.key or Example['key']"""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Recursively convert nested dicts to AttributeDicts (optional):
+        # for key, val in self.items():
+        #     if isinstance(val, dict) and type(val) is not AttributeDict:
+        #         self[key] = AttributeDict(val)
+
+    def __getattr__(self, attr: str) -> Any:
+        return dict.__getitem__(self, attr)
+
+    def __setattr__(self, attr: str, value: Any) -> None:
+        return dict.__setitem__(self, attr, value)
+
+
 class ExtendedEncoder(pyjson.JSONEncoder):
     """
     Extended json serializer that supports serializing several model

From 354a63ccd4f021c68747c8a16d30cd54f67167b8 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:25:43 -0400
Subject: [PATCH 152/333] dont dedupe snapshots in sqlite on every run

---
 archivebox/index/sql.py | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index 0ad68de0..80203980 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -26,23 +26,8 @@ def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
     from core.models import Snapshot
     from django.db import transaction
 
-    all_urls = {link.url: link for link in links}
-    all_ts = {link.timestamp: link for link in links}
-
     with transaction.atomic():
-        for snapshot in Snapshot.objects.all():
-            if snapshot.timestamp in all_ts:
-                info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys}
-                snapshot.delete()
-                Snapshot.objects.create(**info)
-            elif snapshot.url in all_urls:
-                info = {k: v for k, v in all_urls.pop(snapshot.url)._asdict().items() if k in Snapshot.keys}
-                snapshot.delete()
-                Snapshot.objects.create(**info)
-            else:
-                snapshot.delete()
-
-        for url, link in all_urls.items():
+        for link in links:
             info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
             Snapshot.objects.update_or_create(url=url, defaults=info)
 

From d3bfa98a912fe4a360835b1e32258244ffa12262 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:26:30 -0400
Subject: [PATCH 153/333] fix depth flag and tweak logging

---
 archivebox/cli/__init__.py        |  12 +++-
 archivebox/cli/archivebox_add.py  |  24 +++----
 archivebox/cli/logging.py         |  61 ++++++++++++------
 archivebox/extractors/__init__.py |  27 +++++++-
 archivebox/index/__init__.py      |  29 +++++----
 archivebox/main.py                | 102 ++++++++++++------------------
 archivebox/parsers/__init__.py    |  28 ++------
 7 files changed, 156 insertions(+), 127 deletions(-)

diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index 087f11b5..b7575c4a 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -106,8 +106,18 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
 
     if command.help or command.subcommand is None:
         command.subcommand = 'help'
-    if command.version:
+    elif command.version:
         command.subcommand = 'version'
+    
+    if command.subcommand not in ('help', 'version', 'status'):
+        from ..cli.logging import log_cli_command
+
+        log_cli_command(
+            subcommand=command.subcommand,
+            subcommand_args=command.subcommand_args,
+            stdin=stdin,
+            pwd=pwd or OUTPUT_DIR
+        )
 
     run_subcommand(
         subcommand=command.subcommand,
diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index c4c78399..55832346 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -10,7 +10,7 @@ from typing import List, Optional, IO
 
 from ..main import add, docstring
 from ..config import OUTPUT_DIR, ONLY_NEW
-from .logging import SmartFormatter, accept_stdin
+from .logging import SmartFormatter, accept_stdin, stderr
 
 
 @docstring(add.__doc__)
@@ -33,12 +33,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help="Add the links to the main index without archiving them",
     )
     parser.add_argument(
-        'import_path',
-        nargs='?',
+        'urls',
+        nargs='*',
         type=str,
         default=None,
         help=(
-            'URL or path to local file to start the archiving process from. e.g.:\n'
+            'URLs or paths to archive e.g.:\n'
             '    https://getpocket.com/users/USERNAME/feed/all\n'
             '    https://example.com/some/rss/feed.xml\n'
             '    https://example.com\n'
@@ -50,25 +50,21 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         "--depth",
         action="store",
         default=0,
-        choices=[0,1],
+        choices=[0, 1],
         type=int,
         help="Recursively archive all linked pages up to this many hops away"
     )
     command = parser.parse_args(args or ())
-    import_string = accept_stdin(stdin)
-    if import_string and command.import_path:
+    urls = command.urls
+    stdin_urls = accept_stdin(stdin)
+    if (stdin_urls and urls) or (not stdin and not urls):
         stderr(
-            '[X] You should pass an import path or a page url as an argument or in stdin but not both\n',
+            '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
             color='red',
         )
         raise SystemExit(2)
-    elif import_string:
-        import_path = import_string
-    else:
-        import_path = command.import_path
-
     add(
-        url=import_path,
+        urls=stdin_urls or urls,
         depth=command.depth,
         update_all=command.update_all,
         index_only=command.index_only,
diff --git a/archivebox/cli/logging.py b/archivebox/cli/logging.py
index 6de78d8f..a12c4e98 100644
--- a/archivebox/cli/logging.py
+++ b/archivebox/cli/logging.py
@@ -5,10 +5,12 @@ import os
 import sys
 import time
 import argparse
+import logging
+import signal
+from multiprocessing import Process
 
 from datetime import datetime
 from dataclasses import dataclass
-from multiprocessing import Process
 from typing import Optional, List, Dict, Union, IO
 
 from ..index.schema import Link, ArchiveResult
@@ -23,11 +25,11 @@ from ..config import (
     SHOW_PROGRESS,
     TERM_WIDTH,
     OUTPUT_DIR,
+    SOURCES_DIR_NAME,
     HTML_INDEX_FILENAME,
     stderr,
 )
 
-
 @dataclass
 class RuntimeStats:
     """mutable stats counter for logging archiving timing info to CLI output"""
@@ -98,9 +100,9 @@ class TimedProgress:
         
         if SHOW_PROGRESS:
             # terminate if we havent already terminated
-            if self.p is not None:
-                self.p.terminate()
-                self.p = None
+            self.p.terminate()
+            self.p.join()
+            self.p.close()
 
             # clear whole terminal line
             try:
@@ -145,28 +147,51 @@ def progress_bar(seconds: int, prefix: str='') -> None:
             seconds,
         ))
         sys.stdout.flush()
-    except KeyboardInterrupt:
+    except (KeyboardInterrupt, BrokenPipeError):
         print()
         pass
 
 
+def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str):
+    from ..config import VERSION, ANSI
+    cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
+    stdin_hint = ' < /dev/stdin' if not stdin.isatty() else ''
+    print('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{stdin_hint}{reset}'.format(
+        now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+        VERSION=VERSION,
+        cmd=cmd,
+        stdin_hint=stdin_hint,
+        **ANSI,
+    ))
+    print('{black}    > {pwd}{reset}'.format(pwd=pwd, **ANSI))
+    print()
+
 ### Parsing Stage
 
-def log_parsing_started(source_file: str):
-    start_ts = datetime.now()
-    _LAST_RUN_STATS.parse_start_ts = start_ts
-    print('\n{green}[*] [{}] Parsing new links from output/sources/{}...{reset}'.format(
-        start_ts.strftime('%Y-%m-%d %H:%M:%S'),
-        source_file.rsplit('/', 1)[-1],
+
+def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: bool):
+    _LAST_RUN_STATS.parse_start_ts = datetime.now()
+    print('{green}[+] [{}] Adding {} links to index (crawl depth={}){}...{reset}'.format(
+        _LAST_RUN_STATS.parse_start_ts.strftime('%Y-%m-%d %H:%M:%S'),
+        len(urls) if isinstance(urls, list) else len(urls.split('\n')),
+        depth,
+        ' (index only)' if index_only else '',
         **ANSI,
     ))
 
+def log_source_saved(source_file: str):
+    print('    > Saved verbatim input to {}/{}'.format(SOURCES_DIR_NAME, source_file.rsplit('/', 1)[-1]))
 
-def log_parsing_finished(num_parsed: int, num_new_links: int, parser_name: str):
-    end_ts = datetime.now()
-    _LAST_RUN_STATS.parse_end_ts = end_ts
-    print('    > Parsed {} links as {} ({} new links added)'.format(num_parsed, parser_name, num_new_links))
+def log_parsing_finished(num_parsed: int, parser_name: str):
+    _LAST_RUN_STATS.parse_end_ts = datetime.now()
+    print('    > Parsed {} URLs from input ({})'.format(num_parsed, parser_name))
 
+def log_deduping_finished(num_new_links: int):
+    print('    > Found {} new URLs not already in index'.format(num_new_links))
+
+
+def log_crawl_started(new_links):
+    print('{lightblue}[*] Starting crawl of {} sites 1 hop out from starting point{reset}'.format(len(new_links), **ANSI))
 
 ### Indexing Stage
 
@@ -174,7 +199,7 @@ def log_indexing_process_started(num_links: int):
     start_ts = datetime.now()
     _LAST_RUN_STATS.index_start_ts = start_ts
     print()
-    print('{green}[*] [{}] Writing {} links to main index...{reset}'.format(
+    print('{black}[*] [{}] Writing {} links to main index...{reset}'.format(
         start_ts.strftime('%Y-%m-%d %H:%M:%S'),
         num_links,
         **ANSI,
@@ -209,7 +234,7 @@ def log_archiving_started(num_links: int, resume: Optional[float]=None):
              **ANSI,
         ))
     else:
-        print('{green}[▶] [{}] Updating content for {} matching pages in archive...{reset}'.format(
+        print('{green}[▶] [{}] Collecting content for {} Snapshots in archive...{reset}'.format(
              start_ts.strftime('%Y-%m-%d %H:%M:%S'),
              num_links,
              **ANSI,
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index c6a4f33c..c08e7c0c 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.extractors'
 
 import os
 
-from typing import Optional
+from typing import Optional, List
 from datetime import datetime
 
 from ..index.schema import Link
@@ -13,6 +13,9 @@ from ..index import (
 )
 from ..util import enforce_types
 from ..cli.logging import (
+    log_archiving_started,
+    log_archiving_paused,
+    log_archiving_finished,
     log_link_archiving_started,
     log_link_archiving_finished,
     log_archive_method_started,
@@ -103,3 +106,25 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
         raise
 
     return link
+
+
+@enforce_types
+def archive_links(links: List[Link], out_dir: Optional[str]=None) -> List[Link]:
+    if not links:
+        return []
+
+    log_archiving_started(len(links))
+    idx: int = 0
+    link: Link = links[0]
+    try:
+        for idx, link in enumerate(links):
+            archive_link(link, out_dir=link.link_dir)
+    except KeyboardInterrupt:
+        log_archiving_paused(len(links), idx, link.timestamp)
+        raise SystemExit(0)
+    except BaseException:
+        print()
+        raise
+
+    log_archiving_finished(len(links))
+    return links
diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index e82cfefa..7ea473d7 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -33,8 +33,8 @@ from ..cli.logging import (
     log_indexing_process_finished,
     log_indexing_started,
     log_indexing_finished,
-    log_parsing_started,
     log_parsing_finished,
+    log_deduping_finished,
 )
 
 from .schema import Link, ArchiveResult
@@ -268,20 +268,31 @@ def load_main_index_meta(out_dir: str=OUTPUT_DIR) -> Optional[dict]:
 
     return None
 
+
 @enforce_types
-def import_new_links(existing_links: List[Link],
-                     import_path: str,
-                     out_dir: str=OUTPUT_DIR) -> Tuple[List[Link], List[Link]]:
+def parse_links_from_source(source_path: str) -> Tuple[List[Link], List[Link]]:
 
     from ..parsers import parse_links
 
     new_links: List[Link] = []
 
     # parse and validate the import file
-    log_parsing_started(import_path)
-    raw_links, parser_name = parse_links(import_path)
+    raw_links, parser_name = parse_links(source_path)
     new_links = validate_links(raw_links)
 
+    if parser_name:
+        num_parsed = len(raw_links)
+        log_parsing_finished(num_parsed, parser_name)
+
+    return new_links
+
+
+@enforce_types
+def dedupe_links(existing_links: List[Link],
+                 new_links: List[Link]) -> Tuple[List[Link], List[Link]]:
+
+    from ..parsers import parse_links
+
     # merge existing links in out_dir and new links
     all_links = validate_links(existing_links + new_links)
     all_link_urls = {link.url for link in existing_links}
@@ -290,11 +301,7 @@ def import_new_links(existing_links: List[Link],
         link for link in new_links
         if link.url not in all_link_urls
     ]
-
-    if parser_name:
-        num_parsed = len(raw_links)
-        num_new_links = len(all_links) - len(existing_links)
-        log_parsing_finished(num_parsed, num_new_links, parser_name)
+    log_deduping_finished(len(new_links))
 
     return all_links, new_links
 
diff --git a/archivebox/main.py b/archivebox/main.py
index a6e04dd3..54b71acc 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -4,8 +4,7 @@ import os
 import sys
 import shutil
 
-from typing import Dict, List, Optional, Iterable, IO
-
+from typing import Dict, List, Optional, Iterable, IO, Union
 from crontab import CronTab, CronSlices
 
 from .cli import (
@@ -17,16 +16,17 @@ from .cli import (
     archive_cmds,
 )
 from .parsers import (
-    save_stdin_to_sources,
-    save_file_to_sources,
+    save_text_as_source,
+    save_file_as_source,
 )
 from .index.schema import Link
-from .util import enforce_types, docstring
+from .util import enforce_types, docstring                         # type: ignore
 from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
 from .index import (
     links_after_timestamp,
     load_main_index,
-    import_new_links,
+    parse_links_from_source,
+    dedupe_links,
     write_main_index,
     link_matches_filter,
     get_indexed_folders,
@@ -51,7 +51,7 @@ from .index.sql import (
     apply_migrations,
 )
 from .index.html import parse_html_main_index
-from .extractors import archive_link
+from .extractors import archive_links
 from .config import (
     stderr,
     ConfigDict,
@@ -91,9 +91,8 @@ from .config import (
 from .cli.logging import (
     TERM_WIDTH,
     TimedProgress,
-    log_archiving_started,
-    log_archiving_paused,
-    log_archiving_finished,
+    log_importing_started,
+    log_crawl_started,
     log_removal_started,
     log_removal_finished,
     log_list_started,
@@ -496,59 +495,55 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
 
 
 @enforce_types
-def add(url: str,
+def add(urls: Union[str, List[str]],
         depth: int=0,
         update_all: bool=not ONLY_NEW,
         index_only: bool=False,
         out_dir: str=OUTPUT_DIR) -> List[Link]:
     """Add a new URL or list of URLs to your archive"""
 
+    assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
+
+    # Load list of links from the existing index
     check_data_folder(out_dir=out_dir)
-
-    base_path = save_stdin_to_sources(url, out_dir=out_dir)
-    if depth == 1:
-        depth_path = save_file_to_sources(url, out_dir=out_dir)
     check_dependencies()
-
-    # Step 1: Load list of links from the existing index
-    #         merge in and dedupe new links from import_path
     all_links: List[Link] = []
     new_links: List[Link] = []
     all_links = load_main_index(out_dir=out_dir)
-    all_links, new_links = import_new_links(all_links, base_path, out_dir=out_dir)
-    if depth == 1:
-        all_links, new_links_depth = import_new_links(all_links, depth_path, out_dir=out_dir)
-        new_links = new_links + new_links_depth
+
+    log_importing_started(urls=urls, depth=depth, index_only=index_only)
+    if isinstance(urls, str):
+        # save verbatim stdin to sources
+        write_ahead_log = save_text_as_source(urls, filename='{ts}-import.txt', out_dir=out_dir)
+    elif isinstance(urls, list):
+        # save verbatim args to sources
+        write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
+    
+    new_links += parse_links_from_source(write_ahead_log)
+    all_links, new_links = dedupe_links(all_links, new_links)
+    write_main_index(links=all_links, out_dir=out_dir, finished=not new_links)
 
 
-    # Step 2: Write updated index with deduped old and new links back to disk
-    write_main_index(links=all_links, out_dir=out_dir)
+    # If we're going one level deeper, download each link and look for more links
+    if new_links and depth == 1:
+        log_crawl_started(new_links)
+        for new_link in new_links:
+            downloaded_file = save_file_as_source(new_link.url, filename='{ts}-crawl-{basename}.txt', out_dir=out_dir)
+            new_links += parse_links_from_source(downloaded_file)
+            all_links, new_links = dedupe_links(all_links, new_links)
+            write_main_index(links=all_links, out_dir=out_dir, finished=not new_links)
 
     if index_only:
         return all_links
-        
-    # Step 3: Run the archive methods for each link
-    links = all_links if update_all else new_links
-    log_archiving_started(len(links))
-    idx: int = 0
-    link: Link = None                                             # type: ignore
-    try:
-        for idx, link in enumerate(links):
-            archive_link(link, out_dir=link.link_dir)
 
-    except KeyboardInterrupt:
-        log_archiving_paused(len(links), idx, link.timestamp if link else '0')
-        raise SystemExit(0)
-
-    except:
-        print()
-        raise    
-
-    log_archiving_finished(len(links))
+    # Run the archive methods for each link
+    to_archive = all_links if update_all else new_links
+    archive_links(to_archive, out_dir=out_dir)
 
     # Step 4: Re-write links index with updated titles, icons, and resources
-    all_links = load_main_index(out_dir=out_dir)
-    write_main_index(links=list(all_links), out_dir=out_dir, finished=True)
+    if to_archive:
+        all_links = load_main_index(out_dir=out_dir)
+        write_main_index(links=list(all_links), out_dir=out_dir, finished=True)
     return all_links
 
 @enforce_types
@@ -671,23 +666,8 @@ def update(resume: Optional[float]=None,
         return all_links
         
     # Step 3: Run the archive methods for each link
-    links = new_links if only_new else all_links
-    log_archiving_started(len(links), resume)
-    idx: int = 0
-    link: Link = None                                             # type: ignore
-    try:
-        for idx, link in enumerate(links_after_timestamp(links, resume)):
-            archive_link(link, overwrite=overwrite, out_dir=link.link_dir)
-
-    except KeyboardInterrupt:
-        log_archiving_paused(len(links), idx, link.timestamp if link else '0')
-        raise SystemExit(0)
-
-    except:
-        print()
-        raise    
-
-    log_archiving_finished(len(links))
+    to_archive = new_links if only_new else all_links
+    archive_links(to_archive, out_dir=out_dir)
 
     # Step 4: Re-write links index with updated titles, icons, and resources
     all_links = load_main_index(out_dir=out_dir)
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index 479d4e2c..eabaece2 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -29,7 +29,7 @@ from ..util import (
     URL_REGEX,
 )
 from ..index.schema import Link
-from ..cli.logging import pretty_path, TimedProgress
+from ..cli.logging import pretty_path, TimedProgress, log_source_saved
 from .pocket_html import parse_pocket_html_export
 from .pinboard_rss import parse_pinboard_rss_export
 from .shaarli_rss import parse_shaarli_rss_export
@@ -83,36 +83,22 @@ def parse_links(source_file: str) -> Tuple[List[Link], str]:
 
 
 @enforce_types
-def save_stdin_to_sources(raw_text: str, out_dir: str=OUTPUT_DIR) -> str:
-    check_data_folder(out_dir=out_dir)
-
-    sources_dir = os.path.join(out_dir, SOURCES_DIR_NAME)
-    if not os.path.exists(sources_dir):
-        os.makedirs(sources_dir)
-
+def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: str=OUTPUT_DIR) -> str:
     ts = str(datetime.now().timestamp()).split('.', 1)[0]
-
-    source_path = os.path.join(sources_dir, '{}-{}.txt'.format('stdin', ts))
+    source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(ts=ts))
     atomic_write(source_path, raw_text)
+    log_source_saved(source_file=source_path)
     return source_path
 
 
 @enforce_types
-def save_file_to_sources(path: str, timeout: int=TIMEOUT, out_dir: str=OUTPUT_DIR) -> str:
+def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: str=OUTPUT_DIR) -> str:
     """download a given url's content into output/sources/domain-<timestamp>.txt"""
-    check_data_folder(out_dir=out_dir)
-
-    sources_dir = os.path.join(out_dir, SOURCES_DIR_NAME)
-    if not os.path.exists(sources_dir):
-        os.makedirs(sources_dir)
-
     ts = str(datetime.now().timestamp()).split('.', 1)[0]
-
-    source_path = os.path.join(sources_dir, '{}-{}.txt'.format(basename(path), ts))
+    source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(basename=basename(path), ts=ts))
 
     if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
         # Source is a URL that needs to be downloaded
-        source_path = os.path.join(sources_dir, '{}-{}.txt'.format(domain(path), ts))
         print('{}[*] [{}] Downloading {}{}'.format(
             ANSI['green'],
             datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
@@ -140,7 +126,7 @@ def save_file_to_sources(path: str, timeout: int=TIMEOUT, out_dir: str=OUTPUT_DI
 
     atomic_write(source_path, raw_source_text)
 
-    print('    > {}'.format(pretty_path(source_path)))
+    log_source_saved(source_file=source_path)
 
     return source_path
 

From 4c4b1e6a4bde5edb9e11942245a21437e73fe6df Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:33:35 -0400
Subject: [PATCH 154/333] fix link creation

---
 archivebox/index/sql.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index 80203980..b120738c 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -29,7 +29,7 @@ def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
     with transaction.atomic():
         for link in links:
             info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
-            Snapshot.objects.update_or_create(url=url, defaults=info)
+            Snapshot.objects.update_or_create(url=link.url, defaults=info)
 
 @enforce_types
 def write_sql_link_details(link: Link, out_dir: str=OUTPUT_DIR) -> None:

From d159e674e1fb7005f1732f78adbd5cf5aa49436a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:41:18 -0400
Subject: [PATCH 155/333] write stderr instead of stdout for version info

---
 archivebox/cli/logging.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/archivebox/cli/logging.py b/archivebox/cli/logging.py
index a12c4e98..d11ffd9e 100644
--- a/archivebox/cli/logging.py
+++ b/archivebox/cli/logging.py
@@ -156,15 +156,15 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
     from ..config import VERSION, ANSI
     cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
     stdin_hint = ' < /dev/stdin' if not stdin.isatty() else ''
-    print('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{stdin_hint}{reset}'.format(
+    stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{stdin_hint}{reset}'.format(
         now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
         VERSION=VERSION,
         cmd=cmd,
         stdin_hint=stdin_hint,
         **ANSI,
     ))
-    print('{black}    > {pwd}{reset}'.format(pwd=pwd, **ANSI))
-    print()
+    stderr('{black}    > {pwd}{reset}'.format(pwd=pwd, **ANSI))
+    stderr()
 
 ### Parsing Stage
 

From b4ce20cbe5b3d41676a43a337e0e12a869e53aac Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:41:27 -0400
Subject: [PATCH 156/333] write link details json before and after archiving

---
 archivebox/extractors/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index c08e7c0c..c9685a80 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -56,6 +56,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
             os.makedirs(out_dir)
 
         link = load_link_details(link, out_dir=out_dir)
+        write_link_details(link, out_dir=link.link_dir)
         log_link_archiving_started(link, out_dir, is_new)
         link = link.overwrite(updated=datetime.now())
         stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}

From 215d5eae324d9da3ffb758bf5e47f7b31d942e9a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:41:37 -0400
Subject: [PATCH 157/333] normal git clone instead of mirror

---
 archivebox/extractors/git.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py
index 1534ce34..dcb1df3c 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@@ -56,7 +56,7 @@ def save_git(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     cmd = [
         GIT_BINARY,
         'clone',
-        '--mirror',
+        # '--mirror',
         '--recursive',
         *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
         without_query(without_fragment(link.url)),

From ae208435c9c979720fad8f7782d6c74247b6c069 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 12:21:37 -0400
Subject: [PATCH 158/333] fix the add links form

---
 archivebox/cli/logging.py                | 2 +-
 archivebox/core/admin.py                 | 2 +-
 archivebox/core/forms.py                 | 7 +++++--
 archivebox/core/views.py                 | 4 ++--
 archivebox/extractors/git.py             | 1 -
 archivebox/themes/default/add_links.html | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/archivebox/cli/logging.py b/archivebox/cli/logging.py
index d11ffd9e..f002e922 100644
--- a/archivebox/cli/logging.py
+++ b/archivebox/cli/logging.py
@@ -191,7 +191,7 @@ def log_deduping_finished(num_new_links: int):
 
 
 def log_crawl_started(new_links):
-    print('{lightblue}[*] Starting crawl of {} sites 1 hop out from starting point{reset}'.format(len(new_links), **ANSI))
+    print('{lightred}[*] Starting crawl of {} sites 1 hop out from starting point{reset}'.format(len(new_links), **ANSI))
 
 ### Indexing Stage
 
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 7942c6c2..1b05c580 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -49,7 +49,7 @@ class SnapshotAdmin(admin.ModelAdmin):
             '<a href="/{}/{}">📼 </a> '
             '<a href="/{}/{}">📦 </a> '
             '<a href="/{}/{}">🏛 </a> '
-            '</span>'
+            '</span><br/>'
             '<a href="/{}">{}</a>',
             obj.archive_path, canon['wget_path'] or '',
             obj.archive_path, canon['pdf_path'],
diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py
index 5f67e2c6..8bf0cbd0 100644
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -1,7 +1,10 @@
 from django import forms
 
-CHOICES = (('url', 'URL'), ('feed', 'Feed'))
+CHOICES = (
+    ('0', 'depth=0 (archive just this url)'),
+    ('1', 'depth=1 (archive this url and all sites one link away)'),
+)
 
 class AddLinkForm(forms.Form):
     url = forms.URLField()
-    source = forms.ChoiceField(choices=CHOICES, widget=forms.RadioSelect, initial='url')
+    depth = forms.ChoiceField(choices=CHOICES, widget=forms.RadioSelect, initial='0')
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index d9c51700..5fb43119 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -66,9 +66,9 @@ class AddLinks(View):
         if form.is_valid():
             url = form.cleaned_data["url"]
             print(f'[+] Adding URL: {url}')
-            depth = 0 if form.cleaned_data["source"] == "url" else 1
+            depth = 0 if form.cleaned_data["depth"] == "0" else 0
             input_kwargs = {
-                "url": url,
+                "urls": url,
                 "depth": depth,
                 "update_all": False,
                 "out_dir": OUTPUT_DIR,
diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py
index dcb1df3c..c8a5eeaf 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@@ -56,7 +56,6 @@ def save_git(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> A
     cmd = [
         GIT_BINARY,
         'clone',
-        # '--mirror',
         '--recursive',
         *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
         without_query(without_fragment(link.url)),
diff --git a/archivebox/themes/default/add_links.html b/archivebox/themes/default/add_links.html
index 7143c576..6e35f38c 100644
--- a/archivebox/themes/default/add_links.html
+++ b/archivebox/themes/default/add_links.html
@@ -212,7 +212,7 @@
             </form>
         </center>
 
-        <a href="{% url 'admin:core_snapshot_changelist' %}">Go back to Snapshot list</a>
+        <a href="{% url 'admin:core_snapshot_changelist' %}">Go back to Main Index</a>
         
     </body>
 </html>

From a79dd4685a2bea2f6d9b94a79215d28eb72ba722 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 12:21:52 -0400
Subject: [PATCH 159/333] make snapshots unique again

---
 .../migrations/0004_auto_20200713_1552.py     | 19 +++++++++++++++++++
 archivebox/core/models.py                     |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 archivebox/core/migrations/0004_auto_20200713_1552.py

diff --git a/archivebox/core/migrations/0004_auto_20200713_1552.py b/archivebox/core/migrations/0004_auto_20200713_1552.py
new file mode 100644
index 00000000..69836623
--- /dev/null
+++ b/archivebox/core/migrations/0004_auto_20200713_1552.py
@@ -0,0 +1,19 @@
+# Generated by Django 3.0.7 on 2020-07-13 15:52
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0003_auto_20200630_1034'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='timestamp',
+            field=models.CharField(db_index=True, default=None, max_length=32, unique=True),
+            preserve_default=False,
+        ),
+    ]
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 42929e5a..7ac9427b 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -13,7 +13,7 @@ class Snapshot(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
 
     url = models.URLField(unique=True)
-    timestamp = models.CharField(max_length=32, null=True, default=None, db_index=True)
+    timestamp = models.CharField(max_length=32, unique=True, db_index=True)
 
     title = models.CharField(max_length=128, null=True, default=None, db_index=True)
     tags = models.CharField(max_length=256, null=True, default=None, db_index=True)

From 5e2bf73f047f2a647f1497a98aedc4cf76f12832 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 13 Jul 2020 14:48:25 -0500
Subject: [PATCH 160/333] fix: Bugs related to add() refactor

---
 archivebox/index/__init__.py |  6 +++++-
 archivebox/main.py           | 10 ++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 7ea473d7..cd50a185 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -292,7 +292,6 @@ def dedupe_links(existing_links: List[Link],
                  new_links: List[Link]) -> Tuple[List[Link], List[Link]]:
 
     from ..parsers import parse_links
-
     # merge existing links in out_dir and new links
     all_links = validate_links(existing_links + new_links)
     all_link_urls = {link.url for link in existing_links}
@@ -301,6 +300,11 @@ def dedupe_links(existing_links: List[Link],
         link for link in new_links
         if link.url not in all_link_urls
     ]
+
+    all_links_deduped = {link.url: link for link in all_links}
+    for i in range(len(new_links)):
+        if new_links[i].url in all_links_deduped.keys():
+            new_links[i] = all_links_deduped[new_links[i].url]
     log_deduping_finished(len(new_links))
 
     return all_links, new_links
diff --git a/archivebox/main.py b/archivebox/main.py
index 54b71acc..999e4650 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -520,18 +520,16 @@ def add(urls: Union[str, List[str]],
         write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
     
     new_links += parse_links_from_source(write_ahead_log)
-    all_links, new_links = dedupe_links(all_links, new_links)
-    write_main_index(links=all_links, out_dir=out_dir, finished=not new_links)
-
 
     # If we're going one level deeper, download each link and look for more links
+    new_links_depth = []
     if new_links and depth == 1:
         log_crawl_started(new_links)
         for new_link in new_links:
             downloaded_file = save_file_as_source(new_link.url, filename='{ts}-crawl-{basename}.txt', out_dir=out_dir)
-            new_links += parse_links_from_source(downloaded_file)
-            all_links, new_links = dedupe_links(all_links, new_links)
-            write_main_index(links=all_links, out_dir=out_dir, finished=not new_links)
+            new_links_depth += parse_links_from_source(downloaded_file)
+    all_links, new_links = dedupe_links(all_links, new_links + new_links_depth)
+    write_main_index(links=all_links, out_dir=out_dir, finished=not new_links)
 
     if index_only:
         return all_links

From 98dda688970c8993a7a79847ea74ff5e30964b4f Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Tue, 14 Jul 2020 10:26:33 -0400
Subject: [PATCH 161/333] fix: timestamp comparison in to_json function

---
 archivebox/index/schema.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index db17c269..eb6ef894 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -190,7 +190,10 @@ class Link:
             for key, val in json_info.items()
             if key in cls.field_names()
         }
-        info['updated'] = parse_date(info.get('updated'))
+        try:
+            info['updated'] = int(parse_date(info.get('updated'))) # Cast to int which comes with rounding down
+        except (ValueError, TypeError):
+            info['updated'] = None
         info['sources'] = info.get('sources') or []
 
         json_history = info.get('history') or {}

From f845224d6f60e59ee53981885c400eb83a03fb12 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 16 Jul 2020 09:20:33 -0500
Subject: [PATCH 162/333] fix: htmlencode titles before rendering the static
 html index and detail

---
 archivebox/index/html.py                      |   4 +-
 .../templates/title_with_html.com.html        | 699 ++++++++++++++++++
 tests/test_title.py                           |  14 +
 3 files changed, 715 insertions(+), 2 deletions(-)
 create mode 100644 tests/mock_server/templates/title_with_html.com.html
 create mode 100644 tests/test_title.py

diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index 60d41049..e21ae576 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -90,7 +90,7 @@ def main_index_row_template(link: Link) -> str:
         **link._asdict(extended=True),
         
         # before pages are finished archiving, show loading msg instead of title
-        'title': (
+        'title': htmlencode(
             link.title
             or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
         ),
@@ -129,7 +129,7 @@ def link_details_template(link: Link) -> str:
     return render_legacy_template(LINK_DETAILS_TEMPLATE, {
         **link_info,
         **link_info['canonical'],
-        'title': (
+        'title': htmlencode(
             link.title
             or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
         ),
diff --git a/tests/mock_server/templates/title_with_html.com.html b/tests/mock_server/templates/title_with_html.com.html
new file mode 100644
index 00000000..e84dcaa0
--- /dev/null
+++ b/tests/mock_server/templates/title_with_html.com.html
@@ -0,0 +1,699 @@
+<!DOCTYPE html>
+<html lang="en-gb" dir="ltr" prefix="og: http://ogp.me/ns#" class="no-js">
+	<head>
+		<meta charset="utf-8"/>
+		<link rel="dns-prefetch" href="https://fonts.gstatic.com"/>
+		<link rel="dns-prefetch" href="https://cloud.24ways.org"/>
+		<link rel="dns-prefetch" href="https://media.24ways.org"/>
+
+		<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Code+Pro%7CSource+Sans+Pro:400,700%7CSource+Serif+Pro:400"/>
+		<link rel="stylesheet" href="/assets/styles/app-55.css"/>
+
+		<link rel="shortcut icon" href="/assets/icons/icon.ico" type="image/ico"/>
+		<link rel="apple-touch-icon" href="/assets/icons/icon.png" type="image/png"/>
+		<link rel="mask-icon" href="/assets/icons/icon.svg" color="#f04"/>
+		<link rel="manifest" href="/app.webmanifest"/>
+		<link rel="alternate" href="https://feeds.feedburner.com/24ways" type="application/rss+xml"/>
+		<link rel="author" href="/humans.txt"/>
+
+		<script>
+			var docEl = document.documentElement;
+docEl.className = docEl.className.replace('no-js', 'has-js');
+		</script>
+
+		<script src="/assets/scripts/app-55.js" defer></script>
+		<script src="/assets/scripts/prism.min.js" defer></script>
+		<script src="/assets/scripts/stats.js" defer></script>
+
+		<meta name="referrer" content="origin"/>
+		<meta name="robots" content="index, follow"/>
+		<meta name="viewport" content="width=device-width, initial-scale=1"/>
+		<meta property="og:url" name="twitter:url" content="https://24ways.org/2019/it-all-starts-with-a-humble-textarea/"/>
+		<meta property="og:title" name="twitter:title" content="It All Starts with a Humble &lt;textarea&gt;"/>
+		<meta property="og:description" name="twitter:description" content="Andy Bell rings out a fresh call in support of the timeless concept of progressive enhancement. What does it mean to build a modern JavaScript-focussed web experience that still works well if part of the stack isn’t supported or fails? Andy shows us how that might be done."/>
+		<meta property="og:image" name="twitter:image" content="https://cloud.24ways.org/2019/sharing/it-all-starts-with-a-humble-textarea.png"/>
+		<meta property="og:type" content="article"/>
+		<meta property="fb:app_id" content="1506442732766250"/>
+		<meta name="twitter:site" content="@24ways"/>
+		<meta name="twitter:creator" content="@hankchizljaw"/>
+		<meta name="twitter:card" content="summary_large_image"/>
+		<meta name="format-detection" content="telephone=no"/>
+		<meta name="theme-color" content="#302"/>
+		<meta name="msapplication-TileColor" content="#302"/>
+
+		<style>:root
+		{
+			--color-year: hsl(292, 100%, 16%);
+			--color-year--dark: hsl(292, 100%, 8%);
+			--color-year--dark-alpha: hsla(292, 100%, 8%, 0.8);
+			--color-day: hsl(311, 80%, 60%);
+			--color-day--light: hsl(311, 60%, 98%);
+			--color-day--dark: hsl(311, 100%, 24%);
+			--color-day--dark-alpha: hsla(311, 100%, 24%, 0.33);
+		}
+	</style>
+
+	<title>It All Starts with a Humble &lt;textarea&gt; &#9670; 24 ways</title>
+</head>
+<body>
+	<header class="c-banner" id="top">
+		<a class="c-banner__skip" href="#main">Skip to content</a>
+		<p class="c-banner__title">
+			<a class="c-banner__home" href="/" rel="home">24 ways
+				<span>to impress your friends</span>
+			</a>
+		</p>
+	</header>
+	<div class="c-menu no-transition">
+		<button class="c-menu__button" id="menu__button" aria-controls="menu__drawer" aria-expanded="true" aria-label="Menu">
+			<svg class="c-menu__icon" width="20" height="20" viewbox="0 0 200 200" focusable="false" aria-hidden="true">
+				<rect class="c-menu__line" width="120" height="10" x="40" y="45"/>
+				<rect class="c-menu__line" width="120" height="10" x="40" y="70"/>
+				<rect class="c-menu__line" width="120" height="10" x="40" y="95"/>
+				<rect class="c-menu__line" width="120" height="10" x="40" y="95"/>
+				<rect class="c-menu__line" width="120" height="10" x="40" y="120"/>
+				<rect class="c-menu__line" width="120" height="10" x="40" y="145"/>
+			</svg>
+		</button>
+		<div class="c-menu__drawer" id="menu__drawer" role="region" aria-label="Menu">
+			<form class="c-search" role="search" id="search" action="/search/">
+				<fieldset class="c-field">
+					<legend class="u-hidden">Search 24 ways</legend>
+					<label class="u-hidden" for="q">Keywords</label>
+					<input class="c-field__input" type="search" id="q" name="q" placeholder="e.g. CSS, Design, Research&#8230;"/>
+					<button class="c-field__button" type="submit">
+						<svg class="c-field__icon" width="20" height="20" viewbox="0 0 200 200" focusable="false" role="img" aria-label="Search">
+							<path role="presentation" d="M129 121C136 113 140 102 140 90c0-28-22-50-50-50S40 63 40 90s22 50 50 50c12 0 24-4 32-12L158 164l7-7-36-36zM90 130c-22 0-40-18-40-40s18-40 40-40 40 18 40 40-18 40-40 40z"/>
+						</svg>
+					</button>
+				</fieldset>
+			</form>
+
+			<nav class="c-topics-nav" aria-label="Topics">
+				<ul class="c-topics-nav__items">
+
+					<li class="c-topics-nav__item">
+						<a class="c-topics-nav__label" href="/topics/business/">
+							<svg width="16" height="16" viewbox="0 0 240 240" focusable="false" aria-hidden="true">
+								<path d="M20 220c-11 0-20-9-20-20V70c0-11 9-20 20-20h60V35c0-10 5-15 15-15h50c10 0 15 5 15 15v15h60c11 0 20 9 20 20v130c0 11-9 20-20 20H20zm0-160c-5.5 0-10 4.5-10 10v130c0 5.5 4.5 10 10 10h200c5.5 0 10-4.5 10-10V70c0-5.5-4.5-10-10-10H20zm130-10V35c0-3-2-5-5-5H95c-3 0-5 2-5 5v15h60zM30 100V90h180v10H30zm0 40v-10h180v10H30zm0 40v-10h180v10H30z"/>
+							</svg>
+
+							Business
+						</a>
+					</li>
+
+					<li class="c-topics-nav__item">
+						<a class="c-topics-nav__label" href="/topics/code/">
+							<svg width="16" height="16" viewbox="0 0 240 240" focusable="false" aria-hidden="true">
+								<path transform="rotate(45 120 120)" d="M115 100H70.5C63 85 47.5 75 30 75 8.5 75-9.5 90-14 110h29l10 10-10 10h-29c4.5 20 22.5 35 44 35 17.5 0 33-10 40.5-25h99.5c7.5 15 22.5 25 40.5 25 21.5 0 39.5-15 44-35h-29l-10-10 10-10h29c-4.5-20-22.5-35-44-35-17.5 0-33 10-40.5 25H125V30h10v-50h-30v50h10v70zm123.5 40c-6.5 9-17 15-28.5 15-16 0-29-10.5-33.5-25H63.5C59 144.5 46 155 30 155c-12 0-22.5-6-28.5-15H20l20-20-20-20H1.5C7.5 91 18 85 30 85c16 0 29 10.5 33.5 25h113c4.5-14.5 17.5-25 33.5-25 12 0 23 6 29 15h-19l-20 20 20 20h19zM115-10h10v30h-10v-30zM99.5 240v-50h-10v-10h25v-40h10v40h25v10H140v50c0 10-7.5 20-20 20-12.5 0-20-10-20.5-20zm11 0c0 7.5 5 10 10 10s10-2.5 10-10v-50h-20v50z"/>
+							</svg>
+
+							Code
+						</a>
+					</li>
+
+					<li class="c-topics-nav__item">
+						<a class="c-topics-nav__label" href="/topics/content/">
+							<svg width="16" height="16" viewbox="0 0 240 240" focusable="false" aria-hidden="true">
+								<path d="M102.5 240l-1.5-2c-2.5-3.5-61-88-61-128s40.5-64 42.5-65L50 0h140l-32.5 45S200 70 200 110s-58.5 124.5-61 128l-1.5 2h-35zm30-10c9-13 57.5-85.5 57.5-120 0-33-35-56-41.5-60H91.5C85 54 50 77 50 110c0 34.5 48.5 106.5 57.5 120h25zM115 129.5c-11.5-2-20-12.5-20-24.5 0-14 11-25 25-25s25 11 25 25c0 12-8.5 22-20 24.5V230h-10V129.5zm5-39.5c-8 0-15 6.5-15 15s6.5 15 15 15 15-6.5 15-15-6.5-15-15-15zM92.5 40h55L170 10H70l22.5 30z"/>
+							</svg>
+
+							Content
+						</a>
+					</li>
+
+					<li class="c-topics-nav__item">
+						<a class="c-topics-nav__label" href="/topics/design/">
+							<svg width="16" height="16" viewbox="0 0 240 240" focusable="false" aria-hidden="true">
+								<path fill-rule="evenodd" d="M140 0h80v240h-80V0zm70 10h-60v30h20v10h-20V70h20v10h-20v20h20v10h-20v20h20v10h-20v20h20v10h-20v20h20v10h-20V230h60V10zM45 230c-14 0-25-11-25-25V60c0-1 35-55 35-55s35 54 35 55v145c0 14-11 25-25 25H45zm-15-25c0 8 7 15 15 15h20c8 0 15-7 15-15v-5H30v5zm0-25v10h50v-10H30zm0-106c0-2 2-4 4-4h2c2 0 4 2 4 4v96H30V74zm20 0c0-2 2-4 4-4h2c2 0 4 2 4 4v96H50V74zm20 0c0-2 2-4 4-4h2c2 0 4 2 4 4v96H70V74zM30.5 60.5S39 58 45 63.5c6-4.5 14-4.5 20 0 6-5.5 14.5-3 14.5-3L69 45H41L30.5 60.5zm24.5-38L47.5 35h15L55 22.5z"/>
+							</svg>
+
+							Design
+						</a>
+					</li>
+
+					<li class="c-topics-nav__item">
+						<a class="c-topics-nav__label" href="/topics/process/">
+							<svg width="16" height="16" viewbox="0 0 240 240" focusable="false" aria-hidden="true">
+								<path d="M210 116v4c0 49.5-40.5 90-90 90-29 0-55-14-71.5-35l7-7c14.5 19.5 38 32 64.5 32 44 0 80-36 80-80v-3.5l-15.5 16-7.5-7.5 28.5-28.5L234 125l-7.5 7.5L210 116zm-180 8v-4c0-49.5 40.5-90 90-90 29 0 54.5 13.5 71 35l-7 7C169 52.5 146 40 120 40c-44 0-80 36-80 80v5l17-17 7 7-28.5 28.5L7 115l7-7 16 16z"/>
+							</svg>
+
+							Process
+						</a>
+					</li>
+
+					<li class="c-topics-nav__item">
+						<a class="c-topics-nav__label" href="/topics/ux/">
+							<svg width="16" height="16" viewbox="0 0 240 240" focusable="false" aria-hidden="true">
+								<path d="M220 240H20c-11 0-20-9-20-20V20C0 9 9 0 20 0h200c11 0 20 9 20 20v200c0 11-9 20-20 20zM20 10c-5 0-10 4-10 10v200c0 5 4 10 10 10h200c5 0 10-4 10-10V20c0-5-4-10-10-10H20zm150 200c-11 0-20-9-20-20s9-20 20-20 20 9 20 20-9 20-20 20zm0-30c-5 0-10 4-10 10s4 10 10 10 10-4 10-10-4-10-10-10zm-50 30c-11 0-20-9-20-20s9-20 20-20 20 9 20 20-9 20-20 20zm0-30c-5 0-10 4-10 10s4 10 10 10 10-4 10-10-4-10-10-10zm-50 30c-11 0-20-9-20-20s9-20 20-20 20 9 20 20-9 20-20 20zm0-30c-5 0-10 4-10 10s4 10 10 10 10-4 10-10-4-10-10-10zm45-30V80h10v70h-10zm0-100V30h10v20h-10zM65 80V30h10v50H65zm0 70v-40h10v40H65zm100 0v-20h10v20h-10zm0-50V30h10v70h-10zM50 110V80h40v30H50zm10-10h20V90H60v10zm90 30v-30h40v30h-40zm-50-50V50h40v30h-40zm10-10h20V60h-20v10zm50 50h20v-10h-20v10z"/>
+							</svg>
+
+							UX
+						</a>
+					</li>
+
+				</ul>
+			</nav>
+			<nav class="c-site-nav" aria-label="Explore 24 ways">
+				<ul class="c-site-nav__items">
+					<li class="c-site-nav__item">
+						<a class="c-site-nav__label" href="/archives/">Archives</a>
+					</li>
+					<li class="c-site-nav__item">
+						<a class="c-site-nav__label" href="/authors/">Authors</a>
+					</li>
+					<li class="c-site-nav__item">
+						<a class="c-site-nav__label" href="/about/" aria-label="About this website">About</a>
+					</li>
+				</ul>
+			</nav>
+		</div>
+		<script class="c-menu__onload">
+			document.getElementById('menu__drawer').style.display = 'none';
+		</script>
+	</div>
+
+
+	<main class="c-main" id="main">
+		<article class="c-article  h-entry">
+			<header class="c-article__header">
+				<h1 class="c-article__title  p-name">It All Starts with a Humble &lt;textarea&gt;</h1>
+				<p class="c-article__byline  p-author h-card">
+
+					<a class="u-url" href="#author">
+						<picture>
+							<source srcset="https://cloud.24ways.org/authors/andybell280.webp" type="image/webp"/>
+							<img class="c-avatar  u-photo" src="https://cloud.24ways.org/authors/andybell280.jpg" width="160" height="160" alt="Andy Bell"/>
+						</picture>
+						<span class="p-name">Andy Bell</span>
+					</a>
+
+				</p>
+			</header>
+
+			<footer class="c-article__footer">
+				<ul class="c-meta">
+					<li class="c-meta__item">
+						<time class="dt-published" datetime="2019-12-08T00:00:00+00:00">8 Dec<span>ember</span>
+							2019</time>
+					</li>
+
+
+					<li class="c-meta__item">Published in
+						<a href="/topics/ux/">UX</a>
+					</li>
+
+
+					<li class="c-meta__item">
+						<a href="#comments">No comments</a>
+					</li>
+				</ul>
+			</footer>
+
+			<div class="c-article__main e-content">
+
+				<div class="s-prose s-prose--article">
+					<p class="lede">Those that know me well know that I make
+						<em>a lot</em>
+						of
+						<a href="https://hankchizljaw.com/projects/">side projects</a>. I most definitely make too many, but there’s one really useful thing about making lots of side projects: it allows me to experiment in a low-risk setting.
+					</p>
+					<p>Side projects also allow me to accidentally create a context where I can demonstrate a really affective, long-running methodology for building on the web:
+						<strong>progressive enhancement</strong>. That context is a little Progressive Web App that I’m tinkering with called
+						<a href="https://jotter.space/">Jotter</a>. It’s incredibly simple, but under the hood, there’s a really solid experience built on top of a
+						<strong>minimum viable experience</strong>
+						which after reading this article, you’ll hopefully apply this methodology to your own work.</p>
+					<figure>
+						<picture><source srcset="https://media.24ways.org/2019/bell/jotter-screenshot.webp" type="image/webp"><img src="https://media.24ways.org/2019/bell/jotter-screenshot.png" alt="The Jotter Progressive Web App presented in the Google Chrome browser."></source>
+					</picture>
+				</figure>
+				<h2>What is a minimum viable experience?</h2>
+				<p>The key to progressive enhancement is distilling the user experience to its lowest possible technical solution and then building on it to improve the user experience. In the context of
+					<a href="https://jotter.space/">Jotter</a>, that is a humble
+					<code>&lt;textarea&gt;</code>
+					element. That humble
+					<code>&lt;textarea&gt;</code>
+					is our
+					<strong>minimum viable experience</strong>.
+				</p>
+				<p>Let me show you how it’s built up, progressively real quick. If you disable CSS and JavaScript, you get this:</p>
+				<figure>
+					<picture><source srcset="https://media.24ways.org/2019/bell/jotter-screenshot-html-only.webp" type="image/webp"><img src="https://media.24ways.org/2019/bell/jotter-screenshot-html-only.png" alt="The Jotter Progressive Web App with CSS and JavaScript disabled shows a HTML only experience."></source>
+				</picture>
+			</figure>
+			<p>This result is great because I know that regardless of what happens, the user can do what they needed to do when the loaded Jotter in their browser: take some notes. That’s our
+				<strong>minimum viable experience</strong>, completed with a few lines of code that work in
+				<strong>every single browser</strong>—even very old browsers. Don’t you just love good ol’ HTML?
+			</p>
+			<p>Now it’s time to enhance that minimum viable experience,
+				<strong>progressively</strong>. It’s a good idea to do that in smaller steps rather than just provide a 0% experience or a 100% experience, which is the approach that’s often favoured by JavaScript framework enthusiasts. I think that process is counter-intuitive to the web, though, so building up from a minimum viable experience is the optimal way to go, in my opinion.
+			</p>
+			<p>Understanding how a
+				<strong>minimum viable experience</strong>
+				works can be a bit tough, admittedly, so I like to use a the following diagram to explain the process:</p>
+			<figure>
+				<picture><source srcset="https://media.24ways.org/2019/bell/mvp.webp" type="image/webp"><img src="https://media.24ways.org/2019/bell/mvp.png" alt="Minimum viable experience diagram which is described in the next paragraph."></source>
+			</picture>
+		</figure>
+		<p>Let me break down this diagram for both folks who can and can’t see it. On the top row, there’s four stages of a broken-up car, starting with just a wheel, all the way up to a fully functioning car. The car enhances only in a way that it is still
+			<strong>mostly useless</strong>
+			until it gets to its final form when the person is finally happy.
+		</p>
+		<p>On the second row, instead of building a car, we start with a skateboard which immediately does the job of getting the person  from point A to point B. This enhances to a Micro Scooter and then to a Push Bike. Its final form is a fancy looking Motor Scooter. I choose that instead of a car deliberately because generally, when you progressively enhance a project, it turns out to be
+			<em>way simpler and lighter</em>
+			than a project that was built without progressive enhancement in mind.</p>
+		<p>Now that we know what a minimum viable experience is and how it works, let’s apply this methodology to Jotter!
+		</p>
+		<h2>Add some CSS</h2>
+		<p>The first enhancement is CSS. Jotter has a very simple design, which is mostly a full height
+			<code>&lt;textarea&gt;</code>
+			with a little sidebar. A flexbox-based, auto-stacking layout, inspired by a layout called
+			<a href="https://every-layout.dev/layouts/sidebar/">The Sidebar</a>
+			is used and we’re good to go.
+		</p>
+		<p>Based on the diagram from earlier, we can comfortably say we’re in
+			<strong>Skateboard</strong>
+			territory now.</p>
+		<h2>Add some JavaScript</h2>
+		<p>We’ve got styles now, so let’s
+			<em>enhance</em>
+			the experience again. A  user can currently load up the site and take notes. If the CSS loads, it’ll be a more pleasant experience, but if they refresh their browser, they’re going to lose all of their work.</p>
+		<p>We can fix that by adding some
+			<a href="https://developer.mozilla.org/en-US/docs/Web/API/Window/localStorage">local storage</a>
+			into the mix.
+		</p>
+		<p>The functionality flow is pretty straightforward. As a user inputs content, the JavaScript listens to an
+			<code>input</code>
+			event and pushes the content of the
+			<code>&lt;textarea&gt;</code>
+			into
+			<code>localStorage</code>. If we then set that
+			<code>localStorage</code>
+			data to populate the
+			<code>&lt;textarea&gt;</code>
+			on load, that user’s experience is suddenly
+			<em>enhanced</em>
+			because they can’t lose their work by accidentally refreshing.
+		</p>
+		<p>The JavaScript is incredibly light, too:
+		</p>
+		<pre><code class="language-javascript">const textArea = document.querySelector('textarea');
+const storageKey = 'text';
+
+const init = () =&gt; {
+
+  textArea.value = localStorage.getItem(storageKey);
+
+  textArea.addEventListener('input', () =&gt; {
+    localStorage.setItem(storageKey, textArea.value);
+  });
+}
+
+init();</code></pre>
+		<p>In around 13 lines of code (which you can see a
+			<a href="https://codepen.io/andybelldesign/pen/vYEYZJQ">working demo here</a>), we’ve been able to enhance the user’s experience
+			<em>considerably</em>, and if we think back to our diagram from earlier, we are very much in
+			<strong>Micro Scooter</strong>
+			territory now.
+		</p>
+		<h2>Making it a PWA</h2>
+		<p>We’re in really good shape now, so let’s turn Jotter into a
+			<strong>Motor Scooter</strong>
+			and make this thing work offline as an installable Progressive Web App (PWA).
+		</p>
+		<p>Making a PWA is really achievable and Google have even produced a
+			<a href="https://developers.google.com/web/progressive-web-apps/checklist">handy checklist</a>
+			to help you get going. You can also get guidance from a
+			<a href="https://developers.google.com/web/tools/lighthouse">Lighthouse audit</a>.
+		</p>
+		<p>For this little app, all we need is a
+			<a href="https://developers.google.com/web/fundamentals/web-app-manifest">manifest</a>
+			and a
+			<a href="https://developers.google.com/web/fundamentals/primers/service-workers">Service Worker</a>
+			to cache assets and serve them offline for us if needed.</p>
+		<p>The Service Worker is actually pretty slim, so here it is in its entirety:
+		</p>
+		<pre><code class="language-javascript">const VERSION = '0.1.3';
+const CACHE_KEYS = {
+  MAIN: `main-${VERSION}`
+};
+
+// URLS that we want to be cached when the worker is installed
+const PRE_CACHE_URLS = ['/', '/css/global.css', '/js/app.js', '/js/components/content.js'];
+
+/**
+ * Takes an array of strings and puts them in a named cache store
+ *
+ * @param {String} cacheName
+ * @param {Array} items=[]
+ */
+const addItemsToCache = function(cacheName, items = []) {
+  caches.open(cacheName).then(cache =&gt; cache.addAll(items));
+};
+
+self.addEventListener('install', evt =&gt; {
+  self.skipWaiting();
+
+  addItemsToCache(CACHE_KEYS.MAIN, PRE_CACHE_URLS);
+});
+
+self.addEventListener('activate', evt =&gt; {
+  // Look for any old caches that don't match our set and clear them out
+  evt.waitUntil(
+    caches
+      .keys()
+      .then(cacheNames =&gt; {
+        return cacheNames.filter(item =&gt; !Object.values(CACHE_KEYS).includes(item));
+      })
+      .then(itemsToDelete =&gt; {
+        return Promise.all(
+          itemsToDelete.map(item =&gt; {
+            return caches.delete(item);
+          })
+        );
+      })
+      .then(() =&gt; self.clients.claim())
+  );
+});
+
+self.addEventListener('fetch', evt =&gt; {
+  evt.respondWith(
+    caches.match(evt.request).then(cachedResponse =&gt; {
+      // Item found in cache so return
+      if (cachedResponse) {
+        return cachedResponse;
+      }
+
+      // Nothing found so load up the request from the network
+      return caches.open(CACHE_KEYS.MAIN).then(cache =&gt; {
+        return fetch(evt.request)
+          .then(response =&gt; {
+            // Put the new response in cache and return it
+            return cache.put(evt.request, response.clone()).then(() =&gt; {
+              return response;
+            });
+          })
+          .catch(ex =&gt; {
+            return;
+          });
+      });
+    })
+  );
+});</code></pre>
+<p>What the Service Worker does here is pre-cache our core assets that we define in <code>PRE_CACHE_URLS</code>. Then, for each <code>fetch</code> event which is called per request, it’ll try to fulfil the request from cache first. If it can’t do that, it’ll load the remote request for us. With this setup, we achieve two things:</p>
+<ol>
+<li>We get offline support because we stick our critical assets in cache immediately so they will be accessible offline</li>
+<li>Once those critical assets and any other requested assets are cached, the app will run faster by default</li>
+</ol>
+<p>Importantly now, because we have a manifest, some shortcut icons and a Service Worker that gives us offline support, we have a fully installable PWA! </p>
+<h2>Wrapping up</h2>
+<p>I hope with this simplified example you can see how approaching web design and development with a <strong>progressive enhancement</strong> approach, <strong>everyone</strong> gets an acceptable experience instead of those who are lucky enough to get every aspect of the page at the right time. </p>
+<p><a href="https://jotter.space">Jotter</a> is very much live and in the process of being enhanced further, which you can see on its little in-app roadmap, so go ahead and play around with it. </p>
+<p>Before you know it, it’ll be a car itself, but remember: it’ll always start as a humble little <code>&lt;textarea&gt;</code>.</p>
+            </div>
+        </div>
+
+        <section class="c-section" id="author">
+            <header class="c-section__header">
+                <h2 class="c-section__title">About the author</h2>
+            </header>
+            <div class="c-section__main">
+                <div class="s-prose">
+                
+                    <p>Andy Bell is an independent designer and front-end developer who’s trying to make everyone’s experience on the web better with a focus on progressive enhancement and accessibility.</p>
+                    <p><a class="c-continue" href="/authors/andybell/" title="More information about Andy Bell">More articles by Andy</a></p>
+                
+                </div>
+            </div>
+        </section>
+
+
+
+        
+
+                  
+        <section class="c-section c-section--sponsor" id="sponsor">
+            <header class="c-section__header">
+                <h2 class="c-section__title">Brought to you by</h2>
+            </header>
+            <div class="c-section__main">
+                
+
+
+<a class="c-promo" href="https://grabaperch.com/products/runway?ref=24w01">
+    <img class="c-promo__image" src="/_assets/images/logo-perchrunway.png" alt="Perch Runway - Powerful, flexible content management " width="152" height="100"/>
+    <p class="c-promo__message">Powerful, flexible content management with <strong>backup, cloud storage and client satisfaction</strong> all included.</p>
+    <p class="c-promo__url">grabaperch.com/runway</p>
+</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+            </div>
+        </section>
+        
+
+        		
+		<section class="c-section c-section--related" id="related">
+		    <header class="c-section__header">
+		        <h2 class="c-section__title">Related articles</h2>
+		    </header>
+		    <div class="c-section__main">
+		        <ol class="c-listing c-listing--summaries">
+		
+		            <li>
+		            	
+			<article class="c-summary h-entry day-12">
+                <header class="c-summary__header">
+                    <h3 class="c-summary__title  p-name">
+                        <a class="u-url" rel="bookmark" href="/2015/be-fluid-with-your-design-skills-build-your-own-sites/">Be Fluid with Your Design Skills: Build Your Own Sites</a>
+                    </h3>
+                    <p class="c-summary__author  p-author h-card">
+                    
+                        <a class="c-summary__author-url  u-url" href="/authors/roshorner/" tabindex="-1"><picture>
+                <source srcset="https://cloud.24ways.org/authors/roshorner72.webp" type="image/webp" /><img class="u-photo" src="https://cloud.24ways.org/authors/roshorner72.jpg" width="72" height="72" alt="" /></picture><span class="p-name">Ros Horner</span></a>
+                    
+                    </p>
+                </header>
+                <div class="c-summary__main">
+                    <p class="p-summary"><a href="http://roshorner.com">Ros Horner</a> rings out a Christmas message for designers far and near of peace and goodwill to all, especially if they’re developers. With a rallying cry to take back control to see your own designs realised, young or old, merry or sober, the story is clear; as you design, so should you build.</p>
+                </div>
+                <footer class="c-summary__footer">
+                    <p class="c-summary__meta">
+                        <time class="dt-published" datetime="2015-12-12T00:00:00+00:00">
+    						12 <span>Dec 2015</span>
+  						</time>
+                        
+                    </p>
+                </footer>
+            </article>
+		            </li>
+				
+		            <li>
+		            	
+			<article class="c-summary h-entry day-15">
+                <header class="c-summary__header">
+                    <h3 class="c-summary__title  p-name">
+                        <a class="u-url" rel="bookmark" href="/2018/designing-your-future/">Designing Your Future</a>
+                    </h3>
+                    <p class="c-summary__author  p-author h-card">
+                    
+                        <a class="c-summary__author-url  u-url" href="/authors/christophermurphy/" tabindex="-1"><picture>
+                <source srcset="https://cloud.24ways.org/authors/christophermurphy72.webp" type="image/webp" /><img class="u-photo" src="https://cloud.24ways.org/authors/christophermurphy72.jpg" width="72" height="72" alt="" /></picture><span class="p-name">Christopher Murphy</span></a>
+                    
+                    </p>
+                </header>
+                <div class="c-summary__main">
+                    <p class="p-summary"><em>Christopher Murphy</em> channels the Ghost of Christmas Yet-to-Come by not just look into the future, but shaping the form it takes. By taking action now you can affect the outcome down the road, making all the difference when it comes to a big life change such as leaving full time employment.</p>
+                </div>
+                <footer class="c-summary__footer">
+                    <p class="c-summary__meta">
+                        <time class="dt-published" datetime="2018-12-15T00:00:00+00:00">
+    						15 <span>Dec 2018</span>
+  						</time>
+                        
+                    </p>
+                </footer>
+            </article>
+		            </li>
+				
+		            <li>
+		            	
+			<article class="c-summary h-entry day-14">
+                <header class="c-summary__header">
+                    <h3 class="c-summary__title  p-name">
+                        <a class="u-url" rel="bookmark" href="/2014/five-ways-to-animate-responsibly/">Five Ways to Animate Responsibly</a>
+                    </h3>
+                    <p class="c-summary__author  p-author h-card">
+                    
+                        <a class="c-summary__author-url  u-url" href="/authors/rachelnabors/" tabindex="-1"><picture>
+                <source srcset="https://cloud.24ways.org/authors/rachelnabors72.webp" type="image/webp" /><img class="u-photo" src="https://cloud.24ways.org/authors/rachelnabors72.jpg" width="72" height="72" alt="" /></picture><span class="p-name">Rachel Nabors</span></a>
+                    
+                    </p>
+                </header>
+                <div class="c-summary__main">
+                    <p class="p-summary"><a href="http://rachelnabors.com/">Rachel Nabors</a> clears the snowy drift of delight from web animation to reveal the need for necessity and usefulness when we decide to animate web interactions. The box it comes in is as important as the gift.</p>
+                </div>
+                <footer class="c-summary__footer">
+                    <p class="c-summary__meta">
+                        <time class="dt-published" datetime="2014-12-14T00:00:00+00:00">
+    						14 <span>Dec 2014</span>
+  						</time>
+                        
+                    </p>
+                </footer>
+            </article>
+		            </li>
+				
+		            <li>
+		            	
+			<article class="c-summary h-entry day-04">
+                <header class="c-summary__header">
+                    <h3 class="c-summary__title  p-name">
+                        <a class="u-url" rel="bookmark" href="/2017/jobs-to-be-done-in-your-ux-toolbox/">Jobs-to-Be-Done in Your UX Toolbox</a>
+                    </h3>
+                    <p class="c-summary__author  p-author h-card">
+                    
+                        <a class="c-summary__author-url  u-url" href="/authors/stephtroeth/" tabindex="-1"><picture>
+                <source srcset="https://cloud.24ways.org/authors/stephtroeth72.webp" type="image/webp" /><img class="u-photo" src="https://cloud.24ways.org/authors/stephtroeth72.jpg" width="72" height="72" alt="" /></picture><span class="p-name">Steph Troeth</span></a>
+                    
+                    </p>
+                </header>
+                <div class="c-summary__main">
+                    <p class="p-summary"><em>Steph Troeth</em> rallies the workshop elves around an idea for revolutionising their worksheets and giving them a new way to think about approaching each job. One thing’s for certain, as Christmas approaches there’s always plenty of jobs to be done.</p>
+                </div>
+                <footer class="c-summary__footer">
+                    <p class="c-summary__meta">
+                        <time class="dt-published" datetime="2017-12-04T00:00:00+00:00">
+    						4 <span>Dec 2017</span>
+  						</time>
+                        
+                    </p>
+                </footer>
+            </article>
+		            </li>
+				
+		            <li>
+		            	
+			<article class="c-summary h-entry day-05">
+                <header class="c-summary__header">
+                    <h3 class="c-summary__title  p-name">
+                        <a class="u-url" rel="bookmark" href="/2017/levelling-up-for-junior-developers/">Levelling Up for Junior Developers</a>
+                    </h3>
+                    <p class="c-summary__author  p-author h-card">
+                    
+                        <a class="c-summary__author-url  u-url" href="/authors/deanhume/" tabindex="-1"><picture>
+                <source srcset="https://cloud.24ways.org/authors/deanhume72.webp" type="image/webp" /><img class="u-photo" src="https://cloud.24ways.org/authors/deanhume72.jpg" width="72" height="72" alt="" /></picture><span class="p-name">Dean Hume</span></a>
+                    
+                    </p>
+                </header>
+                <div class="c-summary__main">
+                    <p class="p-summary"><em>Dean Hume</em> places another log on the fire, sets the poker back on its stand, pulls up and chair and gathers the junior developers around the hearth to impart some wisdom. Whether you’re just starting out or have been in the game some time, we can all benefit from a little levelling up.</p>
+                </div>
+                <footer class="c-summary__footer">
+                    <p class="c-summary__meta">
+                        <time class="dt-published" datetime="2017-12-05T00:00:00+00:00">
+    						5 <span>Dec 2017</span>
+  						</time>
+                        
+                    </p>
+                </footer>
+            </article>
+		            </li>
+				
+		            <li>
+		            	
+			<article class="c-summary h-entry day-24">
+                <header class="c-summary__header">
+                    <h3 class="c-summary__title  p-name">
+                        <a class="u-url" rel="bookmark" href="/2015/solve-the-hard-problems/">Solve the Hard Problems</a>
+                    </h3>
+                    <p class="c-summary__author  p-author h-card">
+                    
+                        <a class="c-summary__author-url  u-url" href="/authors/drewmclellan/" tabindex="-1"><picture>
+                <source srcset="https://cloud.24ways.org/authors/drewmclellan72.webp" type="image/webp" /><img class="u-photo" src="https://cloud.24ways.org/authors/drewmclellan72.jpg" width="72" height="72" alt="" /></picture><span class="p-name">Drew McLellan</span></a>
+                    
+                    </p>
+                </header>
+                <div class="c-summary__main">
+                    <p class="p-summary"><a href="http://allinthehead.com/">Drew McLellan</a> brings our 2015 calendar to a motivational close with some encouragement for the year ahead. Year’s end is a time for reflection <em>and</em> finding new purpose and enthusiasm for what we do. By tackling the thorniest design and development problems, we can make the greatest impact – and have the most fun. Merry Christmas and a happy New Year!</p>
+                </div>
+                <footer class="c-summary__footer">
+                    <p class="c-summary__meta">
+                        <time class="dt-published" datetime="2015-12-24T00:00:00+00:00">
+    						24 <span>Dec 2015</span>
+  						</time>
+                        
+                    </p>
+                </footer>
+            </article>
+		            </li>
+		
+		        </ol>
+		    </div>
+		</section>
+		
+
+        <section class="c-section" id="comments">
+            <header class="c-section__header">
+                <h2 class="c-section__title">Comments</h2>
+            </header>
+            <div class="c-section__main">
+                <div class="s-prose">
+                    <p><a class="c-continue" href="/2019/it-all-starts-with-a-humble-textarea/comments/" data-replace data-interaction data-target="#comments">No comments yet - leave yours</a></p>
+                </div>
+            </div>
+        </section>
+
+        
+
+        
+    </article>
+</main> <nav class="c-traverse-nav" aria-label="Article"><a class="c-traverse-nav__item" rel="prev" href="/2019/iconography-of-security/" aria-label="Previous: Iconography of Security"><svg class="c-traverse-nav__icon" width="20" height="20" viewBox="0 0 200 200" focusable="false" aria-hidden="true">
+  <path d="M50 100l85 85 7-7-78-78 78-78-7-7"/>
+</svg>
+</a><a class="c-traverse-nav__item" rel="next" href="/2019/its-time-to-get-personal/" aria-label="Next: It’s Time to Get Personal"><svg class="c-traverse-nav__icon" width="20" height="20" viewBox="0 0 200 200" focusable="false" aria-hidden="true">
+  <path d="M150 100l-85 85-7-7 78-78-78-78 7-7"/>
+</svg>
+</a></nav><footer class="c-contentinfo">
+    <p class="c-contentinfo__social">
+        <a href="https://feeds.feedburner.com/24ways" rel="alternate">Grab our RSS feed</a>
+        <a href="https://twitter.com/24ways" rel="me">Follow us on Twitter</a>
+        <a href="https://github.com/24ways" rel="me">Contribute on GitHub</a>
+    </p>
+    <p class="c-contentinfo__copyright">
+        <small>&#169; 2005-2020 24 ways and our authors</small>
+    </p>
+</footer></body>
+</html>
diff --git a/tests/test_title.py b/tests/test_title.py
new file mode 100644
index 00000000..b5090844
--- /dev/null
+++ b/tests/test_title.py
@@ -0,0 +1,14 @@
+from .fixtures import *
+
+def test_title_is_htmlencoded_in_index_html(tmp_path, process):
+    """
+    https://github.com/pirate/ArchiveBox/issues/330
+    Unencoded content should not be rendered as it facilitates xss injections
+    and breaks the layout.
+    """
+    add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'], capture_output=True)
+
+    with open(tmp_path / "index.html", "r") as f:
+        output_html = f.read()
+
+    assert "<textarea>" not in output_html
\ No newline at end of file

From 83e5b019e4704b4fe7f61627ea3f972a04e289b6 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 16 Jul 2020 12:43:22 -0500
Subject: [PATCH 163/333] feat: Add canonical link http header to the static
 response

---
 archivebox/core/views.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 5fb43119..ce6e8f58 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -107,7 +107,9 @@ class LinkDetails(View):
         # slug is a timestamp
         by_ts = {page.timestamp: page for page in all_pages}
         try:
-            return static.serve(request, archivefile, by_ts[slug].link_dir, show_indexes=True)
+            response = static.serve(request, archivefile, by_ts[slug].link_dir, show_indexes=True)
+            response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
+            return response
         except KeyError:
             pass
 

From 63909f617644cfc155de15149efb66a34a629142 Mon Sep 17 00:00:00 2001
From: Angel Rey <afreydev@gmail.com>
Date: Thu, 16 Jul 2020 13:08:58 -0500
Subject: [PATCH 164/333] Added entrypoint to fix permission errors

---
 Dockerfile        |  7 ++-----
 bin/entrypoint.sh | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100755 bin/entrypoint.sh

diff --git a/Dockerfile b/Dockerfile
index 64e5ea98..821125ba 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -28,7 +28,7 @@ ENV TZ=UTC \
 RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
     && apt-get update -qq \
     && apt-get install -qq -y --no-install-recommends \
-       apt-transport-https ca-certificates apt-utils gnupg gnupg2 libgconf-2-4 zlib1g-dev \
+       apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
        dumb-init jq git wget curl youtube-dl ffmpeg \
     && curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
     && echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
@@ -58,12 +58,9 @@ RUN python -m venv --clear --symlinks "$VENV_PATH" \
 
 VOLUME "$DATA_PATH"
 WORKDIR "$DATA_PATH"
-USER archivebox:archivebox
 EXPOSE 8000
 ENV CHROME_BINARY=google-chrome \
     CHROME_SANDBOX=False
 
-RUN archivebox version
-
-ENTRYPOINT ["dumb-init", "--", "archivebox"]
+ENTRYPOINT ["dumb-init", "--", "/app/bin/entrypoint.sh", "archivebox"]
 CMD ["server", "0.0.0.0:8000"]
diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh
new file mode 100755
index 00000000..193f5f1b
--- /dev/null
+++ b/bin/entrypoint.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# detect userid:groupid of contents of data folder
+DATA_DIR="${DATA_DIR:-/data}"
+ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
+
+# Autodetect UID and GID of host user based on ownership of files in the volume
+USID=$(stat --format="%u" "$DATA_DIR")
+GRID=$(stat --format="%g" "$DATA_DIR")
+COMMAND="$@"
+
+# run django as the host user's uid:gid so that any files touched have the same permissions as outside the container
+# e.g. ./manage.py runserver
+
+chown "$USID":"$GRID" "$DATA_DIR"
+usermod -u $USID $ARCHIVEBOX_USER
+groupmod -g $GRID $ARCHIVEBOX_USER
+gosu $ARCHIVEBOX_USER bash -c "$COMMAND"
\ No newline at end of file

From b7785c413884ddf2ed990adf5e3e8db0cdd21312 Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Thu, 16 Jul 2020 19:35:13 -0400
Subject: [PATCH 165/333] use dateparser for parsing, let it handle error

---
 archivebox/index/schema.py | 8 ++++----
 archivebox/util.py         | 4 ++--
 setup.py                   | 1 +
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index eb6ef894..4ea60204 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -190,10 +190,10 @@ class Link:
             for key, val in json_info.items()
             if key in cls.field_names()
         }
-        try:
-            info['updated'] = int(parse_date(info.get('updated'))) # Cast to int which comes with rounding down
-        except (ValueError, TypeError):
-            info['updated'] = None
+        # try:
+        info['updated'] = parse_date(info.get('updated')) # Cast to int which comes with rounding down
+        # except (ValueError, TypeError):
+        #     info['updated'] = None
         info['sources'] = info.get('sources') or []
 
         json_history = info.get('history') or {}
diff --git a/archivebox/util.py b/archivebox/util.py
index 0e7ebd31..76dc24e2 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -9,7 +9,7 @@ from hashlib import sha256
 from urllib.parse import urlparse, quote, unquote
 from html import escape, unescape
 from datetime import datetime
-from dateutil import parser as dateparser
+from dateparser import parse as dateparser
 
 import requests
 from base32_crockford import encode as base32_encode                            # type: ignore
@@ -144,7 +144,7 @@ def parse_date(date: Any) -> Optional[datetime]:
         date = str(date)
 
     if isinstance(date, str):
-        return dateparser.parse(date)
+        return dateparser(date)
 
     raise ValueError('Tried to parse invalid date! {}'.format(date))
 
diff --git a/setup.py b/setup.py
index 66112bfd..2e333bf4 100755
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,7 @@ setuptools.setup(
         "django==3.0.7",
         "django-extensions==2.2.9",
 
+        "dateparser",
         "ipython",
         "youtube-dl",
         "python-crontab==2.5.1",

From 1f91f5b1023a75fa4b4eaa98f994370ba005b6fa Mon Sep 17 00:00:00 2001
From: Apkallum <apkallum@protonmail.com>
Date: Thu, 16 Jul 2020 19:42:20 -0400
Subject: [PATCH 166/333] remove commented lines

---
 archivebox/index/schema.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 4ea60204..db17c269 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -190,10 +190,7 @@ class Link:
             for key, val in json_info.items()
             if key in cls.field_names()
         }
-        # try:
-        info['updated'] = parse_date(info.get('updated')) # Cast to int which comes with rounding down
-        # except (ValueError, TypeError):
-        #     info['updated'] = None
+        info['updated'] = parse_date(info.get('updated'))
         info['sources'] = info.get('sources') or []
 
         json_history = info.get('history') or {}

From 23e6803f0252a06aaa197b97e3b6e3bae4cd29d7 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 17 Jul 2020 16:55:56 -0500
Subject: [PATCH 167/333] fix: Add change to calculate wget folder when there
 is a port present

---
 archivebox/extractors/wget.py                |  4 +---
 tests/mock_server/templates/example.com.html |  4 ++--
 tests/test_args.py                           | 16 ++++++++--------
 tests/test_extractors.py                     |  5 +++++
 tests/test_init.py                           |  4 ++--
 tests/test_util.py                           |  2 +-
 6 files changed, 19 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_extractors.py

diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index 503c3bad..d7133dcb 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -136,7 +136,6 @@ def wget_output_path(link: Link) -> Optional[str]:
 
     See docs on wget --adjust-extension (-E)
     """
-
     if is_static_file(link.url):
         return without_scheme(without_fragment(link.url))
 
@@ -174,10 +173,9 @@ def wget_output_path(link: Link) -> Optional[str]:
     full_path = without_fragment(without_query(path(link.url))).strip('/')
     search_dir = os.path.join(
         link.link_dir,
-        domain(link.url),
+        domain(link.url).replace(":", "+"),
         urldecode(full_path),
     )
-
     for _ in range(4):
         if os.path.exists(search_dir):
             if os.path.isdir(search_dir):
diff --git a/tests/mock_server/templates/example.com.html b/tests/mock_server/templates/example.com.html
index 8492e858..8469956c 100644
--- a/tests/mock_server/templates/example.com.html
+++ b/tests/mock_server/templates/example.com.html
@@ -40,9 +40,9 @@
 		<div>
 			<h1>Example Domain</h1>
 			<p>This domain is for use in illustrative examples in documents. You may use this
-								    domain in literature without prior coordination or asking for permission.</p>
+												    domain in literature without prior coordination or asking for permission.</p>
 			<p>
-				<a href="http://localhost:8080/static/iana.org.html">More information...</a>
+				<a href="http://127.0.0.1:8080/static/iana.org.html">More information...</a>
 			</p>
 		</div>
 	</body>
diff --git a/tests/test_args.py b/tests/test_args.py
index f52626fb..ed132524 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -4,25 +4,25 @@ import json
 from .fixtures import *
 
 def test_depth_flag_is_accepted(process):
-    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=0"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True)
     assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8")
 
 def test_depth_flag_fails_if_it_is_not_0_or_1(process):
-    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=5"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=5"], capture_output=True)
     assert 'invalid choice' in arg_process.stderr.decode("utf-8")
-    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=-1"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=-1"], capture_output=True)
     assert 'invalid choice' in arg_process.stderr.decode("utf-8")
 
 def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
-    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=0"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True)
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
     with open(archived_item_path / "index.json", "r") as f:
         output_json = json.load(f)
-    assert output_json["base_url"] == "localhost:8080/static/example.com.html"
+    assert output_json["base_url"] == "127.0.0.1:8080/static/example.com.html"
 
 def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process):
-    arg_process = subprocess.run(["archivebox", "add", "http://localhost:8080/static/example.com.html", "--depth=1"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=1"], capture_output=True)
     with open(tmp_path / "index.json", "r") as f:
         archive_file = f.read()
-    assert "http://localhost:8080/static/example.com.html" in archive_file
-    assert "http://localhost:8080/static/iana.org.html" in archive_file
+    assert "http://127.0.0.1:8080/static/example.com.html" in archive_file
+    assert "http://127.0.0.1:8080/static/iana.org.html" in archive_file
diff --git a/tests/test_extractors.py b/tests/test_extractors.py
new file mode 100644
index 00000000..203f6701
--- /dev/null
+++ b/tests/test_extractors.py
@@ -0,0 +1,5 @@
+from .fixtures import *
+
+def test_wget_broken_pipe(tmp_path, process):
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+    assert "TypeError chmod_file(..., path: str) got unexpected NoneType argument path=None" not in add_process.stdout.decode("utf-8")
\ No newline at end of file
diff --git a/tests/test_init.py b/tests/test_init.py
index 24d3ed52..6a15612a 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -18,7 +18,7 @@ def test_update(tmp_path, process):
 
 def test_add_link(tmp_path, process):
     os.chdir(tmp_path)
-    add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/example.com.html'], capture_output=True)
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
     assert "index.json" in [x.name for x in archived_item_path.iterdir()]
@@ -34,7 +34,7 @@ def test_add_link(tmp_path, process):
 def test_add_link_support_stdin(tmp_path, process):
     os.chdir(tmp_path)
     stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    stdin_process.communicate(input="http://localhost:8080/static/example.com.html".encode())
+    stdin_process.communicate(input="http://127.0.0.1:8080/static/example.com.html".encode())
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
     assert "index.json" in [x.name for x in archived_item_path.iterdir()]
diff --git a/tests/test_util.py b/tests/test_util.py
index 0a076344..e2ad8240 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,5 +1,5 @@
 from archivebox import util
 
 def test_download_url_downloads_content():
-    text = util.download_url("http://localhost:8080/static/example.com.html")
+    text = util.download_url("http://127.0.0.1:8080/static/example.com.html")
     assert "Example Domain" in text
\ No newline at end of file

From f4d1b5121e1402f285b3ce6fe40871f12d9df283 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 17 Jul 2020 18:00:04 -0500
Subject: [PATCH 168/333] refactor: Move logging.py to main module to avoid
 circular import issues

---
 archivebox/cli/__init__.py            |  2 +-
 archivebox/cli/archivebox_add.py      |  2 +-
 archivebox/cli/archivebox_config.py   |  2 +-
 archivebox/cli/archivebox_help.py     |  2 +-
 archivebox/cli/archivebox_init.py     |  2 +-
 archivebox/cli/archivebox_list.py     |  2 +-
 archivebox/cli/archivebox_remove.py   |  2 +-
 archivebox/cli/archivebox_schedule.py |  2 +-
 archivebox/cli/archivebox_server.py   |  2 +-
 archivebox/cli/archivebox_shell.py    |  2 +-
 archivebox/cli/archivebox_status.py   |  2 +-
 archivebox/cli/archivebox_update.py   |  2 +-
 archivebox/cli/archivebox_version.py  |  2 +-
 archivebox/core/admin.py              |  2 +-
 archivebox/core/welcome_message.py    |  2 +-
 archivebox/extractors/__init__.py     |  2 +-
 archivebox/extractors/archive_org.py  |  2 +-
 archivebox/extractors/dom.py          |  2 +-
 archivebox/extractors/favicon.py      |  2 +-
 archivebox/extractors/git.py          |  2 +-
 archivebox/extractors/media.py        |  2 +-
 archivebox/extractors/pdf.py          |  2 +-
 archivebox/extractors/screenshot.py   |  2 +-
 archivebox/extractors/title.py        |  2 +-
 archivebox/extractors/wget.py         |  2 +-
 archivebox/index/__init__.py          |  2 +-
 archivebox/{cli => }/logging.py       | 14 +++++++-------
 archivebox/main.py                    |  2 +-
 archivebox/parsers/__init__.py        |  2 +-
 29 files changed, 35 insertions(+), 35 deletions(-)
 rename archivebox/{cli => }/logging.py (98%)

diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index b7575c4a..ed050bfc 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -110,7 +110,7 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
         command.subcommand = 'version'
     
     if command.subcommand not in ('help', 'version', 'status'):
-        from ..cli.logging import log_cli_command
+        from ..logging import log_cli_command
 
         log_cli_command(
             subcommand=command.subcommand,
diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 55832346..5fda755b 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -10,7 +10,7 @@ from typing import List, Optional, IO
 
 from ..main import add, docstring
 from ..config import OUTPUT_DIR, ONLY_NEW
-from .logging import SmartFormatter, accept_stdin, stderr
+from ..logging import SmartFormatter, accept_stdin, stderr
 
 
 @docstring(add.__doc__)
diff --git a/archivebox/cli/archivebox_config.py b/archivebox/cli/archivebox_config.py
index 314f84f5..dd422413 100644
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import config, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, accept_stdin
+from ..logging import SmartFormatter, accept_stdin
 
 
 @docstring(config.__doc__)
diff --git a/archivebox/cli/archivebox_help.py b/archivebox/cli/archivebox_help.py
index 94f65c48..b20667be 100755
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import help, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, reject_stdin
+from ..logging import SmartFormatter, reject_stdin
 
 
 @docstring(help.__doc__)
diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py
index 5f39fba2..8a507cc7 100755
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import init, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, reject_stdin
+from ..logging import SmartFormatter, reject_stdin
 
 
 @docstring(init.__doc__)
diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py
index 45cb2407..c75519bf 100644
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -22,7 +22,7 @@ from ..index import (
     get_corrupted_folders,
     get_unrecognized_folders,
 )
-from .logging import SmartFormatter, accept_stdin
+from ..logging import SmartFormatter, accept_stdin
 
 
 @docstring(list_all.__doc__)
diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py
index 735ad52f..18e5915a 100644
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import remove, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, accept_stdin
+from ..logging import SmartFormatter, accept_stdin
 
 
 @docstring(remove.__doc__)
diff --git a/archivebox/cli/archivebox_schedule.py b/archivebox/cli/archivebox_schedule.py
index be9a02a9..d459f86c 100644
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import schedule, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, reject_stdin
+from ..logging import SmartFormatter, reject_stdin
 
 
 @docstring(schedule.__doc__)
diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py
index 634bf732..ad65b459 100644
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import server, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, reject_stdin
+from ..logging import SmartFormatter, reject_stdin
 
 
 @docstring(server.__doc__)
diff --git a/archivebox/cli/archivebox_shell.py b/archivebox/cli/archivebox_shell.py
index a053c7be..a1a9f874 100644
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import shell, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, reject_stdin
+from ..logging import SmartFormatter, reject_stdin
 
 
 @docstring(shell.__doc__)
diff --git a/archivebox/cli/archivebox_status.py b/archivebox/cli/archivebox_status.py
index ac9b56d8..5d802e66 100644
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import status, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, reject_stdin
+from ..logging import SmartFormatter, reject_stdin
 
 
 @docstring(status.__doc__)
diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py
index 5088897d..f5e7a1fd 100644
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -22,7 +22,7 @@ from ..index import (
     get_corrupted_folders,
     get_unrecognized_folders,
 )
-from .logging import SmartFormatter, accept_stdin
+from ..logging import SmartFormatter, accept_stdin
 
 
 @docstring(update.__doc__)
diff --git a/archivebox/cli/archivebox_version.py b/archivebox/cli/archivebox_version.py
index 50b5e5c2..fd1538a6 100755
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import version, docstring
 from ..config import OUTPUT_DIR
-from .logging import SmartFormatter, reject_stdin
+from ..logging import SmartFormatter, reject_stdin
 
 
 @docstring(version.__doc__)
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 1b05c580..96438308 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -3,7 +3,7 @@ from django.utils.html import format_html
 
 from util import htmldecode, urldecode
 from core.models import Snapshot
-from cli.logging import printable_filesize
+from archivebox.logging import printable_filesize
 
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
diff --git a/archivebox/core/welcome_message.py b/archivebox/core/welcome_message.py
index b257b7d6..0837e372 100644
--- a/archivebox/core/welcome_message.py
+++ b/archivebox/core/welcome_message.py
@@ -1,4 +1,4 @@
-from cli.logging import log_shell_welcome_msg
+from archivebox.logging import log_shell_welcome_msg
 
 
 if __name__ == '__main__':
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index c9685a80..a440fe12 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -12,7 +12,7 @@ from ..index import (
     patch_main_index,
 )
 from ..util import enforce_types
-from ..cli.logging import (
+from ..logging import (
     log_archiving_started,
     log_archiving_paused,
     log_archiving_finished,
diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py
index 185a01cb..656beb25 100644
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@@ -19,7 +19,7 @@ from ..config import (
     CURL_VERSION,
     CURL_USER_AGENT,
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py
index 63e24692..49589cf1 100644
--- a/archivebox/extractors/dom.py
+++ b/archivebox/extractors/dom.py
@@ -16,7 +16,7 @@ from ..config import (
     SAVE_DOM,
     CHROME_VERSION,
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py
index 2f5e87ba..cd0b568a 100644
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@@ -15,7 +15,7 @@ from ..config import (
     CHECK_SSL_VALIDITY,
     CURL_USER_AGENT,
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py
index c8a5eeaf..f897c097 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@@ -22,7 +22,7 @@ from ..config import (
     GIT_DOMAINS,
     CHECK_SSL_VALIDITY
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py
index 554f27c9..9c4d4a09 100644
--- a/archivebox/extractors/media.py
+++ b/archivebox/extractors/media.py
@@ -18,7 +18,7 @@ from ..config import (
     YOUTUBEDL_VERSION,
     CHECK_SSL_VALIDITY
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py
index bd8093bf..a63c24c8 100644
--- a/archivebox/extractors/pdf.py
+++ b/archivebox/extractors/pdf.py
@@ -16,7 +16,7 @@ from ..config import (
     SAVE_PDF,
     CHROME_VERSION,
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/extractors/screenshot.py b/archivebox/extractors/screenshot.py
index 33936499..217dc77f 100644
--- a/archivebox/extractors/screenshot.py
+++ b/archivebox/extractors/screenshot.py
@@ -16,7 +16,7 @@ from ..config import (
     SAVE_SCREENSHOT,
     CHROME_VERSION,
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py
index b54d5a04..6442b947 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@@ -18,7 +18,7 @@ from ..config import (
     CURL_VERSION,
     CURL_USER_AGENT,
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
 HTML_TITLE_REGEX = re.compile(
diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index d7133dcb..3221d8bd 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -31,7 +31,7 @@ from ..config import (
     WGET_USER_AGENT,
     COOKIES_FILE,
 )
-from ..cli.logging import TimedProgress
+from ..logging import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index cd50a185..298f61a2 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -27,7 +27,7 @@ from ..config import (
     ANSI,
     stderr,
 )
-from ..cli.logging import (
+from ..logging import (
     TimedProgress,
     log_indexing_process_started,
     log_indexing_process_finished,
diff --git a/archivebox/cli/logging.py b/archivebox/logging.py
similarity index 98%
rename from archivebox/cli/logging.py
rename to archivebox/logging.py
index f002e922..0bed5a30 100644
--- a/archivebox/cli/logging.py
+++ b/archivebox/logging.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.cli'
+__package__ = 'archivebox'
 
 import re
 import os
@@ -13,11 +13,11 @@ from datetime import datetime
 from dataclasses import dataclass
 from typing import Optional, List, Dict, Union, IO
 
-from ..index.schema import Link, ArchiveResult
-from ..index.json import to_json
-from ..index.csv import links_to_csv
-from ..util import enforce_types
-from ..config import (
+from .index.schema import Link, ArchiveResult
+from .index.json import to_json
+from .index.csv import links_to_csv
+from .util import enforce_types
+from .config import (
     ConfigDict,
     PYTHON_ENCODING,
     ANSI,
@@ -153,7 +153,7 @@ def progress_bar(seconds: int, prefix: str='') -> None:
 
 
 def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str):
-    from ..config import VERSION, ANSI
+    from .config import VERSION, ANSI
     cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
     stdin_hint = ' < /dev/stdin' if not stdin.isatty() else ''
     stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{stdin_hint}{reset}'.format(
diff --git a/archivebox/main.py b/archivebox/main.py
index 999e4650..0345588f 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -88,7 +88,7 @@ from .config import (
     USER_CONFIG,
     get_real_name,
 )
-from .cli.logging import (
+from .logging import (
     TERM_WIDTH,
     TimedProgress,
     log_importing_started,
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index eabaece2..820fc9f9 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -29,7 +29,7 @@ from ..util import (
     URL_REGEX,
 )
 from ..index.schema import Link
-from ..cli.logging import pretty_path, TimedProgress, log_source_saved
+from ..logging import pretty_path, TimedProgress, log_source_saved
 from .pocket_html import parse_pocket_html_export
 from .pinboard_rss import parse_pinboard_rss_export
 from .shaarli_rss import parse_shaarli_rss_export

From 53dede8e166f93ada89dc01dabd0e7a151ff0c6d Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 20 Jul 2020 08:39:46 -0500
Subject: [PATCH 169/333] fix: Remove imports causing circular import issues

---
 archivebox/logging.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/archivebox/logging.py b/archivebox/logging.py
index 0bed5a30..859b7090 100644
--- a/archivebox/logging.py
+++ b/archivebox/logging.py
@@ -13,9 +13,6 @@ from datetime import datetime
 from dataclasses import dataclass
 from typing import Optional, List, Dict, Union, IO
 
-from .index.schema import Link, ArchiveResult
-from .index.json import to_json
-from .index.csv import links_to_csv
 from .util import enforce_types
 from .config import (
     ConfigDict,
@@ -285,7 +282,7 @@ def log_archiving_finished(num_links: int):
     print('        archivebox server')
 
 
-def log_link_archiving_started(link: Link, link_dir: str, is_new: bool):
+def log_link_archiving_started(link, link_dir: str, is_new: bool):
     # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford"
     #     http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
     #     > output/archive/1478739709
@@ -303,7 +300,7 @@ def log_link_archiving_started(link: Link, link_dir: str, is_new: bool):
         pretty_path(link_dir),
     ))
 
-def log_link_archiving_finished(link: Link, link_dir: str, is_new: bool, stats: dict):
+def log_link_archiving_finished(link, link_dir: str, is_new: bool, stats: dict):
     total = sum(stats.values())
 
     if stats['failed'] > 0 :
@@ -318,7 +315,7 @@ def log_archive_method_started(method: str):
     print('      > {}'.format(method))
 
 
-def log_archive_method_finished(result: ArchiveResult):
+def log_archive_method_finished(result):
     """quote the argument with whitespace in a command so the user can 
        copy-paste the outputted string directly to run the cmd
     """
@@ -367,6 +364,7 @@ def log_list_started(filter_patterns: Optional[List[str]], filter_type: str):
     print('    {}'.format(' '.join(filter_patterns or ())))
 
 def log_list_finished(links):
+    from .index.csv import links_to_csv
     print()
     print('---------------------------------------------------------------------------------------------------')
     print(links_to_csv(links, cols=['timestamp', 'is_archived', 'num_outputs', 'url'], header=True, ljust=16, separator=' | '))
@@ -374,7 +372,7 @@ def log_list_finished(links):
     print()
 
 
-def log_removal_started(links: List[Link], yes: bool, delete: bool):
+def log_removal_started(links, yes: bool, delete: bool):
     print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(links), **ANSI))
     if delete:
         file_counts = [link.num_outputs for link in links if os.path.exists(link.link_dir)]
@@ -448,13 +446,15 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
 
 
 @enforce_types
-def printable_folders(folders: Dict[str, Optional[Link]],
+def printable_folders(folders,
                       json: bool=False,
                       csv: Optional[str]=None) -> str:
     if json: 
+        from .index.json import to_json
         return to_json(folders.values(), indent=4, sort_keys=True)
 
     elif csv:
+        from .index.csv import links_to_csv
         return links_to_csv(folders.values(), cols=csv.split(','), header=True)
     
     return '\n'.join(f'{folder} {link}' for folder, link in folders.items())

From 75e5a6fcdcaa26bf9c3f873170a38b1b728115ec Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 20 Jul 2020 09:11:17 -0500
Subject: [PATCH 170/333] fix: Add missing change to refactor related to
 circular imports

---
 archivebox/logging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/logging.py b/archivebox/logging.py
index 859b7090..20c48985 100644
--- a/archivebox/logging.py
+++ b/archivebox/logging.py
@@ -410,7 +410,7 @@ def log_removal_finished(all_links: int, to_keep: int):
 
 
 def log_shell_welcome_msg():
-    from . import list_subcommands
+    from .cli import list_subcommands
 
     print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
     print('{green}from archivebox.core.models import Snapshot, User{reset}'.format(**ANSI))

From 834b33e6a8991737730cd9cab8d855636bf64bca Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 20 Jul 2020 11:20:08 -0500
Subject: [PATCH 171/333] fix: Re-add typings with conditional import to avoid
 circular import issue

---
 archivebox/logging.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/archivebox/logging.py b/archivebox/logging.py
index 20c48985..361a1aec 100644
--- a/archivebox/logging.py
+++ b/archivebox/logging.py
@@ -11,7 +11,10 @@ from multiprocessing import Process
 
 from datetime import datetime
 from dataclasses import dataclass
-from typing import Optional, List, Dict, Union, IO
+from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .index.schema import Link, ArchiveResult
 
 from .util import enforce_types
 from .config import (
@@ -282,7 +285,7 @@ def log_archiving_finished(num_links: int):
     print('        archivebox server')
 
 
-def log_link_archiving_started(link, link_dir: str, is_new: bool):
+def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool):
     # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford"
     #     http://www.benstopford.com/2015/02/14/log-structured-merge-trees/
     #     > output/archive/1478739709
@@ -300,7 +303,7 @@ def log_link_archiving_started(link, link_dir: str, is_new: bool):
         pretty_path(link_dir),
     ))
 
-def log_link_archiving_finished(link, link_dir: str, is_new: bool, stats: dict):
+def log_link_archiving_finished(link: "Link", link_dir: str, is_new: bool, stats: dict):
     total = sum(stats.values())
 
     if stats['failed'] > 0 :
@@ -315,7 +318,7 @@ def log_archive_method_started(method: str):
     print('      > {}'.format(method))
 
 
-def log_archive_method_finished(result):
+def log_archive_method_finished(result: "ArchiveResult"):
     """quote the argument with whitespace in a command so the user can 
        copy-paste the outputted string directly to run the cmd
     """
@@ -372,7 +375,7 @@ def log_list_finished(links):
     print()
 
 
-def log_removal_started(links, yes: bool, delete: bool):
+def log_removal_started(links: List["Link"], yes: bool, delete: bool):
     print('{lightyellow}[i] Found {} matching URLs to remove.{reset}'.format(len(links), **ANSI))
     if delete:
         file_counts = [link.num_outputs for link in links if os.path.exists(link.link_dir)]
@@ -446,7 +449,7 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
 
 
 @enforce_types
-def printable_folders(folders,
+def printable_folders(folders: Dict[str, Optional["Link"]],
                       json: bool=False,
                       csv: Optional[str]=None) -> str:
     if json: 

From 0795048622015f4201272423e68f1f8be32c90c8 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 20 Jul 2020 22:25:01 -0400
Subject: [PATCH 172/333] also chown home dir

---
 bin/entrypoint.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh
index 193f5f1b..3cc2930d 100755
--- a/bin/entrypoint.sh
+++ b/bin/entrypoint.sh
@@ -13,6 +13,7 @@ COMMAND="$@"
 # e.g. ./manage.py runserver
 
 chown "$USID":"$GRID" "$DATA_DIR"
+chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
 usermod -u $USID $ARCHIVEBOX_USER
 groupmod -g $GRID $ARCHIVEBOX_USER
-gosu $ARCHIVEBOX_USER bash -c "$COMMAND"
\ No newline at end of file
+gosu $ARCHIVEBOX_USER bash -c "$COMMAND"

From aa2b534624039f52a0407fd37b367b06e4fe6576 Mon Sep 17 00:00:00 2001
From: Angel Rey <afreydev@gmail.com>
Date: Tue, 21 Jul 2020 11:16:29 -0500
Subject: [PATCH 173/333] Commented nginx

---
 docker-compose.yml | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index a64be92b..01c7558c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -17,17 +17,19 @@ services:
         command: server 0.0.0.0:8000
         stdin_open: true
         tty: true
+        ports:
+            - 8000:8000
         environment:
             - USE_COLOR=True
             - SHOW_PROGRESS=False
         volumes:
             - ./data:/data
 
-    nginx:
-        image: nginx:alpine
-        ports:
-            - 443:443
-            - 80:80
-        volumes:
-            - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
-            - ./data:/var/www
+#    nginx:
+#        image: nginx:alpine
+#        ports:
+#            - 443:443
+#            - 80:80
+#        volumes:
+#            - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
+#            - ./data:/var/www

From f50b44d51c5e46aa5d82c69b0bbde0e1f41e45f3 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 21 Jul 2020 12:47:21 -0500
Subject: [PATCH 174/333] fix: Dont change GID nor UID when the owner of the
 volume is the root user

---
 bin/entrypoint.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh
index 3cc2930d..40c41762 100755
--- a/bin/entrypoint.sh
+++ b/bin/entrypoint.sh
@@ -4,6 +4,7 @@
 DATA_DIR="${DATA_DIR:-/data}"
 ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
 
+echo $ARCHIVEBOX_USER
 # Autodetect UID and GID of host user based on ownership of files in the volume
 USID=$(stat --format="%u" "$DATA_DIR")
 GRID=$(stat --format="%g" "$DATA_DIR")
@@ -13,7 +14,11 @@ COMMAND="$@"
 # e.g. ./manage.py runserver
 
 chown "$USID":"$GRID" "$DATA_DIR"
-chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
-usermod -u $USID $ARCHIVEBOX_USER
-groupmod -g $GRID $ARCHIVEBOX_USER
+
+if [ $USID -ne 0 ] && [ $GRID -ne 0 ]
+then
+  chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
+  usermod -u $USID $ARCHIVEBOX_USER
+  groupmod -g $GRID $ARCHIVEBOX_USER
+fi
 gosu $ARCHIVEBOX_USER bash -c "$COMMAND"

From 6e8e3c69fd4357976b37b24af9ee249e81b8f211 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 21 Jul 2020 13:29:07 -0500
Subject: [PATCH 175/333] fix: Remove unused echo

---
 bin/entrypoint.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh
index 40c41762..17e61186 100755
--- a/bin/entrypoint.sh
+++ b/bin/entrypoint.sh
@@ -4,7 +4,6 @@
 DATA_DIR="${DATA_DIR:-/data}"
 ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
 
-echo $ARCHIVEBOX_USER
 # Autodetect UID and GID of host user based on ownership of files in the volume
 USID=$(stat --format="%u" "$DATA_DIR")
 GRID=$(stat --format="%g" "$DATA_DIR")

From fa771c95059baec29b59bd4a9d7a189f7961b13b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 21 Jul 2020 16:14:54 -0400
Subject: [PATCH 176/333] Use modern bash 4.0+ conditional style

---
 bin/entrypoint.sh | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh
index 17e61186..57b8cdb2 100755
--- a/bin/entrypoint.sh
+++ b/bin/entrypoint.sh
@@ -14,10 +14,9 @@ COMMAND="$@"
 
 chown "$USID":"$GRID" "$DATA_DIR"
 
-if [ $USID -ne 0 ] && [ $GRID -ne 0 ]
-then
-  chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
-  usermod -u $USID $ARCHIVEBOX_USER
-  groupmod -g $GRID $ARCHIVEBOX_USER
+if [[ "$USID" != 0 && "$GRID" != 0 ]]; then
+    usermod -u $USID $ARCHIVEBOX_USER
+    groupmod -g $GRID $ARCHIVEBOX_USER
+    chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
 fi
 gosu $ARCHIVEBOX_USER bash -c "$COMMAND"

From b9c03f50949c679282bb7d637bff156bac96a2dc Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 21 Jul 2020 16:24:43 -0400
Subject: [PATCH 177/333] shellcheck fixes

---
 bin/entrypoint.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh
index 57b8cdb2..a970bfa0 100755
--- a/bin/entrypoint.sh
+++ b/bin/entrypoint.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # detect userid:groupid of contents of data folder
 DATA_DIR="${DATA_DIR:-/data}"
@@ -7,7 +7,7 @@ ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
 # Autodetect UID and GID of host user based on ownership of files in the volume
 USID=$(stat --format="%u" "$DATA_DIR")
 GRID=$(stat --format="%g" "$DATA_DIR")
-COMMAND="$@"
+COMMAND="$*"
 
 # run django as the host user's uid:gid so that any files touched have the same permissions as outside the container
 # e.g. ./manage.py runserver
@@ -15,8 +15,8 @@ COMMAND="$@"
 chown "$USID":"$GRID" "$DATA_DIR"
 
 if [[ "$USID" != 0 && "$GRID" != 0 ]]; then
-    usermod -u $USID $ARCHIVEBOX_USER
-    groupmod -g $GRID $ARCHIVEBOX_USER
+    usermod -u "$USID" "$ARCHIVEBOX_USER"
+    groupmod -g "$GRID" "$ARCHIVEBOX_USER"
     chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
 fi
-gosu $ARCHIVEBOX_USER bash -c "$COMMAND"
+gosu "$ARCHIVEBOX_USER" bash -c "$COMMAND"

From 8cb530230c13559e3cbb667ec0e4058cf74355d8 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 21 Jul 2020 23:39:21 -0400
Subject: [PATCH 178/333] fix docker SHM limited to 64mb chrome crash

---
 archivebox/util.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/archivebox/util.py b/archivebox/util.py
index 76dc24e2..ea0abe32 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -186,8 +186,16 @@ def chrome_args(**options) -> List[str]:
         cmd_args += ('--headless',)
     
     if not options['CHROME_SANDBOX']:
-        # dont use GPU or sandbox when running inside docker container
-        cmd_args += ('--no-sandbox', '--disable-gpu')
+        # assume this means we are running inside a docker container
+        # in docker, GPU support is limited, sandboxing is unecessary, 
+        # and SHM is limited to 64MB by default (which is too low to be usable).
+        cmd_args += (
+            '--no-sandbox',
+            '--disable-gpu',
+            '--disable-dev-shm-usage',
+            '--disable-software-rasterizer',
+        )
+
 
     if not options['CHECK_SSL_VALIDITY']:
         cmd_args += ('--disable-web-security', '--ignore-certificate-errors')

From 6b7dfa773e449f3aaeb98c0cb9f5224885888589 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Jul 2020 01:30:58 -0400
Subject: [PATCH 179/333] minor docker process tweaks

---
 Dockerfile                                     | 14 ++++++++------
 bin/{entrypoint.sh => docker_entrypoint.sh}    | 18 ++++++++++--------
 ...owser-history => export_browser_history.sh} |  0
 bin/lint.sh                                    | 18 ++++++++++++++++++
 bin/{archivebox-setup => setup.sh}             |  0
 bin/test.sh                                    | 17 +++++++++++++++++
 6 files changed, 53 insertions(+), 14 deletions(-)
 rename bin/{entrypoint.sh => docker_entrypoint.sh} (54%)
 rename bin/{archivebox-export-browser-history => export_browser_history.sh} (100%)
 create mode 100755 bin/lint.sh
 rename bin/{archivebox-setup => setup.sh} (100%)
 create mode 100755 bin/test.sh

diff --git a/Dockerfile b/Dockerfile
index 821125ba..197844ae 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,9 @@
 # This is the Dockerfile for ArchiveBox, it includes the following major pieces:
 #     git, curl, wget, python3, youtube-dl, google-chrome-stable, ArchiveBox
 # Usage:
-#     docker build . -t archivebox:latest
-#     docker run -v=$PWD/data:/data archivebox:latest archivebox init
-#     echo 'https://example.com' | docker run -v=$PWD/data:/data -i archivebox:latest archivebox add
+#     docker build . -t archivebox
+#     docker run -v "$PWD/data":/data archivebox init
+#     docker run -v "$PWD/data":/data archivebox add 'https://example.com'
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker
 
@@ -43,8 +43,7 @@ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selectio
        fonts-symbola \
        fonts-noto \
        fonts-freefont-ttf \
-    && rm -rf /var/lib/apt/lists/* \
-    && pip install --upgrade --no-cache-dir pip setuptools
+    && rm -rf /var/lib/apt/lists/*
 
 # Run everything from here on out as non-privileged user
 RUN groupadd --system archivebox \
@@ -54,6 +53,7 @@ ADD . "$CODE_PATH"
 WORKDIR "$CODE_PATH"
 ENV PATH="$VENV_PATH/bin:${PATH}"
 RUN python -m venv --clear --symlinks "$VENV_PATH" \
+    && pip install --upgrade pip setuptools \
     && pip install -e .
 
 VOLUME "$DATA_PATH"
@@ -62,5 +62,7 @@ EXPOSE 8000
 ENV CHROME_BINARY=google-chrome \
     CHROME_SANDBOX=False
 
-ENTRYPOINT ["dumb-init", "--", "/app/bin/entrypoint.sh", "archivebox"]
+RUN env ALLOW_ROOT=True archivebox version
+
+ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh", "archivebox"]
 CMD ["server", "0.0.0.0:8000"]
diff --git a/bin/entrypoint.sh b/bin/docker_entrypoint.sh
similarity index 54%
rename from bin/entrypoint.sh
rename to bin/docker_entrypoint.sh
index a970bfa0..e9b399ec 100755
--- a/bin/entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@@ -1,22 +1,24 @@
 #!/usr/bin/env bash
 
-# detect userid:groupid of contents of data folder
+COMMAND="$*"
+
+# Autodetect UID,GID of host user based on ownership of files in the data volume
 DATA_DIR="${DATA_DIR:-/data}"
 ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
 
-# Autodetect UID and GID of host user based on ownership of files in the volume
 USID=$(stat --format="%u" "$DATA_DIR")
 GRID=$(stat --format="%g" "$DATA_DIR")
-COMMAND="$*"
-
-# run django as the host user's uid:gid so that any files touched have the same permissions as outside the container
-# e.g. ./manage.py runserver
-
-chown "$USID":"$GRID" "$DATA_DIR"
 
+# If user is not root, modify the archivebox user+files to have the same uid,gid
 if [[ "$USID" != 0 && "$GRID" != 0 ]]; then
+    chown "$USID":"$GRID" "$DATA_DIR"
     usermod -u "$USID" "$ARCHIVEBOX_USER"
     groupmod -g "$GRID" "$ARCHIVEBOX_USER"
     chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
 fi
+
+# run django as the new archivebox user
+# any files touched will have the same uid,gid
+# inside docker and outside docker on the host
 gosu "$ARCHIVEBOX_USER" bash -c "$COMMAND"
+# e.g. "archivebox server"
diff --git a/bin/archivebox-export-browser-history b/bin/export_browser_history.sh
similarity index 100%
rename from bin/archivebox-export-browser-history
rename to bin/export_browser_history.sh
diff --git a/bin/lint.sh b/bin/lint.sh
new file mode 100755
index 00000000..fad7fda9
--- /dev/null
+++ b/bin/lint.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+### Bash Environment Setup
+# http://redsymbol.net/articles/unofficial-bash-strict-mode/
+# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
+# set -o xtrace
+set -o errexit
+set -o errtrace
+set -o nounset
+set -o pipefail
+IFS=$'\n'
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+
+source "$DIR/.venv/bin/activate"
+
+flake8 archivebox
+mypy archivebox
diff --git a/bin/archivebox-setup b/bin/setup.sh
similarity index 100%
rename from bin/archivebox-setup
rename to bin/setup.sh
diff --git a/bin/test.sh b/bin/test.sh
new file mode 100755
index 00000000..f19ca14a
--- /dev/null
+++ b/bin/test.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+### Bash Environment Setup
+# http://redsymbol.net/articles/unofficial-bash-strict-mode/
+# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
+# set -o xtrace
+set -o errexit
+set -o errtrace
+set -o nounset
+set -o pipefail
+IFS=$'\n'
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+
+source "$DIR/.venv/bin/activate"
+
+pytest

From 25e0cba0cc81bf733860e8d10e6a555cab41e49e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Jul 2020 01:31:23 -0400
Subject: [PATCH 180/333] check system config later in startup process to allow
 version to run during docker build

---
 archivebox/config/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 3638bade..e1a99c99 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -845,6 +845,8 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
 
 
 def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -> None:
+    check_system_config()
+    
     output_dir = out_dir or config['OUTPUT_DIR']
 
     assert isinstance(output_dir, str) and isinstance(config['PYTHON_DIR'], str)
@@ -862,5 +864,3 @@ def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -
                 f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
     except KeyboardInterrupt:
         raise SystemExit(2)
-
-check_system_config()

From 0965031d8f5c86e3e89352800f02e38a1a194133 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Jul 2020 01:46:38 -0400
Subject: [PATCH 181/333] fix archive_org header rename

---
 archivebox/extractors/archive_org.py |  2 +-
 archivebox/extractors/wget.py        | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py
index 656beb25..77cde22d 100644
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@@ -106,7 +106,7 @@ def parse_archive_dot_org_response(response: bytes) -> Tuple[List[str], List[str
         headers[name.lower().strip()].append(val.strip())
 
     # Get successful archive url in "content-location" header or any errors
-    content_location = headers['content-location']
+    content_location = headers.get('content-location', headers['location'])
     errors = headers['x-archive-wayback-runtime-error']
     return content_location, errors
 
diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index 3221d8bd..0e6cdafa 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -97,21 +97,20 @@ def save_wget(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
             if 'Downloaded:' in output_tail[-1]
             else 0
         )
+        hints = (
+            'Got wget response code: {}.'.format(result.returncode),
+            *output_tail,
+        )
 
         # Check for common failure cases
-        if result.returncode > 0 and files_downloaded < 1:
-            hints = (
-                'Got wget response code: {}.'.format(result.returncode),
-                *output_tail,
-            )
+        if (result.returncode > 0 and files_downloaded < 1) or output is None:
             if b'403: Forbidden' in result.stderr:
                 raise ArchiveError('403 Forbidden (try changing WGET_USER_AGENT)', hints)
             if b'404: Not Found' in result.stderr:
                 raise ArchiveError('404 Not Found', hints)
             if b'ERROR 500: Internal Server Error' in result.stderr:
                 raise ArchiveError('500 Internal Server Error', hints)
-            raise ArchiveError('Got an error from the server', hints)
-
+            raise ArchiveError('Wget failed or got an error from the server', hints)
         chmod_file(output, cwd=out_dir)
     except Exception as err:
         status = 'failed'

From 949f78aa6549c2909fe8b0f19cf60aa57aff6e87 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 22 Jul 2020 10:24:08 -0500
Subject: [PATCH 182/333] fix: Use w3lib to improve the encoding extraction

---
 archivebox/util.py                         |  17 +-
 setup.py                                   |   1 +
 tests/mock_server/server.py                |   4 +-
 tests/mock_server/templates/shift_jis.html | 769 +++++++++++++++++++++
 tests/test_util.py                         |   7 +-
 5 files changed, 787 insertions(+), 11 deletions(-)
 create mode 100644 tests/mock_server/templates/shift_jis.html

diff --git a/archivebox/util.py b/archivebox/util.py
index ea0abe32..c43585c0 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -13,6 +13,7 @@ from dateparser import parse as dateparser
 
 import requests
 from base32_crockford import encode as base32_encode                            # type: ignore
+from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 
 try:
     import chardet
@@ -160,15 +161,13 @@ def download_url(url: str, timeout: int=None) -> str:
         verify=CHECK_SSL_VALIDITY,
         timeout=timeout,
     )
-    if response.headers.get('Content-Type') == 'application/rss+xml':
-        # Based on https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py
-        _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
-        _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P<xmlcharset>[\w-]+)')
-        _BODY_ENCODING_PATTERN = r'<\s*(\?xml\s[^>]+%s)' % (_XML_ENCODING_RE)
-        _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE)
-        match = _BODY_ENCODING_STR_RE.search(response.text[:1024])
-        if match:
-            response.encoding = match.group('xmlcharset')
+
+    content_type = response.headers.get('Content-Type', '')
+    encoding = http_content_type_encoding(content_type) or html_body_declared_encoding(response.text)
+
+    if encoding is not None:
+        response.encoding = encoding
+
     return response.text
 
 
diff --git a/setup.py b/setup.py
index 2e333bf4..78f51769 100755
--- a/setup.py
+++ b/setup.py
@@ -47,6 +47,7 @@ setuptools.setup(
         "ipython",
         "youtube-dl",
         "python-crontab==2.5.1",
+        "w3lib==v1.22.0",
         # "croniter",
         # Some/all of these will likely be added in the future:
         # wpull
diff --git a/tests/mock_server/server.py b/tests/mock_server/server.py
index 0c546e99..d9879594 100644
--- a/tests/mock_server/server.py
+++ b/tests/mock_server/server.py
@@ -11,7 +11,9 @@ def index():
 @route("/static/<filename>")
 def static_path(filename):
     template_path = abspath(getcwd()) / Path("tests/mock_server/templates")
-    return static_file(filename, root=template_path)
+    response = static_file(filename, root=template_path)
+    response.set_header("Content-Type", "")
+    return response
 
 def start():
     run(host='localhost', port=8080)
\ No newline at end of file
diff --git a/tests/mock_server/templates/shift_jis.html b/tests/mock_server/templates/shift_jis.html
new file mode 100644
index 00000000..622039a5
--- /dev/null
+++ b/tests/mock_server/templates/shift_jis.html
@@ -0,0 +1,769 @@
+<HTML>
+	<head>
+		<meta http-equiv="content-type" content="text/html; charset=Shift_JIS"/>
+		<META http-equiv='Content-Style-Type' content='text/css'>
+		<meta name="keywords" content="������,��������,�j���[�X,��,�V�C,�C��,����,���̤�n����,�C�x���g"/>
+		<meta property="og:title" content="�������̃j���[�X�bMBC����{����">
+		<meta property="og:description" content="�������̃j���[�X MBC����{����">
+		<meta property="og:image" content="http://www.mbc.co.jp/news/img/image.png">
+		<meta property="og:type" content="website"/>
+		<meta property="og:url" contetnt="http://www.mbc.co.jp/news/">
+		<meta property="og:locale" content="ja_JP"/>
+		<title>�������̃j���[�X�bMBC����{����</title>
+		<script type="text/javascript" src="../../ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script>
+		<script type="text/javascript" src="js/scrolltopcontrol.js"></script>
+		<script type="text/javascript" src="js/scrollsmoothly.js" charset="utf-8"></script>
+		<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+		<meta name="viewport" content="width=device-width,initial-scale=1.0,minimum-scale=1.0">
+		<meta http-equiv="imagetoolbar" content="no">
+		<SCRIPT language="JavaScript" src="js/toggle.js"></SCRIPT>
+		<link rel="stylesheet" type="text/css" href="mbcnews.css">
+		<link
+		rel="stylesheet" href="../mbc-globalnav/mbc-globalnav.css" charset="utf-8">
+
+		<!-- Global site tag (gtag.js) - Google Analytics -->
+		<script async src="../../www.googletagmanager.com/gtag/js@id=UA-22520034-2"></script>
+		<script>
+			window.dataLayer = window.dataLayer || [];
+function gtag() {
+dataLayer.push(arguments);
+}
+gtag('js', new Date());
+
+gtag('config', 'UA-22520034-2');
+		</script>
+		<!-- Global site tag (gtag.js) - Google Analytics END -->
+
+
+		<!-- �A�h�Z���X -->
+		<script async src="../../securepubads.g.doubleclick.net/tag/js/gpt.js"></script>
+		<link rel="stylesheet" href="../css/adsence.css">
+		<script>
+			window.googletag = window.googletag || {
+cmd: []
+};
+googletag.cmd.push(function () {
+googletag.defineSlot('/193632318/LMC/LMC_TV/mbc/PC_all/rectangle1', [
+[
+1, 1
+],
+[
+300, 250
+],
+[
+300, 600
+]
+], 'div-gpt-ad-1570102688339-0').addService(googletag.pubads());
+googletag.defineSlot('/193632318/LMC/LMC_TV/mbc/PC_all/rectangle2', [
+[
+1, 1
+],
+[
+300, 250
+],
+[
+300, 600
+]
+], 'div-gpt-ad-1570102823361-0').addService(googletag.pubads());
+googletag.pubads().enableSingleRequest();
+googletag.enableServices();
+});
+		</script>
+		<script>
+			window.googletag = window.googletag || {
+cmd: []
+};
+googletag.cmd.push(function () {
+googletag.defineSlot('/193632318/LMC/LMC_TV/mbc/SP_all/rectangle1', [
+[
+1, 1
+],
+[
+300, 250
+]
+], 'div-gpt-ad-1570102909947-0').addService(googletag.pubads());
+googletag.pubads().enableSingleRequest();
+googletag.enableServices();
+});
+		</script>
+		<!-- �A�h�Z���X END-->
+
+
+	</head>
+	<body>
+		<!--�w�b�_�[-->
+		<nav id="mbc-globalnav" class="mbc-globalnav" role="navigation"></nav>
+		<script src="../mbc-globalnav/mbc-globalnav.js" charset="utf-8"></script>
+		<!--�w�b�_�[-->
+
+		<DIV id="mbcnews-header">
+			<h1>MBC NEWS</h1>
+
+			<DIV class="mbcnews-follow">
+				<ul>
+					<li class="follow-t">�t�H���[����</li>
+					<li>
+						<a class="tw-follow-btn" href="https://twitter.com/intent/follow?screen_name=MBC_newsnow" target="_blank" onclick="window.open(this.href, 'window', 'width=600, height=400, menubar=no, toolbar=no, scrollbars=yes'); return false;"><IMG src="../sns/img/twitter.png"></a>
+					</li>
+					<li>
+						<A href="https://www.facebook.com/mbc.newsnow" target="_blank"><IMG src="../sns/img/facebook.png"></A>
+					</li>
+				</ul>
+			</DIV>
+		</DIV>
+		<!-- end #mbcnews-header -->
+
+
+		<DIV id='mbcnews-top'>
+			<h2 id='200722'>07��22��(��)</h2>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043706&amp;ap='><IMG src='img/mbcnews.png'><h3>�z�K�V�����Ŕ����@�����P�Q�O�O���[�g��
+						<span>[23:10]</span>
+					</h3>
+					<p>�\�����̐z�K�V�����łQ�Q����A�����I���΂��������A�������Ό�����P�Q�O�O���[�g���̍����܂ŏオ��܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043705&amp;ap='><IMG src='../web-news2/2020072200043705.jpg'><h3>��\�l�ߋC�u�友�v�@�������s�łR�T�D�T�x�@���̖ҏ���<span>[20:03]</span>
+					</h3>
+					<p>�Q�Q���͓�\�l�ߋC�̈�u�友�v�ŁA�P�N�ōł����������Ƃ���܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043704&amp;ap='><IMG src='../web-news2/2020072200043704.jpg'><h3>�u�f���s���g���x���v�L�����y�[���J�n�@�˘f���ƕs���̐���<span>[20:02]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̉e���őŌ����󂯂Ă���ό��ƊE���x�����鍑�́u�f���s���g���x���v�L�����y�[�����Q�Q������n�܂�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043701&amp;ap='><IMG src='../web-news2/2020072200043701.jpg'><h3>�S�A�x�O�Ɂ@��������`�ŐV�^�R���i�΍􋭉��@�o���q�̌�����<span>[19:48]</span>
+					</h3>
+					<p>�Q�R������̂S�A�x�A�V�^�R���i�E�C���X�̑΍���������邽�߁A��������`�ł̓T�[���O���t�B�[�����݂���A�V���ɏo���q�̑̉�������n�܂�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043703&amp;ap='><IMG src='../web-news2/2020072200043703.jpg'><h3>�V�^�R���i�V���ɂQ�l�����@�N���X�^�[�����������΍�p����<span>[19:48]</span>
+					</h3>
+					<p>�����������ł͂Q�Q���A�V�^�R���i�E�C���X�̊����҂��V���ɂQ�l�m�F����A�݌v�͂P�V�S�l�ƂȂ�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043700&amp;ap='><IMG src='../web-news2/2020072200043700.jpg'><h3>�L�^�I��J�Ŕ�Q�@���������ɍ��s���]���_���������@<span>[19:47]</span>
+					</h3>
+					<p>������{�̋L�^�I��J�ő傫�Ȕ�Q���󂯂����������ɍ��s���Q�Q���A�]����_�ѐ��Y��b���K��A�_�Ɣ�Q�̏󋵂Ȃǂ��m�F���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043699&amp;ap='><IMG src='../web-news2/2020072200043699.jpg'><h3>���Z�싅�h��֑��h �����g�[�i�����g���J��<span>[19:46]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̉e���Œ��~�ƂȂ������������̉Ă̍��Z�싅�̑�֑��́A�Q�Q������e�n��̑�\�P�U�Z�ɂ�錈���g�[�i�����g���n�܂�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043697&amp;ap='><IMG src='../web-news2/2020072200043697.jpg'><h3>���w�Z�̍Z��̖؂ŃA�I�o�Y�N���q��Ē��@�����������v���s<span>[19:44]</span>
+					</h3>
+					<p>�����������v���s�̏��w�Z�̍Z��ɐA����ꂽ�؂ŁA�A�I�o�Y�N���q��Ă����Ă��āA�w�Z�̎q�ǂ����������̗l�q��������Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043698&amp;ap='><IMG src='../web-news2/2020072200043698.jpg'><h3>�V���������m���E���c�N�ꎁ�ɕ����@�V�����̈�ِ����Ɩ{�`��ĊJ��<span>[19:44]</span>
+					</h3>
+					<p>���T�Q�W���ɒm���ɏA�C���鉖�c�N�ꂳ��ɁA�����̉ۑ�𕷂��V���[�Y�B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043696&amp;ap='><IMG src='../web-news2/2020072200043696.jpg'><h3>�ۈ牀�������n�@�u�h�E�̂͂��ݓ��ꎮ�@�F������s<span>[19:43]</span>
+					</h3>
+					<p>�����������L���̃u�h�E�̎Y�n�A�F������s�̃u�h�E���łQ�Q���A�͂��ݓ��ꎮ���s���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043695&amp;ap='><IMG src='../web-news2/2020072200043695.jpg'><h3>���������V�^�R���i�@�V���ɂQ�l�����m�F
+						<span>[18:10]</span>
+					</h3>
+					<p>���������͂Q�Q���A�V�^�R���i�E�C���X�̊����҂�V���ɂQ�l�m�F�����Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043692&amp;ap='><IMG src='../web-news2/2020072200043692.jpg'><h3>���H�X�o�c�҂炪�V�^�R���i�΍���w�ԁ@�������s<span>[16:14]</span>
+					</h3>
+					<p>�������s�łQ�Q���A���H�X�Ȃǂ̌o�c�҂炪�V�^�R���i�΍���w�ԁA���C��J����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043691&amp;ap='><IMG src='../web-news2/2020072200043691.jpg'><h3>�V�܃z�e�����c�ƍĊJ�@�v�[���J���@���������w�h�s<span>[16:13]</span>
+					</h3>
+					<p>���������w�h�s�̘V�܃z�e���A�w�h�����قŖ{�i�I�ȉĂ�O�ɁA�P��̃v�[���J�����s���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043688&amp;ap='><IMG src='../web-news2/2020072200043688.jpg'><h3>��������`�ɃT�[���O���t�B�[�R��ݒu�@�A�x�O�ɐV�^�R���i�΍􋭉�<span>[12:20]</span>
+					</h3>
+					<p>�Q�R������̂S�A�x��O�Ɏ�������`�̍������ɂ́A�V�^�R���i�E�C���X�̊����g���h�����߁A�����p�̐V���ȃT�[���O���t�B�[�R�䂪�ݒu����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043689&amp;ap='><IMG src='../web-news2/2020072200043689.jpg'><h3>�V�^�R���i�Ŕ��\��~�@�w�Z�̒���Ń_���X���I<span>[12:19]</span>
+					</h3>
+					<p>�������������s�̒��w�Z���A�V�^�R���i�E�C���X�̉e���Ń_���X���\�̋@������������k�Ɋ���̏��񋟂��悤�ƁA���\����J���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072200043686&amp;ap='><IMG src='../web-news2/2020072200043686.jpg'><h3>�F���A����A��q���E���v�n���ɍ������ӏ��@�����R�T�x�ȏ�\�z<span>[10:56]</span>
+					</h3>
+					<p>�F���E����n���A��q���E���v���n���͂Q�Q���A�����̋C�����R�T�x�ȏ�̖ҏ����ƂȂ�Ƃ��낪���錩���݂ł��B</p>
+				</a>
+			</li>
+			<h2 id='200721'>07��21��(��)</h2>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043685&amp;ap='><IMG src='img/mbcnews.png'><h3>�����s�R���r�j�������������@�j�ɒ����S�N���Y<span>[20:07]</span>
+					</h3>
+					<p>�������������s�ŋ��N�P���A�R���r�j�G���X�X�g�A�ɕ�������ĉ������茻����D�����Ƃ����Ƃ��āA���������̍߂ɖ���Ă���j�̍ٔ����������n�ٖ����x���ŊJ����A���@�͒j�ɒ����S�N�����Y���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043683&amp;ap='><IMG src='../web-news2/2020072100043683.jpg'><h3>�V�^�R���i�@�V���ɂQ�l�����m�F�@�����������P�V�Q�l��<span>[19:51]</span>
+					</h3>
+					<p>�������s�ŐV�^�R���i�E�C���X�̊����҂��V���ɂQ�l�m�F����A�����������̊����҂̗݌v�͂P�V�Q�l�ƂȂ�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043682&amp;ap='><IMG src='../web-news2/2020072100043682.jpg'><h3>�V���������m���E���c�N�ꎁ�ɕ����@�V�^�R���i�΍�<span>[19:49]</span>
+					</h3>
+					<p>�����P�Q���ɍs��ꂽ���������m���I���ŏ����I�������c�N�ꂳ��́A�����Q�W���ɒm���ɏA�C���܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043681&amp;ap='><IMG src='../web-news2/2020072100043681.jpg'><h3>�ꕔ�w�Z�ŉċx�݊J�n�@����Ŏ��Ƒ����w�Z��<span>[19:48]</span>
+					</h3>
+					<p>�����������̈ꕔ�̊w�Z�ł͂Q�P������ċx�݂��n�܂�܂������A����ŐV�^�R���i�E�C���X�ɔ����x�Z�ɂ����Ƃ̒x������߂����߁A�P�w���̎��Ƃ������Ă���w�Z������܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043680&amp;ap='><IMG src='../web-news2/2020072100043680.jpg'><h3>�l�I���C�Y�a���@�������ł��B�����I<span>[19:47]</span>
+					</h3>
+					<p>�ϑ���������ł́A����Ō��邱�Ƃ��ł���قǖ��邢�ƁA�C���^�[�l�b�g�ȂǂŘb��ƂȂ��Ă���a���u�l�I���C�Y�a���v�B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043679&amp;ap='><IMG src='../web-news2/2020072100043679.jpg'><h3>�����̖��w�E�V�}�S�̑��l�ҁ@�؎R�L���񎀋�<span>[19:46]</span>
+					</h3>
+					<p>�����������V���̓��������`�[�t�ɂ����u���C�h�߁v�̍�Ȏ҂ŁA�����̖��w�E�V�}�S�̑��l�҂Ƃ��Ċ��􂵂��؎R�L���񂪂Q�O���A�V���̂��ߖS���Ȃ�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043678&amp;ap='><IMG src='../web-news2/2020072100043678.jpg'><h3>�i�q�������{���@�����������`����@�ꕔ��ԂQ�V������ĊJ<span>[19:38]</span>
+					</h3>
+					<p>��J�̉e���łi�q�������{���̎����������w�Ɛ���w�̊Ԃ́A�^�]�����킹�������Ă��܂����A�ꕔ��Ԃ��Q�V������Վ��_�C���ōĊJ���邱�ƂɂȂ�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043677&amp;ap='><IMG src='../web-news2/2020072100043677.jpg'><h3>����������@�V�^�R���i�̉e���ŕω����@�������s�̃f�p�[�g<span>[19:36]</span>
+					</h3>
+					<p>�������̋G�߂��}���Ă��܂����A�V�^�R���i�E�C���X�̉e��������A���N�̂���������ɂ͕ω�������悤�ł��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043674&amp;ap='><IMG src='../web-news2/2020072100043674.jpg'><h3>��q���쓌���Œn�k�@���q���Ők�x�P<span>[18:03]</span>
+					</h3>
+					<p>�Q�P���ߌ�T���T�S������A��q���쓌����k���n�Ƃ���n�k������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043673&amp;ap='><IMG src='../web-news2/2020072100043673.jpg'><h3>�y�p�N�̓��@�E�i�M���X�ɂ��키<span>[16:36]</span>
+					</h3>
+					<p>�Q�P���͓y�p�̉N�̓��A�������s�̃E�i�M���X�͑吨�̋q�łɂ�����Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043672&amp;ap='><IMG src='../web-news2/2020072100043672.jpg'><h3>���w�����g����R�V�q�J���h�̈��̌��@���������삳�܎s<span>[16:35]</span>
+					</h3>
+					<p>������Ă̎Y�n�A���������삳�܎s�������ŁA�n���̒��w��������̌����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043671&amp;ap='><IMG src='../web-news2/2020072100043671.jpg'><h3>���ǎs�̊�Ƃ��������s�Ɉ�Ã}�X�N�S�����𑡂�<span>[16:34]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̊����\�h�΍�ɖ𗧂ĂĂ��炨���ƁA�����������Ń^�C���̔����Ƃ���|���鈦�ǎs�̊�Ƃ��A�������s�Ƀ}�X�N�S�����𑡂�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043670&amp;ap='><IMG src='../web-news2/2020072100043670.jpg'><h3>�������E�����U�R���@�L���k�h�b�[�L�����h�b�@�ʍs�~��
+						<span>[15:25]</span>
+					</h3>
+					<p>���������̌����U�R���u�z�u���R���̗L���k�C���^�[�ƗL�����C���^�[�̊Ԃ��A�זv�̂��ߒʍs�~�߂ƂȂ��Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043669&amp;ap='><IMG src='img/mbcnews.png'><h3>�g���N�^�[�̉��~���ɂȂ�j�����S�@�����������u�s<span>[15:06]</span>
+					</h3>
+					<p>�����������u�s�łQ�P���ߑO�A����̒j�����g���N�^�[�̉��~���ɂȂ�A���S���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043668&amp;ap='><IMG src='../web-news2/2020072100043668.jpg'><h3>�������ܐ����قɂT���C�̃J�^�N�`�C���V�����ԓ���<span>[12:00]</span>
+					</h3>
+					<p>�Q�R������̘A�x��O�ɂQ�P�����A�������ܐ����قɂT���C�̃J�^�N�`�C���V�����ԓ��肵�A�����A�Q����Ȃ��ĉj���l�q�������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072100043667&amp;ap='><IMG src='../web-news2/2020072100043667.jpg'><h3>���Z�����ό��E�h�Б΍���s�ɒ񌾁@�������������s<span>[11:54]</span>
+					</h3>
+					<p>�����Ȋw�Ȃ̃X�[�p�[�T�C�G���X�n�C�X�N�[���Ɏw�肳��Ă���A�������������s�̍������Z���A�ό���h�ЂȂǂɂ��Ă̒񌾂��s�ɍs���܂����B</p>
+				</a>
+			</li>
+			<h2 id='200720'>07��20��(��)</h2>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043666&amp;ap='><IMG src='img/mbcnews.png'><h3>�������s�̍`�Ō���������́@�S�V�Βj���Ɣ���<span>[20:26]</span>
+					</h3>
+					<p>�������s�̍`�łP�W���Ɍ���������̂̐g���ɂ��āA�x�@�͂Q�O���A�s���ɏZ�ނS�V�΂̓y�؍�ƈ��̒j���������Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043665&amp;ap='><IMG src='../web-news2/2020072000043665.jpg'><h3>���N���Q�P���x���@�����n���@�ϑ��j��ł��x���~�J����<span>[19:42]</span>
+					</h3>
+					<p>�Q�O���̉����n���́A�����m���C���ɕ����Đ󂪍L����A�������n���C�ۑ�͌ߑO�P�P���Ɂu�����n���͔~�J���������Ƃ݂���v�Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043664&amp;ap='><IMG src='../web-news2/2020072000043664.jpg'><h3>�����E�������̏����w�Z�ŏI�Ǝ��@�����������̈ꕔ�w�Z���ċx�݂�<span>[19:41]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̉e���ŋx�Z�[�u�����ꂽ�����������̌������E���w�Z�̑����ł́A�ċx�݂�Z�k������j�ł����A�\��ʂ�Q�P������ċx�݂ɓ��闣���Ȃǈꕔ�̊w�Z�ł́A�Q�O���A�P�w���̏I�Ǝ����s���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043663&amp;ap='><IMG src='../web-news2/2020072000043663.jpg'><h3>�C������ňꎞ�S�l���M���@�S���~���@�����������v���s<span>[19:40]</span>
+					</h3>
+					<p>�����������v���s�̊C������łQ�O���ߌ�A�����S�l���M��A�~������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043662&amp;ap='><IMG src='../web-news2/2020072000043662.jpg'><h3>�u�f�B�X�J�o�[�������v�̎��l�v���������@�W���S���܂�<span>[19:39]</span>
+					</h3>
+					<p>���������͐V�^�R���i�̊����Ґ��������󂯁A���p�҂Ɏ��l��v�����Ă���h���{�ݎx���L�����y�[���u�f�B�X�J�o�[�������v�̎��l�v�����Ԃ��A�����S���܂ŉ������邱�Ƃ𔭕\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043661&amp;ap='><IMG src='../web-news2/2020072000043661.jpg'><h3>�u���S���S�̓V���قɁv���H�X���悻�T�O�X�܂���ď��Ł@�������s<span>[19:38]</span>
+					</h3>
+					<p>�ڑ҂𔺂����H�X��ΏۂɁA������������o����Ă����x�Ɨv���̊��Ԃ��A�����܂łƂȂ�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043660&amp;ap='><IMG src='../web-news2/2020072000043660.jpg'><h3>�Ǝ��̂o�b�q�����@��̎����^�p�J�n�@�������������s<span>[19:37]</span>
+					</h3>
+					<p>�������������s�́A�V�^�R���i�E�C���X�ւ̊����̗L���𒲂ׂ�o�b�q�����@��̉^�p���A�Ǝ��ɂQ�O������n�߂܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043659&amp;ap='><IMG src='../web-news2/2020072000043659.jpg'><h3>�V�^�R���i�@���̊�u�މ@�O�ɂo�b�q���������v�@�����́H<span>[19:36]</span>
+					</h3>
+					<p>�������s�̃V���[�p�u�ŁA�����ő勉�̃N���X�^�[���������A�����ł͍����ɓ���A��Ë@�ււ̓��@��z�e���ŗ×{����l���������Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043658&amp;ap='><IMG src='../web-news2/2020072000043658.jpg'><h3>�t�`�d�̉ΐ��T���@���ځ@�g�QA���P�b�g�ł��グ����<span>[19:35]</span>
+					</h3>
+					<p>�t�`�d���A���u�񒷍��A�M�̉ΐ��T���@�𓋍ڂ����g�Q�`���P�b�g���A���������̎�q���F���Z���^�[����ł��グ���A�ł��グ�͐������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043657&amp;ap='><IMG src='../web-news2/2020072000043657.jpg'><h3>�V���Ɉړ]���@�Z�����[���W���X���Ɏ��{�@�������������s<span>[19:34]</span>
+					</h3>
+					<p>�������������s�̐V�������ɂ̈ړ]�V�z�v��̐����₤�Z�����[���A�����X���ɍs���邱�ƂɂȂ�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043656&amp;ap='><IMG src='../web-news2/2020072000043656.jpg'><h3>�R���i�ɕ����Ȃ��I�R���i�ЂŐV�����`�̉^����<span>[19:34]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̊����g��Ő悪�����Ȃ��s���̒��A�t���ɗ����������l���Ƃ��Љ��V���[�Y�u���������R���i�ɕ����Ȃ��I�v����́A�R���i�Ђł̐V�����`�ł̉^����ɂ��Ď�ނ��܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043655&amp;ap='><IMG src='../web-news2/2020072000043655.jpg'><h3>�Q�P���́u�y�p�N�̓��v�@�E�i�M�̂��ΏĂ��o�׃s�[�N�@����������蒬<span>[19:32]</span>
+					</h3>
+					<p>�Q�P���́u�y�p�̉N�̓��v��O�ɁA����������蒬�ł́A�E�i�M�̂��ΏĂ��Ȃǂ̏o�ׂ��s�[�N���}���Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043654&amp;ap='><IMG src='../web-news2/2020072000043654.jpg'><h3>�V�^�R���i�@�������s�ŐV���ɂT�l�̊����m�F�@�����P�V�O�l��<span>[17:29]</span>
+					</h3>
+					<p>�����������ł͂Q�O���A�V���ɐV�^�R���i�E�C���X�ւ̊����҂��T�l�m�F����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043653&amp;ap='><IMG src='../web-news2/2020072000043653.jpg'><h3>�������E��������P���@�@����_�Ȃ����������͑}�����̐ڐG��<span>[17:11]</span>
+					</h3>
+					<p>����������̎��������̐�������P���@�ł́A�����P�U���Ɍ��q�F�̊j����𐧌䂷�鐧��_�̂����̂P�{���Ȃ����Ă���̂�������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043647&amp;ap='><IMG src='../web-news2/2020072000043647.jpg'><h3>�����n���@�ϑ��j��ł��x���~�J����<span>[11:02]</span>
+					</h3>
+					<p>�������n���C�ۑ�́A�ߑO�P�P���Ɂu�����n���͔~�J���������Ƃ݂���v�Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043646&amp;ap='><IMG src='../web-news2/2020072000043646.jpg'><h3>�g�Q�`���P�b�g�ł��グ�����@�t�`�d�̉ΐ��T���@����<span>[07:57]</span>
+					</h3>
+					<p>�t�`�d���A���u�񒷍��A�M�̉ΐ��T���@�𓋍ڂ����g�Q�`���P�b�g���Q�O������q���F���Z���^�[����ł��グ���A�ł��グ�͐������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020072000043645&amp;ap='><IMG src='../web-news2/2020072000043645.jpg'><h3>�g�Q�`���P�b�g�ł��グ�@�t�`�d�̉ΐ��T���@����<span>[07:18]</span>
+					</h3>
+					<p>�t�`�d���A���u�񒷍��A�M�̉ΐ��T���@�𓋍ڂ����g�Q�`���P�b�g���A��قǌߑO�V���O�Ɏ�q���F���Z���^�[����ł��グ���܂����B</p>
+				</a>
+			</li>
+			<h2 id='200719'>07��19��(��)</h2>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071900043644&amp;ap='><IMG src='../web-news2/2020071900043644.jpg'><h3>�g�Q�`���P�b�g�S�Q���@�@�Q�O�����ł��グ<span>[18:15]</span>
+					</h3>
+					<p>�V��s�ǂ̂��ߑł��グ����������Ă����g�Q�`���P�b�g�S�Q���@�́A�Q�O�����A��q���F���Z���^�[����ł��グ���܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071900043643&amp;ap='><IMG src='../web-news2/2020071900043643.jpg'><h3>�u�f���s���g���x���v����@�O�����m���u�܂��͋ߗגn��Łv<span>[18:13]</span>
+					</h3>
+					<p>���������̎O�����m���́A�P�X���ɍs��ꂽ�S���m����̃E�F�u��c�ŁA���{���ό��x���Ŏn�߂�u�f���s���g���x���v�ɂ��āA�u�V�^�R���i�E�C���X�����g��h�~�̂��߁A�ߗגn�悩��n�߂�ׂ��v�Ƃ̍l���������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071900043642&amp;ap='><IMG src='../web-news2/2020071900043642.jpg'><h3>�V�^�R���i�@�����������V���ɂP�l�̊����m�F<span>[17:41]</span>
+					</h3>
+					<p>�������s�͐�قǁA�V�^�R���i�E�C���X�̊����҂��V���ɂP�l�m�F���ꂽ�Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071900043641&amp;ap='><IMG src='img/mbcnews.png'><h3>���`�Œj�����]���@�ӎ��s���@�������E�삳�܎s<span>[17:30]</span>
+					</h3>
+					<p>���������삳�܎s�̋��`���łP�X���ߑO�A�D�ō�ƒ��̒j�����C�ɓ]�����A�ӎ��s���̏d�̂ƂȂ��Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071900043640&amp;ap='><IMG src='../web-news2/2020071900043640.jpg'><h3>�����ܗ֑�\�E���V�Z�I���I��@��Вn�x���@����J���[��<span>[11:47]</span>
+					</h3>
+					<p>�������������s�ݏZ�ŁA�{�N�V���O�E�E�G���^�[���œ����I�����s�b�N�̓��{��\�̉��V�Z�I���I�肪�v���f���[�X�����J���[���A�����s�̃z�e���Œ񋟂���܂����B</p>
+				</a>
+			</li>
+			<h2 id='200718'>07��18��(�y)</h2>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043639&amp;ap='><IMG src='img/mbcnews.png'><h3>�������s�̍`�Œj���̈��<span>[21:23]</span>
+					</h3>
+					<p>�������s�̍`�łP�W���ߌ�A�j������̂Ō�����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043638&amp;ap='><IMG src='../web-news2/2020071800043638.jpg'><h3>�������E�V�^�R���i�������\�@�P�W���͂Q�l�@�݌v�P�U�S�l<span>[19:16]</span>
+					</h3>
+					<p>���������Ǝ������s�͐V�^�R���i�E�C���X�̊����҂��V���ɂQ�l�m�F���ꂽ�ƂP�W���A���\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043636&amp;ap='><IMG src='../web-news2/2020071800043636.jpg'><h3>�������ܕ�炵�@�I�����C���ڏZ���k��<span>[17:29]</span>
+					</h3>
+					<p>�������ւ̈ڏZ���l����l��Ώۂɂ����I�����C���ł̈ڏZ���k��P�W���A�J����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043637&amp;ap='><IMG src='../web-news2/2020071800043637.jpg'><h3>�V�^�R���i�@�������s�ŐV���ɂP�l�@�����݌v�P�U�S�l��<span>[17:10]</span>
+					</h3>
+					<p>�������s�͐�قǌߌ�T���ɐV�^�R���i�E�C���X�̊����҂��A�P�W���͐V���ɂP�l�m�F���ꂽ�Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043635&amp;ap='><IMG src='../web-news2/2020071800043635.jpg'><h3>���Z�싅�h��֑��h�@�n���\�P�U�Z�o���낤<span>[16:02]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̉e���Œ��~�ƂȂ����A�Ă̍��Z�싅�̑�֑��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043634&amp;ap='><IMG src='../web-news2/2020071800043634.jpg'><h3>�V�^�R���i�@�����������ŏ��߂Čx�@���̊����m�F<span>[12:14]</span>
+					</h3>
+					<p>���x�͌�ʋ@�����ɏ�������Q�O��̒j���x�@�����V�^�R���i�E�C���X�Ɋ������Ă������Ƃ��m�F���ꂽ�Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043633&amp;ap='><IMG src='../web-news2/2020071800043633.jpg'><h3>�ނ�̒j�����C�ɓ]�������S�@�������������s<span>[12:12]</span>
+					</h3>
+					<p>�������������s�łP�V����A�ނ�����Ă����j�����C�ɓ]�����Ď��S���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071800043632&amp;ap='><IMG src='img/mbcnews.png'><h3>���������x�@�j���x�@�����V�^�R���i����<span>[02:16]</span>
+					</h3>
+					<p>���������x�͂P�V���A��ʋ@�����̂Q�O��̒j���x�@�����V�^�R���i�E�C���X�Ɋ��������Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<h2 id='200717'>07��17��(��)</h2>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043629&amp;ap='><IMG src='../web-news2/2020071700043629.jpg'><h3>���������{�y�@�v�X�̐�<span>[19:48]</span>
+					</h3>
+					<p>�P�V���̎��������{�y�́A�O���k���̊�������C�����ꍞ�݁A�󂪍L����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043630&amp;ap='><IMG src='../web-news2/2020071700043630.jpg'><h3>�V�^�R���i�@�����������̊����m�F�Ȃ��@�U���R�O���ȗ��P�V���Ԃ�<span>[19:47]</span>
+					</h3>
+					<p>�����������ł͂P�V���A�V���ȐV�^�R���i�E�C���X�ւ̊����҂͊m�F����܂���ł����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043628&amp;ap='><IMG src='../web-news2/2020071700043628.jpg'><h3>�g�������O�h�łQ�Q������u�f���@�s���@�g���x���v�@���҂ƕs���̐�<span>[19:45]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�őŌ����󂯂Ă���ό��Ƃ��x������u�f���s���g���x���v�L�����y�[���ɂ��āA���{�͗��T�Q�Q�����瓌�������O����`�ŃX�^�[�g������j�������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043627&amp;ap='><IMG src='../web-news2/2020071700043627.jpg'><h3>�P�X���S���ā@���΂̍߁@���h�c���̒j�ɒ����P�Q�N�̎��Y����<span>[19:44]</span>
+					</h3>
+					<p>�������������哇�̗������ł��ƂƂ��A�󂫉Ƃɉ΂����A�Z��ȂǂP�X����S���Ă�����Ȃǂ������Z�����������΂Ȃǂ̍߂ɖ���Ă�����h�c���̍ٔ����ٔ��ŁA�����P�Q�N�̎��Y�����������n����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043625&amp;ap='><IMG src='../web-news2/2020071700043625.jpg'><h3>�����g�[�i�����g�ڎw���āI�@���������ċG���Z�싅���<span>[19:43]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̉e���Œ��~�ƂȂ����A�Ă̍��Z�싅�̑�֑��́A�n��\�I�̏I�Ղ��}���Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043624&amp;ap='><IMG src='../web-news2/2020071700043624.jpg'><h3>�����R���Ԍ�ɔ����@�F������s�̉͐�×��Ō������ۑ�<span>[19:42]</span>
+					</h3>
+					<p>�F������s�ł́A�����R���ɐ����̎x���Ŕ×����������Z����Q���o�܂������A����񂪏o���͔̂×������̂R���Ԍ�ł����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043622&amp;ap='><IMG src='../web-news2/2020071700043622.jpg'><h3>�ۈ牀�Łu�E�i�M���H�v�@����������蒬<span>[19:42]</span>
+					</h3>
+					<p>����������蒬�̑�ەۈ牀�łP�V���A���H�ɏo���ꂽ�̂̓E�i�M�̂��ΏĂ��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043621&amp;ap='><IMG src='../web-news2/2020071700043621.jpg'><h3>�ӂ邳�Ɠ��h�����B�����I�u�����X�Y���v�Ɓu���F�̃h�W���E�v<span>[19:40]</span>
+					</h3>
+					<p>�l�a�b�ӂ邳�Ɠ��h������A�ς�����F�̐������̉f�����͂��܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043631&amp;ap='><IMG src='img/mbcnews.png'><h3>�����̂g�Q�`���P�b�g�@�����Q�O���ߑO�ł��グ��<span>[19:39]</span>
+					</h3>
+					<p>�V��s�ǂőł��グ����������Ă����g�Q�`���P�b�g�S�Q���@�ɂ��āA�O�H�d�H�́A�����Q�O���̌ߑO�U���T�W���Ɏ��������̎�q���F���Z���^�[����ł��グ��Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043620&amp;ap='><IMG src='../web-news2/2020071700043620.jpg'><h3>�����������@�V�^�R���i�V�K�����҂̓[��<span>[17:51]</span>
+					</h3>
+					<p>���������Ǝ������s�͂P�V���A�V�����m�F���ꂽ�V�^�R���i�E�C���X�̊����҂͂��Ȃ������Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043617&amp;ap='><IMG src='../web-news2/2020071700043617.jpg'><h3>�i�q�������{���@����|�G�V��Ԃŉ^�]�ĊJ<span>[16:29]</span>
+					</h3>
+					<p>��J�̉e���ŉ^�]�������킹�Ă����i�q�������{���̐���[�G�V��̊Ԃ́A�����Q�O������ꕔ�ŉ^�]���ĊJ���܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043616&amp;ap='><IMG src='../web-news2/2020071700043616.jpg'><h3>���v�����o��������@�O�c�������\�̋^���ŌY��������<span>[16:06]</span>
+					</h3>
+					<p>�����������v�����̑O�̒��c��c���̒j�����A�o�������s���Ɏ󂯎���Ă����Ƃ��āA�Z���炪���\�̋^���ŋ߂��Y����������l���������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043615&amp;ap='><IMG src='../web-news2/2020071700043615.jpg'><h3>�F������s�̕����z�[���Ւn���p�@��d��Ă̎{�݌��݈Ă��̗p<span>[16:05]</span>
+					</h3>
+					<p>���N�t�ɕق��鎭�������F������s�̐�������z�[���̐Ւn�ɂ��āA�s�͋�B�d�͂���Ă����V���Ȏ{�݂̌��݈Ă��̗p���A���㋦�c��i�߂���j�ł��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043612&amp;ap='><IMG src='../web-news2/2020071700043612.jpg'><h3>�u�r�c�f���v�̈�ŏ��^�d�C�����Ԃ𓱓��@���������ݐM�p����<span>[16:00]</span>
+					</h3>
+					<p>���������ݐM�p���ɂ��r�c�f�����u�����\�ȎЉ����銈���v�̈�Ƃ��āA��l���̏��^�d�C�����Ԃ𓱓����P�V���A�o�������s���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043613&amp;ap='><IMG src='../web-news2/2020071700043613.jpg'><h3>�F�{�Ők�x�R�̒n�k�@���������������Ők�x�P<span>[15:07]</span>
+					</h3>
+					<p>�P�V���ߌ�Q���T�S������F�{���F�{�n����k���n�Ƃ���n�k������A�F�{���ōő�k�x�R���ϑ����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043611&amp;ap='><IMG src='../web-news2/2020071700043611.jpg'><h3>����������̎������E��������P���@�ŋȂ���������_�m�F<span>[11:56]</span>
+					</h3>
+					<p>����������̎��������̐�������P���@�ŁA����_�̂����̂P�{���Ȃ����Ă���̂��m�F����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043610&amp;ap='><IMG src='../web-news2/2020071700043610.jpg'><h3>�u�z�u�s�̌����T�P�R���@�ʍs�~�߉���<span>[10:18]</span>
+					</h3>
+					<p>�����T�P�R���{�P�������̎��������u�z�u�s�L�����R�d�t�߂ł́A�����U������y������̂��ߒʍs�~�߂ƂȂ��Ă��܂������A������Ƃ��I���A�P�V���ߑO�X���ɉ�������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071700043609&amp;ap='><IMG src='../web-news2/2020071700043609.jpg'><h3>�����n���łP�V��������˕��ɒ���<span>[09:08]</span>
+					</h3>
+					<p>�����n���ł͂P�V���A�����◳���Ȃǂ̌������˕��A�}�ȋ����J�ɒ��ӂ��Ă��������B</p>
+				</a>
+			</li>
+			<h2 id='200716'>07��16��(��)</h2>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043608&amp;ap='><IMG src='img/mbcnews.png'><h3>���������삳�܎s�Ŕ����̈�́@�s���s���̐V���z�B���̒j���Ɗm�F<span>[22:15]</span>
+					</h3>
+					<p>���������삳�܎s�̖��V����̉͐�~�łP�S���Ɍ��������j���̈�̂́A�����U������s����������Ȃ��Ȃ��Ă����삳�܎s�̐V���z�B���̒j���Ɗm�F����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043607&amp;ap='><IMG src='img/mbcnews.png'><h3>�������s�Ōx�@���Ȃǖ����s�R�d�b�������@���ӂ�<span>[19:48]</span>
+					</h3>
+					<p>�������s�ł͂P�S���A�x�@���Ȃǂ𖼏��s�R�ȓd�b���������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043604&amp;ap='><IMG src='../web-news2/2020071600043604.jpg'><h3>�Q������̕�e�������Ď��Ȃ����^���@�V�O�Β��j��ߕ߁@���������m����<span>[19:23]</span>
+					</h3>
+					<p>�����������i�Ǖ����̒m�����ŁA�Q������̕�e�������Ď��S�������Ƃ��āA��������V�O�΂̒��j�����Q�v���̋^���őߕ߂���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043602&amp;ap='><IMG src='../web-news2/2020071600043602.jpg'><h3>���J�œ��ƕs���@���N�̂P���������@�����������̏���ɉe��<span>[19:22]</span>
+					</h3>
+					<p>�~�J�̒��J�̉e���ŁA���������̓��u�s��F������s�ł́A���̂P�O���Ԃ̓��Ǝ��Ԃ����N�̂P���ɂ������Ȃ��ȂǁA���ƕs���������Ă��܂��B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043603&amp;ap='><IMG src='../web-news2/2020071600043603.jpg'><h3>�L�^�I��J�̎����������@�e�n�ŕ�����Ƒ���<span>[19:22]</span>
+					</h3>
+					<p>���������̑���n���ł́A�����U���Ɋϑ��j��ő�̎��ԉJ�ʂP�O�X�E�T�~�����ϑ�����ȂǁA�L�^�I�ȑ�J�ƂȂ�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043606&amp;ap='><IMG src='../web-news2/2020071600043606.jpg'><h3>�V�^�R���i�V���ɂS�l�����m�F�@�����������̊����҂͂P�U�Q�l��<span>[19:21]</span>
+					</h3>
+					<p>�����������ł́A�S�l�̐V�^�R���i�E�C���X�ւ̊������V���Ɋm�F����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043601&amp;ap='><IMG src='../web-news2/2020071600043601.jpg'><h3>�V�^�R���i�h���×{�{�݂Ɂ@�����������V���Ƀz�e�����؂�グ<span>[19:20]</span>
+					</h3>
+					<p>�V�^�R���i�̊����m�F���������钆�A���������͌y�ǂ△�Ǐ�̊����҂Ȃǂɑ؍݂��Ă��炤���߂ɁA�V���Ɏ������s���̃z�e���P�����؂�グ���Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043599&amp;ap='><IMG src='../web-news2/2020071600043599.jpg'><h3>�����}���������c�c�@�m���I�����̉�c�@�u���_�����z���v<span>[19:19]</span>
+					</h3>
+					<p>�P�Q���ɓ��[���s��ꂽ���������m���I���ŁA���E�������E��₪�s�ꂽ���Ƃ��󂯂āA�����}���c�c�͂P�U���A���������c���J���܂������A���_�͎����z����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043593&amp;ap='><IMG src='../web-news2/2020071600043593.jpg'><h3>���������c��c���⌇�I���@���I�̒߉��^���F���񂪏��o��<span>[16:21]</span>
+					</h3>
+					<p>�����P�Q���ɓ��J�[���s��ꂽ���������c��c���F������s��̕⌇�I���œ��I�����߉��^���F���񂪂P�U���A���o�����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043596&amp;ap='><IMG src='../web-news2/2020071600043596.jpg'><h3>�u�������s�̐�Ђƕ����ʐ^�W�v�n�܂�@����̌�����Q�̃p�l����<span>[16:21]</span>
+					</h3>
+					<p>�������s�����ŁA�������ƒ���̐푈��Q�ƕ����̕��݂����߂��ʐ^�W���P�U������n�܂�܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043595&amp;ap='><IMG src='../web-news2/2020071600043595.jpg'><h3>���v���s�̖��͂��l�܂����u���h�@�݂ǂ����v�I�[�v��<span>[16:20]</span>
+					</h3>
+					<p>�����������v���s�̖��͂��l�܂����h���{�݁u���h�@�݂ǂ����v���I�[�v�����܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043591&amp;ap='><IMG src='../web-news2/2020071600043591.jpg'><h3>���v�����E�r�؍k�����������\�Ȃǂ̋^���ŏ��ޑ����@��������<span>[16:00]</span>
+					</h3>
+					<p>���v�����̍r�؍k���������o������̈ꕔ�𒅕����Ă�����������A���������x�͂P�U���A�r�؍k�����������\�Ȃǂ̋^���ŏ��ޑ������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043592&amp;ap='><IMG src='../web-news2/2020071600043592.jpg'><h3>�����������̐V�^�R���i�����Ҋg����󂯁@��މ����x�Ɗ��Ԃ�����<span>[11:56]</span>
+					</h3>
+					<p>�V�^�R���i�E�C���X�̉e���ō��N�S������x�Ƃ��Ă��鎭�����s�́u��މ��v�́A�P�V������c�Ƃ��ĊJ����\��ł������A�����ɓ���A�����Ŋ����҂������Ă��邱�Ƃ��󂯁A�x�Ɗ��Ԃ���������Ɣ��\���܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043590&amp;ap='><IMG src='../web-news2/2020071600043590.jpg'><h3>�����s�̍����Q�Q�O���Í]�o�C�p�X�@�ʍs�ĊJ<span>[09:16]</span>
+					</h3>
+					<p>�����Q�Q�O���Í]�o�C�p�X�̎����s�̍��،������_�Ɛ����s�̂܂���������_�̊Ԃł́A�����U������y���̗����̕�����Ƃ̂��ߒʍs�~�߂ƂȂ��Ă��܂������A�P�U���ߑO�U���ɁA�K���͉�������܂����B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043589&amp;ap='><IMG src='../web-news2/2020071600043589.jpg'><h3>�����n���łP�V���ɂ����ė�����˕��ɒ���<span>[08:30]</span>
+					</h3>
+					<p>�����n���łP�V���ɂ����ė����◳���Ȃǂ̌����˕��A�}�ȋ����J�ɒ��ӂ��Ă��������B</p>
+				</a>
+			</li>
+			<li>
+				<a href='https://www.mbc.co.jp/news/mbc_news.php?ibocd=2020071600043588&amp;ap='><IMG src='../web-news2/2020071600043588.jpg'><h3>�z�K�V�����Ŕ����I����<span>[08:17]</span>
+					</h3>
+					<p>�\�����̐z�K�V�����łP�U�����A�����I���΂��������܂����B</p>
+				</a>
+			</li>
+
+
+		</DIV>
+		<!-- end #mbcnews-top-->
+
+		<!--adsense start-->
+		<br clear="all">
+		<section class="ad_list">
+			<div class="ad2para">
+				<div class="adcenter">
+					<div
+						class="adLeft">
+						<!-- /193632318/LMC/LMC_TV/mbc/PC_all/rectangle1 -->
+						<div id='div-gpt-ad-1570102688339-0'>
+							<script>
+								googletag.cmd.push(function () {
+googletag.display('div-gpt-ad-1570102688339-0');
+});
+							</script>
+						</div>
+					</div>
+
+					<div class="adRight">
+						<div
+							id="pc-banner">
+							<!-- /193632318/LMC/LMC_TV/mbc/PC_all/rectangle2 -->
+							<div id='div-gpt-ad-1570102823361-0'>
+								<script>
+									googletag.cmd.push(function () {
+googletag.display('div-gpt-ad-1570102823361-0');
+});
+								</script>
+							</div>
+						</div>
+					</div>
+				</div>
+			</div>
+		</section>
+
+
+		<section class="ad_list_mobile">
+			<div class="ad2para">
+				<div
+					class="adcenter">
+					<!-- /193632318/LMC/LMC_TV/mbc/SP_all/rectangle1 -->
+					<div id='div-gpt-ad-1570102909947-0'>
+						<script>
+							googletag.cmd.push(function () {
+googletag.display('div-gpt-ad-1570102909947-0');
+});
+						</script>
+					</div>
+				</div>
+			</div>
+		</section>
+		<!--adsense end-->
+
+
+		<!--�t�b�^�[-->
+		<DIV id="cr">Copyright(c) Minaminihon Broadcasting Co.,Ltd. All rights reserved.<BR>
+			�f�ڂ��ꂽ�S�Ă̋L���E�摜���̖��f�]�ځA�񎟗��p�����f�肢�����܂��B</DIV>
+		<!--�t�b�^�[-->
+
+
+	</body>
+</html>
diff --git a/tests/test_util.py b/tests/test_util.py
index e2ad8240..36402abd 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -2,4 +2,9 @@ from archivebox import util
 
 def test_download_url_downloads_content():
     text = util.download_url("http://127.0.0.1:8080/static/example.com.html")
-    assert "Example Domain" in text
\ No newline at end of file
+    assert "Example Domain" in text
+
+def test_download_url_gets_encoding_from_body():
+    text = util.download_url("http://127.0.0.1:8080/static/shift_jis.html")
+    assert "鹿児島のニュース｜MBC南日本放送" in text
+    assert "掲載された全ての記事・画像等の無断転載、二次利用をお断りいたします" in text
\ No newline at end of file

From aa45f9c9ea10c0f982adb4ceedc0931d9930c569 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 22 Jul 2020 11:34:57 -0400
Subject: [PATCH 183/333] fix version tag

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 78f51769..8f9200af 100755
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ setuptools.setup(
         "ipython",
         "youtube-dl",
         "python-crontab==2.5.1",
-        "w3lib==v1.22.0",
+        "w3lib==1.22.0",
         # "croniter",
         # Some/all of these will likely be added in the future:
         # wpull

From a5550b21051e89478318702b566355fe34b029c3 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 22 Jul 2020 11:02:13 -0500
Subject: [PATCH 184/333] fix: Rename logging folder to avoid naming conflicts
 (and circular import issues)

---
 archivebox/cli/__init__.py                 | 2 +-
 archivebox/cli/archivebox_add.py           | 2 +-
 archivebox/cli/archivebox_config.py        | 2 +-
 archivebox/cli/archivebox_help.py          | 2 +-
 archivebox/cli/archivebox_init.py          | 2 +-
 archivebox/cli/archivebox_list.py          | 2 +-
 archivebox/cli/archivebox_remove.py        | 2 +-
 archivebox/cli/archivebox_schedule.py      | 2 +-
 archivebox/cli/archivebox_server.py        | 2 +-
 archivebox/cli/archivebox_shell.py         | 2 +-
 archivebox/cli/archivebox_status.py        | 2 +-
 archivebox/cli/archivebox_update.py        | 2 +-
 archivebox/cli/archivebox_version.py       | 2 +-
 archivebox/core/admin.py                   | 2 +-
 archivebox/core/welcome_message.py         | 2 +-
 archivebox/extractors/__init__.py          | 2 +-
 archivebox/extractors/archive_org.py       | 2 +-
 archivebox/extractors/dom.py               | 2 +-
 archivebox/extractors/favicon.py           | 2 +-
 archivebox/extractors/git.py               | 2 +-
 archivebox/extractors/media.py             | 2 +-
 archivebox/extractors/pdf.py               | 2 +-
 archivebox/extractors/screenshot.py        | 2 +-
 archivebox/extractors/title.py             | 2 +-
 archivebox/extractors/wget.py              | 2 +-
 archivebox/index/__init__.py               | 2 +-
 archivebox/{logging.py => logging_util.py} | 0
 archivebox/main.py                         | 2 +-
 archivebox/parsers/__init__.py             | 2 +-
 29 files changed, 28 insertions(+), 28 deletions(-)
 rename archivebox/{logging.py => logging_util.py} (100%)

diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py
index ed050bfc..70a6866e 100644
--- a/archivebox/cli/__init__.py
+++ b/archivebox/cli/__init__.py
@@ -110,7 +110,7 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
         command.subcommand = 'version'
     
     if command.subcommand not in ('help', 'version', 'status'):
-        from ..logging import log_cli_command
+        from ..logging_util import log_cli_command
 
         log_cli_command(
             subcommand=command.subcommand,
diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index 5fda755b..de6c3dbc 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -10,7 +10,7 @@ from typing import List, Optional, IO
 
 from ..main import add, docstring
 from ..config import OUTPUT_DIR, ONLY_NEW
-from ..logging import SmartFormatter, accept_stdin, stderr
+from ..logging_util import SmartFormatter, accept_stdin, stderr
 
 
 @docstring(add.__doc__)
diff --git a/archivebox/cli/archivebox_config.py b/archivebox/cli/archivebox_config.py
index dd422413..7dffbfad 100644
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import config, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, accept_stdin
+from ..logging_util import SmartFormatter, accept_stdin
 
 
 @docstring(config.__doc__)
diff --git a/archivebox/cli/archivebox_help.py b/archivebox/cli/archivebox_help.py
index b20667be..0b245a03 100755
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import help, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, reject_stdin
+from ..logging_util import SmartFormatter, reject_stdin
 
 
 @docstring(help.__doc__)
diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py
index 8a507cc7..e27b26d9 100755
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import init, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, reject_stdin
+from ..logging_util import SmartFormatter, reject_stdin
 
 
 @docstring(init.__doc__)
diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py
index c75519bf..2f2180b0 100644
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -22,7 +22,7 @@ from ..index import (
     get_corrupted_folders,
     get_unrecognized_folders,
 )
-from ..logging import SmartFormatter, accept_stdin
+from ..logging_util import SmartFormatter, accept_stdin
 
 
 @docstring(list_all.__doc__)
diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py
index 18e5915a..36779e87 100644
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import remove, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, accept_stdin
+from ..logging_util import SmartFormatter, accept_stdin
 
 
 @docstring(remove.__doc__)
diff --git a/archivebox/cli/archivebox_schedule.py b/archivebox/cli/archivebox_schedule.py
index d459f86c..4dfa2769 100644
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import schedule, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, reject_stdin
+from ..logging_util import SmartFormatter, reject_stdin
 
 
 @docstring(schedule.__doc__)
diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py
index ad65b459..0d529a40 100644
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import server, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, reject_stdin
+from ..logging_util import SmartFormatter, reject_stdin
 
 
 @docstring(server.__doc__)
diff --git a/archivebox/cli/archivebox_shell.py b/archivebox/cli/archivebox_shell.py
index a1a9f874..5d5498cc 100644
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import shell, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, reject_stdin
+from ..logging_util import SmartFormatter, reject_stdin
 
 
 @docstring(shell.__doc__)
diff --git a/archivebox/cli/archivebox_status.py b/archivebox/cli/archivebox_status.py
index 5d802e66..c0bed3fe 100644
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import status, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, reject_stdin
+from ..logging_util import SmartFormatter, reject_stdin
 
 
 @docstring(status.__doc__)
diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py
index f5e7a1fd..663e03f3 100644
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -22,7 +22,7 @@ from ..index import (
     get_corrupted_folders,
     get_unrecognized_folders,
 )
-from ..logging import SmartFormatter, accept_stdin
+from ..logging_util import SmartFormatter, accept_stdin
 
 
 @docstring(update.__doc__)
diff --git a/archivebox/cli/archivebox_version.py b/archivebox/cli/archivebox_version.py
index fd1538a6..c56ab43f 100755
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -10,7 +10,7 @@ from typing import Optional, List, IO
 
 from ..main import version, docstring
 from ..config import OUTPUT_DIR
-from ..logging import SmartFormatter, reject_stdin
+from ..logging_util import SmartFormatter, reject_stdin
 
 
 @docstring(version.__doc__)
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 96438308..1cad374d 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -3,7 +3,7 @@ from django.utils.html import format_html
 
 from util import htmldecode, urldecode
 from core.models import Snapshot
-from archivebox.logging import printable_filesize
+from archivebox.logging_util import printable_filesize
 
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
diff --git a/archivebox/core/welcome_message.py b/archivebox/core/welcome_message.py
index 0837e372..a948c78e 100644
--- a/archivebox/core/welcome_message.py
+++ b/archivebox/core/welcome_message.py
@@ -1,4 +1,4 @@
-from archivebox.logging import log_shell_welcome_msg
+from archivebox.logging_util import log_shell_welcome_msg
 
 
 if __name__ == '__main__':
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index a440fe12..d67325ac 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -12,7 +12,7 @@ from ..index import (
     patch_main_index,
 )
 from ..util import enforce_types
-from ..logging import (
+from ..logging_util import (
     log_archiving_started,
     log_archiving_paused,
     log_archiving_finished,
diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py
index 77cde22d..410e1ea6 100644
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@@ -19,7 +19,7 @@ from ..config import (
     CURL_VERSION,
     CURL_USER_AGENT,
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py
index 49589cf1..de98f37b 100644
--- a/archivebox/extractors/dom.py
+++ b/archivebox/extractors/dom.py
@@ -16,7 +16,7 @@ from ..config import (
     SAVE_DOM,
     CHROME_VERSION,
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py
index cd0b568a..0e46ef2c 100644
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@@ -15,7 +15,7 @@ from ..config import (
     CHECK_SSL_VALIDITY,
     CURL_USER_AGENT,
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py
index f897c097..e23da07e 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/extractors/git.py
@@ -22,7 +22,7 @@ from ..config import (
     GIT_DOMAINS,
     CHECK_SSL_VALIDITY
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py
index 9c4d4a09..d4624b7c 100644
--- a/archivebox/extractors/media.py
+++ b/archivebox/extractors/media.py
@@ -18,7 +18,7 @@ from ..config import (
     YOUTUBEDL_VERSION,
     CHECK_SSL_VALIDITY
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py
index a63c24c8..56634aee 100644
--- a/archivebox/extractors/pdf.py
+++ b/archivebox/extractors/pdf.py
@@ -16,7 +16,7 @@ from ..config import (
     SAVE_PDF,
     CHROME_VERSION,
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/extractors/screenshot.py b/archivebox/extractors/screenshot.py
index 217dc77f..3d8819f7 100644
--- a/archivebox/extractors/screenshot.py
+++ b/archivebox/extractors/screenshot.py
@@ -16,7 +16,7 @@ from ..config import (
     SAVE_SCREENSHOT,
     CHROME_VERSION,
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py
index 6442b947..e2d7e12e 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@@ -18,7 +18,7 @@ from ..config import (
     CURL_VERSION,
     CURL_USER_AGENT,
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
 HTML_TITLE_REGEX = re.compile(
diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py
index 0e6cdafa..d233a12c 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/extractors/wget.py
@@ -31,7 +31,7 @@ from ..config import (
     WGET_USER_AGENT,
     COOKIES_FILE,
 )
-from ..logging import TimedProgress
+from ..logging_util import TimedProgress
 
 
 @enforce_types
diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 298f61a2..89a84f1e 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -27,7 +27,7 @@ from ..config import (
     ANSI,
     stderr,
 )
-from ..logging import (
+from ..logging_util import (
     TimedProgress,
     log_indexing_process_started,
     log_indexing_process_finished,
diff --git a/archivebox/logging.py b/archivebox/logging_util.py
similarity index 100%
rename from archivebox/logging.py
rename to archivebox/logging_util.py
diff --git a/archivebox/main.py b/archivebox/main.py
index 0345588f..18f40b97 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -88,7 +88,7 @@ from .config import (
     USER_CONFIG,
     get_real_name,
 )
-from .logging import (
+from .logging_util import (
     TERM_WIDTH,
     TimedProgress,
     log_importing_started,
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index 820fc9f9..40899b06 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -29,7 +29,7 @@ from ..util import (
     URL_REGEX,
 )
 from ..index.schema import Link
-from ..logging import pretty_path, TimedProgress, log_source_saved
+from ..logging_util import pretty_path, TimedProgress, log_source_saved
 from .pocket_html import parse_pocket_html_export
 from .pinboard_rss import parse_pinboard_rss_export
 from .shaarli_rss import parse_shaarli_rss_export

From 0fee8149cb1c88d4a56221c298fe3024fda282c2 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 22 Jul 2020 13:09:06 -0500
Subject: [PATCH 185/333] refactor: Remove old bin/archivebox

---
 bin/archivebox | 1 -
 1 file changed, 1 deletion(-)
 delete mode 120000 bin/archivebox

diff --git a/bin/archivebox b/bin/archivebox
deleted file mode 120000
index 45e5ba0e..00000000
--- a/bin/archivebox
+++ /dev/null
@@ -1 +0,0 @@
-../archivebox/__main__.py
\ No newline at end of file

From 9815241b78ecc3a4cc9b0f80ccbcbf1357cd621f Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 22 Jul 2020 14:22:00 -0500
Subject: [PATCH 186/333] feat: Fallback to link detail when there is an issue
 loading a link from main index

---
 archivebox/index/json.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index deca4bea..212c09c3 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -3,6 +3,7 @@ __package__ = 'archivebox.index'
 import os
 import sys
 import json as pyjson
+from pathlib import Path
 
 from datetime import datetime
 from typing import List, Optional, Iterator, Any
@@ -49,7 +50,11 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
         with open(index_path, 'r', encoding='utf-8') as f:
             links = pyjson.load(f)['links']
             for link_json in links:
-                yield Link.from_json(link_json)
+                try:
+                    yield Link.from_json(link_json)
+                except KeyError:
+                    detail_index_path = OUTPUT_DIR / Path(f"archive/{link_json['timestamp']}")
+                    yield parse_json_link_details(str(detail_index_path))
 
     return ()
 

From 263eb4e372fc4a9cc38331521a3aa6a4f121b9fe Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 22 Jul 2020 14:37:10 -0500
Subject: [PATCH 187/333] fix: Change path to use ARCHIVE_DIR_NAME

---
 archivebox/index/json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index 212c09c3..421e91b0 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -53,7 +53,7 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
                 try:
                     yield Link.from_json(link_json)
                 except KeyError:
-                    detail_index_path = OUTPUT_DIR / Path(f"archive/{link_json['timestamp']}")
+                    detail_index_path = Path(f"{OUTPUT_DIR}/{ARCHIVE_DIR_NAME}/{link_json['timestamp']}")
                     yield parse_json_link_details(str(detail_index_path))
 
     return ()

From e58c3deb05dc69abde667cb662d2e351a362729a Mon Sep 17 00:00:00 2001
From: Cristian Vargas <cristianvargasvalencia@gmail.com>
Date: Wed, 22 Jul 2020 14:46:03 -0500
Subject: [PATCH 188/333] feat: Update path generation in detail index fallback

Co-authored-by: Nick Sweeting <git@sweeting.me>
---
 archivebox/index/json.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index 421e91b0..d0d38f86 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -53,7 +53,7 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
                 try:
                     yield Link.from_json(link_json)
                 except KeyError:
-                    detail_index_path = Path(f"{OUTPUT_DIR}/{ARCHIVE_DIR_NAME}/{link_json['timestamp']}")
+                    detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp']
                     yield parse_json_link_details(str(detail_index_path))
 
     return ()
@@ -155,4 +155,3 @@ def to_json(obj: Any, indent: Optional[int]=4, sort_keys: bool=True, cls=Extende
     return pyjson.dumps(obj, indent=indent, sort_keys=sort_keys, cls=ExtendedEncoder)
 
 
-

From 71f5f03a203ad9d816b6f79466620e37b362ccb7 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 22 Jul 2020 17:08:32 -0500
Subject: [PATCH 189/333] fix: Add notice for issues with index detail

---
 archivebox/index/__init__.py |  5 ++++-
 archivebox/index/json.py     | 15 +++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 89a84f1e..71cfb833 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -583,7 +583,10 @@ def fix_invalid_folder_locations(out_dir: str=OUTPUT_DIR) -> Tuple[List[str], Li
     for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
         if entry.is_dir(follow_symlinks=True):
             if os.path.exists(os.path.join(entry.path, 'index.json')):
-                link = parse_json_link_details(entry.path)
+                try:
+                    link = parse_json_link_details(entry.path)
+                except KeyError:
+                    link = None
                 if not link:
                     continue
 
diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index d0d38f86..7bf043c3 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -19,6 +19,7 @@ from ..config import (
     DEPENDENCIES,
     JSON_INDEX_FILENAME,
     ARCHIVE_DIR_NAME,
+    ANSI
 )
 
 
@@ -53,9 +54,12 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
                 try:
                     yield Link.from_json(link_json)
                 except KeyError:
-                    detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp']
-                    yield parse_json_link_details(str(detail_index_path))
-
+                    try:
+                        detail_index_path = Path(f"{OUTPUT_DIR}/{ARCHIVE_DIR_NAME}/{link_json['timestamp']}")
+                        yield parse_json_link_details(str(detail_index_path))
+                    except KeyError: 
+                        print("    {lightyellow}! Failed to retrieve index from {}. The index may be corrupt.".format(detail_index_path, **ANSI))
+                        continue
     return ()
 
 @enforce_types
@@ -115,7 +119,10 @@ def parse_json_links_details(out_dir: str) -> Iterator[Link]:
     for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
         if entry.is_dir(follow_symlinks=True):
             if os.path.exists(os.path.join(entry.path, 'index.json')):
-                link = parse_json_link_details(entry.path)
+                try:
+                    link = parse_json_link_details(entry.path)
+                except KeyError:
+                    link = None
                 if link:
                     yield link
 

From 4202569401f03a8794e509452334d9bb103e093a Mon Sep 17 00:00:00 2001
From: Angel Rey <afreydev@gmail.com>
Date: Wed, 22 Jul 2020 18:30:36 -0500
Subject: [PATCH 190/333] Updated test workflow

---
 .github/workflows/test.yml | 118 ++++++++++++++++++++++++++++++++++---
 Pipfile                    |   3 -
 2 files changed, 111 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 311236c0..d2f49558 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,25 +1,129 @@
 name: Test workflow
 on: [push]
 
+env:
+  MAX_LINE_LENGTH: 110
+  PIPENV_VENV_IN_PROJECT: 1
+  CACHE_PATH: .venv
 jobs:
-  test:
+
+  lint:
     runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.8
+          architecture: x64
+   
+      - name: Install pipenv
+        run: |
+          pip install pipenv
+      
+      - uses: actions/cache@v2
+        id: cache-archivebox
+        with:
+          path: ${{ env.CACHE_PATH }}
+          key: ${{ runner.os }}-3.8-pipenv-${{ hashFiles('**/Pipfile') }}
+          restore-keys: |
+            ${{ runner.os }}-3.8-pipenv-
+
+      - name: Install dependencies
+        run: |
+          pipenv install --dev
+
+      - name: Lint with flake8
+        run: |
+          # one pass for show-stopper syntax errors or undefined names
+          pipenv run flake8 . --count --show-source --statistics
+          # one pass for small stylistic things
+          pipenv run flake8 . --count --max-line-length="$MAX_LINE_LENGTH" --statistics
+
+      - name: Lint with mypy
+        run: |
+          pipenv run mypy .
+
+  test:
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python: [3.7, 3.8]
 
     steps:
-      - uses: actions/checkout@v1
+      - uses: actions/checkout@v2
         with:
           fetch-depth: 1
 
-      - name: Set up Python 3.7
+      - name: Set up Python ${{ matrix.python }}
         uses: actions/setup-python@v1
         with:
-          python-version: 3.7
+          python-version: ${{ matrix.python }}
           architecture: x64
+
+      - name: Install pipenv
+        run: |
+          pip install pipenv
       
+      - uses: actions/cache@v1
+        id: cache-archivebox
+        with:
+          path: ${{ env.CACHE_PATH }}
+          key: ${{ runner.os }}-${{ matrix.python }}-pipenv-${{ hashFiles('**/Pipfile') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.python }}-pipenv-
+
       - name: Install dependencies
         run: |
-          pip install -e .[dev]
+          pipenv install --dev
 
-      - name: Test with pytest
+      - name: Test built package with pytest
         run: |
-          pytest -s
\ No newline at end of file
+          pipenv run pytest -s
+        
+  docker-test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 1
+      
+      - name: Build image
+        run: |
+          docker build . -t archivebox
+
+      - name: Init data dir
+        run: |
+          mkdir data
+          docker run -v "$PWD"/data:/data archivebox init
+      
+      - name: Run test server
+        run: |
+          sudo bash -c 'echo "127.0.0.1  www.test-nginx-1.local www.test-nginx-2.local" >> /etc/hosts'
+          docker run --name www-nginx -p 80:80 -d nginx
+      
+      - name: Add link
+        run: |
+          docker run -v "$PWD"/data:/data --network host archivebox add http://www.test-nginx-1.local
+          
+      - name: Add stdin link
+        run: |
+          echo "http://www.test-nginx-2.local" | docker run -i -v "$PWD"/data:/data archivebox add
+          
+      - name: List links
+        run: |
+          docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-1.local" || { echo "The site 1 isn't in the list"; exit 1; }
+          docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-2.local" || { echo "The site 2 isn't in the list"; exit 1; }
+     
+      - name: Start docker-compose stack
+        run: |
+          docker-compose up -d
+      
+      - name: Curl to Django app
+        run: |
+          sleep 10
+          curl http://127.0.0.1:8000/
diff --git a/Pipfile b/Pipfile
index 0359bc1d..78cec54d 100644
--- a/Pipfile
+++ b/Pipfile
@@ -3,9 +3,6 @@ name = "pypi"
 url = "https://pypi.org/simple"
 verify_ssl = true
 
-[requires]
-python_version = "3.8"
-
 [packages]
 # see setup.py for package dependency list
 "e1839a8" = {path = ".", editable = true}

From d905eca630d09891ce56d0bc70f4073e3786d3e7 Mon Sep 17 00:00:00 2001
From: Angel Rey <afreydev@gmail.com>
Date: Wed, 22 Jul 2020 18:39:02 -0500
Subject: [PATCH 191/333] Added curl follow redirect

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d2f49558..4eb12358 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -126,4 +126,4 @@ jobs:
       - name: Curl to Django app
         run: |
           sleep 10
-          curl http://127.0.0.1:8000/
+          curl -IL http://127.0.0.1:8000/

From 0ed2a236705b57368c3abf54bfeb21a9cf31e556 Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Wed, 22 Jul 2020 17:41:43 -0400
Subject: [PATCH 192/333] ensure correct permissions for output folder

---
 archivebox/index/__init__.py | 3 +++
 archivebox/main.py           | 3 ++-
 archivebox/system.py         | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 89a84f1e..6617864b 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -26,6 +26,7 @@ from ..config import (
     URL_BLACKLIST_PTN,
     ANSI,
     stderr,
+    OUTPUT_PERMISSIONS
 )
 from ..logging_util import (
     TimedProgress,
@@ -232,6 +233,8 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=
 
     with timed_index_update(os.path.join(out_dir, SQL_INDEX_FILENAME)):
         write_sql_main_index(links, out_dir=out_dir)
+        os.chmod(os.path.join(out_dir, SQL_INDEX_FILENAME), int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
+
 
     with timed_index_update(os.path.join(out_dir, JSON_INDEX_FILENAME)):
         write_json_main_index(links, out_dir=out_dir)
diff --git a/archivebox/main.py b/archivebox/main.py
index 18f40b97..2c492ee2 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -87,6 +87,7 @@ from .config import (
     CONFIG,
     USER_CONFIG,
     get_real_name,
+    OUTPUT_PERMISSIONS
 )
 from .logging_util import (
     TERM_WIDTH,
@@ -240,8 +241,8 @@ def run(subcommand: str,
 @enforce_types
 def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
     """Initialize a new ArchiveBox collection in the current directory"""
+    os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8))
     os.makedirs(out_dir, exist_ok=True)
-
     is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
     existing_index = os.path.exists(os.path.join(out_dir, JSON_INDEX_FILENAME))
 
diff --git a/archivebox/system.py b/archivebox/system.py
index d6206557..a9b3758b 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -43,7 +43,7 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
             dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
         elif isinstance(contents, (bytes, str)):
             f.write(contents)
-
+    os.chmod(path, int(OUTPUT_PERMISSIONS, base=8))
 
 @enforce_types
 def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) -> None:

From 1b944303d048ea03882d612fb0a7bf985df0af3d Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Thu, 23 Jul 2020 11:06:43 -0400
Subject: [PATCH 193/333] test: test output permissions

---
 tests/test_init.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_init.py b/tests/test_init.py
index 6a15612a..0b2832c3 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -6,6 +6,8 @@ import subprocess
 from pathlib import Path
 import json
 
+from archivebox.config import OUTPUT_PERMISSIONS
+
 from .fixtures import *
 
 def test_init(tmp_path, process):
@@ -43,3 +45,9 @@ def test_add_link_support_stdin(tmp_path, process):
         output_json = json.load(f)
     assert "Example Domain" == output_json['history']['title'][0]['output']
 
+def test_correct_permissions_output_folder(tmp_path, process):
+    index_files = ['index.json', 'index.html', 'index.sqlite3', 'archive']
+    for file in index_files:
+        file_path = tmp_path / file
+        assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
+

From 5ca7121fd82a19886bdd3a2d0f8e4ef06c17d28c Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 23 Jul 2020 10:22:36 -0500
Subject: [PATCH 194/333] refactor: Change path calculation to use pathlib in a
 better way

---
 archivebox/index/json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index 7bf043c3..21a732fd 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -55,7 +55,7 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
                     yield Link.from_json(link_json)
                 except KeyError:
                     try:
-                        detail_index_path = Path(f"{OUTPUT_DIR}/{ARCHIVE_DIR_NAME}/{link_json['timestamp']}")
+                        detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp']
                         yield parse_json_link_details(str(detail_index_path))
                     except KeyError: 
                         print("    {lightyellow}! Failed to retrieve index from {}. The index may be corrupt.".format(detail_index_path, **ANSI))

From 51716bbf74bda9e7a1a38c3fc79bc1dbf5376086 Mon Sep 17 00:00:00 2001
From: Cristian Vargas <cristianvargasvalencia@gmail.com>
Date: Thu, 23 Jul 2020 10:23:41 -0500
Subject: [PATCH 195/333] Update warning message on detail index error

Co-authored-by: Nick Sweeting <git@sweeting.me>
---
 archivebox/index/json.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index 21a732fd..f4cb9e54 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -58,7 +58,7 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
                         detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp']
                         yield parse_json_link_details(str(detail_index_path))
                     except KeyError: 
-                        print("    {lightyellow}! Failed to retrieve index from {}. The index may be corrupt.".format(detail_index_path, **ANSI))
+                        print("    {lightyellow}! Failed to load the index.json from {}".format(detail_index_path, **ANSI))
                         continue
     return ()
 
@@ -161,4 +161,3 @@ class ExtendedEncoder(pyjson.JSONEncoder):
 def to_json(obj: Any, indent: Optional[int]=4, sort_keys: bool=True, cls=ExtendedEncoder) -> str:
     return pyjson.dumps(obj, indent=indent, sort_keys=sort_keys, cls=ExtendedEncoder)
 
-

From fd724e0915ecef75b7d5e0c4b9606520cb95a59c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 23 Jul 2020 11:33:01 -0400
Subject: [PATCH 196/333] Apply suggestions from code review

---
 .github/workflows/test.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4eb12358..55349351 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -37,13 +37,13 @@ jobs:
       - name: Lint with flake8
         run: |
           # one pass for show-stopper syntax errors or undefined names
-          pipenv run flake8 . --count --show-source --statistics
+          pipenv run flake8 archivebox --count --show-source --statistics
           # one pass for small stylistic things
-          pipenv run flake8 . --count --max-line-length="$MAX_LINE_LENGTH" --statistics
+          pipenv run flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
 
       - name: Lint with mypy
         run: |
-          pipenv run mypy .
+          pipenv run mypy archivebox
 
   test:
     runs-on: ${{ matrix.os }}

From b854884c568be51273b7d9a2cb3f902d2a2d4985 Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Thu, 23 Jul 2020 11:50:42 -0400
Subject: [PATCH 197/333] move umask to init/__config__

---
 archivebox/config/__init__.py | 2 ++
 archivebox/main.py            | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index e1a99c99..140769db 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -864,3 +864,5 @@ def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -
                 f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
     except KeyboardInterrupt:
         raise SystemExit(2)
+
+os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8))
diff --git a/archivebox/main.py b/archivebox/main.py
index 2c492ee2..cd6c7492 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -87,7 +87,6 @@ from .config import (
     CONFIG,
     USER_CONFIG,
     get_real_name,
-    OUTPUT_PERMISSIONS
 )
 from .logging_util import (
     TERM_WIDTH,
@@ -241,7 +240,6 @@ def run(subcommand: str,
 @enforce_types
 def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
     """Initialize a new ArchiveBox collection in the current directory"""
-    os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8))
     os.makedirs(out_dir, exist_ok=True)
     is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR)
     existing_index = os.path.exists(os.path.join(out_dir, JSON_INDEX_FILENAME))

From e1973ae381d7d89ac2a097ba15aea3d73d160c80 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 23 Jul 2020 12:05:54 -0400
Subject: [PATCH 198/333] add wheel to dev dependendcies

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 8f9200af..c5c7cf55 100755
--- a/setup.py
+++ b/setup.py
@@ -58,6 +58,7 @@ setuptools.setup(
     extras_require={
         'dev': [
             "setuptools",
+            "wheel",
             "twine",
             "flake8",
             "ipdb",

From cf50819e4a023be367b664782745d2cbfa063a42 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 23 Jul 2020 11:32:40 -0500
Subject: [PATCH 199/333] docs: Update readme for release 0.4

---
 README.md | 227 +++++++++++++++++++++++++++---------------------------
 1 file changed, 114 insertions(+), 113 deletions(-)

diff --git a/README.md b/README.md
index 6ce54b97..589b8bb8 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,12 @@
 <img src="https://i.imgur.com/4nkFjdv.png" height="80px">
 <h1>ArchiveBox<br/><sub>The open-source self-hosted web archive.</sub></h1>
 
-▶️ <a href="https://github.com/pirate/ArchiveBox/wiki/Quickstart">Quickstart</a> | 
-<a href="https://archivebox.zervice.io/">Demo</a> | 
-<a href="https://github.com/pirate/ArchiveBox">Github</a> | 
-<a href="https://github.com/pirate/ArchiveBox/wiki">Documentation</a> | 
-<a href="#background--motivation">Info & Motivation</a> | 
-<a href="https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community">Community</a> | 
+▶️ <a href="https://github.com/pirate/ArchiveBox/wiki/Quickstart">Quickstart</a> |
+<a href="https://archivebox.zervice.io/">Demo</a> |
+<a href="https://github.com/pirate/ArchiveBox">Github</a> |
+<a href="https://github.com/pirate/ArchiveBox/wiki">Documentation</a> |
+<a href="#background--motivation">Info & Motivation</a> |
+<a href="https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community">Community</a> |
 <a href="https://github.com/pirate/ArchiveBox/wiki/Roadmap">Roadmap</a>
 
 <pre>
@@ -15,6 +15,7 @@
 </pre>
 
 <!--<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>-->
+
 <a href="https://github.com/pirate/ArchiveBox/blob/master/LICENSE"><img src="https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git&logoColor=green"/></a>
 <a href="https://github.com/pirate/ArchiveBox/commits/dev"><img src="https://img.shields.io/github/last-commit/pirate/ArchiveBox.svg?logo=Sublime+Text&logoColor=green&label=Active"/></a>
 <a href="https://github.com/pirate/ArchiveBox"><img src="https://img.shields.io/github/stars/pirate/ArchiveBox.svg?logo=github&label=Stars&logoColor=blue"/></a>
@@ -24,34 +25,34 @@
 
 <hr/>
 <br/>
-<i>💥 Attention: Big API changes are coming soon (including a proper config file format and <code>pip install archivebox</code>)! Check out <a href="https://github.com/pirate/ArchiveBox/pull/207">v0.4</a> and help us test it! 💥</i>
+<i>💥 Attention: Big API changes are coming with the current release (including <code>pip install archivebox</code>)!
 <br/><br/>
-<b>Note: There are some important security design issues that need to be fixed before v0.4 can be pushed, all help is appreciated!<br/>
-(This project is not abandoned, it's my primary side-project for the forseeable future, my day job is very busy right now.)<br/>
 See the <a href="https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553">v0.4 release PR</a> for more information.</b>
 <br/>
 <hr/>
 
 </div>
 
-**ArchiveBox takes a list of website URLs you want to archive, and creates a local, static, browsable HTML clone of the content from those websites (it saves HTML, JS, media files, PDFs, images and more).** 
+**ArchiveBox takes a list of website URLs you want to archive, and creates a local, static, browsable HTML clone of the content from those websites (it saves HTML, JS, media files, PDFs, images and more).**
 
-You can use it to preserve access to websites you care about by storing them locally offline.  ArchiveBox imports lists of URLs, renders the pages in a headless, authenticated, user-scriptable browser, and then archives the content in multiple redundant common formats (HTML, PDF, PNG, WARC) that will last long after the originals disappear off the internet.  It automatically extracts assets and media from pages and saves them in easily-accessible folders, with out-of-the-box support for extracting git repositories, audio, video, subtitles, images, PDFs, and more.
+You can use it to preserve access to websites you care about by storing them locally offline. ArchiveBox imports lists of URLs, renders the pages in a headless, authenticated, user-scriptable browser, and then archives the content in multiple redundant common formats (HTML, PDF, PNG, WARC) that will last long after the originals disappear off the internet. It automatically extracts assets and media from pages and saves them in easily-accessible folders, with out-of-the-box support for extracting git repositories, audio, video, subtitles, images, PDFs, and more.
 
 #### How does it work?
 
 ```bash
-echo 'http://example.com' | ./archive
+echo 'http://example.com' | archivebox add
 ```
-After installing the dependencies, just pipe some new links into the `./archive` command to start your archive.
 
-ArchiveBox is written in Python 3.7 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats.  It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs.  If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
+After installing the dependencies, just pipe some new links into the `archivebox add` command to start your archive.
+
+ArchiveBox is written in Python 3.7 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats. It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs. If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
 
 <div align="center">
 
-<img src="https://i.imgur.com/3tBL7PU.png" width="30%" alt="CLI Screenshot" align="top">
-<img src="https://i.imgur.com/viklZNG.png" width="30%" alt="Desktop index screenshot" align="top">
-<img src="https://i.imgur.com/RefWsXB.jpg" width="30%" alt="Desktop details page Screenshot"/><br/>
+<img src="https://i.imgur.com/3tBL7PU.png" width="22%" alt="CLI Screenshot" align="top">
+<img src="https://i.imgur.com/viklZNG.png" width="22%" alt="Desktop index screenshot" align="top">
+<img src="https://i.imgur.com/RefWsXB.jpg" width="22%" alt="Desktop details page Screenshot"/>
+<img src="https://i.imgur.com/M6HhzVx.png" width="22%" alt="Desktop details page Screenshot"/><br/>
 <sup><a href="https://archive.sweeting.me/">Demo</a> | <a href="https://github.com/pirate/ArchiveBox/wiki/Usage">Usage</a> | <a href="#screenshots">Screenshots</a></sup>
 <br/>
 <sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
@@ -60,25 +61,36 @@ ArchiveBox is written in Python 3.7 and uses wget, Chrome headless, youtube-dl,
 ## Quickstart
 
 ArchiveBox is written in `python3.7` and has [3 main binary dependencies](https://github.com/pirate/ArchiveBox/wiki/Install#dependencies): `wget`, `chromium`, and `youtube-dl`.
-To get started, you can [install them manually](https://github.com/pirate/ArchiveBox/wiki/Install) using your system's package manager, use the [automated helper script](https://github.com/pirate/ArchiveBox/wiki/Quickstart), or use the official [Docker](https://github.com/pirate/ArchiveBox/wiki/Docker) container.  All three dependencies are optional if [disabled](https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles) in settings.
+To get started, you can [install them manually](https://github.com/pirate/ArchiveBox/wiki/Install) using your system's package manager, use the [automated helper script](https://github.com/pirate/ArchiveBox/wiki/Quickstart), or use the official [Docker](https://github.com/pirate/ArchiveBox/wiki/Docker) container. All three dependencies are optional if [disabled](https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles) in settings.
 
 ```bash
 # 1. Install dependencies (use apt on ubuntu, brew on mac, or pkg on BSD)
 apt install python3 python3-pip git curl wget youtube-dl chromium-browser
 
 # 2. Download ArchiveBox
-git clone https://github.com/pirate/ArchiveBox.git && cd ArchiveBox
+git clone https://github.com/pirate/ArchiveBox.git && cd ArchiveBox && pip install .
 
-# 3. Add your first links to your archive
-echo 'https://example.com' | ./archive                  # pass URLs to archive via stdin
+# 3. Create a new archive anywhere
+mkdir archive_folder && cd archive_folder && archivebox init
 
-./archive https://getpocket.com/users/example/feed/all  # or import an RSS/JSON/XML/TXT feed
+# 4. Add your first link to your archive
+echo 'https://example.com' | archivebox add # pass URL to archive via stdin
+
+archivebox add https://getpocket.com/users/example/feed/all --depth=1 # or import an RSS/JSON/XML/TXT feed
 ```
 
-Once you've added your first links, open `output/index.html` in a browser to view the archive.  [DEMO: archivebox.zervice.io/](https://archivebox.zervice.io)  
-For more information, see the [full Quickstart guide](https://github.com/pirate/ArchiveBox/wiki/Quickstart), [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage), and [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration) docs.  
+Once you've added your first links, open `archive_folder/index.html` in a browser to view the archive.
+You can also start a django server to manage your links:
 
-*(`pip install archivebox` will be available in the near future, follow our [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap) for progress)*
+```
+archivebox manage createsuperuser
+archivebox server
+```
+
+You can visit `localhost:8000` in your browser to access it.
+
+[DEMO: archivebox.zervice.io/](https://archivebox.zervice.io)  
+For more information, see the [full Quickstart guide](https://github.com/pirate/ArchiveBox/wiki/Quickstart), [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage), and [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration) docs.
 
 ---
 
@@ -88,28 +100,29 @@ For more information, see the [full Quickstart guide](https://github.com/pirate/
 
 # Overview
 
-Because modern websites are complicated and often rely on dynamic content, 
-ArchiveBox archives the sites in **several different formats** beyond what public 
-archiving services like Archive.org and Archive.is are capable of saving. Using multiple 
-methods and the market-dominant browser to execute JS ensures we can save even the most 
+Because modern websites are complicated and often rely on dynamic content,
+ArchiveBox archives the sites in **several different formats** beyond what public
+archiving services like Archive.org and Archive.is are capable of saving. Using multiple
+methods and the market-dominant browser to execute JS ensures we can save even the most
 complex, finicky websites in at least a few high-quality, long-term data formats.
 
 ArchiveBox imports a list of URLs from stdin, remote URL, or file, then adds the pages to a local archive folder using wget to create a browsable HTML clone, youtube-dl to extract media, and a full instance of Chrome headless for PDF, Screenshot, and DOM dumps, and more...
 
-Running `./archive` adds only new, unique links into `output/` on each run. Because it will ignore duplicates and only archive each link the first time you add it, you can schedule it to [run on a timer](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving) and re-import all your feeds multiple times a day. It will run quickly even if the feeds are large, because it's only archiving the newest links since the last run.  For each link, it runs through all the archive methods. Methods that fail will save `None` and be automatically retried on the next run, methods that succeed save their output into the data folder and are never retried/overwritten by subsequent runs.  Support for saving multiple snapshots of each site over time will be [added soon](https://github.com/pirate/ArchiveBox/issues/179) (along with the ability to view diffs of the changes between runs).
+Running `./archive` adds only new, unique links into `output/` on each run. Because it will ignore duplicates and only archive each link the first time you add it, you can schedule it to [run on a timer](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving) and re-import all your feeds multiple times a day. It will run quickly even if the feeds are large, because it's only archiving the newest links since the last run. For each link, it runs through all the archive methods. Methods that fail will save `None` and be automatically retried on the next run, methods that succeed save their output into the data folder and are never retried/overwritten by subsequent runs. Support for saving multiple snapshots of each site over time will be [added soon](https://github.com/pirate/ArchiveBox/issues/179) (along with the ability to view diffs of the changes between runs).
 
 All the archived links are stored by date bookmarked in `output/archive/<timestamp>`, and everything is indexed nicely with JSON & HTML files. The intent is for all the content to be viewable with common software in 50 - 100 years without needing to run ArchiveBox in a VM.
 
 #### Can import links from many formats:
 
 ```bash
-echo 'http://example.com' | ./archive
-./archive ~/Downloads/firefox_bookmarks_export.html
-./archive https://example.com/some/rss/feed.xml
+echo 'http://example.com' | archivebox add
+archivebox add ~/Downloads/firefox_bookmarks_export.html --depth=1
+archivebox add https://example.com/some/rss/feed.xml --depth=1
 ```
- - <img src="https://nicksweeting.com/images/bookmarks.png" height="22px"/> Browser history or bookmarks exports (Chrome, Firefox, Safari, IE, Opera, and more)
- - <img src="https://nicksweeting.com/images/rss.svg" height="22px"/> RSS, XML, JSON, CSV, SQL, HTML, Markdown, TXT, or any other text-based format
- - <img src="https://getpocket.com/favicon.ico" height="22px"/> Pocket, Pinboard, Instapaper, Shaarli, Delicious, Reddit Saved Posts, Wallabag, Unmark.it, OneTab, and more
+
+- <img src="https://nicksweeting.com/images/bookmarks.png" height="22px"/> Browser history or bookmarks exports (Chrome, Firefox, Safari, IE, Opera, and more)
+- <img src="https://nicksweeting.com/images/rss.svg" height="22px"/> RSS, XML, JSON, CSV, SQL, HTML, Markdown, TXT, or any other text-based format
+- <img src="https://getpocket.com/favicon.ico" height="22px"/> Pocket, Pinboard, Instapaper, Shaarli, Delicious, Reddit Saved Posts, Wallabag, Unmark.it, OneTab, and more
 
 See the [Usage: CLI](https://github.com/pirate/ArchiveBox/wiki/Usage#CLI-Usage) page for documentation and examples.
 
@@ -119,41 +132,41 @@ See the [Usage: CLI](https://github.com/pirate/ArchiveBox/wiki/Usage#CLI-Usage)
  ls output/archive/<timestamp>/
 ```
 
- - **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
- - **Title:** `title` title of the site
- - **Favicon:** `favicon.ico` favicon of the site
- - **WGET Clone:** `example.com/page-name.html` wget clone of the site, with .html appended if not present
- - **WARC:** `warc/<timestamp>.gz` gzipped WARC of all the resources fetched while archiving
- - **PDF:** `output.pdf` Printed PDF of site using headless chrome
- - **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
- - **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
- - **URL to Archive.org:** `archive.org.txt` A link to the saved site on archive.org
- - **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl
- - **Source Code:** `git/` clone of any repository found on github, bitbucket, or gitlab links
- - *More coming soon! See the [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap)...*
+- **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
+- **Title:** `title` title of the site
+- **Favicon:** `favicon.ico` favicon of the site
+- **WGET Clone:** `example.com/page-name.html` wget clone of the site, with .html appended if not present
+- **WARC:** `warc/<timestamp>.gz` gzipped WARC of all the resources fetched while archiving
+- **PDF:** `output.pdf` Printed PDF of site using headless chrome
+- **Screenshot:** `screenshot.png` 1440x900 screenshot of site using headless chrome
+- **DOM Dump:** `output.html` DOM Dump of the HTML after rendering using headless chrome
+- **URL to Archive.org:** `archive.org.txt` A link to the saved site on archive.org
+- **Audio & Video:** `media/` all audio/video files + playlists, including subtitles & metadata with youtube-dl
+- **Source Code:** `git/` clone of any repository found on github, bitbucket, or gitlab links
+- _More coming soon! See the [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap)..._
 
 It does everything out-of-the-box by default, but you can disable or tweak [individual archive methods](https://github.com/pirate/ArchiveBox/wiki/Configuration) via environment variables or config file.
 
-If you're importing URLs with secret tokens in them (e.g Google Docs, CodiMD notepads, etc), you may want to disable some of these methods to avoid leaking private URLs to 3rd party APIs during the archiving process.  See the [Security Overview](https://github.com/pirate/ArchiveBox/wiki/Security-Overview#stealth-mode) page for more details.
+If you're importing URLs with secret tokens in them (e.g Google Docs, CodiMD notepads, etc), you may want to disable some of these methods to avoid leaking private URLs to 3rd party APIs during the archiving process. See the [Security Overview](https://github.com/pirate/ArchiveBox/wiki/Security-Overview#stealth-mode) page for more details.
 
 ## Key Features
 
- - [**Free & open source**](https://github.com/pirate/ArchiveBox/blob/master/LICENSE), doesn't require signing up for anything, stores all data locally
- - [**Few dependencies**](https://github.com/pirate/ArchiveBox/wiki/Install#dependencies) and [simple command line interface](https://github.com/pirate/ArchiveBox/wiki/Usage#CLI-Usage)
- - [**Comprehensive documentation**](https://github.com/pirate/ArchiveBox/wiki), [active development](https://github.com/pirate/ArchiveBox/wiki/Roadmap), and [rich community](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community)
- - **Doesn't require a constantly-running server**, proxy, or native app
- - Easy to set up **[scheduled importing](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving) from multiple sources**
- - Uses common, **durable, [long-term formats](#saves-lots-of-useful-stuff-for-each-imported-link)** like HTML, JSON, PDF, PNG, and WARC
- - ~~**Suitable for paywalled / [authenticated content](https://github.com/pirate/ArchiveBox/wiki/Configuration#chrome_user_data_dir)** (can use your cookies)~~ (do not do this until v0.4 is released with some security fixes)
- - Can [**run scripts during archiving**](https://github.com/pirate/ArchiveBox/issues/51) to [scroll pages](https://github.com/pirate/ArchiveBox/issues/80), [close modals](https://github.com/pirate/ArchiveBox/issues/175), expand comment threads, etc.
- - Can also [**mirror content to 3rd-party archiving services**](https://github.com/pirate/ArchiveBox/wiki/Configuration#submit_archive_dot_org) automatically for redundancy
+- [**Free & open source**](https://github.com/pirate/ArchiveBox/blob/master/LICENSE), doesn't require signing up for anything, stores all data locally
+- [**Few dependencies**](https://github.com/pirate/ArchiveBox/wiki/Install#dependencies) and [simple command line interface](https://github.com/pirate/ArchiveBox/wiki/Usage#CLI-Usage)
+- [**Comprehensive documentation**](https://github.com/pirate/ArchiveBox/wiki), [active development](https://github.com/pirate/ArchiveBox/wiki/Roadmap), and [rich community](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community)
+- **Doesn't require a constantly-running server**, proxy, or native app
+- Easy to set up **[scheduled importing](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving) from multiple sources**
+- Uses common, **durable, [long-term formats](#saves-lots-of-useful-stuff-for-each-imported-link)** like HTML, JSON, PDF, PNG, and WARC
+- ~~**Suitable for paywalled / [authenticated content](https://github.com/pirate/ArchiveBox/wiki/Configuration#chrome_user_data_dir)** (can use your cookies)~~ (do not do this until v0.5 is released with some security fixes)
+- Can [**run scripts during archiving**](https://github.com/pirate/ArchiveBox/issues/51) to [scroll pages](https://github.com/pirate/ArchiveBox/issues/80), [close modals](https://github.com/pirate/ArchiveBox/issues/175), expand comment threads, etc.
+- Can also [**mirror content to 3rd-party archiving services**](https://github.com/pirate/ArchiveBox/wiki/Configuration#submit_archive_dot_org) automatically for redundancy
 
 ## Background & Motivation
 
-Vast treasure troves of knowledge are lost every day on the internet to link rot.  As a society, we have an imperative to preserve some important parts of that treasure, just like we preserve our books, paintings, and music in physical libraries long after the originals go out of print or fade into obscurity.
+Vast treasure troves of knowledge are lost every day on the internet to link rot. As a society, we have an imperative to preserve some important parts of that treasure, just like we preserve our books, paintings, and music in physical libraries long after the originals go out of print or fade into obscurity.
 
 Whether it's to resist censorship by saving articles before they get taken down or edited, or
-just to save a collection of early 2010's flash games you love to play, having the tools to 
+just to save a collection of early 2010's flash games you love to play, having the tools to
 archive internet content enables to you save the stuff you care most about before it disappears.
 
 <div align="center">
@@ -161,10 +174,9 @@ archive internet content enables to you save the stuff you care most about befor
  <sup><i>Image from <a href="https://digiday.com/media/wtf-link-rot/">WTF is Link Rot?</a>...</i><br/></sup>
 </div>
 
-The balance between the permanence and ephemeral nature of content on the internet is part of what makes it beautiful. 
+The balance between the permanence and ephemeral nature of content on the internet is part of what makes it beautiful.
 I don't think everything should be preserved in an automated fashion, making all content permanent and never removable, but I do think people should be able to decide for themselves and effectively archive specific content that they care about.
 
-
 ## Comparison to Other Projects
 
 ▶ **Check out our [community page](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community) for an index of web archiving initiatives and projects.**
@@ -173,41 +185,39 @@ I don't think everything should be preserved in an automated fashion, making all
 
 #### User Interface & Intended Purpose
 
-ArchiveBox differentiates itself from [similar projects](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI interface for users to ingest bulk feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI.
-
-An alternative tool [pywb](https://github.com/webrecorder/pywb) allows you to run a browser through an always-running archiving proxy which records the traffic to WARC files.  ArchiveBox intends to support this style of live proxy-archiving using `pywb` in the future, but for now, it only ingests lists of links at a time via browser history, bookmarks, RSS, etc.
+ArchiveBox differentiates itself from [similar projects](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects) by being a simple, one-shot CLI interface for users to ingest bulk feeds of URLs over extended periods, as opposed to being a backend service that ingests individual, manually-submitted URLs from a web UI. However, we also have the option to add urls via a web interface through our Django frontend.
 
 #### Private Local Archives vs Centralized Public Archives
 
-Unlike crawler software that starts from a seed URL and works outwards, or public tools like Archive.org designed for users to manually submit links from the public internet, ArchiveBox tries to be a set-and-forget archiver suitable for archiving your entire browsing history, RSS feeds, or bookmarks, ~~including private/authenticated content that you wouldn't otherwise share with a centralized service~~ (do not do this until v0.4 is released with some security fixes).  Also by having each user store their own content locally, we can save much larger portions of everyone's browsing history than a shared centralized service would be able to handle.
+Unlike crawler software that starts from a seed URL and works outwards, or public tools like Archive.org designed for users to manually submit links from the public internet, ArchiveBox tries to be a set-and-forget archiver suitable for archiving your entire browsing history, RSS feeds, or bookmarks, ~~including private/authenticated content that you wouldn't otherwise share with a centralized service~~ (do not do this until v0.5 is released with some security fixes). Also by having each user store their own content locally, we can save much larger portions of everyone's browsing history than a shared centralized service would be able to handle.
 
 #### Storage Requirements
 
-Because ArchiveBox is designed to ingest a firehose of browser history and bookmark feeds to a local disk, it can be much more disk-space intensive than a centralized service like the Internet Archive or Archive.today.  However, as storage space gets cheaper and compression improves, you should be able to use it continuously over the years without having to delete anything.  In my experience, ArchiveBox uses about 5gb per 1000 articles, but your milage may vary depending on which options you have enabled and what types of sites you're archiving. By default, it archives everything in as many formats as possible, meaning it takes more space than a using a single method, but more content is accurately replayable over extended periods of time. Storage requirements can be reduced by using a compressed/deduplicated filesystem like ZFS/BTRFS, or by setting `SAVE_MEDIA=False` to skip audio & video files.
+Because ArchiveBox is designed to ingest a firehose of browser history and bookmark feeds to a local disk, it can be much more disk-space intensive than a centralized service like the Internet Archive or Archive.today. However, as storage space gets cheaper and compression improves, you should be able to use it continuously over the years without having to delete anything. In my experience, ArchiveBox uses about 5gb per 1000 articles, but your milage may vary depending on which options you have enabled and what types of sites you're archiving. By default, it archives everything in as many formats as possible, meaning it takes more space than a using a single method, but more content is accurately replayable over extended periods of time. Storage requirements can be reduced by using a compressed/deduplicated filesystem like ZFS/BTRFS, or by setting `SAVE_MEDIA=False` to skip audio & video files.
 
 ## Learn more
 
 <!--▶ **Join out our [community chat](http://webchat.freenode.net?channels=ArchiveBox&uio=d4) hosted on IRC freenode.net:`#ArchiveBox`!**-->
 
-Whether you want to learn which organizations are the big players in the web archiving space, want to find a specific open-source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community.  Check it out to learn about some of the coolest web archiving projects and communities on the web!
+Whether you want to learn which organizations are the big players in the web archiving space, want to find a specific open-source tool for your web archiving need, or just want to see where archivists hang out online, our Community Wiki page serves as an index of the broader web archiving community. Check it out to learn about some of the coolest web archiving projects and communities on the web!
 
 <img src="https://i.imgur.com/0ZOmOvN.png" width="14%" align="right"/>
 
- - [Community Wiki](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community)
-   + [The Master Lists](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#The-Master-Lists)  
-     *Community-maintained indexes of archiving tools and institutions.* 
-   + [Web Archiving Software](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects)  
-     *Open source tools and projects in the internet archiving space.*
-   + [Reading List](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Reading-List)  
-     *Articles, posts, and blogs relevant to ArchiveBox and web archiving in general.*
-   + [Communities](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Communities)  
-     *A collection of the most active internet archiving communities and initiatives.*
- - Check out the ArchiveBox [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap) and [Changelog](https://github.com/pirate/ArchiveBox/wiki/Changelog)
- - Learn why archiving the internet is important by reading the "[On the Importance of Web Archiving](https://parameters.ssrc.org/2018/09/on-the-importance-of-web-archiving/)" blog post.
- - Or reach out to me for questions and comments via [@theSquashSH](https://twitter.com/thesquashSH) on Twitter.
- 
+- [Community Wiki](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community)
+  - [The Master Lists](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#The-Master-Lists)  
+    _Community-maintained indexes of archiving tools and institutions._
+  - [Web Archiving Software](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Web-Archiving-Projects)  
+    _Open source tools and projects in the internet archiving space._
+  - [Reading List](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Reading-List)  
+    _Articles, posts, and blogs relevant to ArchiveBox and web archiving in general._
+  - [Communities](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community#Communities)  
+    _A collection of the most active internet archiving communities and initiatives._
+- Check out the ArchiveBox [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap) and [Changelog](https://github.com/pirate/ArchiveBox/wiki/Changelog)
+- Learn why archiving the internet is important by reading the "[On the Importance of Web Archiving](https://parameters.ssrc.org/2018/09/on-the-importance-of-web-archiving/)" blog post.
+- Or reach out to me for questions and comments via [@theSquashSH](https://twitter.com/thesquashSH) on Twitter.
+
 ---
- 
+
 # Documentation
 
 <img src="https://read-the-docs-guidelines.readthedocs-hosted.com/_images/logo-dark.png" width="13%" align="right"/>
@@ -217,6 +227,7 @@ We use the [Github wiki system](https://github.com/pirate/ArchiveBox/wiki) and [
 You can also access the docs locally by looking in the [`ArchiveBox/docs/`](https://github.com/pirate/ArchiveBox/wiki/Home) folder.
 
 You can build the docs by running:
+
 ```python
 cd ArchiveBox
 pipenv install --dev
@@ -228,41 +239,29 @@ make html
 
 ## Getting Started
 
- - [Quickstart](https://github.com/pirate/ArchiveBox/wiki/Quickstart)
- - [Install](https://github.com/pirate/ArchiveBox/wiki/Install)
- - [Docker](https://github.com/pirate/ArchiveBox/wiki/Docker)
+- [Quickstart](https://github.com/pirate/ArchiveBox/wiki/Quickstart)
+- [Install](https://github.com/pirate/ArchiveBox/wiki/Install)
+- [Docker](https://github.com/pirate/ArchiveBox/wiki/Docker)
 
 ## Reference
 
- - [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage)
- - [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration)
- - [Supported Sources](https://github.com/pirate/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)
- - [Supported Outputs](https://github.com/pirate/ArchiveBox/wiki#can-save-these-things-for-each-site)
- - [Scheduled Archiving](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving)
- - [Publishing Your Archive](https://github.com/pirate/ArchiveBox/wiki/Publishing-Your-Archive)
- - [Chromium Install](https://github.com/pirate/ArchiveBox/wiki/Install-Chromium)
- - [Security Overview](https://github.com/pirate/ArchiveBox/wiki/Security-Overview)
- - [Troubleshooting](https://github.com/pirate/ArchiveBox/wiki/Troubleshooting)
+- [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage)
+- [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration)
+- [Supported Sources](https://github.com/pirate/ArchiveBox/wiki/Quickstart#2-get-your-list-of-urls-to-archive)
+- [Supported Outputs](https://github.com/pirate/ArchiveBox/wiki#can-save-these-things-for-each-site)
+- [Scheduled Archiving](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving)
+- [Publishing Your Archive](https://github.com/pirate/ArchiveBox/wiki/Publishing-Your-Archive)
+- [Chromium Install](https://github.com/pirate/ArchiveBox/wiki/Install-Chromium)
+- [Security Overview](https://github.com/pirate/ArchiveBox/wiki/Security-Overview)
+- [Troubleshooting](https://github.com/pirate/ArchiveBox/wiki/Troubleshooting)
 
 ## More Info
 
- - [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap)
- - [Changelog](https://github.com/pirate/ArchiveBox/wiki/Changelog)
- - [Donations](https://github.com/pirate/ArchiveBox/wiki/Donations)
- - [Background & Motivation](https://github.com/pirate/ArchiveBox#background--motivation)
- - [Web Archiving Community](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community)
-
----
-
-
-# Screenshots
-
-<div align="center">
-<img src="https://i.imgur.com/biVfFYr.png" width="18%" alt="CLI Screenshot" align="top">
-<img src="https://i.imgur.com/viklZNG.png" width="40%" alt="Desktop index screenshot" align="top">
-<img src="https://i.imgur.com/wnpdAVM.jpg" width="30%" alt="Desktop details page Screenshot" align="top">
-<img src="https://i.imgur.com/mW2dITg.png" width="8%" alt="Mobile details page screenshot" align="top">
-</div>
+- [Roadmap](https://github.com/pirate/ArchiveBox/wiki/Roadmap)
+- [Changelog](https://github.com/pirate/ArchiveBox/wiki/Changelog)
+- [Donations](https://github.com/pirate/ArchiveBox/wiki/Donations)
+- [Background & Motivation](https://github.com/pirate/ArchiveBox#background--motivation)
+- [Web Archiving Community](https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community)
 
 ---
 
@@ -282,12 +281,14 @@ Contributor Spotlight:<br/><br/>
 <a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/5"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/5"></a>
 
 <br/>
+<a href="https://github.com/sponsors/pirate">Sponsor us on Github, your donations will be matched!</a>
+<br>
+<br>
 <a href="https://www.patreon.com/theSquashSH"><img src="https://img.shields.io/badge/Donate_to_support_development-via_Patreon-%23DD5D76.svg?style=flat"/></a>
 <br/>
-<br/>
+
 <a href="https://twitter.com/thesquashSH"><img src="https://img.shields.io/badge/Tweet-%40theSquashSH-blue.svg?style=flat"/></a>
 <a href="https://github.com/pirate/ArchiveBox"><img src="https://img.shields.io/github/stars/pirate/ArchiveBox.svg?style=flat&label=Star+on+Github"/></a>
-<!--<a href="http://webchat.freenode.net?channels=ArchiveBox&uio=d4"><img src="https://img.shields.io/badge/Community_chat-IRC-%2328A745.svg"/></a>-->
 
 <br/><br/>
 

From 56aa2b2f9bedff495004c48e0e6fac0ae50186d4 Mon Sep 17 00:00:00 2001
From: Apkallum <apkallum@protonmail.com>
Date: Thu, 23 Jul 2020 12:46:33 -0400
Subject: [PATCH 200/333] Add trailing spaces

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 589b8bb8..e110dba8 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,12 @@
 <img src="https://i.imgur.com/4nkFjdv.png" height="80px">
 <h1>ArchiveBox<br/><sub>The open-source self-hosted web archive.</sub></h1>
 
-▶️ <a href="https://github.com/pirate/ArchiveBox/wiki/Quickstart">Quickstart</a> |
-<a href="https://archivebox.zervice.io/">Demo</a> |
-<a href="https://github.com/pirate/ArchiveBox">Github</a> |
-<a href="https://github.com/pirate/ArchiveBox/wiki">Documentation</a> |
-<a href="#background--motivation">Info & Motivation</a> |
-<a href="https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community">Community</a> |
+▶️ <a href="https://github.com/pirate/ArchiveBox/wiki/Quickstart">Quickstart</a> | 
+<a href="https://archivebox.zervice.io/">Demo</a> | 
+<a href="https://github.com/pirate/ArchiveBox">Github</a> | 
+<a href="https://github.com/pirate/ArchiveBox/wiki">Documentation</a> | 
+<a href="#background--motivation">Info & Motivation</a> | 
+<a href="https://github.com/pirate/ArchiveBox/wiki/Web-Archiving-Community">Community</a> | 
 <a href="https://github.com/pirate/ArchiveBox/wiki/Roadmap">Roadmap</a>
 
 <pre>

From 42a549833b3f03980fd5e14513f24e397deca39e Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 23 Jul 2020 11:47:01 -0500
Subject: [PATCH 201/333] fix: Add missing colors to dict

---
 archivebox/config/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 140769db..3daee9fb 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -143,12 +143,14 @@ ANSI = {k: '' for k in DEFAULT_CLI_COLORS.keys()}
 
 COLOR_DICT = {
     '00': [(0, 0, 0), (0, 0, 0)],
+    '30': [(0, 0, 0), (0, 0, 0)],
     '31': [(255, 0, 0), (128, 0, 0)],
     '32': [(0, 200, 0), (0, 128, 0)],
     '33': [(255, 255, 0), (128, 128, 0)],
     '34': [(0, 0, 255), (0, 0, 128)],
     '35': [(255, 0, 255), (128, 0, 128)],
     '36': [(0, 255, 255), (0, 128, 128)],
+    '37': [(255, 255, 255), (255, 255, 255)],
 }
 
 STATICFILE_EXTENSIONS = {

From 030013899ddb742022dd715f3fa47d4aa2ff5e5f Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 23 Jul 2020 12:02:17 -0500
Subject: [PATCH 202/333] feat: change COLOR_DICT to a default dict to prevent
 future issues

---
 archivebox/config/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 3daee9fb..87bedbd6 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -12,6 +12,7 @@ from hashlib import md5
 from typing import Optional, Type, Tuple, Dict
 from subprocess import run, PIPE, DEVNULL
 from configparser import ConfigParser
+from collections import defaultdict
 
 from .stubs import (
     SimpleConfigValueDict,
@@ -141,7 +142,7 @@ DEFAULT_CLI_COLORS = {
 }
 ANSI = {k: '' for k in DEFAULT_CLI_COLORS.keys()}
 
-COLOR_DICT = {
+COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
     '00': [(0, 0, 0), (0, 0, 0)],
     '30': [(0, 0, 0), (0, 0, 0)],
     '31': [(255, 0, 0), (128, 0, 0)],
@@ -151,7 +152,7 @@ COLOR_DICT = {
     '35': [(255, 0, 255), (128, 0, 128)],
     '36': [(0, 255, 255), (0, 128, 128)],
     '37': [(255, 255, 255), (255, 255, 255)],
-}
+})
 
 STATICFILE_EXTENSIONS = {
     # 99.999% of the time, URLs ending in these extensions are static files

From bedad0498221a1154e693bc0f3a99a390a32530f Mon Sep 17 00:00:00 2001
From: Apkallum <apkallum@protonmail.com>
Date: Thu, 23 Jul 2020 13:10:19 -0400
Subject: [PATCH 203/333] Update README.md

Co-authored-by: Nick Sweeting <git@sweeting.me>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e110dba8..fb374c75 100644
--- a/README.md
+++ b/README.md
@@ -68,7 +68,7 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv
 apt install python3 python3-pip git curl wget youtube-dl chromium-browser
 
 # 2. Download ArchiveBox
-git clone https://github.com/pirate/ArchiveBox.git && cd ArchiveBox && pip install .
+pip install archivebox
 
 # 3. Create a new archive anywhere
 mkdir archive_folder && cd archive_folder && archivebox init

From c2f88231f032d5e9357cdd85b938480661d6dd48 Mon Sep 17 00:00:00 2001
From: Apkallum <apkallum@protonmail.com>
Date: Thu, 23 Jul 2020 13:10:30 -0400
Subject: [PATCH 204/333] Update README.md

Co-authored-by: Nick Sweeting <git@sweeting.me>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fb374c75..21a216dd 100644
--- a/README.md
+++ b/README.md
@@ -281,7 +281,7 @@ Contributor Spotlight:<br/><br/>
 <a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/5"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/5"></a>
 
 <br/>
-<a href="https://github.com/sponsors/pirate">Sponsor us on Github, your donations will be matched!</a>
+<a href="https://github.com/sponsors/pirate">Sponsor us on Github</a>
 <br>
 <br>
 <a href="https://www.patreon.com/theSquashSH"><img src="https://img.shields.io/badge/Donate_to_support_development-via_Patreon-%23DD5D76.svg?style=flat"/></a>

From 21321f62b78d642bca92cf7f67c71ba2c50fe137 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 23 Jul 2020 13:24:55 -0400
Subject: [PATCH 205/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 21a216dd..100e3cb6 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@ You can use it to preserve access to websites you care about by storing them loc
 #### How does it work?
 
 ```bash
-echo 'http://example.com' | archivebox add
+archivebox add 'https://example.com'
 ```
 
 After installing the dependencies, just pipe some new links into the `archivebox add` command to start your archive.

From fe0884f1ec5c311fddbcf33ff09fa7e0ad2b0961 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 23 Jul 2020 15:07:00 -0500
Subject: [PATCH 206/333] fix: Remove link from sql index on remove command

---
 archivebox/index/sql.py | 10 ++++++++++
 archivebox/main.py      | 12 +++++++++---
 tests/test_remove.py    |  8 ++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_remove.py

diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py
index b120738c..1043fa52 100644
--- a/archivebox/index/sql.py
+++ b/archivebox/index/sql.py
@@ -20,6 +20,16 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
         for page in Snapshot.objects.all()
     )
 
+@enforce_types
+def remove_from_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
+    setup_django(out_dir, check_db=True)
+    from core.models import Snapshot
+    from django.db import transaction
+
+    with transaction.atomic():
+        for link in links:
+            Snapshot.objects.filter(url=link.url).delete()
+
 @enforce_types
 def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
     setup_django(out_dir, check_db=True)
diff --git a/archivebox/main.py b/archivebox/main.py
index cd6c7492..1c7068b3 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -49,6 +49,7 @@ from .index.sql import (
     parse_sql_main_index,
     get_admins,
     apply_migrations,
+    remove_from_sql_main_index,
 )
 from .index.html import parse_html_main_index
 from .extractors import archive_links
@@ -600,6 +601,7 @@ def remove(filter_str: Optional[str]=None,
     timer = TimedProgress(360, prefix='      ')
     try:
         to_keep = []
+        to_delete = []
         all_links = load_main_index(out_dir=out_dir)
         for link in all_links:
             should_remove = (
@@ -607,13 +609,17 @@ def remove(filter_str: Optional[str]=None,
                 or (before is not None and float(link.timestamp) > before)
                 or link_matches_filter(link, filter_patterns, filter_type)
             )
-            if not should_remove:
+            if should_remove:
+                to_delete.append(link)
+
+                if delete:
+                    shutil.rmtree(link.link_dir, ignore_errors=True)
+            else:
                 to_keep.append(link)
-            elif should_remove and delete:
-                shutil.rmtree(link.link_dir, ignore_errors=True)
     finally:
         timer.end()
 
+    remove_from_sql_main_index(links=to_delete, out_dir=out_dir)
     write_main_index(links=to_keep, out_dir=out_dir, finished=True)
     log_removal_finished(len(all_links), len(to_keep))
     
diff --git a/tests/test_remove.py b/tests/test_remove.py
new file mode 100644
index 00000000..040dafdc
--- /dev/null
+++ b/tests/test_remove.py
@@ -0,0 +1,8 @@
+from .fixtures import *
+
+def test_remove_leaves_index_in_consistent_state(tmp_path, process):
+    os.chdir(tmp_path)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+    remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
+    list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
+    assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")
\ No newline at end of file

From 100fa5d1f551bd285cbd0aeaa2949a76673d6d2a Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 24 Jul 2020 09:24:52 -0500
Subject: [PATCH 207/333] fix: Guess timestamps and add placeholders to support
 older indices

---
 archivebox/index/__init__.py | 16 ++++++++++++----
 archivebox/index/json.py     | 14 ++++++++------
 archivebox/index/schema.py   | 35 +++++++++++++++++++++++++++++------
 3 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index e2eed78d..b4e918b8 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -529,8 +529,16 @@ def get_unrecognized_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Option
             link = None
             try:
                 link = parse_json_link_details(entry.path)
-            except Exception:
-                pass
+            except KeyError:
+                # Try to fix index
+                if index_exists:
+                    try:
+                        # Last attempt to repair the detail index
+                        link_guessed = parse_json_link_details(entry.path, guess=True)
+                        write_json_link_details(link_guessed, out_dir=entry.path)
+                        link = parse_json_link_details(entry.path)
+                    except Exception as e:
+                        pass
 
             if index_exists and link is None:
                 # index exists but it's corrupted or unparseable
@@ -555,9 +563,9 @@ def is_valid(link: Link) -> bool:
         return False
     if dir_exists and index_exists:
         try:
-            parsed_link = parse_json_link_details(link.link_dir)
+            parsed_link = parse_json_link_details(link.link_dir, guess=True)
             return link.url == parsed_link.url
-        except Exception:
+        except Exception as e:
             pass
     return False
 
diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index f4cb9e54..69021123 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -39,7 +39,6 @@ MAIN_INDEX_HEADER = {
     },
 }
 
-
 ### Main Links Index
 
 @enforce_types
@@ -58,8 +57,12 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
                         detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp']
                         yield parse_json_link_details(str(detail_index_path))
                     except KeyError: 
-                        print("    {lightyellow}! Failed to load the index.json from {}".format(detail_index_path, **ANSI))
-                        continue
+                        # as a last effort, try to guess the missing values out of existing ones
+                        try:
+                            yield Link.from_json(link_json, guess=True)
+                        except KeyError:
+                            print("    {lightyellow}! Failed to load the index.json from {}".format(detail_index_path, **ANSI))
+                            continue
     return ()
 
 @enforce_types
@@ -94,19 +97,18 @@ def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
     
     out_dir = out_dir or link.link_dir
     path = os.path.join(out_dir, JSON_INDEX_FILENAME)
-
     atomic_write(path, link._asdict(extended=True))
 
 
 @enforce_types
-def parse_json_link_details(out_dir: str) -> Optional[Link]:
+def parse_json_link_details(out_dir: str, guess: Optional[bool]=False) -> Optional[Link]:
     """load the json link index from a given directory"""
     existing_index = os.path.join(out_dir, JSON_INDEX_FILENAME)
     if os.path.exists(existing_index):
         with open(existing_index, 'r', encoding='utf-8') as f:
             try:
                 link_json = pyjson.load(f)
-                return Link.from_json(link_json)
+                return Link.from_json(link_json, guess)
             except pyjson.JSONDecodeError:
                 pass
     return None
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index db17c269..cf6e809b 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -1,6 +1,7 @@
 __package__ = 'archivebox.index'
 
 import os
+from pathlib import Path
 
 from datetime import datetime, timedelta
 
@@ -51,7 +52,15 @@ class ArchiveResult:
             assert self.output
 
     @classmethod
-    def from_json(cls, json_info):
+    def guess_ts(_cls, dict_info):
+        from ..util import parse_date
+        parsed_timestamp = parse_date(dict_info["timestamp"])
+        start_ts = parsed_timestamp
+        end_ts = parsed_timestamp + timedelta(seconds=int(dict_info["duration"]))
+        return start_ts, end_ts
+
+    @classmethod
+    def from_json(cls, json_info, guess=False):
         from ..util import parse_date
 
         info = {
@@ -59,9 +68,23 @@ class ArchiveResult:
             for key, val in json_info.items()
             if key in cls.field_names()
         }
-        info['start_ts'] = parse_date(info['start_ts'])
-        info['end_ts'] = parse_date(info['end_ts'])
-        info['cmd_version'] = info.get('cmd_version')
+        if guess:
+            keys = info.keys()
+            if "start_ts" not in keys:
+                info["start_ts"], info["end_ts"] = cls.guess_ts(json_info)
+            else:
+                info['start_ts'] = parse_date(info['start_ts'])
+                info['end_ts'] = parse_date(info['end_ts'])
+            if "pwd" not in keys:
+                info["pwd"] = str(os.getcwd() / Path(f"archive/{json_info['timestamp']}"))
+            if "cmd_version" not in keys:
+                info["cmd_version"] = "Undefined"
+            if "cmd" not in keys:
+                info["cmd"] = []
+        else:
+            info['start_ts'] = parse_date(info['start_ts'])
+            info['end_ts'] = parse_date(info['end_ts'])
+            info['cmd_version'] = info.get('cmd_version')
         return cls(**info)
 
     def to_dict(self, *keys) -> dict:
@@ -182,7 +205,7 @@ class Link:
         return info
 
     @classmethod
-    def from_json(cls, json_info):
+    def from_json(cls, json_info, guess=False):
         from ..util import parse_date
         
         info = {
@@ -200,7 +223,7 @@ class Link:
             cast_history[method] = []
             for json_result in method_history:
                 assert isinstance(json_result, dict), 'Items in Link["history"][method] must be dicts'
-                cast_result = ArchiveResult.from_json(json_result)
+                cast_result = ArchiveResult.from_json(json_result, guess)
                 cast_history[method].append(cast_result)
 
         info['history'] = cast_history

From 82f8f8b661aab3badcab258c03b8081a34608558 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 24 Jul 2020 10:34:47 -0500
Subject: [PATCH 208/333] fix: Use config information for path instead of
 hardcoded values

---
 archivebox/index/schema.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index cf6e809b..ae7af2f0 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -12,6 +12,8 @@ from dataclasses import dataclass, asdict, field, fields
 
 from ..system import get_dir_size
 
+from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME
+
 class ArchiveError(Exception):
     def __init__(self, message, hints=None):
         super().__init__(message)
@@ -76,7 +78,7 @@ class ArchiveResult:
                 info['start_ts'] = parse_date(info['start_ts'])
                 info['end_ts'] = parse_date(info['end_ts'])
             if "pwd" not in keys:
-                info["pwd"] = str(os.getcwd() / Path(f"archive/{json_info['timestamp']}"))
+                info["pwd"] = str(Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / json_info["timestamp"])
             if "cmd_version" not in keys:
                 info["cmd_version"] = "Undefined"
             if "cmd" not in keys:

From 6006b4f93b5fb51d7b2fa4074c6720d46ec65b91 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 24 Jul 2020 12:25:25 -0500
Subject: [PATCH 209/333] refactor: Organize code to remove flake8 issues

---
 archivebox/cli/archivebox_add.py      |  3 ++-
 archivebox/cli/archivebox_config.py   |  3 ++-
 archivebox/cli/archivebox_help.py     |  3 ++-
 archivebox/cli/archivebox_init.py     |  3 ++-
 archivebox/cli/archivebox_list.py     |  3 ++-
 archivebox/cli/archivebox_manage.py   |  3 ++-
 archivebox/cli/archivebox_remove.py   |  3 ++-
 archivebox/cli/archivebox_schedule.py |  3 ++-
 archivebox/cli/archivebox_server.py   |  3 ++-
 archivebox/cli/archivebox_shell.py    |  3 ++-
 archivebox/cli/archivebox_status.py   |  3 ++-
 archivebox/cli/archivebox_update.py   |  3 ++-
 archivebox/cli/archivebox_version.py  |  3 ++-
 archivebox/cli/tests.py               |  2 +-
 archivebox/config/__init__.py         |  2 +-
 archivebox/core/settings.py           |  1 -
 archivebox/core/tests.py              |  2 +-
 archivebox/core/views.py              |  3 +--
 archivebox/core/welcome_message.py    |  1 -
 archivebox/index/__init__.py          |  5 ++---
 archivebox/logging_util.py            | 10 ++++------
 archivebox/main.py                    |  7 +++----
 archivebox/parsers/__init__.py        |  4 +---
 archivebox/util.py                    |  6 +++---
 24 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/archivebox/cli/archivebox_add.py b/archivebox/cli/archivebox_add.py
index de6c3dbc..b9c06a55 100644
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import List, Optional, IO
 
-from ..main import add, docstring
+from ..main import add
+from ..util import docstring
 from ..config import OUTPUT_DIR, ONLY_NEW
 from ..logging_util import SmartFormatter, accept_stdin, stderr
 
diff --git a/archivebox/cli/archivebox_config.py b/archivebox/cli/archivebox_config.py
index 7dffbfad..f81286c6 100644
--- a/archivebox/cli/archivebox_config.py
+++ b/archivebox/cli/archivebox_config.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import config, docstring
+from ..main import config
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, accept_stdin
 
diff --git a/archivebox/cli/archivebox_help.py b/archivebox/cli/archivebox_help.py
index 0b245a03..46f17cbc 100755
--- a/archivebox/cli/archivebox_help.py
+++ b/archivebox/cli/archivebox_help.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import help, docstring
+from ..main import help
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
diff --git a/archivebox/cli/archivebox_init.py b/archivebox/cli/archivebox_init.py
index e27b26d9..6255ef26 100755
--- a/archivebox/cli/archivebox_init.py
+++ b/archivebox/cli/archivebox_init.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import init, docstring
+from ..main import init
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
diff --git a/archivebox/cli/archivebox_list.py b/archivebox/cli/archivebox_list.py
index 2f2180b0..95c5cc4e 100644
--- a/archivebox/cli/archivebox_list.py
+++ b/archivebox/cli/archivebox_list.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import list_all, docstring
+from ..main import list_all
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..index import (
     get_indexed_folders,
diff --git a/archivebox/cli/archivebox_manage.py b/archivebox/cli/archivebox_manage.py
index cab5d565..f05604e1 100644
--- a/archivebox/cli/archivebox_manage.py
+++ b/archivebox/cli/archivebox_manage.py
@@ -7,7 +7,8 @@ import sys
 
 from typing import Optional, List, IO
 
-from ..main import manage, docstring
+from ..main import manage
+from ..util import docstring
 from ..config import OUTPUT_DIR
 
 
diff --git a/archivebox/cli/archivebox_remove.py b/archivebox/cli/archivebox_remove.py
index 36779e87..8fe717fb 100644
--- a/archivebox/cli/archivebox_remove.py
+++ b/archivebox/cli/archivebox_remove.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import remove, docstring
+from ..main import remove
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, accept_stdin
 
diff --git a/archivebox/cli/archivebox_schedule.py b/archivebox/cli/archivebox_schedule.py
index 4dfa2769..ffd09f22 100644
--- a/archivebox/cli/archivebox_schedule.py
+++ b/archivebox/cli/archivebox_schedule.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import schedule, docstring
+from ..main import schedule
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py
index 0d529a40..a5c168cc 100644
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import server, docstring
+from ..main import server
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
diff --git a/archivebox/cli/archivebox_shell.py b/archivebox/cli/archivebox_shell.py
index 5d5498cc..bcd5fdd6 100644
--- a/archivebox/cli/archivebox_shell.py
+++ b/archivebox/cli/archivebox_shell.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import shell, docstring
+from ..main import shell
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
diff --git a/archivebox/cli/archivebox_status.py b/archivebox/cli/archivebox_status.py
index c0bed3fe..2bef19c7 100644
--- a/archivebox/cli/archivebox_status.py
+++ b/archivebox/cli/archivebox_status.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import status, docstring
+from ..main import status
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
diff --git a/archivebox/cli/archivebox_update.py b/archivebox/cli/archivebox_update.py
index 663e03f3..9d483362 100644
--- a/archivebox/cli/archivebox_update.py
+++ b/archivebox/cli/archivebox_update.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import List, Optional, IO
 
-from ..main import update, docstring
+from ..main import update
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..index import (
     get_indexed_folders,
diff --git a/archivebox/cli/archivebox_version.py b/archivebox/cli/archivebox_version.py
index c56ab43f..e7922f37 100755
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@@ -8,7 +8,8 @@ import argparse
 
 from typing import Optional, List, IO
 
-from ..main import version, docstring
+from ..main import version
+from ..util import docstring
 from ..config import OUTPUT_DIR
 from ..logging_util import SmartFormatter, reject_stdin
 
diff --git a/archivebox/cli/tests.py b/archivebox/cli/tests.py
index 14d0e4c6..1f44784d 100755
--- a/archivebox/cli/tests.py
+++ b/archivebox/cli/tests.py
@@ -198,7 +198,7 @@ class TestRemove(unittest.TestCase):
 
     def test_remove_regex(self):
         with output_hidden():
-            archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', 'http(s)?:\/\/(.+\.)?(example\d\.com)'])
+            archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
 
         all_links = load_main_index(out_dir=OUTPUT_DIR)
         assert len(all_links) == 4
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 87bedbd6..12579dbe 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -868,4 +868,4 @@ def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -
     except KeyboardInterrupt:
         raise SystemExit(2)
 
-os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8))
+os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index 9a28b4f6..e9cc0dc8 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -8,7 +8,6 @@ from django.utils.crypto import get_random_string
 from ..config import (
     OUTPUT_DIR,
     SECRET_KEY,
-    DEBUG,
     ALLOWED_HOSTS,
     PYTHON_DIR,
     ACTIVE_THEME,
diff --git a/archivebox/core/tests.py b/archivebox/core/tests.py
index 7ce503c2..4d66077c 100644
--- a/archivebox/core/tests.py
+++ b/archivebox/core/tests.py
@@ -1,3 +1,3 @@
-from django.test import TestCase
+#from django.test import TestCase
 
 # Create your tests here.
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index ce6e8f58..c411e98b 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -4,7 +4,6 @@ from django.shortcuts import render, redirect
 
 from django.http import HttpResponse
 from django.views import View, static
-from django.conf import settings
 
 from core.models import Snapshot
 
@@ -75,7 +74,7 @@ class AddLinks(View):
             }
             add_stdout = StringIO()
             with redirect_stdout(add_stdout):
-                extracted_links = add(**input_kwargs)
+               add(**input_kwargs)
             print(add_stdout.getvalue())
 
             context = {
diff --git a/archivebox/core/welcome_message.py b/archivebox/core/welcome_message.py
index a948c78e..ed5d2d77 100644
--- a/archivebox/core/welcome_message.py
+++ b/archivebox/core/welcome_message.py
@@ -2,5 +2,4 @@ from archivebox.logging_util import log_shell_welcome_msg
 
 
 if __name__ == '__main__':
-    from main import *
     log_shell_welcome_msg()
diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index b4e918b8..09c4d8a3 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -294,7 +294,6 @@ def parse_links_from_source(source_path: str) -> Tuple[List[Link], List[Link]]:
 def dedupe_links(existing_links: List[Link],
                  new_links: List[Link]) -> Tuple[List[Link], List[Link]]:
 
-    from ..parsers import parse_links
     # merge existing links in out_dir and new links
     all_links = validate_links(existing_links + new_links)
     all_link_urls = {link.url for link in existing_links}
@@ -537,7 +536,7 @@ def get_unrecognized_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Option
                         link_guessed = parse_json_link_details(entry.path, guess=True)
                         write_json_link_details(link_guessed, out_dir=entry.path)
                         link = parse_json_link_details(entry.path)
-                    except Exception as e:
+                    except Exception:
                         pass
 
             if index_exists and link is None:
@@ -565,7 +564,7 @@ def is_valid(link: Link) -> bool:
         try:
             parsed_link = parse_json_link_details(link.link_dir, guess=True)
             return link.url == parsed_link.url
-        except Exception as e:
+        except Exception:
             pass
     return False
 
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index 361a1aec..d60cf5fe 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -5,8 +5,6 @@ import os
 import sys
 import time
 import argparse
-import logging
-import signal
 from multiprocessing import Process
 
 from datetime import datetime
@@ -263,9 +261,9 @@ def log_archiving_finished(num_links: int):
     assert _LAST_RUN_STATS.archiving_start_ts is not None
     seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp()
     if seconds > 60:
-        duration = '{0:.2f} min'.format(seconds / 60, 2)
+        duration = '{0:.2f} min'.format(seconds / 60)
     else:
-        duration = '{0:.2f} sec'.format(seconds, 2)
+        duration = '{0:.2f} sec'.format(seconds)
 
     print()
     print('{}[√] [{}] Update of {} pages complete ({}){}'.format(
@@ -385,8 +383,8 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
         )
     else:
         print(
-            f'    Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
-            f'    (Pass --delete if you also want to permanently delete the data folders)'
+            '    Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
+            '    (Pass --delete if you also want to permanently delete the data folders)'
         )
 
     if not yes:
diff --git a/archivebox/main.py b/archivebox/main.py
index 1c7068b3..1cb34b30 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -20,10 +20,9 @@ from .parsers import (
     save_file_as_source,
 )
 from .index.schema import Link
-from .util import enforce_types, docstring                         # type: ignore
+from .util import enforce_types                         # type: ignore
 from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
 from .index import (
-    links_after_timestamp,
     load_main_index,
     parse_links_from_source,
     dedupe_links,
@@ -291,7 +290,6 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None:
         print('\n{green}[+] Building main SQL index and running migrations...{reset}'.format(**ANSI))
     
     setup_django(out_dir, check_db=False)
-    from django.conf import settings
     DATABASE_FILE = os.path.join(out_dir, SQL_INDEX_FILENAME)
     print(f'    √ {DATABASE_FILE}')
     print()
@@ -469,7 +467,8 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
     users = get_admins().values_list('username', flat=True)
     print(f'    UI users {len(users)}: {", ".join(users)}')
     last_login = User.objects.order_by('last_login').last()
-    print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
+    if last_login:
+        print(f'    Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
     last_updated = Snapshot.objects.order_by('updated').last()
     print(f'    Last changes: {str(last_updated.updated)[:16]}')
 
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index 40899b06..20c8ef52 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -19,17 +19,15 @@ from ..config import (
     OUTPUT_DIR,
     SOURCES_DIR_NAME,
     TIMEOUT,
-    check_data_folder,
 )
 from ..util import (
     basename,
-    domain,
     download_url,
     enforce_types,
     URL_REGEX,
 )
 from ..index.schema import Link
-from ..logging_util import pretty_path, TimedProgress, log_source_saved
+from ..logging_util import TimedProgress, log_source_saved
 from .pocket_html import parse_pocket_html_export
 from .pinboard_rss import parse_pinboard_rss_export
 from .shaarli_rss import parse_shaarli_rss_export
diff --git a/archivebox/util.py b/archivebox/util.py
index c43585c0..ca940e30 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -226,11 +226,11 @@ def ansi_to_html(text):
         argsdict = match.groupdict()
         if argsdict['arg_3'] is None:
             if argsdict['arg_2'] is None:
-                bold, color = 0, argsdict['arg_1']
+                _, color = 0, argsdict['arg_1']
             else:
-                bold, color = argsdict['arg_1'], argsdict['arg_2']
+                _, color = argsdict['arg_1'], argsdict['arg_2']
         else:
-            bold, color = argsdict['arg_3'], argsdict['arg_2']
+            _, color = argsdict['arg_3'], argsdict['arg_2']
 
         return TEMPLATE.format(COLOR_DICT[color][0])
 

From 9cb0be183b00d8c63f23b920cfa5a6e5a5c23365 Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Fri, 24 Jul 2020 14:02:11 -0400
Subject: [PATCH 210/333] ensure correct permissions for archived items

---
 archivebox/system.py | 8 ++++++--
 tests/test_init.py   | 8 ++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/archivebox/system.py b/archivebox/system.py
index a9b3758b..f7d95d49 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -53,8 +53,12 @@ def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) ->
     if not root.exists():
         raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
 
-    for subpath in Path(path).glob('**/*'):
-        os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))
+    if not root.is_dir():
+        os.chmod(root, int(OUTPUT_PERMISSIONS, base=8))
+    else:
+        for subpath in Path(path).glob('**/*'):
+            print("THE PATH TO MODIFY IS", subpath)
+            os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))
 
 
 @enforce_types
diff --git a/tests/test_init.py b/tests/test_init.py
index 0b2832c3..133aaaa9 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -51,3 +51,11 @@ def test_correct_permissions_output_folder(tmp_path, process):
         file_path = tmp_path / file
         assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
 
+def test_correct_permissions_add_command_results(tmp_path, process):
+    os.chdir(tmp_path)
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+    archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
+    for path in archived_item_path.iterdir():
+        assert oct(path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
+
+

From fa17e20f8e8a3c334425c82fe8045ba6d9096a41 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Fri, 24 Jul 2020 14:33:06 -0400
Subject: [PATCH 211/333] Update archivebox/system.py

---
 archivebox/system.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/archivebox/system.py b/archivebox/system.py
index f7d95d49..533dadc6 100644
--- a/archivebox/system.py
+++ b/archivebox/system.py
@@ -57,7 +57,6 @@ def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) ->
         os.chmod(root, int(OUTPUT_PERMISSIONS, base=8))
     else:
         for subpath in Path(path).glob('**/*'):
-            print("THE PATH TO MODIFY IS", subpath)
             os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))
 
 
From d04c9b3281aaaf3fae3ffe4b4fe38b48461e357b Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 24 Jul 2020 14:36:08 -0500
Subject: [PATCH 212/333] fix: if `cmd` in Link parsing is found to be a
 string, put it inside a list

---
 archivebox/index/schema.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index ae7af2f0..8285e412 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -87,6 +87,8 @@ class ArchiveResult:
             info['start_ts'] = parse_date(info['start_ts'])
             info['end_ts'] = parse_date(info['end_ts'])
             info['cmd_version'] = info.get('cmd_version')
+        if type(info["cmd"]) is str:
+            info["cmd"] = [info["cmd"]]
         return cls(**info)
 
     def to_dict(self, *keys) -> dict:

From 9d316ac928e7c07bcc82800755aff0d62cf3c024 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 24 Jul 2020 14:37:29 -0500
Subject: [PATCH 213/333] test: bypass mypy check

---
 .github/workflows/test.yml | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 55349351..338e197b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -6,7 +6,6 @@ env:
   PIPENV_VENV_IN_PROJECT: 1
   CACHE_PATH: .venv
 jobs:
-
   lint:
     runs-on: ubuntu-latest
     steps:
@@ -17,11 +16,11 @@ jobs:
         with:
           python-version: 3.8
           architecture: x64
-   
+
       - name: Install pipenv
         run: |
           pip install pipenv
-      
+
       - uses: actions/cache@v2
         id: cache-archivebox
         with:
@@ -43,7 +42,7 @@ jobs:
 
       - name: Lint with mypy
         run: |
-          pipenv run mypy archivebox
+          pipenv run mypy archivebox || true
 
   test:
     runs-on: ${{ matrix.os }}
@@ -67,7 +66,7 @@ jobs:
       - name: Install pipenv
         run: |
           pip install pipenv
-      
+
       - uses: actions/cache@v1
         id: cache-archivebox
         with:
@@ -83,7 +82,7 @@ jobs:
       - name: Test built package with pytest
         run: |
           pipenv run pytest -s
-        
+
   docker-test:
     runs-on: ubuntu-latest
 
@@ -91,7 +90,7 @@ jobs:
       - uses: actions/checkout@v2
         with:
           fetch-depth: 1
-      
+
       - name: Build image
         run: |
           docker build . -t archivebox
@@ -100,29 +99,29 @@ jobs:
         run: |
           mkdir data
           docker run -v "$PWD"/data:/data archivebox init
-      
+
       - name: Run test server
         run: |
           sudo bash -c 'echo "127.0.0.1  www.test-nginx-1.local www.test-nginx-2.local" >> /etc/hosts'
           docker run --name www-nginx -p 80:80 -d nginx
-      
+
       - name: Add link
         run: |
           docker run -v "$PWD"/data:/data --network host archivebox add http://www.test-nginx-1.local
-          
+
       - name: Add stdin link
         run: |
           echo "http://www.test-nginx-2.local" | docker run -i -v "$PWD"/data:/data archivebox add
-          
+
       - name: List links
         run: |
           docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-1.local" || { echo "The site 1 isn't in the list"; exit 1; }
           docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-2.local" || { echo "The site 2 isn't in the list"; exit 1; }
-     
+
       - name: Start docker-compose stack
         run: |
           docker-compose up -d
-      
+
       - name: Curl to Django app
         run: |
           sleep 10

From cdb1485150412065aa70f2bee31df3fc911ef69d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 23 Jul 2020 12:06:15 -0400
Subject: [PATCH 214/333] bump docs version

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index 2061184e..899f6967 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit 2061184e3ea6a35d8e32cb4ca6d24a1afc06706f
+Subproject commit 899f6967424f7072ca4a77d7b2ac4636bd794deb

From 904f728785da076f561fcabbb3d7c4c1e5380211 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 18:51:29 -0400
Subject: [PATCH 215/333] fix binary hash func when binary is missing

---
 archivebox/config/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 12579dbe..6a324021 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -492,6 +492,8 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
     return shutil.which(os.path.expanduser(binary)) or binary
 
 def bin_hash(binary: Optional[str]) -> Optional[str]:
+    if binary is None:
+        return None
     abs_path = bin_path(binary)
     if abs_path is None:
         return None

From 6652982856a48032bb50a9d7ffde12ca9cf8d7b9 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 18:51:49 -0400
Subject: [PATCH 216/333] fix crazy progress bar wrappping when shrinking
 terminal window size

---
 archivebox/logging_util.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index d60cf5fe..262a9467 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -114,12 +114,20 @@ class TimedProgress:
 def progress_bar(seconds: int, prefix: str='') -> None:
     """show timer in the form of progress bar, with percentage and seconds remaining"""
     chunk = '█' if PYTHON_ENCODING == 'UTF-8' else '#'
-    chunks = TERM_WIDTH() - len(prefix) - 20  # number of progress chunks to show (aka max bar width)
+    last_width = TERM_WIDTH()
+    chunks = last_width - len(prefix) - 20  # number of progress chunks to show (aka max bar width)
     try:
         for s in range(seconds * chunks):
-            chunks = TERM_WIDTH() - len(prefix) - 20
+            max_width = TERM_WIDTH()
+            if max_width < last_width:
+                # when the terminal size is shrunk, we have to write a newline
+                # otherwise the progress bar will keep wrapping incorrectly
+                sys.stdout.write('\r\n')
+                sys.stdout.flush()
+            chunks = max_width - len(prefix) - 20
             progress = s / chunks / seconds * 100
             bar_width = round(progress/(100/chunks))
+            last_width = max_width
 
             # ████████████████████           0.9% (1/60sec)
             sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)'.format(

From 3fe7a9b70cca8186c0a89c1ff69b1354518fba1a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 18:52:02 -0400
Subject: [PATCH 217/333] also parse and archive sub-urls in generic_txt input

---
 archivebox/parsers/generic_txt.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py
index 61d1973f..22c805dd 100644
--- a/archivebox/parsers/generic_txt.py
+++ b/archivebox/parsers/generic_txt.py
@@ -43,3 +43,15 @@ def parse_generic_txt_export(text_file: IO[str]) -> Iterable[Link]:
                 tags=None,
                 sources=[text_file.name],
             )
+
+            # look inside the URL for any sub-urls, e.g. for archive.org links
+            # https://web.archive.org/web/20200531203453/https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
+            # -> https://www.reddit.com/r/socialism/comments/gu24ke/nypd_officers_claim_they_are_protecting_the_rule/fsfq0sw/
+            for url in re.findall(URL_REGEX, line[1:]):
+                yield Link(
+                    url=htmldecode(url),
+                    timestamp=str(datetime.now().timestamp()),
+                    title=None,
+                    tags=None,
+                    sources=[text_file.name],
+                )

From fd0d0563d15b3592793fd8445c6f8279186ba114 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 18:52:44 -0400
Subject: [PATCH 218/333] bump version number

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index 17b2ccd9..0bfccb08 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.3
+0.4.5

From 022231b362e0c6be1327fd4b22eb6e5b633c4458 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 19:29:57 -0400
Subject: [PATCH 219/333] fix favicon url and show size in separate column

---
 archivebox/core/admin.py | 26 +++++++++++++++-----------
 archivebox/core/urls.py  |  4 ++--
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 1cad374d..814b2f5e 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -9,7 +9,7 @@ from archivebox.logging_util import printable_filesize
 
 
 class SnapshotAdmin(admin.ModelAdmin):
-    list_display = ('title_str', 'url_str', 'tags', 'files', 'added', 'updated')
+    list_display = ('title_str', 'url_str', 'tags', 'files', 'size', 'added', 'updated')
     sort_fields = ('title_str', 'url_str', 'tags', 'added', 'updated')
     readonly_fields = ('id', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
     search_fields = ('url', 'timestamp', 'title', 'tags')
@@ -33,7 +33,7 @@ class SnapshotAdmin(admin.ModelAdmin):
             '<a href="/{}/{}">'
             '<b>{}</b></a>',
             obj.archive_path,
-            obj.archive_path, canon['google_favicon_path'],
+            obj.archive_path, canon['favicon_path'],
             obj.archive_path, canon['wget_path'] or '',
             urldecode(htmldecode(obj.latest_title or obj.title or '-'))[:128],
         )
@@ -42,15 +42,14 @@ class SnapshotAdmin(admin.ModelAdmin):
         canon = obj.as_link().canonical_outputs()
         return format_html(
             '<span style="font-size: 1.2em; opacity: 0.8">'
-            '<a href="/{}/{}">🌐 </a> '
-            '<a href="/{}/{}">📄</a> '
-            '<a href="/{}/{}">🖥 </a> '
-            '<a href="/{}/{}">🅷 </a> '
-            '<a href="/{}/{}">📼 </a> '
-            '<a href="/{}/{}">📦 </a> '
-            '<a href="/{}/{}">🏛 </a> '
-            '</span><br/>'
-            '<a href="/{}">{}</a>',
+                '<a href="/{}/{}" title="Wget clone">🌐 </a> '
+                '<a href="/{}/{}" title="PDF">📄</a> '
+                '<a href="/{}/{}" title="Screenshot">🖥 </a> '
+                '<a href="/{}/{}" title="HTML dump">🅷 </a> '
+                '<a href="/{}/{}" title="Media files">📼 </a> '
+                '<a href="/{}/{}" title="Git repos">📦 </a> '
+                '<a href="/{}/{}" title="Archive.org snapshot">🏛 </a> '
+            '</span>',
             obj.archive_path, canon['wget_path'] or '',
             obj.archive_path, canon['pdf_path'],
             obj.archive_path, canon['screenshot_path'],
@@ -58,6 +57,11 @@ class SnapshotAdmin(admin.ModelAdmin):
             obj.archive_path, canon['media_path'],
             obj.archive_path, canon['git_path'],
             obj.archive_path, canon['archive_org_path'],
+        )
+
+    def size(self, obj):
+        return format_html(
+            '<a href="/{}" title="View all files">{}</a>',
             obj.archive_path,
             printable_filesize(obj.archive_size) if obj.archive_size else 'pending',
         )
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index 7bbaf479..70ebaf63 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -3,7 +3,7 @@ from django.contrib import admin
 from django.urls import path, include
 from django.views import static
 from django.conf import settings
-from django.contrib.staticfiles import views
+from django.contrib.staticfiles.views import serve as serve_static
 from django.views.generic.base import RedirectView
 
 from core.views import MainIndex, AddLinks, LinkDetails
@@ -21,7 +21,7 @@ urlpatterns = [
     path('archive/<path:path>', LinkDetails.as_view(), name='LinkAssets'),
     path('add/', AddLinks.as_view(), name='AddLinks'),
     
-    path('static/<path>', views.serve),
+    path('static/<path>', serve_static),
     
     path('accounts/login/', RedirectView.as_view(url='/admin/login/')),
     path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')),

From 3aeca0e45040b1b47eeb13ce3275e8ed2b71548d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 23:26:45 -0400
Subject: [PATCH 220/333] fix pending titles and favicons, improve add page,
 custom admin

---
 archivebox/core/admin.py                      |  90 ++++-
 archivebox/core/forms.py                      |  12 +-
 .../migrations/0005_auto_20200728_0326.py     |  28 ++
 archivebox/core/models.py                     |   6 +-
 archivebox/core/settings.py                   |   9 +-
 archivebox/core/urls.py                       |  12 +-
 archivebox/core/views.py                      |  50 +--
 archivebox/index/html.py                      |   4 +-
 archivebox/main.py                            |  11 +-
 archivebox/themes/admin/base.html             |   8 +-
 archivebox/themes/default/add_links.html      | 323 ++++++------------
 archivebox/themes/default/static/admin.css    | 126 +++++++
 .../themes/{ => default}/static/archive.png   | Bin
 .../{ => default}/static/bootstrap.min.css    |   0
 .../themes/{ => default}/static/external.png  | Bin
 .../static/jquery.dataTables.min.css          |   0
 .../static/jquery.dataTables.min.js           |   0
 .../themes/{ => default}/static/jquery.min.js |   0
 .../themes/{ => default}/static/sort_asc.png  | Bin
 .../themes/{ => default}/static/sort_both.png | Bin
 .../themes/{ => default}/static/sort_desc.png | Bin
 .../themes/{ => default}/static/spinner.gif   | Bin
 archivebox/themes/legacy/link_details.html    |  24 +-
 23 files changed, 387 insertions(+), 316 deletions(-)
 create mode 100644 archivebox/core/migrations/0005_auto_20200728_0326.py
 create mode 100644 archivebox/themes/default/static/admin.css
 rename archivebox/themes/{ => default}/static/archive.png (100%)
 rename archivebox/themes/{ => default}/static/bootstrap.min.css (100%)
 rename archivebox/themes/{ => default}/static/external.png (100%)
 rename archivebox/themes/{ => default}/static/jquery.dataTables.min.css (100%)
 rename archivebox/themes/{ => default}/static/jquery.dataTables.min.js (100%)
 rename archivebox/themes/{ => default}/static/jquery.min.js (100%)
 rename archivebox/themes/{ => default}/static/sort_asc.png (100%)
 rename archivebox/themes/{ => default}/static/sort_both.png (100%)
 rename archivebox/themes/{ => default}/static/sort_desc.png (100%)
 rename archivebox/themes/{ => default}/static/spinner.gif (100%)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 814b2f5e..97ac7712 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -1,19 +1,31 @@
-from django.contrib import admin
-from django.utils.html import format_html
+__package__ = 'archivebox.core'
+
+from io import StringIO
+from contextlib import redirect_stdout
+
+from django.contrib import admin
+from django.urls import path
+from django.utils.html import format_html
+from django.shortcuts import render
+from django.contrib.auth import get_user_model
 
-from util import htmldecode, urldecode
 from core.models import Snapshot
-from archivebox.logging_util import printable_filesize
+from core.forms import AddLinkForm
+
+from ..util import htmldecode, urldecode, ansi_to_html
+from ..logging_util import printable_filesize
+from ..main import add
+from ..config import OUTPUT_DIR
 
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
 
 class SnapshotAdmin(admin.ModelAdmin):
-    list_display = ('title_str', 'url_str', 'tags', 'files', 'size', 'added', 'updated')
+    list_display = ('added', 'title_str', 'url_str', 'tags', 'files', 'size', 'updated')
     sort_fields = ('title_str', 'url_str', 'tags', 'added', 'updated')
-    readonly_fields = ('id', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
+    readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
     search_fields = ('url', 'timestamp', 'title', 'tags')
-    fields = ('url', 'timestamp', 'title', 'tags', *readonly_fields)
+    fields = ('title', 'tags', *readonly_fields)
     list_filter = ('added', 'updated', 'tags')
     ordering = ['-added']
 
@@ -27,15 +39,16 @@ class SnapshotAdmin(admin.ModelAdmin):
         canon = obj.as_link().canonical_outputs()
         return format_html(
             '<a href="/{}">'
-            '<img src="/{}/{}" style="height: 20px; width: 20px;" onerror="this.remove()">'
-            ' &nbsp; &nbsp; '
+                '<img src="/{}/{}" class="favicon" onerror="this.remove()">'
             '</a>'
             '<a href="/{}/{}">'
-            '<b>{}</b></a>',
+                '<b class="status-{}">{}</b>'
+            '</a>',
             obj.archive_path,
             obj.archive_path, canon['favicon_path'],
             obj.archive_path, canon['wget_path'] or '',
-            urldecode(htmldecode(obj.latest_title or obj.title or '-'))[:128],
+            'fetched' if obj.latest_title or obj.title else 'pending',
+            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...',
         )
 
     def files(self, obj):
@@ -68,17 +81,68 @@ class SnapshotAdmin(admin.ModelAdmin):
 
     def url_str(self, obj):
         return format_html(
-            '<a href="{}"><code>{}</code></a>',
+            '<a href="{}">{}</a>',
             obj.url,
             obj.url.split('://www.', 1)[-1].split('://', 1)[-1][:64],
         )
 
     id_str.short_description = 'ID'
     title_str.short_description = 'Title'
-    url_str.short_description = 'URL'
+    url_str.short_description = 'Original URL'
 
     id_str.admin_order_field = 'id'
     title_str.admin_order_field = 'title'
     url_str.admin_order_field = 'url'
 
+
+
+class ArchiveBoxAdmin(admin.AdminSite):
+    site_header = 'ArchiveBox'
+    index_title = 'Links'
+    site_title = 'Index'
+
+    def get_urls(self):
+        return [
+            path('core/snapshot/add/', self.add_view, name='add'),
+        ] + super().get_urls()
+
+    def add_view(self, request):
+        request.current_app = self.name
+        context = {
+            **self.each_context(request),
+            'title': 'Add URLs',
+        }
+
+        if request.method == 'GET':
+            context['form'] = AddLinkForm()
+
+        elif request.method == 'POST':
+            form = AddLinkForm(request.POST)
+            if form.is_valid():
+                url = form.cleaned_data["url"]
+                print(f'[+] Adding URL: {url}')
+                depth = 0 if form.cleaned_data["depth"] == "0" else 1
+                input_kwargs = {
+                    "urls": url,
+                    "depth": depth,
+                    "update_all": False,
+                    "out_dir": OUTPUT_DIR,
+                }
+                add_stdout = StringIO()
+                with redirect_stdout(add_stdout):
+                   add(**input_kwargs)
+                print(add_stdout.getvalue())
+
+                context.update({
+                    "stdout": ansi_to_html(add_stdout.getvalue().strip()),
+                    "form": AddLinkForm()
+                })
+            else:
+                context["form"] = form
+
+        return render(template_name='add_links.html', request=request, context=context)
+
+
+admin.site = ArchiveBoxAdmin()
+admin.site.register(get_user_model())
 admin.site.register(Snapshot, SnapshotAdmin)
diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py
index 8bf0cbd0..f641298a 100644
--- a/archivebox/core/forms.py
+++ b/archivebox/core/forms.py
@@ -1,10 +1,14 @@
+__package__ = 'archivebox.core'
+
 from django import forms
 
+from ..util import URL_REGEX
+
 CHOICES = (
-    ('0', 'depth=0 (archive just this url)'),
-    ('1', 'depth=1 (archive this url and all sites one link away)'),
+    ('0', 'depth = 0 (archive just these URLs)'),
+    ('1', 'depth = 1 (archive these URLs and all URLs one hop away)'),
 )
 
 class AddLinkForm(forms.Form):
-    url = forms.URLField()
-    depth = forms.ChoiceField(choices=CHOICES, widget=forms.RadioSelect, initial='0')
+    url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True)
+    depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0')
diff --git a/archivebox/core/migrations/0005_auto_20200728_0326.py b/archivebox/core/migrations/0005_auto_20200728_0326.py
new file mode 100644
index 00000000..f367aeb1
--- /dev/null
+++ b/archivebox/core/migrations/0005_auto_20200728_0326.py
@@ -0,0 +1,28 @@
+# Generated by Django 3.0.7 on 2020-07-28 03:26
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0004_auto_20200713_1552'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='snapshot',
+            name='tags',
+            field=models.CharField(blank=True, db_index=True, max_length=256, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='title',
+            field=models.CharField(blank=True, db_index=True, max_length=128, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='updated',
+            field=models.DateTimeField(blank=True, db_index=True, null=True),
+        ),
+    ]
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 7ac9427b..95638bc1 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -15,11 +15,11 @@ class Snapshot(models.Model):
     url = models.URLField(unique=True)
     timestamp = models.CharField(max_length=32, unique=True, db_index=True)
 
-    title = models.CharField(max_length=128, null=True, default=None, db_index=True)
-    tags = models.CharField(max_length=256, null=True, default=None, db_index=True)
+    title = models.CharField(max_length=128, null=True, blank=True, db_index=True)
+    tags = models.CharField(max_length=256, null=True, blank=True, db_index=True)
 
     added = models.DateTimeField(auto_now_add=True, db_index=True)
-    updated = models.DateTimeField(null=True, default=None, db_index=True)
+    updated = models.DateTimeField(null=True, blank=True, db_index=True)
     # bookmarked = models.DateTimeField()
 
     keys = ('url', 'timestamp', 'title', 'tags', 'updated')
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index e9cc0dc8..a0da8b92 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -5,16 +5,16 @@ import sys
 from django.utils.crypto import get_random_string
 
 
-from ..config import (
-    OUTPUT_DIR,
+from ..config import (                                                          # noqa: F401
+    DEBUG,
     SECRET_KEY,
     ALLOWED_HOSTS,
     PYTHON_DIR,
     ACTIVE_THEME,
     SQL_INDEX_FILENAME,
+    OUTPUT_DIR,
 )
 
-
 ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
 IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
 
@@ -25,8 +25,8 @@ INSTALLED_APPS = [
     'django.contrib.contenttypes',
     'django.contrib.sessions',
     'django.contrib.messages',
-    'django.contrib.admin',
     'django.contrib.staticfiles',
+    'django.contrib.admin',
 
     'core',
 
@@ -121,5 +121,4 @@ STATIC_URL = '/static/'
 STATICFILES_DIRS = [
     os.path.join(PYTHON_DIR, 'themes', ACTIVE_THEME, 'static'),
     os.path.join(PYTHON_DIR, 'themes', 'default', 'static'),
-    os.path.join(PYTHON_DIR, 'themes', 'static'),
 ]
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index 70ebaf63..0c1f8131 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -3,15 +3,12 @@ from django.contrib import admin
 from django.urls import path, include
 from django.views import static
 from django.conf import settings
-from django.contrib.staticfiles.views import serve as serve_static
 from django.views.generic.base import RedirectView
 
-from core.views import MainIndex, AddLinks, LinkDetails
+from core.views import MainIndex, LinkDetails
 
-admin.site.site_header = 'ArchiveBox'
-admin.site.index_title = 'Links' 
-admin.site.site_title = 'Index'
 
+# print('DEBUG', settings.DEBUG)
 
 urlpatterns = [
     path('robots.txt', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'robots.txt'}),
@@ -19,14 +16,11 @@ urlpatterns = [
 
     path('archive/', RedirectView.as_view(url='/')),
     path('archive/<path:path>', LinkDetails.as_view(), name='LinkAssets'),
-    path('add/', AddLinks.as_view(), name='AddLinks'),
-    
-    path('static/<path>', serve_static),
+    path('add/', RedirectView.as_view(url='/admin/core/snapshot/add/')),
     
     path('accounts/login/', RedirectView.as_view(url='/admin/login/')),
     path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')),
 
-    path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
 
     path('accounts/', include('django.contrib.auth.urls')),
     path('admin/', admin.site.urls),
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index c411e98b..1eb8fc20 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -7,9 +7,6 @@ from django.views import View, static
 
 from core.models import Snapshot
 
-from contextlib import redirect_stdout
-from io import StringIO
-
 from ..index import load_main_index, load_main_index_meta
 from ..config import (
     OUTPUT_DIR,
@@ -18,10 +15,7 @@ from ..config import (
     PUBLIC_INDEX,
     PUBLIC_SNAPSHOTS,
 )
-from ..util import base_url, ansi_to_html
-from .. main import add
-
-from .forms import AddLinkForm
+from ..util import base_url
 
 
 class MainIndex(View):
@@ -45,48 +39,6 @@ class MainIndex(View):
         return render(template_name=self.template, request=request, context=context)
 
 
-class AddLinks(View):
-    template = 'add_links.html'
-
-    def get(self, request):
-        if not request.user.is_authenticated and not PUBLIC_INDEX:
-            return redirect(f'/admin/login/?next={request.path}')
-
-        context = {
-            "form": AddLinkForm()
-        }
-
-        return render(template_name=self.template, request=request, context=context)
-
-    def post(self, request):
-        if not request.user.is_authenticated and not PUBLIC_INDEX:
-            return redirect(f'/admin/login/?next={request.path}')
-        form = AddLinkForm(request.POST)
-        if form.is_valid():
-            url = form.cleaned_data["url"]
-            print(f'[+] Adding URL: {url}')
-            depth = 0 if form.cleaned_data["depth"] == "0" else 0
-            input_kwargs = {
-                "urls": url,
-                "depth": depth,
-                "update_all": False,
-                "out_dir": OUTPUT_DIR,
-            }
-            add_stdout = StringIO()
-            with redirect_stdout(add_stdout):
-               add(**input_kwargs)
-            print(add_stdout.getvalue())
-
-            context = {
-                "stdout": ansi_to_html(add_stdout.getvalue()),
-                "form": AddLinkForm()
-            }
-        else:
-            context = {"form": form}
-
-        return render(template_name=self.template, request=request, context=context)
-
-
 class LinkDetails(View):
     def get(self, request, path):
         # missing trailing slash -> redirect to index
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index e21ae576..4c6ae8bb 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -136,8 +136,8 @@ def link_details_template(link: Link) -> str:
         'url_str': htmlencode(urldecode(link.base_url)),
         'archive_url': urlencode(
             wget_output_path(link)
-            or (link.domain if link.is_archived else 'about:blank')
-        ),
+            or (link.domain if link.is_archived else '')
+        ) or 'about:blank',
         'extension': link.extension or 'html',
         'tags': link.tags or 'untagged',
         'status': 'archived' if link.is_archived else 'not yet archived',
diff --git a/archivebox/main.py b/archivebox/main.py
index 1cb34b30..141fe34c 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -83,6 +83,7 @@ from .config import (
     EXTERNAL_LOCATIONS,
     DATA_LOCATIONS,
     DEPENDENCIES,
+    DEBUG,
     load_all_config,
     CONFIG,
     USER_CONFIG,
@@ -987,13 +988,19 @@ def server(runserver_args: Optional[List[str]]=None,
     """Run the ArchiveBox HTTP server"""
 
     runserver_args = runserver_args or []
-    check_data_folder(out_dir=out_dir)
+    
+    from . import config
+    config.SHOW_PROGRESS = False
 
     if debug:
-        os.environ['DEBUG'] = 'True'
+        # if --debug is passed, patch config.DEBUG to be True for this run
+        config.DEBUG = True
     else:
+        # force staticfiles to be served when DEBUG=False
+        # TODO: do this using nginx or another server instead of django?
         runserver_args.append('--insecure')
 
+    check_data_folder(out_dir=out_dir)
     setup_django(out_dir)
     from django.core.management import call_command
     from django.contrib.auth.models import User
diff --git a/archivebox/themes/admin/base.html b/archivebox/themes/admin/base.html
index 2160dfeb..2a67873e 100644
--- a/archivebox/themes/admin/base.html
+++ b/archivebox/themes/admin/base.html
@@ -2,7 +2,7 @@
 {% get_current_language as LANGUAGE_CODE %}{% get_current_language_bidi as LANGUAGE_BIDI %}
 <html lang="{{ LANGUAGE_CODE|default:"en-us" }}" {% if LANGUAGE_BIDI %}dir="rtl"{% endif %}>
 <head>
-<title>{% block title %}{% endblock %}</title>
+<title>{% block title %}{% endblock %} | ArchiveBox</title>
 <link rel="stylesheet" type="text/css" href="{% block stylesheet %}{% static "admin/css/base.css" %}{% endblock %}">
 {% block extrastyle %}{% endblock %}
 {% if LANGUAGE_BIDI %}<link rel="stylesheet" type="text/css" href="{% block stylesheet_rtl %}{% static "admin/css/rtl.css" %}{% endblock %}">{% endif %}
@@ -13,6 +13,7 @@
     {% if LANGUAGE_BIDI %}<link rel="stylesheet" type="text/css" href="{% static "admin/css/responsive_rtl.css" %}">{% endif %}
 {% endblock %}
 {% block blockbots %}<meta name="robots" content="NONE,NOARCHIVE">{% endblock %}
+<link rel="stylesheet" type="text/css" href="{% static "admin.css" %}">
 </head>
 {% load i18n %}
 
@@ -26,13 +27,14 @@
     <!-- Header -->
     <div id="header">
         <div id="branding">
-        {% block branding %}{% endblock %}
+            {% block branding %}{% endblock %}
         </div>
         {% block usertools %}
         {% if has_permission %}
         <div id="user-tools">
             <a href="/add/">Add Links</a> /
             <a href="/">Main Index</a> /
+            <a href="/admin/">Admin</a> /
             <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
              &nbsp; &nbsp;
             {% block welcome-msg %}
@@ -76,7 +78,7 @@
     <!-- Content -->
     <div id="content" class="{% block coltype %}colM{% endblock %}">
         {% block pretitle %}{% endblock %}
-        {% block content_title %}{% if title %}<h1>{{ title }}</h1>{% endif %}{% endblock %}
+        {% block content_title %}{# {% if title %}<h1>{{ title }}</h1>{% endif %} #}{% endblock %}
         {% block content %}
         {% block object-tools %}{% endblock %}
         {{ content }}
diff --git a/archivebox/themes/default/add_links.html b/archivebox/themes/default/add_links.html
index 6e35f38c..84c5da58 100644
--- a/archivebox/themes/default/add_links.html
+++ b/archivebox/themes/default/add_links.html
@@ -1,218 +1,113 @@
-{% load static %}
+{% extends "admin/index.html" %}
+{% load i18n %}
 
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <title>Archived Sites</title>
-        <meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
-        <style>
-            html, body {
-                width: 100%;
-                height: 100%;
-                font-size: 18px;
-                font-weight: 200;
-                text-align: center;
-                margin: 0px;
-                padding: 0px;
-                font-family: "Gill Sans", Helvetica, sans-serif;
-            }
-            .header-top small {
-                font-weight: 200;
-                color: #efefef;
-            }
-            
-            .header-top {
-                width: 100%;
-                height: auto;
-                min-height: 40px;
-                margin: 0px;
-                text-align: center;
-                color: white;
-                font-size: calc(11px + 0.84vw);
-                font-weight: 200;
-                padding: 4px 4px;
-                border-bottom: 3px solid #aa1e55;
-                background-color: #aa1e55;
-            }
-            input[type=search] {
-                width: 22vw;
-                border-radius: 4px;
-                border: 1px solid #aeaeae;
-                padding: 3px 5px;
-            }
-            .nav > div {
-                min-height: 30px;
-            }
-            .header-top a {
-                text-decoration: none;
-                color: rgba(0,0,0,0.6);
-            }
-            .header-top a:hover {
-                text-decoration: none;
-                color: rgba(0,0,0,0.9);
-            }
-            .header-top .col-lg-4 {
-                text-align: center;
-                padding-top: 4px;
-                padding-bottom: 4px;
-            }
-            .header-archivebox img {
-                display: inline-block;
-                margin-right: 3px;
-                height: 30px;
-                margin-left: 12px;
-                margin-top: -4px;
-                margin-bottom: 2px;
-            }
-            .header-archivebox img:hover {
-                opacity: 0.5;
-            }
+{% block breadcrumbs %}
+    <div class="breadcrumbs">
+        <a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
+        {% if title %} &rsaquo; {{ title }}{% endif %}
+    </div>
+{% endblock %}
 
-            #table-bookmarks_length, #table-bookmarks_filter {
-                padding-top: 12px;
-                opacity: 0.8;
-                padding-left: 24px;
-                padding-right: 22px;
-                margin-bottom: -16px;
-            }
-            table {
-                padding: 6px;
-                width: 100%;
-            }
-            table thead th {
-                font-weight: 400;
-            }
-            table tr {
-                height: 35px;
-            }
-            tbody tr:nth-child(odd) {
-               background-color: #ffebeb !important;
-            }
-            table tr td {
-                white-space: nowrap;
-                overflow: hidden;
-                /*padding-bottom: 0.4em;*/
-                /*padding-top: 0.4em;*/
-                padding-left: 2px;
-                text-align: center;
-            }
-            table tr td a {
-                text-decoration: none;
-            }
-            table tr td img, table tr td object {
-                display: inline-block;
-                margin: auto;
-                height: 24px;
-                width: 24px;
-                padding: 0px;
-                padding-right: 5px;
-                vertical-align: middle;
-                margin-left: 4px;
-            }
-            #table-bookmarks {
-                width: 100%; 
-                overflow-y: scroll;
-                table-layout: fixed;
-            }
-            .dataTables_wrapper {
-                background-color: #fafafa;
-            }
-            table tr a span[data-archived~=False] {
-                opacity: 0.4;
-            }
-            .files-spinner {
-                height: 15px;
-                width: auto;
-                opacity: 0.5;
-                vertical-align: -2px;
-            }
-            .in-progress {
-                display: none;
-            }
-            body[data-status~=finished] .files-spinner {
-                display: none;
-            }
-            /*body[data-status~=running] .in-progress {
-                display: inline-block;
-            }*/
-            tr td a.favicon img {
-                padding-left: 6px;
-                padding-right: 12px;
-                vertical-align: -4px;
-            }
-            tr td a.title {
-                font-size: 1.4em;
-                text-decoration:none;
-                color:black;
-            }
-            tr td a.title small {
-                background-color: #efefef;
-                border-radius: 4px;
-                float:right
-            }
-            input[type=search]::-webkit-search-cancel-button {
-                -webkit-appearance: searchfield-cancel-button;
-            }
-            .title-col {
-                text-align: left;
-            }
-            .title-col a {
-                color: black;
-            }
-            .ul-form {
-                list-style: none;
-            }
-            .ul-form li {
-                list-style: none;
-            }
-        </style>
-        <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
-        <link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}"/>
-        <script src="{% static 'jquery.min.js' %}"></script>
-        <script src="{% static 'jquery.dataTables.min.js' %}"></script>
-        <script>
-            document.addEventListener('error', function(e) {
-              e.target.style.opacity = 0;
-            }, true)
-            jQuery(document).ready(function() {
-                jQuery('#table-bookmarks').DataTable({
-                    stateSave: true, // save state (filtered input, number of entries shown, etc) in localStorage
-                    dom: '<lf<t>ip>', // how to show the table and its helpers (filter, etc) in the DOM
-                    order: [[0, 'desc']],
-                    iDisplayLength: 100,
-                });
-            });
-        </script>
-    </head>
-    <body data-status="finished">
-        <header>
-            <div class="header-top container-fluid">
-                <div class="row nav">
-                    <div class="col-sm-2">
-                        <a href="/" class="header-archivebox" title="Last updated: {{updated}}">
-                            <img src="{% static 'archive.png' %}" alt="Logo"/>
-                            ArchiveBox: Add
-                        </a>
-                    </div>
-                    <div class="col-sm-10" style="text-align: right">
-                        <a href="/">Main Index</a> &nbsp; | &nbsp; 
-                        <a href="/admin/">Admin</a> &nbsp; | &nbsp; 
-                        <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
-                    </div>
-                </div>
-            </div>
-        </header>
-        <center>
-            {{ stdout | safe }}
-            <br/><br/>
-            <form action="?" method="POST" class="ul-form">{% csrf_token %}
-                Add new links...<br/>
-                {{ form.as_ul }}
-                <button role="submit">Add</button>
+{% block content %}
+    <style>
+        .dashboard #content {
+            width: 100%;
+            margin-right: 0px;
+            margin-left: 0px;
+        }
+        #submit {
+            border: 1px solid rgba(0,0,0,0.2);
+            padding: 10px;
+            border-radius: 4px;
+            background-color: #f5dd5d;
+            color: #333;
+            font-size: 18px;
+            font-weight: 800;
+        }
+        #add-form button[role=submit]:hover {
+            background-color: #e5cd4d;
+        }
+        #add-form label {
+            display: block;
+            font-size: 16px;
+        }
+        #add-form textarea {
+            width: 100%;
+            min-height: 300px;
+        }
+        #delay-warning div {
+            border: 1px solid red;
+            border-radius: 4px;
+            margin: 10px;
+            padding: 10px;
+            font-size: 15px;
+            background-color: #F5DD5D;
+        }
+        #stdout {
+            background-color: #ded;
+            padding: 10px 10px;
+            border-radius: 4px;
+            white-space: normal;
+        }
+        .loader {
+            border: 16px solid #f3f3f3; /* Light grey */
+            border-top: 16px solid #3498db; /* Blue */
+            border-radius: 50%;
+            width: 120px;
+            height: 120px;
+            animation: spin 2s linear infinite;
+        }
+
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+    </style>
+    <div style="max-width: 550px; margin: auto; float: none">
+        <br/><br/>
+        {% if stdout %}
+            <h1>Add new URLs to your archive: results</h1>
+            <pre id="stdout">
+                {{ stdout | safe }}
+                <br/><br/>
+            </pre>
+            <br/>
+            <center>
+                <a href="/add" id="submit">&nbsp; Add more URLs ➕</a>
+            </center>
+        {% else %}
+            <form id="add-form" action="?" method="POST" class="p-form">{% csrf_token %}
+                <h1>Add new URLs to your archive</h1>
+                <br/>
+                {{ form.as_p }}
+                <center>
+                    <button role="submit" id="submit">&nbsp; Add URLs and archive ➕</button>
+                </center>
             </form>
-        </center>
+            <br/><br/><br/>
+            <center id="delay-warning" style="display: none">
+                <b><i>This page will be unresponsive until the process is completely finished.</i></b>
+                <br/><br/>
+                <div>
+                    Warning: it may take several minutes to finish adding!<br/>
+                    <br/>
+                    Progress will be displayed in the <code>archivebox server</code> stdout,<br/>
+                    and on this page once the archiving process completes.<br/>
+                    <br/>
+                    <small>(it's safe to leave this page, adding will continue in the background)</small>
+                </div>
+            </center>
+            <script>
+                document.getElementById('add-form').addEventListener('submit', function(event) {
+                    setTimeout(function() {
+                        document.getElementById('add-form').innerHTML = '<center><h3>Adding URLs to index and running archive methods...<h3><br/><div class="loader"></div><br/>(see terminal for progress)</center>'
+                        document.getElementById('delay-warning').style.display = 'block'
+                    }, 200)
+                    return true
+                })
+            </script>
+        {% endif %}
+    </div>
+{% endblock %}
 
-        <a href="{% url 'admin:core_snapshot_changelist' %}">Go back to Main Index</a>
-        
-    </body>
-</html>
+{% block sidebar %}{% endblock %}
diff --git a/archivebox/themes/default/static/admin.css b/archivebox/themes/default/static/admin.css
new file mode 100644
index 00000000..436a98ce
--- /dev/null
+++ b/archivebox/themes/default/static/admin.css
@@ -0,0 +1,126 @@
+#header {
+    background: #aa1e55;
+    padding: 6px 14px;
+}
+#content {
+    padding: 8px 8px;
+}
+#user-tools {
+    font-size: 13px;
+
+}
+
+div.breadcrumbs {
+    background: #772948;
+    color: #f5dd5d;
+    padding: 6px 15px;
+}
+
+.module h2, .module caption, .inline-group h2 {
+    background: #772948;
+}
+
+#content .object-tools {
+    margin-top: -35px;
+    margin-right: -10px;
+    float: right;
+}
+
+#content .object-tools a:link, #content .object-tools a:visited {
+    border-radius: 0px;
+    background-color: #f5dd5d;
+    color: #333;
+    font-size: 12px;
+    font-weight: 800;
+}
+
+#content .object-tools a.addlink {
+    background-blend-mode: difference;
+}
+
+#content #changelist #toolbar {
+    padding: 0px;
+    background: none;
+    margin-bottom: 10px;
+    border-top: 0px; 
+    border-bottom: 0px;
+}
+
+#content #changelist #toolbar form input[type="submit"] {
+    border-color: #aa1e55;
+}
+
+#content #changelist-filter li.selected a {
+    color: #aa1e55;
+}
+
+
+#content #changelist .actions {
+    position: fixed;
+    bottom: 0px;
+    z-index: 800;
+}
+
+#content #changelist .actions .button {
+    border-color: #aa1e55;
+}
+#content #changelist-filter h2 {
+    border-radius: 4px 4px 0px 0px;
+}
+
+@media (min-width: 767px) {
+    #content #changelist-filter {
+        top: 35px;
+        width: 110px;
+    }
+
+    .change-list .filtered .results,
+    .change-list .filtered .paginator, 
+    .filtered #toolbar, 
+    .filtered div.xfull {
+        margin-right: 115px;
+    }
+}
+
+#content a img.favicon {
+    height: 20px;
+    width: 20px;
+    vertical-align: -5px;
+    padding-right: 6px;
+}
+
+#content td, #content th {
+    vertical-align: middle;
+}
+
+#content #changelist table input {
+    vertical-align: -2px;
+}
+
+
+#content th.field-added, #content td.field-updated {
+    word-break: break-word;
+    min-width: 85px;
+    white-space: normal;
+}
+
+#content th.field-title_str {
+    min-width: 300px;
+}
+
+#content td.field-files {
+    white-space: nowrap;
+}
+#content td.field-size {
+    white-space: nowrap;
+}
+
+#content td.field-url_str {
+    word-break: break-all;
+    min-width: 200px;
+}
+
+#content tr b.status-pending {
+    font-weight: 200;
+    opacity: 0.6;
+}
diff --git a/archivebox/themes/static/archive.png b/archivebox/themes/default/static/archive.png
similarity index 100%
rename from archivebox/themes/static/archive.png
rename to archivebox/themes/default/static/archive.png
diff --git a/archivebox/themes/static/bootstrap.min.css b/archivebox/themes/default/static/bootstrap.min.css
similarity index 100%
rename from archivebox/themes/static/bootstrap.min.css
rename to archivebox/themes/default/static/bootstrap.min.css
diff --git a/archivebox/themes/static/external.png b/archivebox/themes/default/static/external.png
similarity index 100%
rename from archivebox/themes/static/external.png
rename to archivebox/themes/default/static/external.png
diff --git a/archivebox/themes/static/jquery.dataTables.min.css b/archivebox/themes/default/static/jquery.dataTables.min.css
similarity index 100%
rename from archivebox/themes/static/jquery.dataTables.min.css
rename to archivebox/themes/default/static/jquery.dataTables.min.css
diff --git a/archivebox/themes/static/jquery.dataTables.min.js b/archivebox/themes/default/static/jquery.dataTables.min.js
similarity index 100%
rename from archivebox/themes/static/jquery.dataTables.min.js
rename to archivebox/themes/default/static/jquery.dataTables.min.js
diff --git a/archivebox/themes/static/jquery.min.js b/archivebox/themes/default/static/jquery.min.js
similarity index 100%
rename from archivebox/themes/static/jquery.min.js
rename to archivebox/themes/default/static/jquery.min.js
diff --git a/archivebox/themes/static/sort_asc.png b/archivebox/themes/default/static/sort_asc.png
similarity index 100%
rename from archivebox/themes/static/sort_asc.png
rename to archivebox/themes/default/static/sort_asc.png
diff --git a/archivebox/themes/static/sort_both.png b/archivebox/themes/default/static/sort_both.png
similarity index 100%
rename from archivebox/themes/static/sort_both.png
rename to archivebox/themes/default/static/sort_both.png
diff --git a/archivebox/themes/static/sort_desc.png b/archivebox/themes/default/static/sort_desc.png
similarity index 100%
rename from archivebox/themes/static/sort_desc.png
rename to archivebox/themes/default/static/sort_desc.png
diff --git a/archivebox/themes/static/spinner.gif b/archivebox/themes/default/static/spinner.gif
similarity index 100%
rename from archivebox/themes/static/spinner.gif
rename to archivebox/themes/default/static/spinner.gif
diff --git a/archivebox/themes/legacy/link_details.html b/archivebox/themes/legacy/link_details.html
index 1d3b201d..c5173470 100644
--- a/archivebox/themes/legacy/link_details.html
+++ b/archivebox/themes/legacy/link_details.html
@@ -359,18 +359,6 @@
                           </div>
                         </div>
                     </div>
-                    <div class="col-lg-2">
-                        <div class="card">
-                          <iframe class="card-img-top" src="$url" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
-                          <div class="card-body">
-                            <a href="$url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
-                                <img src="../../static/external.png" class="external"/>
-                            </a>
-                            <a href="$url" target="preview"><h4 class="card-title">Original</h4></a>
-                            <p class="card-text">$domain</p>
-                          </div>
-                        </div>
-                    </div>
                     <div class="col-lg-2">
                         <div class="card">
                           <iframe class="card-img-top" src="$archive_org_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
@@ -383,6 +371,18 @@
                           </div>
                         </div>
                     </div>
+                    <div class="col-lg-2">
+                        <div class="card">
+                          <iframe class="card-img-top" src="$url" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                          <div class="card-body">
+                            <a href="$url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
+                                <img src="../../static/external.png" class="external"/>
+                            </a>
+                            <a href="$url" target="preview"><h4 class="card-title">Original</h4></a>
+                            <p class="card-text">$domain</p>
+                          </div>
+                        </div>
+                    </div>
                 </div>
             </div>
         </header>

From ea1ff7b6bc723eafb23b101d79b98d61c2f945aa Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 23:34:30 -0400
Subject: [PATCH 221/333] fix linter

---
 archivebox/main.py | 1 -
 bin/lint.sh        | 6 +++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/archivebox/main.py b/archivebox/main.py
index 141fe34c..85f58341 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -83,7 +83,6 @@ from .config import (
     EXTERNAL_LOCATIONS,
     DATA_LOCATIONS,
     DEPENDENCIES,
-    DEBUG,
     load_all_config,
     CONFIG,
     USER_CONFIG,
diff --git a/bin/lint.sh b/bin/lint.sh
index fad7fda9..2783dcca 100755
--- a/bin/lint.sh
+++ b/bin/lint.sh
@@ -14,5 +14,9 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
 
 source "$DIR/.venv/bin/activate"
 
+echo "[*] Running flake8..."
 flake8 archivebox
-mypy archivebox
+echo
+echo
+echo "[*] Running mypy..."
+mypy archivebox || true

From b8c8c4a599b2f101c5d375553676950faa965fea Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 23:34:35 -0400
Subject: [PATCH 222/333] bump package versions

---
 setup.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index c5c7cf55..5d82292e 100755
--- a/setup.py
+++ b/setup.py
@@ -35,20 +35,18 @@ setuptools.setup(
     },
     python_requires=">=3.7",
     install_requires=[
-        "requests",
-        "atomicwrites",
-        "dataclasses==0.6",
+        "requests==2.24.0",
+        "atomicwrites==1.4.0",
         "mypy-extensions==0.4.3",
         "base32-crockford==0.3.0",
-        "django==3.0.7",
-        "django-extensions==2.2.9",
+        "django==3.0.8",
+        "django-extensions==3.0.3",
 
         "dateparser",
         "ipython",
         "youtube-dl",
         "python-crontab==2.5.1",
         "w3lib==1.22.0",
-        # "croniter",
         # Some/all of these will likely be added in the future:
         # wpull
         # pywb

From d70bb7980e92be48e988d5f0d4744c2da19218b1 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 23:56:35 -0400
Subject: [PATCH 223/333] use proper url naming instead of hardcoding paths

---
 archivebox/core/admin.py           |  7 ++++--
 archivebox/core/urls.py            |  8 ++++---
 archivebox/core/views.py           | 38 ++++++++++++++++++++----------
 archivebox/themes/admin/base.html  |  9 +++----
 archivebox/themes/admin/login.html |  2 +-
 5 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 97ac7712..6a9fcdae 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -6,7 +6,7 @@ from contextlib import redirect_stdout
 from django.contrib import admin
 from django.urls import path
 from django.utils.html import format_html
-from django.shortcuts import render
+from django.shortcuts import render, redirect
 from django.contrib.auth import get_user_model
 
 from core.models import Snapshot
@@ -103,10 +103,13 @@ class ArchiveBoxAdmin(admin.AdminSite):
 
     def get_urls(self):
         return [
-            path('core/snapshot/add/', self.add_view, name='add'),
+            path('core/snapshot/add/', self.add_view, name='Add'),
         ] + super().get_urls()
 
     def add_view(self, request):
+        if not request.user.is_authenticated:
+            return redirect(f'/admin/login/?next={request.path}')
+
         request.current_app = self.name
         context = {
             **self.each_context(request),
diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py
index 0c1f8131..b830de68 100644
--- a/archivebox/core/urls.py
+++ b/archivebox/core/urls.py
@@ -5,7 +5,7 @@ from django.views import static
 from django.conf import settings
 from django.views.generic.base import RedirectView
 
-from core.views import MainIndex, LinkDetails
+from core.views import MainIndex, OldIndex, LinkDetails
 
 
 # print('DEBUG', settings.DEBUG)
@@ -14,6 +14,8 @@ urlpatterns = [
     path('robots.txt', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'robots.txt'}),
     path('favicon.ico', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'favicon.ico'}),
 
+    path('docs/', RedirectView.as_view(url='https://github.com/pirate/ArchiveBox/wiki'), name='Docs'),
+
     path('archive/', RedirectView.as_view(url='/')),
     path('archive/<path:path>', LinkDetails.as_view(), name='LinkAssets'),
     path('add/', RedirectView.as_view(url='/admin/core/snapshot/add/')),
@@ -25,8 +27,8 @@ urlpatterns = [
     path('accounts/', include('django.contrib.auth.urls')),
     path('admin/', admin.site.urls),
     
-    path('old.html', MainIndex.as_view(), name='OldHome'),
+    path('old.html', OldIndex.as_view(), name='OldHome'),
     path('index.html', RedirectView.as_view(url='/')),
     path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
-    path('', RedirectView.as_view(url='/admin/core/snapshot/'), name='Home'),
+    path('', MainIndex.as_view(), name='Home'),
 ]
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 1eb8fc20..7ce4eb82 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -22,21 +22,35 @@ class MainIndex(View):
     template = 'main_index.html'
 
     def get(self, request):
-        if not request.user.is_authenticated and not PUBLIC_INDEX:
-            return redirect(f'/admin/login/?next={request.path}')
+        if request.user.is_authenticated:
+            return redirect('/admin/core/snapshot/')
 
-        all_links = load_main_index(out_dir=OUTPUT_DIR)
-        meta_info = load_main_index_meta(out_dir=OUTPUT_DIR)
+        if PUBLIC_INDEX:
+            return redirect('OldHome')
+        
+        return redirect(f'/admin/login/?next={request.path}')
 
-        context = {
-            'updated': meta_info['updated'],
-            'num_links': meta_info['num_links'],
-            'links': all_links,
-            'VERSION': VERSION,
-            'FOOTER_INFO': FOOTER_INFO,
-        }
+        
 
-        return render(template_name=self.template, request=request, context=context)
+class OldIndex(View):
+    template = 'main_index.html'
+
+    def get(self, request):
+        if PUBLIC_INDEX or request.user.is_authenticated:
+            all_links = load_main_index(out_dir=OUTPUT_DIR)
+            meta_info = load_main_index_meta(out_dir=OUTPUT_DIR)
+
+            context = {
+                'updated': meta_info['updated'],
+                'num_links': meta_info['num_links'],
+                'links': all_links,
+                'VERSION': VERSION,
+                'FOOTER_INFO': FOOTER_INFO,
+            }
+
+            return render(template_name=self.template, request=request, context=context)
+
+        return redirect(f'/admin/login/?next={request.path}')
 
 
 class LinkDetails(View):
diff --git a/archivebox/themes/admin/base.html b/archivebox/themes/admin/base.html
index 2a67873e..f961194f 100644
--- a/archivebox/themes/admin/base.html
+++ b/archivebox/themes/admin/base.html
@@ -32,10 +32,11 @@
         {% block usertools %}
         {% if has_permission %}
         <div id="user-tools">
-            <a href="/add/">Add Links</a> /
-            <a href="/">Main Index</a> /
-            <a href="/admin/">Admin</a> /
-            <a href="https://github.com/pirate/ArchiveBox/wiki">Docs</a>
+            <a href="{% url 'Home' %}">Index</a> /
+            <a href="{% url 'admin:Add' %}">Add URLs</a> /
+            <a href="{% url 'admin:index' %}">Admin</a> /
+            <a href="{% url 'OldHome' %}">Old UI</a> /
+            <a href="{% url 'Docs' %}">Docs</a>
              &nbsp; &nbsp;
             {% block welcome-msg %}
                 {% trans 'User' %}
diff --git a/archivebox/themes/admin/login.html b/archivebox/themes/admin/login.html
index a6d8eac7..98283f80 100644
--- a/archivebox/themes/admin/login.html
+++ b/archivebox/themes/admin/login.html
@@ -11,7 +11,7 @@
 
 {% block usertools %}
 <br/>
-  <a href="/">Back to Main Index</a>
+  <a href="{% url 'Home' %}">Back to Main Index</a>
 {% endblock %}
 
 {% block nav-global %}{% endblock %}

From 6073b454b09a3832a55d790505abf661475d669e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 27 Jul 2020 23:59:10 -0400
Subject: [PATCH 224/333] fix lint script

---
 bin/lint.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bin/lint.sh b/bin/lint.sh
index 2783dcca..605f966d 100755
--- a/bin/lint.sh
+++ b/bin/lint.sh
@@ -15,8 +15,9 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
 source "$DIR/.venv/bin/activate"
 
 echo "[*] Running flake8..."
-flake8 archivebox
-echo
+flake8 archivebox && echo "√ No errors found."
+
 echo
+
 echo "[*] Running mypy..."
-mypy archivebox || true
+echo "(skipping for now, run 'mypy archivebox' to run it manually)"

From 2fa8b9d35934a5bda97bd69e3ba84780f3b13431 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 00:13:10 -0400
Subject: [PATCH 225/333] tweak github actions CI caching

---
 .github/workflows/test.yml | 52 +++++++++++++-------------------------
 1 file changed, 18 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 338e197b..17431678 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,8 +3,7 @@ on: [push]
 
 env:
   MAX_LINE_LENGTH: 110
-  PIPENV_VENV_IN_PROJECT: 1
-  CACHE_PATH: .venv
+  
 jobs:
   lint:
     runs-on: ubuntu-latest
@@ -17,32 +16,18 @@ jobs:
           python-version: 3.8
           architecture: x64
 
-      - name: Install pipenv
-        run: |
-          pip install pipenv
-
-      - uses: actions/cache@v2
-        id: cache-archivebox
-        with:
-          path: ${{ env.CACHE_PATH }}
-          key: ${{ runner.os }}-3.8-pipenv-${{ hashFiles('**/Pipfile') }}
-          restore-keys: |
-            ${{ runner.os }}-3.8-pipenv-
-
-      - name: Install dependencies
-        run: |
-          pipenv install --dev
-
       - name: Lint with flake8
         run: |
+          pip install flake8
           # one pass for show-stopper syntax errors or undefined names
-          pipenv run flake8 archivebox --count --show-source --statistics
+          flake8 archivebox --count --show-source --statistics
           # one pass for small stylistic things
-          pipenv run flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
+          flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
 
-      - name: Lint with mypy
-        run: |
-          pipenv run mypy archivebox || true
+      # - name: Lint with mypy
+      #   run: |
+      #     pip install mypy
+      #     mypy archivebox || true
 
   test:
     runs-on: ${{ matrix.os }}
@@ -63,25 +48,22 @@ jobs:
           python-version: ${{ matrix.python }}
           architecture: x64
 
-      - name: Install pipenv
-        run: |
-          pip install pipenv
-
-      - uses: actions/cache@v1
-        id: cache-archivebox
+      - name: Cache virtualenv
+        uses: actions/cache@v2
         with:
-          path: ${{ env.CACHE_PATH }}
-          key: ${{ runner.os }}-${{ matrix.python }}-pipenv-${{ hashFiles('**/Pipfile') }}
+          path: .venv
+          key: ${{ runner.os }}-${{ matrix.python }}-venv-${{ hashFiles('setup.py') }}
           restore-keys: |
-            ${{ runner.os }}-${{ matrix.python }}-pipenv-
+            ${{ runner.os }}-${{ matrix.python }}-venv-
 
       - name: Install dependencies
         run: |
-          pipenv install --dev
+          python3 -m .venv
+          ./.venv/bin/python -m pip install -e '.[dev]'
 
       - name: Test built package with pytest
         run: |
-          pipenv run pytest -s
+          ./.venv/bin/python -m pipenv run pytest -s
 
   docker-test:
     runs-on: ubuntu-latest
@@ -91,6 +73,8 @@ jobs:
         with:
           fetch-depth: 1
 
+      - uses: satackey/action-docker-layer-caching@v0.0.4
+
       - name: Build image
         run: |
           docker build . -t archivebox

From d6ac7998f0259d3fa2875d40c878a4c029f8e69a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 00:14:34 -0400
Subject: [PATCH 226/333] fix venv path

---
 .github/workflows/test.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 17431678..de5c11e0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,7 +3,7 @@ on: [push]
 
 env:
   MAX_LINE_LENGTH: 110
-  
+
 jobs:
   lint:
     runs-on: ubuntu-latest
@@ -16,9 +16,12 @@ jobs:
           python-version: 3.8
           architecture: x64
 
-      - name: Lint with flake8
+      - name: Install flake8
         run: |
           pip install flake8
+
+      - name: Lint with flake8
+        run: |
           # one pass for show-stopper syntax errors or undefined names
           flake8 archivebox --count --show-source --statistics
           # one pass for small stylistic things
@@ -58,7 +61,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python3 -m .venv
+          python3 -m venv .venv
           ./.venv/bin/python -m pip install -e '.[dev]'
 
       - name: Test built package with pytest

From e68f6aaa76b5a38f1fd22e5b0fd0af5c7b6bf8eb Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 00:53:50 -0400
Subject: [PATCH 227/333] check for login page in github ci

---
 .github/workflows/test.yml | 2 +-
 Dockerfile                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index de5c11e0..1b05fa5a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -112,4 +112,4 @@ jobs:
       - name: Curl to Django app
         run: |
           sleep 10
-          curl -IL http://127.0.0.1:8000/
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'https://github.com/pirate/ArchiveBox/wiki'
diff --git a/Dockerfile b/Dockerfile
index 197844ae..97bd1bd1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -51,7 +51,7 @@ RUN groupadd --system archivebox \
 
 ADD . "$CODE_PATH"
 WORKDIR "$CODE_PATH"
-ENV PATH="$VENV_PATH/bin:${PATH}"
+ENV PATH="${PATH}:$VENV_PATH/bin"
 RUN python -m venv --clear --symlinks "$VENV_PATH" \
     && pip install --upgrade pip setuptools \
     && pip install -e .

From ece6d4307809bd4e848496cf0a40ae6e03ce22e7 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:51:02 -0400
Subject: [PATCH 228/333] hide builtin delete button

---
 archivebox/config/__init__.py | 2 +-
 archivebox/core/admin.py      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 6a324021..7cbc6d40 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -870,4 +870,4 @@ def setup_django(out_dir: str=None, check_db=False, config: ConfigDict=CONFIG) -
     except KeyboardInterrupt:
         raise SystemExit(2)
 
-os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
+os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8))  # noqa: F821
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 6a9fcdae..ec8a3d54 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -149,3 +149,4 @@ class ArchiveBoxAdmin(admin.AdminSite):
 admin.site = ArchiveBoxAdmin()
 admin.site.register(get_user_model())
 admin.site.register(Snapshot, SnapshotAdmin)
+admin.site.disable_action('delete_selected')

From 313fcd050144ef502b6b5aa4819afe2a96f0667b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:51:18 -0400
Subject: [PATCH 229/333] change defalt date format to ISO

---
 archivebox/core/settings.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index a0da8b92..babcf35e 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -110,9 +110,12 @@ if IS_SHELL:
 
 LANGUAGE_CODE = 'en-us'
 TIME_ZONE = 'UTC'
-USE_I18N = True
-USE_L10N = True
-USE_TZ = False
+USE_I18N = False
+USE_L10N = False
+USE_TZ = True
+
+DATETIME_FORMAT = 'Y-m-d g:iA'
+SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
 
 
 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'

From d6030e15c7c5c4009dbe80fc351ac0bdf0858ff0 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:52:15 -0400
Subject: [PATCH 230/333] allow passing links to remove method

---
 archivebox/main.py | 68 ++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/archivebox/main.py b/archivebox/main.py
index 85f58341..00bf90e5 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -547,6 +547,7 @@ def add(urls: Union[str, List[str]],
 def remove(filter_str: Optional[str]=None,
            filter_patterns: Optional[List[str]]=None,
            filter_type: str='exact',
+           links: Optional[List[Link]]=None,
            after: Optional[float]=None,
            before: Optional[float]=None,
            yes: bool=False,
@@ -556,38 +557,40 @@ def remove(filter_str: Optional[str]=None,
     
     check_data_folder(out_dir=out_dir)
 
-    if filter_str and filter_patterns:
-        stderr(
-            '[X] You should pass either a pattern as an argument, '
-            'or pass a list of patterns via stdin, but not both.\n',
-            color='red',
-        )
-        raise SystemExit(2)
-    elif not (filter_str or filter_patterns):
-        stderr(
-            '[X] You should pass either a pattern as an argument, '
-            'or pass a list of patterns via stdin.',
-            color='red',
-        )
-        stderr()
-        stderr('    {lightred}Hint:{reset} To remove all urls you can run:'.format(**ANSI))
-        stderr("        archivebox remove --filter-type=regex '.*'")
-        stderr()
-        raise SystemExit(2)
-    elif filter_str:
-        filter_patterns = [ptn.strip() for ptn in filter_str.split('\n')]
+    if links is None:
+        if filter_str and filter_patterns:
+            stderr(
+                '[X] You should pass either a pattern as an argument, '
+                'or pass a list of patterns via stdin, but not both.\n',
+                color='red',
+            )
+            raise SystemExit(2)
+        elif not (filter_str or filter_patterns):
+            stderr(
+                '[X] You should pass either a pattern as an argument, '
+                'or pass a list of patterns via stdin.',
+                color='red',
+            )
+            stderr()
+            stderr('    {lightred}Hint:{reset} To remove all urls you can run:'.format(**ANSI))
+            stderr("        archivebox remove --filter-type=regex '.*'")
+            stderr()
+            raise SystemExit(2)
+        elif filter_str:
+            filter_patterns = [ptn.strip() for ptn in filter_str.split('\n')]
+
+        log_list_started(filter_patterns, filter_type)
+        timer = TimedProgress(360, prefix='      ')
+        try:
+            links = list(list_links(
+                filter_patterns=filter_patterns,
+                filter_type=filter_type,
+                after=after,
+                before=before,
+            ))
+        finally:
+            timer.end()
 
-    log_list_started(filter_patterns, filter_type)
-    timer = TimedProgress(360, prefix='      ')
-    try:
-        links = list(list_links(
-            filter_patterns=filter_patterns,
-            filter_type=filter_type,
-            after=after,
-            before=before,
-        ))
-    finally:
-        timer.end()
 
     if not len(links):
         log_removal_finished(0, 0)
@@ -606,7 +609,8 @@ def remove(filter_str: Optional[str]=None,
             should_remove = (
                 (after is not None and float(link.timestamp) < after)
                 or (before is not None and float(link.timestamp) > before)
-                or link_matches_filter(link, filter_patterns, filter_type)
+                or link_matches_filter(link, filter_patterns or [], filter_type)
+                or link in links
             )
             if should_remove:
                 to_delete.append(link)

From 943453a9a8f257efa9e04ff187e2eff24d6b58fd Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:53:26 -0400
Subject: [PATCH 231/333] pass overwrite properly

---
 archivebox/core/views.py          | 3 ++-
 archivebox/extractors/__init__.py | 4 ++--
 archivebox/main.py                | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index 7ce4eb82..399f368e 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -72,7 +72,8 @@ class LinkDetails(View):
         # slug is a timestamp
         by_ts = {page.timestamp: page for page in all_pages}
         try:
-            response = static.serve(request, archivefile, by_ts[slug].link_dir, show_indexes=True)
+            # print('SERVING STATICFILE', by_ts[slug].link_dir, request.path, path)
+            response = static.serve(request, archivefile, document_root=by_ts[slug].link_dir, show_indexes=True)
             response["Link"] = f'<{by_ts[slug].url}>; rel="canonical"'
             return response
         except KeyError:
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index d67325ac..ce51d4f8 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -110,7 +110,7 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
 
 
 @enforce_types
-def archive_links(links: List[Link], out_dir: Optional[str]=None) -> List[Link]:
+def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None) -> List[Link]:
     if not links:
         return []
 
@@ -119,7 +119,7 @@ def archive_links(links: List[Link], out_dir: Optional[str]=None) -> List[Link]:
     link: Link = links[0]
     try:
         for idx, link in enumerate(links):
-            archive_link(link, out_dir=link.link_dir)
+            archive_link(link, overwrite=overwrite, methods=methods, out_dir=link.link_dir)
     except KeyboardInterrupt:
         log_archiving_paused(len(links), idx, link.timestamp)
         raise SystemExit(0)
diff --git a/archivebox/main.py b/archivebox/main.py
index 00bf90e5..b6996b5e 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -673,7 +673,7 @@ def update(resume: Optional[float]=None,
         
     # Step 3: Run the archive methods for each link
     to_archive = new_links if only_new else all_links
-    archive_links(to_archive, out_dir=out_dir)
+    archive_links(to_archive, overwrite=overwrite, out_dir=out_dir)
 
     # Step 4: Re-write links index with updated titles, icons, and resources
     all_links = load_main_index(out_dir=out_dir)

From af9084ee955e3ff4c4b6231d575603c45354be26 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:55:09 -0400
Subject: [PATCH 232/333] update Snapshot.title to latest_title after fetching

---
 archivebox/extractors/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index ce51d4f8..b468ce03 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -85,6 +85,13 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
 
         # print('    ', stats)
 
+        try:
+            latest_title = link.history['title'][-1].output.strip()
+            if latest_title and len(latest_title) >= len(link.title or ''):
+                link = link.overwrite(title=latest_title)
+        except Exception:
+            pass
+
         write_link_details(link, out_dir=link.link_dir)
         patch_main_index(link)
 

From 273059f054f139d9ea24a804f252650d6eb03b30 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:55:54 -0400
Subject: [PATCH 233/333] accept gzipped responses when using curl

---
 archivebox/extractors/archive_org.py | 1 +
 archivebox/extractors/favicon.py     | 1 +
 archivebox/extractors/title.py       | 7 ++++---
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py
index 410e1ea6..603134e5 100644
--- a/archivebox/extractors/archive_org.py
+++ b/archivebox/extractors/archive_org.py
@@ -48,6 +48,7 @@ def save_archive_dot_org(link: Link, out_dir: Optional[str]=None, timeout: int=T
         '--silent',
         '--location',
         '--head',
+        '--compressed',
         '--max-time', str(timeout),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py
index 0e46ef2c..272272ea 100644
--- a/archivebox/extractors/favicon.py
+++ b/archivebox/extractors/favicon.py
@@ -37,6 +37,7 @@ def save_favicon(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
         '--silent',
         '--max-time', str(timeout),
         '--location',
+        '--compressed',
         '--output', str(output),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py
index e2d7e12e..642c45b7 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@@ -43,18 +43,19 @@ def should_save_title(link: Link, out_dir: Optional[str]=None) -> bool:
 def save_title(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> ArchiveResult:
     """try to guess the page's title from its content"""
 
+    setup_django(out_dir=out_dir)
+    from core.models import Snapshot
+
     output: ArchiveOutput = None
     cmd = [
         CURL_BINARY,
         '--silent',
         '--max-time', str(timeout),
         '--location',
+        '--compressed',
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         link.url,
-        '|',
-        'grep',
-        '<title',
     ]
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')

From 55a237a435abf14a008db8a55967ac75254a778c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:56:34 -0400
Subject: [PATCH 234/333] also set snapshot title inside of fetch_title
 directly

---
 archivebox/extractors/title.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py
index 642c45b7..f75edbb5 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@@ -63,7 +63,10 @@ def save_title(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) ->
         html = download_url(link.url, timeout=timeout)
         match = re.search(HTML_TITLE_REGEX, html)
         output = htmldecode(match.group(1).strip()) if match else None
-        if not output:
+        if output:
+            if not link.title or len(output) >= len(link.title):
+                Snapshot.objects.filter(url=link.url, timestamp=link.timestamp).update(title=output)
+        else:
             raise ArchiveError('Unable to detect page title')
     except Exception as err:
         status = 'failed'

From 9e7330cc1439fac774346a68aaffb405633b3cf3 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:57:34 -0400
Subject: [PATCH 235/333] add init flag to server and fix SHOW_PROGRESS config
 being ignored

---
 archivebox/cli/archivebox_server.py |  6 +++++
 archivebox/logging_util.py          |  6 +++--
 archivebox/main.py                  | 38 ++++++++++++++++++++---------
 3 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/archivebox/cli/archivebox_server.py b/archivebox/cli/archivebox_server.py
index a5c168cc..b7f970d0 100644
--- a/archivebox/cli/archivebox_server.py
+++ b/archivebox/cli/archivebox_server.py
@@ -39,6 +39,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         action='store_true',
         help='Enable DEBUG=True mode with more verbose errors',
     )
+    parser.add_argument(
+        '--init',
+        action='store_true',
+        help='Run archivebox init before starting the server',
+    )
     command = parser.parse_args(args or ())
     reject_stdin(__command__, stdin)
     
@@ -46,6 +51,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         runserver_args=command.runserver_args,
         reload=command.reload,
         debug=command.debug,
+        init=command.init,
         out_dir=pwd or OUTPUT_DIR,
     )
 
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index 262a9467..6ea64daa 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -84,7 +84,9 @@ class TimedProgress:
     """Show a progress bar and measure elapsed time until .end() is called"""
 
     def __init__(self, seconds, prefix=''):
-        if SHOW_PROGRESS:
+        from .config import SHOW_PROGRESS
+        self.SHOW_PROGRESS = SHOW_PROGRESS
+        if self.SHOW_PROGRESS:
             self.p = Process(target=progress_bar, args=(seconds, prefix))
             self.p.start()
 
@@ -96,7 +98,7 @@ class TimedProgress:
         end_ts = datetime.now()
         self.stats['end_ts'] = end_ts
         
-        if SHOW_PROGRESS:
+        if self.SHOW_PROGRESS:
             # terminate if we havent already terminated
             self.p.terminate()
             self.p.join()
diff --git a/archivebox/main.py b/archivebox/main.py
index b6996b5e..5f6d8995 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -987,38 +987,54 @@ def schedule(add: bool=False,
 def server(runserver_args: Optional[List[str]]=None,
            reload: bool=False,
            debug: bool=False,
+           init: bool=False,
            out_dir: str=OUTPUT_DIR) -> None:
     """Run the ArchiveBox HTTP server"""
 
     runserver_args = runserver_args or []
     
+    if init:
+        run_subcommand('init', stdin=None, pwd=out_dir)
+
+    # setup config for django runserver
     from . import config
     config.SHOW_PROGRESS = False
-
-    if debug:
-        # if --debug is passed, patch config.DEBUG to be True for this run
-        config.DEBUG = True
-    else:
-        # force staticfiles to be served when DEBUG=False
-        # TODO: do this using nginx or another server instead of django?
-        runserver_args.append('--insecure')
+    config.DEBUG = config.DEBUG or debug
 
     check_data_folder(out_dir=out_dir)
     setup_django(out_dir)
+
     from django.core.management import call_command
     from django.contrib.auth.models import User
 
-    if IS_TTY and not User.objects.filter(is_superuser=True).exists():
+    admin_user = User.objects.filter(is_superuser=True).order_by('date_joined').only('username').last()
+
+    print('{green}[+] Starting ArchiveBox webserver...{reset}'.format(**ANSI))
+    if admin_user:
+        print("{lightred}[i] The admin username is:{lightblue} {}{reset}".format(admin_user.username, **ANSI))
+    else:
         print('{lightyellow}[!] No admin users exist yet, you will not be able to edit links in the UI.{reset}'.format(**ANSI))
         print()
         print('    To create an admin user, run:')
         print('        archivebox manage createsuperuser')
         print()
 
-    print('{green}[+] Starting ArchiveBox webserver...{reset}'.format(**ANSI))
-    if not reload:
+    # fallback to serving staticfiles insecurely with django when DEBUG=False
+    if config.DEBUG:
+        print('DEBUG=True')
+    else:
+        runserver_args.append('--insecure')  # TODO: serve statics w/ nginx instead
+    
+    # toggle autoreloading when archivebox code changes (it's on by default)
+    if reload:
+        print('AUTORELOAD=True')
+    else:
         runserver_args.append('--noreload')
 
+    config.SHOW_PROGRESS = False
+    config.DEBUG = config.DEBUG or debug
+
+
     call_command("runserver", *runserver_args)
 
 
From 032c2458defc8e829397416f49c120567e32c146 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:58:13 -0400
Subject: [PATCH 236/333] add missing setup_django import

---
 archivebox/extractors/title.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py
index f75edbb5..2db6dc3d 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/extractors/title.py
@@ -17,6 +17,7 @@ from ..config import (
     CURL_BINARY,
     CURL_VERSION,
     CURL_USER_AGENT,
+    setup_django,
 )
 from ..logging_util import TimedProgress
 

From 2e0b751376ba4aea4bf7f8f633bf6ed016fd3904 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:58:38 -0400
Subject: [PATCH 237/333] accept methods argument to filder archive_link

---
 archivebox/extractors/__init__.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index b468ce03..c42da945 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.extractors'
 
 import os
 
-from typing import Optional, List
+from typing import Optional, List, Iterable
 from datetime import datetime
 
 from ..index.schema import Link
@@ -34,10 +34,10 @@ from .archive_org import should_save_archive_dot_org, save_archive_dot_org
 
 
 @enforce_types
-def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None) -> Link:
+def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None) -> Link:
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
 
-    ARCHIVE_METHODS = (
+    ARCHIVE_METHODS = [
         ('title', should_save_title, save_title),
         ('favicon', should_save_favicon, save_favicon),
         ('wget', should_save_wget, save_wget),
@@ -47,7 +47,12 @@ def archive_link(link: Link, overwrite: bool=False, out_dir: Optional[str]=None)
         ('git', should_save_git, save_git),
         ('media', should_save_media, save_media),
         ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
-    )
+    ]
+    if methods is not None:
+        ARCHIVE_METHODS = [
+            method for method in ARCHIVE_METHODS
+            if method[1] in methods
+        ]
 
     out_dir = out_dir or link.link_dir
     try:

From 5a30e037785721025bebbf68976671831babad7c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 05:59:54 -0400
Subject: [PATCH 238/333] rearrange tags column and improve files icons

---
 archivebox/core/admin.py          | 90 +++++++++++++++++++++++--------
 archivebox/themes/admin/base.html |  4 +-
 2 files changed, 71 insertions(+), 23 deletions(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index ec8a3d54..9fcdccab 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -2,10 +2,12 @@ __package__ = 'archivebox.core'
 
 from io import StringIO
 from contextlib import redirect_stdout
+from pathlib import Path
 
 from django.contrib import admin
 from django.urls import path
 from django.utils.html import format_html
+from django.utils.safestring import mark_safe
 from django.shortcuts import render, redirect
 from django.contrib.auth import get_user_model
 
@@ -14,20 +16,55 @@ from core.forms import AddLinkForm
 
 from ..util import htmldecode, urldecode, ansi_to_html
 from ..logging_util import printable_filesize
-from ..main import add
+from ..main import add, remove
 from ..config import OUTPUT_DIR
+from ..extractors import archive_links
 
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
+def update_snapshots(modeladmin, request, queryset):
+    archive_links([
+        snapshot.as_link()
+        for snapshot in queryset
+    ], out_dir=OUTPUT_DIR)
+update_snapshots.short_description = "Archive"
+
+def update_titles(modeladmin, request, queryset):
+    archive_links([
+        snapshot.as_link()
+        for snapshot in queryset
+    ], overwrite=True, methods=('title',), out_dir=OUTPUT_DIR)
+update_titles.short_description = "Pull title"
+
+def overwrite_snapshots(modeladmin, request, queryset):
+    archive_links([
+        snapshot.as_link()
+        for snapshot in queryset
+    ], overwrite=True, out_dir=OUTPUT_DIR)
+overwrite_snapshots.short_description = "Re-archive (overwrite)"
+
+def verify_snapshots(modeladmin, request, queryset):
+    for snapshot in queryset:
+        print(snapshot.timestamp, snapshot.url, snapshot.is_archived, snapshot.archive_size, len(snapshot.history))
+
+verify_snapshots.short_description = "Check"
+
+def delete_snapshots(modeladmin, request, queryset):
+    remove(links=[snapshot.as_link() for snapshot in queryset], yes=True, delete=True, out_dir=OUTPUT_DIR)
+
+delete_snapshots.short_description = "Delete"
+
 
 class SnapshotAdmin(admin.ModelAdmin):
-    list_display = ('added', 'title_str', 'url_str', 'tags', 'files', 'size', 'updated')
-    sort_fields = ('title_str', 'url_str', 'tags', 'added', 'updated')
+    list_display = ('added', 'title_str', 'url_str', 'files', 'size', 'updated')
+    sort_fields = ('title_str', 'url_str', 'added', 'updated')
     readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
     search_fields = ('url', 'timestamp', 'title', 'tags')
     fields = ('title', 'tags', *readonly_fields)
     list_filter = ('added', 'updated', 'tags')
     ordering = ['-added']
+    actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots]
+    actions_template = 'admin/actions_as_select.html'
 
     def id_str(self, obj):
         return format_html(
@@ -37,6 +74,10 @@ class SnapshotAdmin(admin.ModelAdmin):
 
     def title_str(self, obj):
         canon = obj.as_link().canonical_outputs()
+        tags = ''.join(
+            format_html('<span>{}</span>', tag.strip())
+            for tag in obj.tags.split(',')
+        ) if obj.tags else ''
         return format_html(
             '<a href="/{}">'
                 '<img src="/{}/{}" class="favicon" onerror="this.remove()">'
@@ -48,28 +89,35 @@ class SnapshotAdmin(admin.ModelAdmin):
             obj.archive_path, canon['favicon_path'],
             obj.archive_path, canon['wget_path'] or '',
             'fetched' if obj.latest_title or obj.title else 'pending',
-            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...',
-        )
+            urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
+        ) + mark_safe(f'<span class="tags">{tags}</span>')
 
     def files(self, obj):
-        canon = obj.as_link().canonical_outputs()
+        link = obj.as_link()
+        canon = link.canonical_outputs()
+        out_dir = Path(link.link_dir)
+
+        link_tuple = lambda link, method: (link.archive_path, canon[method], canon[method] and (out_dir / canon[method]).exists())
+
         return format_html(
-            '<span style="font-size: 1.2em; opacity: 0.8">'
-                '<a href="/{}/{}" title="Wget clone">🌐 </a> '
-                '<a href="/{}/{}" title="PDF">📄</a> '
-                '<a href="/{}/{}" title="Screenshot">🖥 </a> '
-                '<a href="/{}/{}" title="HTML dump">🅷 </a> '
-                '<a href="/{}/{}" title="Media files">📼 </a> '
-                '<a href="/{}/{}" title="Git repos">📦 </a> '
-                '<a href="/{}/{}" title="Archive.org snapshot">🏛 </a> '
+            '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'
+                '<a href="/{}/{}/" class="exists-{}" title="Wget clone">🌐 </a> '
+                '<a href="/{}/{}" class="exists-{}" title="PDF">📄</a> '
+                '<a href="/{}/{}" class="exists-{}" title="Screenshot">🖥 </a> '
+                '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
+                '<a href="/{}/{}/" class="exists-{}" title="WARC">🆆 </a> '
+                '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
+                '<a href="/{}/{}/" class="exists-{}" title="Git repos">📦 </a> '
+                '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
             '</span>',
-            obj.archive_path, canon['wget_path'] or '',
-            obj.archive_path, canon['pdf_path'],
-            obj.archive_path, canon['screenshot_path'],
-            obj.archive_path, canon['dom_path'],
-            obj.archive_path, canon['media_path'],
-            obj.archive_path, canon['git_path'],
-            obj.archive_path, canon['archive_org_path'],
+            *link_tuple(link, 'wget_path'),
+            *link_tuple(link, 'pdf_path'),
+            *link_tuple(link, 'screenshot_path'),
+            *link_tuple(link, 'dom_path'),
+            *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
+            *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
+            *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
+            canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),
         )
 
     def size(self, obj):
diff --git a/archivebox/themes/admin/base.html b/archivebox/themes/admin/base.html
index f961194f..6b788358 100644
--- a/archivebox/themes/admin/base.html
+++ b/archivebox/themes/admin/base.html
@@ -81,8 +81,8 @@
         {% block pretitle %}{% endblock %}
         {% block content_title %}{# {% if title %}<h1>{{ title }}</h1>{% endif %} #}{% endblock %}
         {% block content %}
-        {% block object-tools %}{% endblock %}
-        {{ content }}
+            {% block object-tools %}{% endblock %}
+            {{ content }}
         {% endblock %}
         {% block sidebar %}{% endblock %}
         <br class="clear">

From b1082cfbaac7cd68e9362642de710bd051f733fe Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:00:09 -0400
Subject: [PATCH 239/333] ui and css improvements

---
 .../themes/admin/actions_as_select.html       |   1 +
 archivebox/themes/admin/base.html             | 113 ++++++++++++++++--
 archivebox/themes/default/add_links.html      |  13 --
 archivebox/themes/default/static/admin.css    | 104 +++++++++++++++-
 4 files changed, 204 insertions(+), 27 deletions(-)
 create mode 100644 archivebox/themes/admin/actions_as_select.html

diff --git a/archivebox/themes/admin/actions_as_select.html b/archivebox/themes/admin/actions_as_select.html
new file mode 100644
index 00000000..86a77190
--- /dev/null
+++ b/archivebox/themes/admin/actions_as_select.html
@@ -0,0 +1 @@
+actions_as_select
diff --git a/archivebox/themes/admin/base.html b/archivebox/themes/admin/base.html
index 6b788358..22014b02 100644
--- a/archivebox/themes/admin/base.html
+++ b/archivebox/themes/admin/base.html
@@ -20,6 +20,54 @@
 <body class="{% if is_popup %}popup {% endif %}{% block bodyclass %}{% endblock %}"
   data-admin-utc-offset="{% now "Z" %}">
 
+  <style nonce="{{nonce}}">
+      /* Loading Progress Bar */
+        #progress {
+            position: absolute;
+            z-index: 1000;
+            top: 0px;
+            left: -6px;
+            width: 2%;
+            opacity: 1;
+            height: 2px;
+            background: #1a1a1a;
+            border-radius: 1px;
+            transition: width 4s ease-out, opacity 400ms linear;
+        }
+  
+        @-moz-keyframes bugfix { from { padding-right: 1px ; } to { padding-right: 0; } }
+  </style>
+
+  <script>
+        // Page Loading Bar
+        window.loadStart = function(distance) {
+            var distance = distance || 0;
+            // only add progrstess bar if not already present
+            if (django.jQuery("#loading-bar").length == 0) {
+                django.jQuery("body").add("<div id=\"loading-bar\"></div>");
+            }
+            if (django.jQuery("#progress").length === 0) {
+                django.jQuery("body").append(django.jQuery("<div></div>").attr("id", "progress"));
+                let last_distance = (distance || (30 + (Math.random() * 30)))
+                django.jQuery("#progress").width(last_distance + "%");
+                setInterval(function() {
+                    last_distance += Math.random()
+                    django.jQuery("#progress").width(last_distance + "%");
+                }, 1000)
+            }
+        };
+  
+        window.loadFinish = function() {
+            django.jQuery("#progress").width("101%").delay(200).fadeOut(400, function() {
+                django.jQuery(this).remove();
+            });
+        };
+        window.loadStart();
+        window.addEventListener('beforeunload', function() {window.loadStart(27)});
+        document.addEventListener('DOMContentLoaded', function() {window.loadFinish()});
+  </script>
+
+
 <!-- Container -->
 <div id="container">
 
@@ -27,14 +75,20 @@
     <!-- Header -->
     <div id="header">
         <div id="branding">
-            {% block branding %}{% endblock %}
+            <h1 id="site-name">
+                <a href="{% url 'Home' %}">
+                    <img src="{% static 'archive.png' %}" id="logo">
+                    ArchiveBox
+                </a>
+            </h1>
+                
         </div>
         {% block usertools %}
         {% if has_permission %}
         <div id="user-tools">
-            <a href="{% url 'Home' %}">Index</a> /
-            <a href="{% url 'admin:Add' %}">Add URLs</a> /
-            <a href="{% url 'admin:index' %}">Admin</a> /
+            <a href="{% url 'admin:Add' %}">Add ➕</a> /
+            <a href="{% url 'Home' %}">URLs</a> /
+            <a href="/admin/auth/user/">Users</a> /
             <a href="{% url 'OldHome' %}">Old UI</a> /
             <a href="{% url 'Docs' %}">Docs</a>
              &nbsp; &nbsp;
@@ -59,13 +113,13 @@
         {% endblock %}
         {% block nav-global %}{% endblock %}
     </div>
-    <!-- END Header -->
-    {% block breadcrumbs %}
-    <div class="breadcrumbs">
-    <a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
-    {% if title %} &rsaquo; {{ title }}{% endif %}
-    </div>
-    {% endblock %}
+        <!-- END Header -->
+        {% block breadcrumbs %}
+        <div class="breadcrumbs">
+        <a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
+        {% if title %} &rsaquo; {{ title }}{% endif %}
+        </div>
+        {% endblock %}
     {% endif %}
 
     {% block messages %}
@@ -93,5 +147,42 @@
 </div>
 <!-- END Container -->
 
+<script>
+    (function ($) {
+        $.fn.reverse = [].reverse;
+
+        function fix_actions() {
+            var container = $('div.actions');
+
+            if (container.find('option').length < 10) {
+                container.find('label, button').hide();
+
+                var buttons = $('<div></div>')
+                    .prependTo(container)
+                    .css('display', 'inline')
+                    .addClass('class', 'action-buttons');
+
+                container.find('option:gt(0)').reverse().each(function () {
+                    const name = this.value
+                    $('<button>')
+                        .appendTo(buttons)
+                        .attr('name', this.value)
+                        .addClass('button')
+                        .text(this.text)
+                        .click(function () {
+                            container.find('select')
+                                .find(':selected').attr('selected', '').end()
+                                .find('[value=' + this.name + ']').attr('selected', 'selected');
+                            $('#changelist-form button[name="index"]').click();
+                            document.querySelector('#logo').outerHTML = '<div class="loader"></div>'
+                        });
+                });
+            }
+        };
+        $(function () {
+            fix_actions();
+        });
+    })(django.jQuery);
+</script>
 </body>
 </html>
diff --git a/archivebox/themes/default/add_links.html b/archivebox/themes/default/add_links.html
index 84c5da58..80a4b1fc 100644
--- a/archivebox/themes/default/add_links.html
+++ b/archivebox/themes/default/add_links.html
@@ -49,19 +49,6 @@
             border-radius: 4px;
             white-space: normal;
         }
-        .loader {
-            border: 16px solid #f3f3f3; /* Light grey */
-            border-top: 16px solid #3498db; /* Blue */
-            border-radius: 50%;
-            width: 120px;
-            height: 120px;
-            animation: spin 2s linear infinite;
-        }
-
-        @keyframes spin {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
-        }
     </style>
     <div style="max-width: 550px; margin: auto; float: none">
         <br/><br/>
diff --git a/archivebox/themes/default/static/admin.css b/archivebox/themes/default/static/admin.css
index 436a98ce..b2b58d64 100644
--- a/archivebox/themes/default/static/admin.css
+++ b/archivebox/themes/default/static/admin.css
@@ -1,3 +1,23 @@
+#logo {
+    height: 30px;
+    vertical-align: -6px;
+    padding-right: 5px;
+}
+#site-name:hover a {
+    opacity: 0.9;
+}
+#site-name .loader {
+    height: 25px;
+    width: 25px;
+    display: inline-block;
+    border-width: 3px;
+    vertical-align: -3px;
+    margin-right: 5px;
+    margin-top: 2px;
+}
+#branding h1, #branding h1 a:link, #branding h1 a:visited {
+    color: mintcream;
+}
 #header {
     background: #aa1e55;
     padding: 6px 14px;
@@ -16,6 +36,11 @@ div.breadcrumbs {
     padding: 6px 15px;
 }
 
+body.model-snapshot.change-list div.breadcrumbs,
+body.model-snapshot.change-list #content .object-tools {
+    display: none;
+}
+
 .module h2, .module caption, .inline-group h2 {
     background: #772948;
 }
@@ -55,15 +80,51 @@ div.breadcrumbs {
 }
 
 
-#content #changelist .actions {
+/*#content #changelist .actions {
     position: fixed;
     bottom: 0px;
     z-index: 800;
+}*/
+#content #changelist .actions {
+    float: right;
+    margin-top: -34px;
+    padding: 0px;
+    background: none;
+    margin-right: 0px;
 }
 
 #content #changelist .actions .button {
-    border-color: #aa1e55;
+    border-radius: 2px;
+    background-color: #f5dd5d;
+    color: #333;
+    font-size: 12px;
+    font-weight: 800;
+    margin-right: 4px;
+    box-shadow: 4px 4px 4px rgba(0,0,0,0.02);
+    border: 1px solid rgba(0,0,0,0.08);
 }
+#content #changelist .actions .button:hover {
+    border: 1px solid rgba(0,0,0,0.2);
+    opacity: 0.9;
+}
+#content #changelist .actions .button[name=verify_snapshots], #content #changelist .actions .button[name=update_titles] {
+    background-color: #dedede;
+    color: #333;
+}
+#content #changelist .actions .button[name=update_snapshots] {
+    background-color:lightseagreen;
+    color: #333;
+}
+#content #changelist .actions .button[name=overwrite_snapshots] {
+    background-color: #ffaa31;
+    color: #333;
+}
+#content #changelist .actions .button[name=delete_snapshots] {
+    background-color: #f91f74;
+    color: rgb(255 248 252 / 64%);
+}
+
+
 #content #changelist-filter h2 {
     border-radius: 4px 4px 0px 0px;
 }
@@ -72,6 +133,7 @@ div.breadcrumbs {
     #content #changelist-filter {
         top: 35px;
         width: 110px;
+        margin-bottom: 35px;
     }
 
     .change-list .filtered .results,
@@ -82,6 +144,16 @@ div.breadcrumbs {
     }
 }
 
+@media (max-width: 1127px) {
+    #content #changelist .actions {
+        position: fixed;
+        bottom: 6px;
+        left: 10px;
+        float: left;
+        z-index: 1000;
+    }
+}
+
 #content a img.favicon {
     height: 20px;
     width: 20px;
@@ -91,16 +163,20 @@ div.breadcrumbs {
 
 #content td, #content th {
     vertical-align: middle;
+    padding: 4px;
 }
 
 #content #changelist table input {
     vertical-align: -2px;
 }
 
+#content thead th .text a {
+    padding: 8px 4px;
+}
 
 #content th.field-added, #content td.field-updated {
     word-break: break-word;
-    min-width: 85px;
+    min-width: 128px;
     white-space: normal;
 }
 
@@ -111,6 +187,13 @@ div.breadcrumbs {
 #content td.field-files {
     white-space: nowrap;
 }
+#content td.field-files .exists-True {
+    opacity: 1;
+}
+#content td.field-files .exists-False {
+    opacity: 0.1;
+    filter: grayscale(100%);
+}
 #content td.field-size {
     white-space: nowrap;
 }
@@ -124,3 +207,18 @@ div.breadcrumbs {
     font-weight: 200;
     opacity: 0.6;
 }
+
+.loader {
+    border: 16px solid #f3f3f3; /* Light grey */
+    border-top: 16px solid #3498db; /* Blue */
+    border-radius: 50%;
+    width: 30px;
+    height: 30px;
+    box-sizing: border-box;
+    animation: spin 2s linear infinite;
+}
+
+@keyframes spin {
+  0% { transform: rotate(0deg); }
+  100% { transform: rotate(360deg); }
+}

From d7fc161ac7734b27d3b20aacb0aab47959d3d99a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:00:52 -0400
Subject: [PATCH 240/333] update readme examples

---
 README.md                | 3 ++-
 bin/docker_entrypoint.sh | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 100e3cb6..8d2c4f86 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,8 @@ You can use it to preserve access to websites you care about by storing them loc
 #### How does it work?
 
 ```bash
-archivebox add 'https://example.com'
+docker run -v $PWD:/data archivebox init
+docker run -v $PWD/data:/data -p 8000 archivebox
 ```
 
 After installing the dependencies, just pipe some new links into the `archivebox add` command to start your archive.
diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh
index e9b399ec..a9491d32 100755
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@@ -11,10 +11,10 @@ GRID=$(stat --format="%g" "$DATA_DIR")
 
 # If user is not root, modify the archivebox user+files to have the same uid,gid
 if [[ "$USID" != 0 && "$GRID" != 0 ]]; then
-    chown "$USID":"$GRID" "$DATA_DIR"
     usermod -u "$USID" "$ARCHIVEBOX_USER"
     groupmod -g "$GRID" "$ARCHIVEBOX_USER"
     chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
+    chown "$USID":"$GRID" "$DATA_DIR" "$DATA_DIR/*"
 fi
 
 # run django as the new archivebox user

From 2030748d5df74df4363393138a13ed0bc6d66b20 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:01:06 -0400
Subject: [PATCH 241/333] add pywb docker example to docker-compose

---
 docker-compose.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 01c7558c..1be33830 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -22,9 +22,21 @@ services:
         environment:
             - USE_COLOR=True
             - SHOW_PROGRESS=False
+            # - HTTP_PROXY=http://pywb:8080
         volumes:
             - ./data:/data
 
+    # pywb:
+    #     image: webrecorder/pywb:latest
+    #     entrypoint: /bin/sh 'wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback --proxy;'
+    #     environment:
+    #         - INIT_COLLECTION=archivebox
+    #     ports:
+    #         - 8080:8080
+    #     volumes:
+    #         ./data:/archivebox
+    #         ./data/wayback:/webarchive
+
 #    nginx:
 #        image: nginx:alpine
 #        ports:

From b8c93889c14a00fbc6516edd118e1101547093c4 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:03:52 -0400
Subject: [PATCH 242/333] hide prints and tweak url text in titlebar

---
 archivebox/main.py                | 8 ++------
 archivebox/themes/admin/base.html | 2 +-
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/archivebox/main.py b/archivebox/main.py
index 5f6d8995..59155580 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -1020,15 +1020,11 @@ def server(runserver_args: Optional[List[str]]=None,
         print()
 
     # fallback to serving staticfiles insecurely with django when DEBUG=False
-    if config.DEBUG:
-        print('DEBUG=True')
-    else:
+    if not config.DEBUG:
         runserver_args.append('--insecure')  # TODO: serve statics w/ nginx instead
     
     # toggle autoreloading when archivebox code changes (it's on by default)
-    if reload:
-        print('AUTORELOAD=True')
-    else:
+    if not reload:
         runserver_args.append('--noreload')
 
     config.SHOW_PROGRESS = False
diff --git a/archivebox/themes/admin/base.html b/archivebox/themes/admin/base.html
index 22014b02..efe63e00 100644
--- a/archivebox/themes/admin/base.html
+++ b/archivebox/themes/admin/base.html
@@ -87,7 +87,7 @@
         {% if has_permission %}
         <div id="user-tools">
             <a href="{% url 'admin:Add' %}">Add ➕</a> /
-            <a href="{% url 'Home' %}">URLs</a> /
+            <a href="{% url 'Home' %}">Snapshots</a> /
             <a href="/admin/auth/user/">Users</a> /
             <a href="{% url 'OldHome' %}">Old UI</a> /
             <a href="{% url 'Docs' %}">Docs</a>

From bc180bb0cb9e1cf1ca99f61dcc9652c84036a84a Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:11:30 -0400
Subject: [PATCH 243/333] add release script

---
 bin/release.sh | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100755 bin/release.sh

diff --git a/bin/release.sh b/bin/release.sh
new file mode 100755
index 00000000..09d12c4c
--- /dev/null
+++ b/bin/release.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+### Bash Environment Setup
+# http://redsymbol.net/articles/unofficial-bash-strict-mode/
+# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
+# set -o xtrace
+set -o errexit
+set -o errtrace
+set -o nounset
+set -o pipefail
+IFS=$'\n'
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+
+source "$DIR/.venv/bin/activate"
+cd "$DIR"
+
+echo "[*] Fetching latest docs version"
+cd "$DIR/docs"
+git pull
+
+echo "[*] Cleaning up build dirs"
+cd "$DIR"
+rm -Rf build dist
+
+echo "[*] Bumping VERSION number"
+nano "$DIR/archivebox/VERSION"
+
+echo "[*] Building sdist and bdist_wheel"
+python3 setup.py sdist bdist_wheel
+
+echo "[*] Building sdist and bdist_wheel"
+python3 setup.py sdist bdist_wheel
+
+echo "[^] Uploading to test.pypi.org"
+python3 -m twine upload --repository testpypi dist/*
+
+echo "[^] Uploading to pypi.org"
+python3 -m twine upload --repository pypi dist/*
+
+echo "[√] Done. Now at version $(cat "$DIR/archivebox/VERSION")"

From 2e6837d748074975d6ee2b7f74447f158cc08276 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:17:54 -0400
Subject: [PATCH 244/333] update release script

---
 bin/release.sh | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/bin/release.sh b/bin/release.sh
index 09d12c4c..0887fabf 100755
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -11,10 +11,26 @@ set -o pipefail
 IFS=$'\n'
 
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+VERSION_FILE="$DIR/archivebox/VERSION"
+
+function bump_semver {
+    echo "$1" | awk -F. '{$NF = $NF + 1;} 1' | sed 's/ /./g'
+}
 
 source "$DIR/.venv/bin/activate"
 cd "$DIR"
 
+OLD_VERSION="$(cat "$VERSION_FILE")"
+NEW_VERSION="$(bump_semver "$OLD_VERSION")"
+
+if [ -z "$(git status --porcelain)" ]; then 
+    echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
+    echo "$NEW_VERSION" > "$VERSION_FILE"
+else
+    echo "[X] Commit your changes and make sure the Git state is clean before proceeding."
+    exit 4
+fi
+
 echo "[*] Fetching latest docs version"
 cd "$DIR/docs"
 git pull
@@ -23,12 +39,6 @@ echo "[*] Cleaning up build dirs"
 cd "$DIR"
 rm -Rf build dist
 
-echo "[*] Bumping VERSION number"
-nano "$DIR/archivebox/VERSION"
-
-echo "[*] Building sdist and bdist_wheel"
-python3 setup.py sdist bdist_wheel
-
 echo "[*] Building sdist and bdist_wheel"
 python3 setup.py sdist bdist_wheel
 
@@ -38,4 +48,4 @@ python3 -m twine upload --repository testpypi dist/*
 echo "[^] Uploading to pypi.org"
 python3 -m twine upload --repository pypi dist/*
 
-echo "[√] Done. Now at version $(cat "$DIR/archivebox/VERSION")"
+echo "[√] Done. Published version v$NEW_VERSION"

From 301e220c53620f7a0cdba2ded31c42d7aaadc90c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:22:24 -0400
Subject: [PATCH 245/333] v0.4.6

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index 0bfccb08..ef52a648 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.5
+0.4.6

From dab97b0a5b2a8d3f964ee26c026d0de9ec8d2eff Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:28:58 -0400
Subject: [PATCH 246/333] add release script

---
 bin/release.sh | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/bin/release.sh b/bin/release.sh
index 0887fabf..c262ba46 100755
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -23,17 +23,25 @@ cd "$DIR"
 OLD_VERSION="$(cat "$VERSION_FILE")"
 NEW_VERSION="$(bump_semver "$OLD_VERSION")"
 
-if [ -z "$(git status --porcelain)" ]; then 
-    echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
-    echo "$NEW_VERSION" > "$VERSION_FILE"
-else
-    echo "[X] Commit your changes and make sure the Git state is clean before proceeding."
-    exit 4
-fi
-
 echo "[*] Fetching latest docs version"
 cd "$DIR/docs"
 git pull
+cd "$DIR"
+
+if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then 
+    git pull
+else
+    echo "[X] Commit your changes and make sure git is checked out on clean master."
+    exit 4
+fi
+
+echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
+echo "$NEW_VERSION" > "$VERSION_FILE"
+git add "$NEW_VERSION"
+git commit -m "$NEW_VERSION release"
+git tag -a "$NEW_VERSION"
+git push origin master
+git push origin --tags
 
 echo "[*] Cleaning up build dirs"
 cd "$DIR"

From 895428c8468b65f3ed25742cac4d7cbc24817fa4 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:36:58 -0400
Subject: [PATCH 247/333] update release script and README instructions

---
 README.md      | 43 +++++++++++++++++++++++++++----------------
 bin/release.sh |  2 ++
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 8d2c4f86..18590cf8 100644
--- a/README.md
+++ b/README.md
@@ -40,11 +40,14 @@ You can use it to preserve access to websites you care about by storing them loc
 #### How does it work?
 
 ```bash
-docker run -v $PWD:/data archivebox init
-docker run -v $PWD/data:/data -p 8000 archivebox
+mkdir data && cd data
+archivebox init
+archivebox add 'https://example.com'
+archivebox add 'https://getpocket.com/users/USERNAME/feed/all' --depth=1
+archivebox server
 ```
 
-After installing the dependencies, just pipe some new links into the `archivebox add` command to start your archive.
+After installing archivebox, just pass some new links to the `archivebox add` command to start your collection.
 
 ArchiveBox is written in Python 3.7 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats. It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs. If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
 
@@ -65,30 +68,38 @@ ArchiveBox is written in `python3.7` and has [3 main binary dependencies](https:
 To get started, you can [install them manually](https://github.com/pirate/ArchiveBox/wiki/Install) using your system's package manager, use the [automated helper script](https://github.com/pirate/ArchiveBox/wiki/Quickstart), or use the official [Docker](https://github.com/pirate/ArchiveBox/wiki/Docker) container. All three dependencies are optional if [disabled](https://github.com/pirate/ArchiveBox/wiki/Configuration#archive-method-toggles) in settings.
 
 ```bash
-# 1. Install dependencies (use apt on ubuntu, brew on mac, or pkg on BSD)
+# Docker
+mkdir data && cd data
+docker run -v $PWD:/data archivebox init
+docker run -v $PWD:/data archivebox add 'https://example.com'
+docker run -v $PWD:/data -p 8000 archivebox server
+open https://127.0.0.1:8000
+```
+
+```bash
+# Bare Metal
+# Use apt on Ubuntu/Debian, brew on mac, or pkg on BSD
 apt install python3 python3-pip git curl wget youtube-dl chromium-browser
 
-# 2. Download ArchiveBox
-pip install archivebox
+pip install archivebox      # install archivebox
 
-# 3. Create a new archive anywhere
-mkdir archive_folder && cd archive_folder && archivebox init
+mkdir data && cd data       # (doesn't have to be called data)
+archivebox init
+archivebox add 'https://example.com'  # add URLs via args or stdin
 
-# 4. Add your first link to your archive
-echo 'https://example.com' | archivebox add # pass URL to archive via stdin
-
-archivebox add https://getpocket.com/users/example/feed/all --depth=1 # or import an RSS/JSON/XML/TXT feed
+# or import an RSS/JSON/XML/TXT feed/list of links
+archivebox add https://getpocket.com/users/USERNAME/feed/all --depth=1
 ```
 
-Once you've added your first links, open `archive_folder/index.html` in a browser to view the archive.
-You can also start a django server to manage your links:
+Once you've added your first links, open `data/index.html` in a browser to view the static archive.
 
-```
+You can also start it as a server with a full web UI to manage your links:
+```bash
 archivebox manage createsuperuser
 archivebox server
 ```
 
-You can visit `localhost:8000` in your browser to access it.
+You can visit `https://127.0.0.1:8000` in your browser to access it.
 
 [DEMO: archivebox.zervice.io/](https://archivebox.zervice.io)  
 For more information, see the [full Quickstart guide](https://github.com/pirate/ArchiveBox/wiki/Quickstart), [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage), and [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration) docs.
diff --git a/bin/release.sh b/bin/release.sh
index c262ba46..d985a3ef 100755
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -24,8 +24,10 @@ OLD_VERSION="$(cat "$VERSION_FILE")"
 NEW_VERSION="$(bump_semver "$OLD_VERSION")"
 
 echo "[*] Fetching latest docs version"
+sphinx-apidoc -o docs archivebox
 cd "$DIR/docs"
 git pull
+make html
 cd "$DIR"
 
 if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then 

From e74b3bbb768a2749c9870eab421caa20f5c2c7e6 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:44:58 -0400
Subject: [PATCH 248/333] add docker build to release script

---
 bin/release.sh | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/bin/release.sh b/bin/release.sh
index d985a3ef..846b1df7 100755
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -24,18 +24,22 @@ OLD_VERSION="$(cat "$VERSION_FILE")"
 NEW_VERSION="$(bump_semver "$OLD_VERSION")"
 
 echo "[*] Fetching latest docs version"
-sphinx-apidoc -o docs archivebox
 cd "$DIR/docs"
 git pull
+cd "$DIR"
+
+echo "[+] Building docs"
+sphinx-apidoc -o docs archivebox
+cd "$DIR/docs"
 make html
 cd "$DIR"
 
-if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then 
-    git pull
-else
-    echo "[X] Commit your changes and make sure git is checked out on clean master."
-    exit 4
-fi
+# if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then 
+#     git pull
+# else
+#     echo "[X] Commit your changes and make sure git is checked out on clean master."
+#     exit 4
+# fi
 
 echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
 echo "$NEW_VERSION" > "$VERSION_FILE"
@@ -49,7 +53,7 @@ echo "[*] Cleaning up build dirs"
 cd "$DIR"
 rm -Rf build dist
 
-echo "[*] Building sdist and bdist_wheel"
+echo "[+] Building sdist and bdist_wheel"
 python3 setup.py sdist bdist_wheel
 
 echo "[^] Uploading to test.pypi.org"
@@ -58,4 +62,13 @@ python3 -m twine upload --repository testpypi dist/*
 echo "[^] Uploading to pypi.org"
 python3 -m twine upload --repository pypi dist/*
 
+echo "[+] Building docker image"
+docker build . -t "archivebox,archivebox:latest,archivebox:$NEW_VERSION,nikisweeting/archivebox,docker.pkg.github.com/pirate/ArchiveBox/archivebox:$NEW_VERSION"
+
+echo "[^] Uploading docker image"
+# docker login --username=dockerhubusername --email=email@example.com
+# docker login docker.pkg.github.com --username githubusername
+docker push nikisweeting/archivebox
+docker push docker.pkg.github.com/pirate/ArchiveBox/archivebox:$NEW_VERSION
+
 echo "[√] Done. Published version v$NEW_VERSION"

From 9c595827221fa86ffee0d92a7ead508af89dfb2f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:46:13 -0400
Subject: [PATCH 249/333] update release script

---
 bin/release.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/release.sh b/bin/release.sh
index 846b1df7..89ffc9c0 100755
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -66,8 +66,8 @@ echo "[+] Building docker image"
 docker build . -t "archivebox,archivebox:latest,archivebox:$NEW_VERSION,nikisweeting/archivebox,docker.pkg.github.com/pirate/ArchiveBox/archivebox:$NEW_VERSION"
 
 echo "[^] Uploading docker image"
-# docker login --username=dockerhubusername --email=email@example.com
-# docker login docker.pkg.github.com --username githubusername
+# docker login --username=nikisweeting
+# docker login docker.pkg.github.com --username=pirate
 docker push nikisweeting/archivebox
 docker push docker.pkg.github.com/pirate/ArchiveBox/archivebox:$NEW_VERSION
 

From 9806ed8d8c1475f58fd724b608448ece3c52050b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:50:03 -0400
Subject: [PATCH 250/333] fix circular import

---
 archivebox/core/admin.py    | 10 +++++-----
 archivebox/core/settings.py |  2 +-
 archivebox/main.py          |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 9fcdccab..01ca20b8 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -14,11 +14,11 @@ from django.contrib.auth import get_user_model
 from core.models import Snapshot
 from core.forms import AddLinkForm
 
-from ..util import htmldecode, urldecode, ansi_to_html
-from ..logging_util import printable_filesize
-from ..main import add, remove
-from ..config import OUTPUT_DIR
-from ..extractors import archive_links
+from util import htmldecode, urldecode, ansi_to_html
+from logging_util import printable_filesize
+from main import add, remove
+from config import OUTPUT_DIR
+from extractors import archive_links
 
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index babcf35e..14b3b369 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -112,7 +112,7 @@ LANGUAGE_CODE = 'en-us'
 TIME_ZONE = 'UTC'
 USE_I18N = False
 USE_L10N = False
-USE_TZ = True
+USE_TZ = False
 
 DATETIME_FORMAT = 'Y-m-d g:iA'
 SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
diff --git a/archivebox/main.py b/archivebox/main.py
index 59155580..09f1a1be 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -56,7 +56,7 @@ from .config import (
     stderr,
     ConfigDict,
     ANSI,
-    IS_TTY,
+    # IS_TTY,
     USER,
     ARCHIVEBOX_BINARY,
     ONLY_NEW,

From acc697e73cde60df4d0678702ec51b7ecd7fcb02 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:51:18 -0400
Subject: [PATCH 251/333] 0.4.7 release

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index ef52a648..f9056827 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.6
+0.4.7

From 9248ff58900eaa363d531a536bab4b88a87bd9d6 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 06:52:44 -0400
Subject: [PATCH 252/333] 0.4.8 release

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index f9056827..cb498ab2 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.7
+0.4.8

From ac72423886634cb5d3b26be65477cccc48aedf36 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:06:14 -0400
Subject: [PATCH 253/333] prevent releases from non-master branches

---
 bin/release.sh | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/bin/release.sh b/bin/release.sh
index 89ffc9c0..cadd69ed 100755
--- a/bin/release.sh
+++ b/bin/release.sh
@@ -34,18 +34,18 @@ cd "$DIR/docs"
 make html
 cd "$DIR"
 
-# if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then 
-#     git pull
-# else
-#     echo "[X] Commit your changes and make sure git is checked out on clean master."
-#     exit 4
-# fi
+if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then 
+    git pull
+else
+    echo "[X] Commit your changes and make sure git is checked out on clean master."
+    exit 4
+fi
 
 echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
 echo "$NEW_VERSION" > "$VERSION_FILE"
-git add "$NEW_VERSION"
+git add "$VERSION_FILE"
 git commit -m "$NEW_VERSION release"
-git tag -a "$NEW_VERSION"
+git tag -a "v$NEW_VERSION" -m "v$NEW_VERSION"
 git push origin master
 git push origin --tags
 
@@ -63,12 +63,18 @@ echo "[^] Uploading to pypi.org"
 python3 -m twine upload --repository pypi dist/*
 
 echo "[+] Building docker image"
-docker build . -t "archivebox,archivebox:latest,archivebox:$NEW_VERSION,nikisweeting/archivebox,docker.pkg.github.com/pirate/ArchiveBox/archivebox:$NEW_VERSION"
+docker build . -t archivebox \
+               -t archivebox:latest \
+               -t archivebox:$NEW_VERSION \
+               -t docker.io/nikisweeting/archivebox:latest \
+               -t docker.io/nikisweeting/archivebox:$NEW_VERSION \
+               -t docker.pkg.github.com/pirate/archivebox/archivebox:latest \
+               -t docker.pkg.github.com/pirate/archivebox/archivebox:$NEW_VERSION
 
 echo "[^] Uploading docker image"
 # docker login --username=nikisweeting
 # docker login docker.pkg.github.com --username=pirate
-docker push nikisweeting/archivebox
-docker push docker.pkg.github.com/pirate/ArchiveBox/archivebox:$NEW_VERSION
+docker push docker.io/nikisweeting/archivebox
+docker push docker.pkg.github.com/pirate/archivebox/archivebox
 
 echo "[√] Done. Published version v$NEW_VERSION"

From 1b96c582a7bebeb7ca6d35e6274abe3194af5b08 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:18:10 -0400
Subject: [PATCH 254/333] fix lint and improve docker-compose instructions

---
 .github/workflows/test.yml | 14 +++++++++++---
 README.md                  | 14 +++++++++++---
 archivebox/logging_util.py |  1 -
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1b05fa5a..36903f42 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -66,7 +66,7 @@ jobs:
 
       - name: Test built package with pytest
         run: |
-          ./.venv/bin/python -m pipenv run pytest -s
+          ./.venv/bin/python -m pytest -s
 
   docker-test:
     runs-on: ubuntu-latest
@@ -111,5 +111,13 @@ jobs:
 
       - name: Curl to Django app
         run: |
-          sleep 10
-          curl --silent --location 'http://127.0.0.1:8000' | grep 'https://github.com/pirate/ArchiveBox/wiki'
+          sleep 8
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox Admin'
+          docker-compose run archivebox config --set PUBLIC_INDEX=True
+          docker-compose run archivebox add 'http://example.com/#test_docker' --index-only
+          docker-compose stop archivebox
+          docker-compose up -d
+          sleep 8
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'http://example.com/#test_docker'
+          docker-compose down
diff --git a/README.md b/README.md
index 18590cf8..e7e4a5ee 100644
--- a/README.md
+++ b/README.md
@@ -70,12 +70,20 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv
 ```bash
 # Docker
 mkdir data && cd data
-docker run -v $PWD:/data archivebox init
-docker run -v $PWD:/data archivebox add 'https://example.com'
-docker run -v $PWD:/data -p 8000 archivebox server
+docker run -v $PWD:/data nikisweeting:archivebox init
+docker run -v $PWD:/data nikisweeting:archivebox add 'https://example.com'
+docker run -v $PWD:/data -p 8000 nikisweeting:archivebox server
 open https://127.0.0.1:8000
 ```
 
+```bash
+# Docker Compose
+# Download https://github.com/pirate/ArchiveBox/tree/master/docker-compose.yml
+docker-compose run archivebox init
+docker-compose run archivebox add 'https://example.com'
+docker-compose up
+```
+
 ```bash
 # Bare Metal
 # Use apt on Ubuntu/Debian, brew on mac, or pkg on BSD
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index 6ea64daa..c44f87f1 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -20,7 +20,6 @@ from .config import (
     PYTHON_ENCODING,
     ANSI,
     IS_TTY,
-    SHOW_PROGRESS,
     TERM_WIDTH,
     OUTPUT_DIR,
     SOURCES_DIR_NAME,

From 3c7966c13a142898be582920be37e31bac3b802c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:20:57 -0400
Subject: [PATCH 255/333] dont get bin path when bin is missing

---
 archivebox/config/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 7cbc6d40..52dab210 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -9,6 +9,7 @@ import getpass
 import shutil
 
 from hashlib import md5
+from pathlib import Path
 from typing import Optional, Type, Tuple, Dict
 from subprocess import run, PIPE, DEVNULL
 from configparser import ConfigParser
@@ -495,7 +496,7 @@ def bin_hash(binary: Optional[str]) -> Optional[str]:
     if binary is None:
         return None
     abs_path = bin_path(binary)
-    if abs_path is None:
+    if abs_path is None or not Path(abs_path).exists():
         return None
 
     file_hash = md5()

From 5e56c0f14e3ec78844d97cad4086fad956307054 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:25:36 -0400
Subject: [PATCH 256/333] install pytest and bottle manually

---
 .github/workflows/test.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 36903f42..21216a35 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -62,7 +62,8 @@ jobs:
       - name: Install dependencies
         run: |
           python3 -m venv .venv
-          ./.venv/bin/python -m pip install -e '.[dev]'
+          ./.venv/bin/python -m pip install .
+          ./.venv/bin/python -m pip install pytest bottle
 
       - name: Test built package with pytest
         run: |

From 78e1b8c15b748b500206146e9aecfade7e1fe7eb Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:27:42 -0400
Subject: [PATCH 257/333] put venv in path instead of manually hardcoding bins

---
 .github/workflows/test.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 21216a35..9b5f3513 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -62,12 +62,13 @@ jobs:
       - name: Install dependencies
         run: |
           python3 -m venv .venv
-          ./.venv/bin/python -m pip install .
-          ./.venv/bin/python -m pip install pytest bottle
+          source .venv/bin/activate
+          python -m pip install .
+          python -m pip install pytest bottle
 
       - name: Test built package with pytest
         run: |
-          ./.venv/bin/python -m pytest -s
+          python -m pytest -s
 
   docker-test:
     runs-on: ubuntu-latest

From 43ed722a8d842db6ac15bee84931cc9c913e6dfc Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:30:00 -0400
Subject: [PATCH 258/333] activate venv before pytest

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9b5f3513..2ccb0e8a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -68,6 +68,7 @@ jobs:
 
       - name: Test built package with pytest
         run: |
+          source .venv/bin/activate
           python -m pytest -s
 
   docker-test:

From f30176a76534d3a225fc9c2dbec7f245b2cbe586 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:31:11 -0400
Subject: [PATCH 259/333] only create venv if cache misses

---
 .github/workflows/test.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2ccb0e8a..528b138d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -59,9 +59,12 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-${{ matrix.python }}-venv-
 
-      - name: Install dependencies
+      - name: Create virtualenv
         run: |
           python3 -m venv .venv
+          if: steps.cache-venv.outputs.cache-hit != 'true'
+
+      - name: Install dependencies
           source .venv/bin/activate
           python -m pip install .
           python -m pip install pytest bottle

From 13188c0ead93362a5b3e8919c7b4e672e2284099 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:31:39 -0400
Subject: [PATCH 260/333] fix yaml error

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 528b138d..2ce34c73 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -65,6 +65,7 @@ jobs:
           if: steps.cache-venv.outputs.cache-hit != 'true'
 
       - name: Install dependencies
+        run: |
           source .venv/bin/activate
           python -m pip install .
           python -m pip install pytest bottle

From 75035782820252cb9340bc5a8b1ddc3a9490a107 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:32:23 -0400
Subject: [PATCH 261/333] fix yaml error2

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2ce34c73..e459dc3a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -60,9 +60,9 @@ jobs:
             ${{ runner.os }}-${{ matrix.python }}-venv-
 
       - name: Create virtualenv
+        if: steps.cache-venv.outputs.cache-hit != 'true'
         run: |
           python3 -m venv .venv
-          if: steps.cache-venv.outputs.cache-hit != 'true'
 
       - name: Install dependencies
         run: |

From 922ddba22214df6925a0327ce06cd5caef1282cc Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:33:48 -0400
Subject: [PATCH 262/333] fix missing step id

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e459dc3a..7cdb3593 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -53,6 +53,7 @@ jobs:
 
       - name: Cache virtualenv
         uses: actions/cache@v2
+        id: cache-venv
         with:
           path: .venv
           key: ${{ runner.os }}-${{ matrix.python }}-venv-${{ hashFiles('setup.py') }}

From a0a26946eb4f8b034f4a47f92a75032305da76fd Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:36:40 -0400
Subject: [PATCH 263/333] add pip upgrade step to silence warnings

---
 .github/workflows/test.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7cdb3593..cfe79b18 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -64,6 +64,8 @@ jobs:
         if: steps.cache-venv.outputs.cache-hit != 'true'
         run: |
           python3 -m venv .venv
+          source .venv/bin/activate
+          python3 -m pip install --upgrade pip setuptools
 
       - name: Install dependencies
         run: |

From f0d4f9ca70d3039a8f46b68a7948102c72c192d5 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:49:04 -0400
Subject: [PATCH 264/333] better compose testing in github actions

---
 .github/workflows/test.yml | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index cfe79b18..06daad89 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -115,19 +115,28 @@ jobs:
           docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-1.local" || { echo "The site 1 isn't in the list"; exit 1; }
           docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-2.local" || { echo "The site 2 isn't in the list"; exit 1; }
 
+
       - name: Start docker-compose stack
         run: |
+          docker-compose run archivebox init
           docker-compose up -d
+          sleep 4
+          curl --silent --location 'http://127.0.0.1:8000/static/admin/js/jquery.init.js' | grep 'django.jQuery'
 
-      - name: Curl to Django app
-        run: |
-          sleep 8
-          curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox Admin'
-          docker-compose run archivebox config --set PUBLIC_INDEX=True
+      - name: Check added urls show up in index
           docker-compose run archivebox add 'http://example.com/#test_docker' --index-only
-          docker-compose stop archivebox
-          docker-compose up -d
-          sleep 8
-          curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'
           curl --silent --location 'http://127.0.0.1:8000' | grep 'http://example.com/#test_docker'
-          docker-compose down
+
+      - name: Curl index with PUBLIC_INDEX=False
+        run: |
+          docker-compose run archivebox config --set PUBLIC_INDEX=False
+          docker-compose restart
+          sleep 4
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox Admin'
+
+      - name: Curl index with PUBLIC_INDEX=True
+        run: |
+          docker-compose run archivebox config --set PUBLIC_INDEX=True
+          docker-compose restart
+          sleep 4
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'

From 98ca5786842d1c7f77b63c2e8f5fbb17e152f666 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:50:56 -0400
Subject: [PATCH 265/333] fix yaml

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 06daad89..f34050ec 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -124,6 +124,7 @@ jobs:
           curl --silent --location 'http://127.0.0.1:8000/static/admin/js/jquery.init.js' | grep 'django.jQuery'
 
       - name: Check added urls show up in index
+        run: |
           docker-compose run archivebox add 'http://example.com/#test_docker' --index-only
           curl --silent --location 'http://127.0.0.1:8000' | grep 'http://example.com/#test_docker'
 

From 4bab1c258e088b65b0a97c85dd7ab0c9e157e9a7 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:55:03 -0400
Subject: [PATCH 266/333] bump delays between docker steps

---
 .github/workflows/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f34050ec..22176cd3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -132,12 +132,12 @@ jobs:
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=False
           docker-compose restart
-          sleep 4
+          sleep 6
           curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox Admin'
 
       - name: Curl index with PUBLIC_INDEX=True
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=True
           docker-compose restart
-          sleep 4
+          sleep 6
           curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'

From ace9421b21f0e683dbd042a098c26e4d0330d41e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:57:26 -0400
Subject: [PATCH 267/333] fix chown errors

---
 bin/docker_entrypoint.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh
index a9491d32..eea71040 100755
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@@ -14,7 +14,8 @@ if [[ "$USID" != 0 && "$GRID" != 0 ]]; then
     usermod -u "$USID" "$ARCHIVEBOX_USER"
     groupmod -g "$GRID" "$ARCHIVEBOX_USER"
     chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
-    chown "$USID":"$GRID" "$DATA_DIR" "$DATA_DIR/*"
+    chown "$USID":"$GRID" "$DATA_DIR"
+    chown "$USID":"$GRID" "$DATA_DIR/*" || true
 fi
 
 # run django as the new archivebox user

From f6ec98e1c07068ac9615c26082583c3d8b8bce2f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 07:58:16 -0400
Subject: [PATCH 268/333] ignore stderr during chown

---
 bin/docker_entrypoint.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh
index eea71040..7e5836e3 100755
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@@ -15,7 +15,7 @@ if [[ "$USID" != 0 && "$GRID" != 0 ]]; then
     groupmod -g "$GRID" "$ARCHIVEBOX_USER"
     chown -R "$USID":"$GRID" "/home/$ARCHIVEBOX_USER"
     chown "$USID":"$GRID" "$DATA_DIR"
-    chown "$USID":"$GRID" "$DATA_DIR/*" || true
+    chown "$USID":"$GRID" "$DATA_DIR/*" > /dev/null 2>&1 || true
 fi
 
 # run django as the new archivebox user

From ab1dac50f67ff82dd859f75c4957f6c336bb80e7 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 08:02:08 -0400
Subject: [PATCH 269/333] ignore docker restart retcode

---
 .github/workflows/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 22176cd3..5aeb4c0c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -131,13 +131,13 @@ jobs:
       - name: Curl index with PUBLIC_INDEX=False
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=False
-          docker-compose restart
+          docker-compose restart || true
           sleep 6
           curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox Admin'
 
       - name: Curl index with PUBLIC_INDEX=True
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=True
-          docker-compose restart
+          docker-compose restart || true
           sleep 6
           curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'

From e671ddd22bf8cb42a979637d1ce4863079be90dd Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 08:14:50 -0400
Subject: [PATCH 270/333] increase ci step delays

---
 .github/workflows/test.yml | 8 +++++---
 docker-compose.yml         | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5aeb4c0c..c8836432 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -131,13 +131,15 @@ jobs:
       - name: Curl index with PUBLIC_INDEX=False
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=False
-          docker-compose restart || true
-          sleep 6
+          docker-compose restart || 
+          docker-compose up -d || true
+          sleep 10
           curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox Admin'
 
       - name: Curl index with PUBLIC_INDEX=True
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=True
           docker-compose restart || true
-          sleep 6
+          docker-compose up -d || true
+          sleep 10
           curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'
diff --git a/docker-compose.yml b/docker-compose.yml
index 1be33830..a6f72108 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,8 +12,8 @@ version: '3.7'
 
 services:
     archivebox:
-        build: .
-        image: archivebox:latest
+        # build: .
+        image: nikisweeting/archivebox:latest
         command: server 0.0.0.0:8000
         stdin_open: true
         tty: true

From 331418ef8b61274034636abc7911893ae9f70711 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 08:22:40 -0400
Subject: [PATCH 271/333] fix curl grep str

---
 .github/workflows/test.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c8836432..6bcb7aa6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -131,15 +131,15 @@ jobs:
       - name: Curl index with PUBLIC_INDEX=False
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=False
-          docker-compose restart || 
           docker-compose up -d || true
-          sleep 10
-          curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox Admin'
+          sleep 8
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'Log in'
+          docker-compose down
 
       - name: Curl index with PUBLIC_INDEX=True
         run: |
           docker-compose run archivebox config --set PUBLIC_INDEX=True
-          docker-compose restart || true
           docker-compose up -d || true
-          sleep 10
+          sleep 8
           curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'
+          docker-compose down

From c1f21880f36f4d8df5b62dd1413a8a1b2ea6d4cb Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 08:25:01 -0400
Subject: [PATCH 272/333] 0.4.9 release

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index cb498ab2..76914ddc 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.8
+0.4.9

From 457e4406afa23ced5c800b5a54c38b4a5189081f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 08:44:06 -0400
Subject: [PATCH 273/333] shorten ci runtime

---
 .github/workflows/test.yml | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6bcb7aa6..ba9cb83a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -120,26 +120,12 @@ jobs:
         run: |
           docker-compose run archivebox init
           docker-compose up -d
-          sleep 4
+          sleep 5
+          curl --silent --location 'http://127.0.0.1:8000' | grep 'ArchiveBox'
           curl --silent --location 'http://127.0.0.1:8000/static/admin/js/jquery.init.js' | grep 'django.jQuery'
 
       - name: Check added urls show up in index
         run: |
           docker-compose run archivebox add 'http://example.com/#test_docker' --index-only
           curl --silent --location 'http://127.0.0.1:8000' | grep 'http://example.com/#test_docker'
-
-      - name: Curl index with PUBLIC_INDEX=False
-        run: |
-          docker-compose run archivebox config --set PUBLIC_INDEX=False
-          docker-compose up -d || true
-          sleep 8
-          curl --silent --location 'http://127.0.0.1:8000' | grep 'Log in'
-          docker-compose down
-
-      - name: Curl index with PUBLIC_INDEX=True
-        run: |
-          docker-compose run archivebox config --set PUBLIC_INDEX=True
-          docker-compose up -d || true
-          sleep 8
-          curl --silent --location 'http://127.0.0.1:8000' | grep 'Add Links'
-          docker-compose down
+          docker-compose down || true

From 1df83db1c27d321dc07d2e2a3c3888fdfdf4179b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 09:27:41 -0400
Subject: [PATCH 274/333] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e7e4a5ee..3fdfa1f7 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,7 @@ open https://127.0.0.1:8000
 
 ```bash
 # Docker Compose
-# Download https://github.com/pirate/ArchiveBox/tree/master/docker-compose.yml
+# first download: https://github.com/pirate/ArchiveBox/blob/master/docker-compose.yml
 docker-compose run archivebox init
 docker-compose run archivebox add 'https://example.com'
 docker-compose up

From cc71444cc9dc175dcbf147317de52750ba21f9f2 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 10:40:40 -0400
Subject: [PATCH 275/333] Set theme jekyll-theme-minimal

---
 _config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_config.yml b/_config.yml
index c50ff38d..2f7efbea 100644
--- a/_config.yml
+++ b/_config.yml
@@ -1 +1 @@
-theme: jekyll-theme-merlot
\ No newline at end of file
+theme: jekyll-theme-minimal
\ No newline at end of file

From 550439aa1b6464f088a788108c6e83ff0488e99b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 10:42:49 -0400
Subject: [PATCH 276/333] Update README.md

---
 README.md | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/README.md b/README.md
index 3fdfa1f7..101f7a28 100644
--- a/README.md
+++ b/README.md
@@ -22,15 +22,7 @@
 <a href="https://test.pypi.org/project/archivebox/"><img src="https://img.shields.io/badge/Python-%3E%3D3.5-yellow.svg?logo=python&logoColor=yellow"/></a>
 <a href="https://github.com/pirate/ArchiveBox/wiki/Install#dependencies"><img src="https://img.shields.io/badge/Chromium-%3E%3D59-orange.svg?logo=Google+Chrome&logoColor=orange"/></a>
 <a href="https://hub.docker.com/r/nikisweeting/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
-
 <hr/>
-<br/>
-<i>💥 Attention: Big API changes are coming with the current release (including <code>pip install archivebox</code>)!
-<br/><br/>
-See the <a href="https://github.com/pirate/ArchiveBox/pull/207#issuecomment-494107553">v0.4 release PR</a> for more information.</b>
-<br/>
-<hr/>
-
 </div>
 
 **ArchiveBox takes a list of website URLs you want to archive, and creates a local, static, browsable HTML clone of the content from those websites (it saves HTML, JS, media files, PDFs, images and more).**

From 0ac4e12a24c7a7155d84a2693b8a45d037695ae5 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 10:43:46 -0400
Subject: [PATCH 277/333] Update README.md

---
 README.md | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 101f7a28..c5d09aa7 100644
--- a/README.md
+++ b/README.md
@@ -281,16 +281,8 @@ make html
 <br/><br/>
 <img src="https://raw.githubusercontent.com/Monadical-SAS/redux-time/HEAD/examples/static/jeremy.jpg" height="40px"/>
 <br/>
-<sub><i>This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous contributors.</i></sub>
+<sub><i>This project is maintained mostly in <a href="https://nicksweeting.com/blog#About">my spare time</a> with the help from generous contributors and Monadical.com.</i></sub>
 <br/><br/>
-Contributor Spotlight:<br/><br/>
- 
-<a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/0"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/0"></a>
-<a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/1"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/1"></a>
-<a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/2"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/2"></a>
-<a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/3"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/3"></a>
-<a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/4"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/4"></a>
-<a href="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/links/5"><img src="https://sourcerer.io/fame/pirate/pirate/ArchiveBox/images/5"></a>
 
 <br/>
 <a href="https://github.com/sponsors/pirate">Sponsor us on Github</a>

From 683ffbcdae45b14833c235d4c8d7684680bd2271 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 16:14:54 -0400
Subject: [PATCH 278/333] Update docker-compose.yml

---
 docker-compose.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index a6f72108..7be6fe00 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,10 +1,9 @@
-# This docker-compose config for ArchiveBox runs the following containers:
-#     - ArchiveBox (it creates the initial archive, then sleeps forever to allow commands to be run with exec to add links)
-#     - nginx webserver running on https://127.0.0.1:8098
 # Usage:
 #     docker-compose up -d
+#     docker-ocmpose run archivebox init
 #     echo "https://example.com" | docker-compose run archivebox archivebox add
-#     docker-compose run archivebox archivebox https://example.com/some/feed.rss
+#     docker-compose run archivebox add --depth=1 https://example.com/some/feed.rss
+#     docker-compose run archivebox config --set PUBLIC_INDEX=True
 # Documentation:
 #     https://github.com/pirate/ArchiveBox/wiki/Docker#docker-compose
 
@@ -22,7 +21,7 @@ services:
         environment:
             - USE_COLOR=True
             - SHOW_PROGRESS=False
-            # - HTTP_PROXY=http://pywb:8080
+            # - HTTP_PROXY=http://pywb:8080  COMING SOON!
         volumes:
             - ./data:/data
 

From 685f85aaae3dc9c3b7a70dbe6551efae51c25569 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Tue, 28 Jul 2020 16:15:09 -0400
Subject: [PATCH 279/333] typo fix

---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 7be6fe00..957149a3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 # Usage:
 #     docker-compose up -d
-#     docker-ocmpose run archivebox init
+#     docker-compose run archivebox init
 #     echo "https://example.com" | docker-compose run archivebox archivebox add
 #     docker-compose run archivebox add --depth=1 https://example.com/some/feed.rss
 #     docker-compose run archivebox config --set PUBLIC_INDEX=True

From d9348ec82b9c46998bfa67fcd6b259f3d3e807a1 Mon Sep 17 00:00:00 2001
From: imlonghao <github@esd.cc>
Date: Thu, 30 Jul 2020 00:00:50 +0800
Subject: [PATCH 280/333] Delete isolated quote in ArchiveBox.conf.default

---
 etc/ArchiveBox.conf.default | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default
index cb69adb8..7ce4e2a8 100644
--- a/etc/ArchiveBox.conf.default
+++ b/etc/ArchiveBox.conf.default
@@ -54,7 +54,7 @@
 # USE_GIT = True
 
 # CURL_BINARY = curl
-# GIT_BINARY = git"
+# GIT_BINARY = git
 # WGET_BINARY = wget
 # YOUTUBEDL_BINARY = youtube-dl
 # CHROME_BINARY = chromium

From c073ea141d48cfe3523b2e043834fa0be9e696b4 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 29 Jul 2020 11:19:06 -0500
Subject: [PATCH 281/333] feat: Initial oneshot command proposal

---
 archivebox/cli/archivebox_oneshot.py | 62 ++++++++++++++++++++++
 archivebox/extractors/__init__.py    | 14 ++---
 archivebox/index/__init__.py         |  5 +-
 archivebox/main.py                   |  9 ++++
 archivebox/parsers/__init__.py       | 79 +++++++++++++++++++---------
 tests/test_oneshot.py                | 12 +++++
 6 files changed, 148 insertions(+), 33 deletions(-)
 create mode 100644 archivebox/cli/archivebox_oneshot.py
 create mode 100644 tests/test_oneshot.py

diff --git a/archivebox/cli/archivebox_oneshot.py b/archivebox/cli/archivebox_oneshot.py
new file mode 100644
index 00000000..8d5bb173
--- /dev/null
+++ b/archivebox/cli/archivebox_oneshot.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+__package__ = 'archivebox.cli'
+__command__ = 'archivebox oneshot'
+
+import sys
+import argparse
+
+from pathlib import Path
+from typing import List, Optional, IO
+
+from ..main import oneshot
+from ..util import docstring
+from ..config import OUTPUT_DIR
+from ..logging_util import SmartFormatter, accept_stdin, stderr
+
+
+@docstring(oneshot.__doc__)
+def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
+    parser = argparse.ArgumentParser(
+        prog=__command__,
+        description=oneshot.__doc__,
+        add_help=True,
+        formatter_class=SmartFormatter,
+    )
+    parser.add_argument(
+        'url',
+        type=str,
+        default=None,
+        help=(
+            'URLs or paths to archive e.g.:\n'
+            '    https://getpocket.com/users/USERNAME/feed/all\n'
+            '    https://example.com/some/rss/feed.xml\n'
+            '    https://example.com\n'
+            '    ~/Downloads/firefox_bookmarks_export.html\n'
+            '    ~/Desktop/sites_list.csv\n'
+        )
+    )
+    parser.add_argument(
+        '--out-dir',
+        type=str,
+        default=OUTPUT_DIR,
+        help= "Path to save the single archive folder to, e.g. ./example.com_archive"
+    )
+    command = parser.parse_args(args or ())
+    url = command.url
+    stdin_url = accept_stdin(stdin)
+    if (stdin_url and url) or (not stdin and not url):
+        stderr(
+            '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
+            color='red',
+        )
+        raise SystemExit(2)
+    
+    oneshot(
+        url=stdin_url or url,
+        out_dir=str(Path(command.out_dir).absolute()),
+    )
+
+
+if __name__ == '__main__':
+    main(args=sys.argv[1:], stdin=sys.stdin)
\ No newline at end of file
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index c42da945..106af637 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -34,7 +34,7 @@ from .archive_org import should_save_archive_dot_org, save_archive_dot_org
 
 
 @enforce_types
-def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None) -> Link:
+def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None, skip_index: bool=False) -> Link:
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
 
     ARCHIVE_METHODS = [
@@ -61,7 +61,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
             os.makedirs(out_dir)
 
         link = load_link_details(link, out_dir=out_dir)
-        write_link_details(link, out_dir=link.link_dir)
+        write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
         log_link_archiving_started(link, out_dir, is_new)
         link = link.overwrite(updated=datetime.now())
         stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
@@ -97,8 +97,9 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
         except Exception:
             pass
 
-        write_link_details(link, out_dir=link.link_dir)
-        patch_main_index(link)
+        write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
+        if not skip_index:
+            patch_main_index(link)
 
         # # If any changes were made, update the main links index json and html
         # was_changed = stats['succeeded'] or stats['failed']
@@ -122,7 +123,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
 
 
 @enforce_types
-def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None) -> List[Link]:
+def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None, skip_index: bool=False) -> List[Link]:
     if not links:
         return []
 
@@ -131,7 +132,8 @@ def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[It
     link: Link = links[0]
     try:
         for idx, link in enumerate(links):
-            archive_link(link, overwrite=overwrite, methods=methods, out_dir=link.link_dir)
+            link_out_dir = out_dir or link.link_dir
+            archive_link(link, overwrite=overwrite, methods=methods, link_out_dir=out_dir, skip_index=skip_index)
     except KeyboardInterrupt:
         log_archiving_paused(len(links), idx, link.timestamp)
         raise SystemExit(0)
diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py
index 09c4d8a3..fc55beea 100644
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -354,12 +354,13 @@ def patch_main_index(link: Link, out_dir: str=OUTPUT_DIR) -> None:
 ### Link Details Index
 
 @enforce_types
-def write_link_details(link: Link, out_dir: Optional[str]=None) -> None:
+def write_link_details(link: Link, out_dir: Optional[str]=None, skip_sql_index: bool=False) -> None:
     out_dir = out_dir or link.link_dir
 
     write_json_link_details(link, out_dir=out_dir)
     write_html_link_details(link, out_dir=out_dir)
-    write_sql_link_details(link)
+    if not skip_sql_index:
+        write_sql_link_details(link)
 
 
 @enforce_types
diff --git a/archivebox/main.py b/archivebox/main.py
index 09f1a1be..7b73aae5 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -18,6 +18,7 @@ from .cli import (
 from .parsers import (
     save_text_as_source,
     save_file_as_source,
+    parse_links_memory,
 )
 from .index.schema import Link
 from .util import enforce_types                         # type: ignore
@@ -493,6 +494,13 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
     print(ANSI['black'], '   ...', ANSI['reset'])
 
 
+@enforce_types
+def oneshot(url: str, out_dir: str=OUTPUT_DIR):
+    oneshot_links, _ = parse_links_memory([url])
+    oneshot_links, _ = dedupe_links([], oneshot_links)
+    archive_links(oneshot_links, out_dir=out_dir, skip_index=True)
+    return oneshot_links
+
 @enforce_types
 def add(urls: Union[str, List[str]],
         depth: int=0,
@@ -1055,3 +1063,4 @@ def shell(out_dir: str=OUTPUT_DIR) -> None:
     setup_django(OUTPUT_DIR)
     from django.core.management import call_command
     call_command("shell_plus")
+
diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index 20c8ef52..588651f9 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -9,8 +9,9 @@ __package__ = 'archivebox.parsers'
 
 import re
 import os
+from io import StringIO
 
-from typing import Tuple, List
+from typing import IO, Tuple, List
 from datetime import datetime
 
 from ..system import atomic_write
@@ -37,15 +38,7 @@ from .generic_rss import parse_generic_rss_export
 from .generic_json import parse_generic_json_export
 from .generic_txt import parse_generic_txt_export
 
-
-@enforce_types
-def parse_links(source_file: str) -> Tuple[List[Link], str]:
-    """parse a list of URLs with their metadata from an 
-       RSS feed, bookmarks export, or text file
-    """
-
-    check_url_parsing_invariants()
-    PARSERS = (
+PARSERS = (
         # Specialized parsers
         ('Pocket HTML', parse_pocket_html_export),
         ('Pinboard RSS', parse_pinboard_rss_export),
@@ -60,30 +53,66 @@ def parse_links(source_file: str) -> Tuple[List[Link], str]:
         # Fallback parser
         ('Plain Text', parse_generic_txt_export),
     )
+
+@enforce_types
+def parse_links_memory(urls: List[str]):
+    """
+    parse a list of URLS without touching the filesystem
+    """
+    check_url_parsing_invariants()
+
     timer = TimedProgress(TIMEOUT * 4)
-    with open(source_file, 'r', encoding='utf-8') as file:
-        for parser_name, parser_func in PARSERS:
-            try:
-                links = list(parser_func(file))
-                if links:
-                    timer.end()
-                    return links, parser_name
-            except Exception as err:   # noqa
-                pass
-                # Parsers are tried one by one down the list, and the first one
-                # that succeeds is used. To see why a certain parser was not used
-                # due to error or format incompatibility, uncomment this line:
-                # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
-                # raise
+    #urls = list(map(lambda x: x + "\n", urls))
+    file = StringIO()
+    file.writelines(urls)
+    file.name = "io_string"
+    output = _parse(file, timer)
+
+    if output is not None:
+        return output
 
     timer.end()
     return [], 'Failed to parse'
+    
+
+@enforce_types
+def parse_links(source_file: str) -> Tuple[List[Link], str]:
+    """parse a list of URLs with their metadata from an 
+       RSS feed, bookmarks export, or text file
+    """
+
+    check_url_parsing_invariants()
+
+    timer = TimedProgress(TIMEOUT * 4)
+    with open(source_file, 'r', encoding='utf-8') as file:
+        output = _parse(file, timer)
+
+    if output is not None:
+        return output
+
+    timer.end()
+    return [], 'Failed to parse'
+
+def _parse(to_parse: IO[str], timer) -> Tuple[List[Link], str]:
+    for parser_name, parser_func in PARSERS:
+        try:
+            links = list(parser_func(to_parse))
+            if links:
+                timer.end()
+                return links, parser_name
+        except Exception as err:   # noqa
+            pass
+            # Parsers are tried one by one down the list, and the first one
+            # that succeeds is used. To see why a certain parser was not used
+            # due to error or format incompatibility, uncomment this line:
+            # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
+            # raise
 
 
 @enforce_types
 def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: str=OUTPUT_DIR) -> str:
     ts = str(datetime.now().timestamp()).split('.', 1)[0]
-    source_path = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME, filename.format(ts=ts))
+    source_path = os.path.join(out_dir, SOURCES_DIR_NAME, filename.format(ts=ts))
     atomic_write(source_path, raw_text)
     log_source_saved(source_file=source_path)
     return source_path
diff --git a/tests/test_oneshot.py b/tests/test_oneshot.py
new file mode 100644
index 00000000..5d53a942
--- /dev/null
+++ b/tests/test_oneshot.py
@@ -0,0 +1,12 @@
+from .fixtures import *
+
+def test_oneshot_command_exists(tmp_path):
+    os.chdir(tmp_path)
+    process = subprocess.run(['archivebox', 'oneshot'], capture_output=True)
+    assert not "invalid choice: 'oneshot'" in process.stderr.decode("utf-8")
+
+def test_oneshot_commad_saves_page_in_right_folder(tmp_path):
+    process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"], capture_output=True)
+    items = ' '.join([str(x) for x in tmp_path.iterdir()])
+    assert "index.json" in items
+    
\ No newline at end of file

From 3afb2401bccebf75bc3c816816fdcf711faafaee Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 29 Jul 2020 11:53:49 -0500
Subject: [PATCH 282/333] fix: Add condition to avoid breaking the `add`
 command

---
 archivebox/extractors/__init__.py | 9 ++++++---
 archivebox/main.py                | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 106af637..62743af2 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -123,7 +123,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
 
 
 @enforce_types
-def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None, skip_index: bool=False) -> List[Link]:
+def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None, skip_index: bool=False, oneshot: bool=False) -> List[Link]:
     if not links:
         return []
 
@@ -132,8 +132,11 @@ def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[It
     link: Link = links[0]
     try:
         for idx, link in enumerate(links):
-            link_out_dir = out_dir or link.link_dir
-            archive_link(link, overwrite=overwrite, methods=methods, link_out_dir=out_dir, skip_index=skip_index)
+            if oneshot:
+                link_out_dir = out_dir or link.link_dir
+            else:
+                link_out_dir = link.link_dir
+            archive_link(link, overwrite=overwrite, methods=methods, out_dir=link_out_dir, skip_index=skip_index)
     except KeyboardInterrupt:
         log_archiving_paused(len(links), idx, link.timestamp)
         raise SystemExit(0)
diff --git a/archivebox/main.py b/archivebox/main.py
index 7b73aae5..b642cce4 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -498,7 +498,7 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
 def oneshot(url: str, out_dir: str=OUTPUT_DIR):
     oneshot_links, _ = parse_links_memory([url])
     oneshot_links, _ = dedupe_links([], oneshot_links)
-    archive_links(oneshot_links, out_dir=out_dir, skip_index=True)
+    archive_links(oneshot_links, out_dir=out_dir, skip_index=True, oneshot=True)
     return oneshot_links
 
 @enforce_types

From c52bb722e76a9cc52f8324f23634be86ff51a63c Mon Sep 17 00:00:00 2001
From: Jon Konrath <jkonrath@rumored.com>
Date: Wed, 29 Jul 2020 11:45:55 -0700
Subject: [PATCH 283/333] Typo - CHROME was spelled CROME x2

---
 etc/ArchiveBox.conf.default | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etc/ArchiveBox.conf.default b/etc/ArchiveBox.conf.default
index 7ce4e2a8..9b014083 100644
--- a/etc/ArchiveBox.conf.default
+++ b/etc/ArchiveBox.conf.default
@@ -60,5 +60,5 @@
 # CHROME_BINARY = chromium
 
 # CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"
-# CROME_HEADLESS = True
-# CROME_SANDBOX = True
+# CHROME_HEADLESS = True
+# CHROME_SANDBOX = True

From e08fa654a3dcbc998bd4c1a1ac8f4901feea5a3e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 29 Jul 2020 18:09:09 -0400
Subject: [PATCH 284/333] improve example commands in readme

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index c5d09aa7..e2cb366f 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ archivebox server
 
 After installing archivebox, just pass some new links to the `archivebox add` command to start your collection.
 
-ArchiveBox is written in Python 3.7 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats. It doesn't require a constantly running server or backend, just open the generated `output/index.html` in a browser to view the archive. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs. If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
+ArchiveBox is written in Python 3.7 and uses wget, Chrome headless, youtube-dl, pywb, and other common UNIX tools to save each page you add in multiple redundant formats. It doesn't require a constantly running server or backend (though it does include an optional one), just open the generated `data/index.html` in a browser to view the archive or run `archivebox server` to use the interactive Web UI. It can import and export links as JSON (among other formats), so it's easy to script or hook up to other APIs. If you run it on a schedule and import from browser history or bookmarks regularly, you can sleep soundly knowing that the slice of the internet you care about will be automatically preserved in multiple, durable long-term formats that will be accessible for decades (or longer).
 
 <div align="center">
 
@@ -120,9 +120,9 @@ complex, finicky websites in at least a few high-quality, long-term data formats
 
 ArchiveBox imports a list of URLs from stdin, remote URL, or file, then adds the pages to a local archive folder using wget to create a browsable HTML clone, youtube-dl to extract media, and a full instance of Chrome headless for PDF, Screenshot, and DOM dumps, and more...
 
-Running `./archive` adds only new, unique links into `output/` on each run. Because it will ignore duplicates and only archive each link the first time you add it, you can schedule it to [run on a timer](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving) and re-import all your feeds multiple times a day. It will run quickly even if the feeds are large, because it's only archiving the newest links since the last run. For each link, it runs through all the archive methods. Methods that fail will save `None` and be automatically retried on the next run, methods that succeed save their output into the data folder and are never retried/overwritten by subsequent runs. Support for saving multiple snapshots of each site over time will be [added soon](https://github.com/pirate/ArchiveBox/issues/179) (along with the ability to view diffs of the changes between runs).
+Running `archivebox add` adds only new, unique links into your collection on each run. Because it will ignore duplicates and only archive each link the first time you add it, you can schedule it to [run on a timer](https://github.com/pirate/ArchiveBox/wiki/Scheduled-Archiving) and re-import all your feeds multiple times a day. It will run quickly even if the feeds are large, because it's only archiving the newest links since the last run. For each link, it runs through all the archive methods. Methods that fail will save `None` and be automatically retried on the next run, methods that succeed save their output into the data folder and are never retried/overwritten by subsequent runs. Support for saving multiple snapshots of each site over time will be [added soon](https://github.com/pirate/ArchiveBox/issues/179) (along with the ability to view diffs of the changes between runs).
 
-All the archived links are stored by date bookmarked in `output/archive/<timestamp>`, and everything is indexed nicely with JSON & HTML files. The intent is for all the content to be viewable with common software in 50 - 100 years without needing to run ArchiveBox in a VM.
+All the archived links are stored by date bookmarked in `./archive/<timestamp>`, and everything is indexed nicely with JSON & HTML files. The intent is for all the content to be viewable with common software in 50 - 100 years without needing to run ArchiveBox in a VM.
 
 #### Can import links from many formats:
 
@@ -141,7 +141,7 @@ See the [Usage: CLI](https://github.com/pirate/ArchiveBox/wiki/Usage#CLI-Usage)
 #### Saves lots of useful stuff for each imported link:
 
 ```bash
- ls output/archive/<timestamp>/
+ ls ./archive/<timestamp>/
 ```
 
 - **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details

From 83bfc580fa26506b2df0551a7f12a082591c861b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 29 Jul 2020 18:37:34 -0400
Subject: [PATCH 285/333] add archiving through VPN example to docker-compose

---
 docker-compose.yml | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 957149a3..d7b0bbab 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -21,10 +21,37 @@ services:
         environment:
             - USE_COLOR=True
             - SHOW_PROGRESS=False
-            # - HTTP_PROXY=http://pywb:8080  COMING SOON!
         volumes:
             - ./data:/data
 
+
+    # Optional Addons
+
+    # Example: Put Nginx in front of the ArchiveBox server for SSL termination
+    # nginx:
+    #     image: nginx:alpine
+    #     ports:
+    #         - 443:443
+    #         - 80:80
+    #     volumes:
+    #         - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
+    #         - ./data:/var/www
+
+    # Example: run all your ArchiveBox traffic through a WireGuard VPN
+    # wireguard:
+    #   image: linuxserver/wireguard
+    #   network_mode: 'service:archivebox'
+    #   cap_add:
+    #     - NET_ADMIN
+    #     - SYS_MODULE
+    #   sysctls:
+    #     - net.ipv4.conf.all.rp_filter=2
+    #     - net.ipv4.conf.all.src_valid_mark=1
+    #   volumes:
+    #     - /lib/modules:/lib/modules
+    #     - ./wireguard.conf:/config/wg0.conf:ro
+    
+    # Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
     # pywb:
     #     image: webrecorder/pywb:latest
     #     entrypoint: /bin/sh 'wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback --proxy;'
@@ -35,12 +62,3 @@ services:
     #     volumes:
     #         ./data:/archivebox
     #         ./data/wayback:/webarchive
-
-#    nginx:
-#        image: nginx:alpine
-#        ports:
-#            - 443:443
-#            - 80:80
-#        volumes:
-#            - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
-#            - ./data:/var/www

From b04e92681093faf5bfc90dd4a8a31c0c7778e7b7 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 29 Jul 2020 19:48:56 -0400
Subject: [PATCH 286/333] Add archivebox foreground scheduler to docker compose
 examples

---
 docker-compose.yml | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index d7b0bbab..a4c9f1e2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -25,8 +25,19 @@ services:
             - ./data:/data
 
 
-    # Optional Addons
+    # Optional Addons: tweak these examples as needed for your specific use case
 
+    # Example: Run scheduled imports in a docker instead of using cron on the
+    # host machine, add tasks and see more info with archivebox schedule --help
+    # scheduler:
+    #    image: nikisweeting/archivebox:latest
+    #    command: schedule --foreground
+    #    environment:
+    #        - USE_COLOR=True
+    #        - SHOW_PROGRESS=False
+    #    volumes:
+    #        - ./data:/data
+    
     # Example: Put Nginx in front of the ArchiveBox server for SSL termination
     # nginx:
     #     image: nginx:alpine
@@ -37,7 +48,7 @@ services:
     #         - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
     #         - ./data:/var/www
 
-    # Example: run all your ArchiveBox traffic through a WireGuard VPN
+    # Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
     # wireguard:
     #   image: linuxserver/wireguard
     #   network_mode: 'service:archivebox'

From 9dedcdd577f9ac2775449bb257b6c118f0fdee72 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 29 Jul 2020 23:54:26 -0400
Subject: [PATCH 287/333] remove inaccurate updated ts from main index UI

---
 archivebox/core/admin.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 01ca20b8..e6646c7b 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -56,8 +56,8 @@ delete_snapshots.short_description = "Delete"
 
 
 class SnapshotAdmin(admin.ModelAdmin):
-    list_display = ('added', 'title_str', 'url_str', 'files', 'size', 'updated')
-    sort_fields = ('title_str', 'url_str', 'added', 'updated')
+    list_display = ('added', 'title_str', 'url_str', 'files', 'size')
+    sort_fields = ('title_str', 'url_str', 'added')
     readonly_fields = ('id', 'url', 'timestamp', 'num_outputs', 'is_archived', 'url_hash', 'added', 'updated')
     search_fields = ('url', 'timestamp', 'title', 'tags')
     fields = ('title', 'tags', *readonly_fields)

From a160e6bf2072fe65dab0e9d4eb1f3c61b1ae2a9b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 29 Jul 2020 23:54:41 -0400
Subject: [PATCH 288/333] fix None canon output to be emptystring

---
 archivebox/core/admin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index e6646c7b..4578cc11 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -97,7 +97,7 @@ class SnapshotAdmin(admin.ModelAdmin):
         canon = link.canonical_outputs()
         out_dir = Path(link.link_dir)
 
-        link_tuple = lambda link, method: (link.archive_path, canon[method], canon[method] and (out_dir / canon[method]).exists())
+        link_tuple = lambda link, method: (link.archive_path, canon[method] or '', canon[method] and (out_dir / (canon[method] or 'notdone')).exists())
 
         return format_html(
             '<span class="files-icons" style="font-size: 1.2em; opacity: 0.8">'

From ea136ddd7972b8efdb24cc5d6b915baea2283cc5 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Wed, 29 Jul 2020 23:54:47 -0400
Subject: [PATCH 289/333] bump docs version

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index 899f6967..f23f5e47 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit 899f6967424f7072ca4a77d7b2ac4636bd794deb
+Subproject commit f23f5e4701dc275e9cd413343754d52f7ec06106

From 7ec1e35a83929e07e47b0b5f6fb72888f3ff1b25 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 30 Jul 2020 00:09:27 -0400
Subject: [PATCH 290/333] update submodule url

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 9bbb6b2c..a28574ad 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "docs"]
 	path = docs
-	url = https://github.com/pirate/ArchiveBox.wiki.git
+	url = https://github.com/pirate/ArchiveBox-docs.git

From 5707ffe657191a80188aa03c142826eacc31faf1 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 30 Jul 2020 16:55:24 -0400
Subject: [PATCH 291/333] fix old config name FETCH_TITLE

---
 archivebox/config/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 52dab210..90727e8c 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -236,7 +236,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'DJANGO_BINARY':            {'default': lambda c: django.__file__.replace('__init__.py', 'bin/django-admin.py')},
     'DJANGO_VERSION':           {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)},
 
-    'USE_CURL':                 {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['FETCH_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
+    'USE_CURL':                 {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
     'CURL_VERSION':             {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
     'CURL_USER_AGENT':          {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
     'SAVE_FAVICON':             {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']},

From 8bcb171e7434746485dd3f1995a3c7297b1f5d96 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 09:05:40 -0500
Subject: [PATCH 292/333] fix: Remove support for multiple urls in oneshot
 command

---
 archivebox/cli/archivebox_oneshot.py |  4 ++--
 archivebox/extractors/__init__.py    |  8 ++------
 archivebox/main.py                   | 15 ++++++++++-----
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/archivebox/cli/archivebox_oneshot.py b/archivebox/cli/archivebox_oneshot.py
index 8d5bb173..86a0fae6 100644
--- a/archivebox/cli/archivebox_oneshot.py
+++ b/archivebox/cli/archivebox_oneshot.py
@@ -47,7 +47,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     stdin_url = accept_stdin(stdin)
     if (stdin_url and url) or (not stdin and not url):
         stderr(
-            '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
+            '[X] You must pass URL/path to add via stdin or CLI arguments.\n',
             color='red',
         )
         raise SystemExit(2)
@@ -59,4 +59,4 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
 
 
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
\ No newline at end of file
+    main(args=sys.argv[1:], stdin=sys.stdin)
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 62743af2..99a79278 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -123,7 +123,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
 
 
 @enforce_types
-def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None, skip_index: bool=False, oneshot: bool=False) -> List[Link]:
+def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None) -> List[Link]:
     if not links:
         return []
 
@@ -132,11 +132,7 @@ def archive_links(links: List[Link], overwrite: bool=False, methods: Optional[It
     link: Link = links[0]
     try:
         for idx, link in enumerate(links):
-            if oneshot:
-                link_out_dir = out_dir or link.link_dir
-            else:
-                link_out_dir = link.link_dir
-            archive_link(link, overwrite=overwrite, methods=methods, out_dir=link_out_dir, skip_index=skip_index)
+            archive_link(link, overwrite=overwrite, methods=methods, out_dir=link.link_dir)
     except KeyboardInterrupt:
         log_archiving_paused(len(links), idx, link.timestamp)
         raise SystemExit(0)
diff --git a/archivebox/main.py b/archivebox/main.py
index b642cce4..8a4c4971 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -52,7 +52,7 @@ from .index.sql import (
     remove_from_sql_main_index,
 )
 from .index.html import parse_html_main_index
-from .extractors import archive_links
+from .extractors import archive_links, archive_link
 from .config import (
     stderr,
     ConfigDict,
@@ -496,10 +496,15 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
 
 @enforce_types
 def oneshot(url: str, out_dir: str=OUTPUT_DIR):
-    oneshot_links, _ = parse_links_memory([url])
-    oneshot_links, _ = dedupe_links([], oneshot_links)
-    archive_links(oneshot_links, out_dir=out_dir, skip_index=True, oneshot=True)
-    return oneshot_links
+    oneshot_link, _ = parse_links_memory([url])
+    if len(oneshot_link) > 1:
+        stderr(
+                '[X] You should pass a single url to the oneshot command',
+                color='red'
+            )
+        raise SystemExit(2)
+    archive_link(oneshot_link[0], out_dir=out_dir, skip_index=True)
+    return oneshot_link
 
 @enforce_types
 def add(urls: Union[str, List[str]],

From e6c571beb2ed88e4ed5528cd75a8676f44afac1c Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 10:24:58 -0500
Subject: [PATCH 293/333] fix: Remove title from extractors for oneshot

---
 archivebox/extractors/__init__.py | 32 ++++++++++++++++++++-----------
 archivebox/main.py                |  5 +++--
 tests/test_extractors.py          | 10 +++++++++-
 tests/test_oneshot.py             |  4 ++++
 4 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 99a79278..284ce569 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -32,22 +32,32 @@ from .git import should_save_git, save_git
 from .media import should_save_media, save_media
 from .archive_org import should_save_archive_dot_org, save_archive_dot_org
 
+def get_default_archive_methods():
+    return [
+            ('title', should_save_title, save_title),
+            ('favicon', should_save_favicon, save_favicon),
+            ('wget', should_save_wget, save_wget),
+            ('pdf', should_save_pdf, save_pdf),
+            ('screenshot', should_save_screenshot, save_screenshot),
+            ('dom', should_save_dom, save_dom),
+            ('git', should_save_git, save_git),
+            ('media', should_save_media, save_media),
+            ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
+        ]
+
+@enforce_types
+def ignore_methods(to_ignore: List[str]):
+    ARCHIVE_METHODS = get_default_archive_methods()
+    methods = filter(lambda x: x[0] not in to_ignore, ARCHIVE_METHODS)
+    methods = map(lambda x: x[1], methods)
+    return list(methods)
 
 @enforce_types
 def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[str]=None, skip_index: bool=False) -> Link:
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
 
-    ARCHIVE_METHODS = [
-        ('title', should_save_title, save_title),
-        ('favicon', should_save_favicon, save_favicon),
-        ('wget', should_save_wget, save_wget),
-        ('pdf', should_save_pdf, save_pdf),
-        ('screenshot', should_save_screenshot, save_screenshot),
-        ('dom', should_save_dom, save_dom),
-        ('git', should_save_git, save_git),
-        ('media', should_save_media, save_media),
-        ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
-    ]
+    ARCHIVE_METHODS = get_default_archive_methods()
+    
     if methods is not None:
         ARCHIVE_METHODS = [
             method for method in ARCHIVE_METHODS
diff --git a/archivebox/main.py b/archivebox/main.py
index 8a4c4971..cd49d68b 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -52,7 +52,7 @@ from .index.sql import (
     remove_from_sql_main_index,
 )
 from .index.html import parse_html_main_index
-from .extractors import archive_links, archive_link
+from .extractors import archive_links, archive_link, ignore_methods
 from .config import (
     stderr,
     ConfigDict,
@@ -503,7 +503,8 @@ def oneshot(url: str, out_dir: str=OUTPUT_DIR):
                 color='red'
             )
         raise SystemExit(2)
-    archive_link(oneshot_link[0], out_dir=out_dir, skip_index=True)
+    methods = ignore_methods(['title'])
+    archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, skip_index=True)
     return oneshot_link
 
 @enforce_types
diff --git a/tests/test_extractors.py b/tests/test_extractors.py
index 203f6701..c7aaaeaf 100644
--- a/tests/test_extractors.py
+++ b/tests/test_extractors.py
@@ -1,5 +1,13 @@
 from .fixtures import *
+from archivebox.extractors import ignore_methods, get_default_archive_methods, should_save_title
 
 def test_wget_broken_pipe(tmp_path, process):
     add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
-    assert "TypeError chmod_file(..., path: str) got unexpected NoneType argument path=None" not in add_process.stdout.decode("utf-8")
\ No newline at end of file
+    assert "TypeError chmod_file(..., path: str) got unexpected NoneType argument path=None" not in add_process.stdout.decode("utf-8")
+
+def test_ignore_methods():
+    """
+    Takes the passed method out of the default methods list and returns that value
+    """
+    ignored = ignore_methods(['title'])
+    assert should_save_title not in ignored
\ No newline at end of file
diff --git a/tests/test_oneshot.py b/tests/test_oneshot.py
index 5d53a942..7ff9867f 100644
--- a/tests/test_oneshot.py
+++ b/tests/test_oneshot.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 from .fixtures import *
 
 def test_oneshot_command_exists(tmp_path):
@@ -8,5 +10,7 @@ def test_oneshot_command_exists(tmp_path):
 def test_oneshot_commad_saves_page_in_right_folder(tmp_path):
     process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"], capture_output=True)
     items = ' '.join([str(x) for x in tmp_path.iterdir()])
+    current_path = ' '.join([str(x) for x in Path.cwd().iterdir()])
     assert "index.json" in items
+    assert not "index.sqlite3" in current_path
     
\ No newline at end of file

From a8c74730f8cf4658c530e21701f007ce01ab828f Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 10:28:30 -0500
Subject: [PATCH 294/333] docs: Add docstring to oneshot method

---
 archivebox/main.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/archivebox/main.py b/archivebox/main.py
index cd49d68b..1b58ad17 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -496,6 +496,10 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
 
 @enforce_types
 def oneshot(url: str, out_dir: str=OUTPUT_DIR):
+    """
+    Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
+    You can run this to archive single pages without needing to create a whole collection with archivebox init.
+    """
     oneshot_link, _ = parse_links_memory([url])
     if len(oneshot_link) > 1:
         stderr(

From b2a318c5eb661a92aa674efd281a9a55ea9280db Mon Sep 17 00:00:00 2001
From: Cristian Vargas <cristianvargasvalencia@gmail.com>
Date: Fri, 31 Jul 2020 10:51:54 -0500
Subject: [PATCH 295/333] fix: Update error message for oneshot command

Co-authored-by: Nick Sweeting <git@sweeting.me>
---
 archivebox/cli/archivebox_oneshot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/cli/archivebox_oneshot.py b/archivebox/cli/archivebox_oneshot.py
index 86a0fae6..c3678aa5 100644
--- a/archivebox/cli/archivebox_oneshot.py
+++ b/archivebox/cli/archivebox_oneshot.py
@@ -47,7 +47,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     stdin_url = accept_stdin(stdin)
     if (stdin_url and url) or (not stdin and not url):
         stderr(
-            '[X] You must pass URL/path to add via stdin or CLI arguments.\n',
+            '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
             color='red',
         )
         raise SystemExit(2)

From 206ade7d7cab751564583a56853a53d826a73167 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 11:06:20 -0500
Subject: [PATCH 296/333] fix: Use pip cache instead of venv folder

---
 .github/workflows/test.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ba9cb83a..ed2977b6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -51,17 +51,16 @@ jobs:
           python-version: ${{ matrix.python }}
           architecture: x64
 
-      - name: Cache virtualenv
+      - name: Cache pip
         uses: actions/cache@v2
-        id: cache-venv
+        id: cache-pip
         with:
-          path: .venv
+          path: ~/.cache/pip
           key: ${{ runner.os }}-${{ matrix.python }}-venv-${{ hashFiles('setup.py') }}
           restore-keys: |
             ${{ runner.os }}-${{ matrix.python }}-venv-
 
       - name: Create virtualenv
-        if: steps.cache-venv.outputs.cache-hit != 'true'
         run: |
           python3 -m venv .venv
           source .venv/bin/activate
@@ -115,7 +114,6 @@ jobs:
           docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-1.local" || { echo "The site 1 isn't in the list"; exit 1; }
           docker run -v "$PWD"/data:/data archivebox list | grep -q "www.test-nginx-2.local" || { echo "The site 2 isn't in the list"; exit 1; }
 
-
       - name: Start docker-compose stack
         run: |
           docker-compose run archivebox init

From d0d2991c69bf62de57c37f73998068b7267b5e0c Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 08:32:29 -0500
Subject: [PATCH 297/333] fix: Change import that was not working

---
 archivebox/util.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/archivebox/util.py b/archivebox/util.py
index ca940e30..7f33ca10 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -1,3 +1,5 @@
+__package__ = 'archivebox'
+
 import re
 import json as pyjson
 

From 98d9d58f637814c5610375acab0a5f1f877ad4c4 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 12:21:01 -0500
Subject: [PATCH 298/333] fix: Remove venv

---
 .github/workflows/test.yml | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ed2977b6..9fccf5da 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -27,11 +27,6 @@ jobs:
           # one pass for small stylistic things
           flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
 
-      # - name: Lint with mypy
-      #   run: |
-      #     pip install mypy
-      #     mypy archivebox || true
-
   test:
     runs-on: ${{ matrix.os }}
 
@@ -60,21 +55,13 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-${{ matrix.python }}-venv-
 
-      - name: Create virtualenv
-        run: |
-          python3 -m venv .venv
-          source .venv/bin/activate
-          python3 -m pip install --upgrade pip setuptools
-
       - name: Install dependencies
         run: |
-          source .venv/bin/activate
           python -m pip install .
           python -m pip install pytest bottle
 
       - name: Test built package with pytest
         run: |
-          source .venv/bin/activate
           python -m pytest -s
 
   docker-test:

From e462825c31fb0353d59f4c6b7f6247d648549aad Mon Sep 17 00:00:00 2001
From: imlonghao <github@esd.cc>
Date: Mon, 3 Aug 2020 21:04:12 +0800
Subject: [PATCH 299/333] typo: Fix docker image name

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e2cb366f..021691d4 100644
--- a/README.md
+++ b/README.md
@@ -62,9 +62,9 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv
 ```bash
 # Docker
 mkdir data && cd data
-docker run -v $PWD:/data nikisweeting:archivebox init
-docker run -v $PWD:/data nikisweeting:archivebox add 'https://example.com'
-docker run -v $PWD:/data -p 8000 nikisweeting:archivebox server
+docker run -v $PWD:/data nikisweeting/archivebox init
+docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com'
+docker run -v $PWD:/data -p 8000 nikisweeting/archivebox server
 open https://127.0.0.1:8000
 ```
 

From 853685668cae5d3257923838ff462aaf0e75a7aa Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Thu, 30 Jul 2020 13:23:10 -0500
Subject: [PATCH 300/333] feat: Add initial support for singlefile extractor

---
 archivebox/config/__init__.py       | 13 +++++
 archivebox/extractors/__init__.py   |  2 +
 archivebox/extractors/singlefile.py | 81 +++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+)
 create mode 100644 archivebox/extractors/singlefile.py

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 90727e8c..5a747187 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -74,6 +74,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'SAVE_FAVICON':             {'type': bool,  'default': True, 'aliases': ('FETCH_FAVICON',)},
         'SAVE_WGET':                {'type': bool,  'default': True, 'aliases': ('FETCH_WGET',)},
         'SAVE_WGET_REQUISITES':     {'type': bool,  'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)},
+        'SAVE_SINGLEFILE':          {'type': bool,  'default': True, 'aliases': ('FETCH_SINGLEFILE',)},
         'SAVE_PDF':                 {'type': bool,  'default': True, 'aliases': ('FETCH_PDF',)},
         'SAVE_SCREENSHOT':          {'type': bool,  'default': True, 'aliases': ('FETCH_SCREENSHOT',)},
         'SAVE_DOM':                 {'type': bool,  'default': True, 'aliases': ('FETCH_DOM',)},
@@ -104,6 +105,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
     'DEPENDENCY_CONFIG': {
         'USE_CURL':                 {'type': bool,  'default': True},
         'USE_WGET':                 {'type': bool,  'default': True},
+        'USE_SINGLEFILE':           {'type': bool,  'default': True},
         'USE_GIT':                  {'type': bool,  'default': True},
         'USE_CHROME':               {'type': bool,  'default': True},
         'USE_YOUTUBEDL':            {'type': bool,  'default': True},
@@ -111,6 +113,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'CURL_BINARY':              {'type': str,   'default': 'curl'},
         'GIT_BINARY':               {'type': str,   'default': 'git'},
         'WGET_BINARY':              {'type': str,   'default': 'wget'},
+        'SINGLEFILE_BINARY':        {'type': str, 'default': 'single-file'},
         'YOUTUBEDL_BINARY':         {'type': str,   'default': 'youtube-dl'},
         'CHROME_BINARY':            {'type': str,   'default': None},
     },
@@ -249,6 +252,9 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'SAVE_WGET':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
     'SAVE_WARC':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
 
+    'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and (c['SAVE_SINGLEFILE'])},
+    'SINGLEFILE_VERSION':       {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
+
     'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
     'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
     'SAVE_GIT':                 {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
@@ -674,6 +680,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
             'enabled': config['USE_WGET'],
             'is_valid': bool(config['WGET_VERSION']),
         },
+        'SINGLEFILE_BINARY': {
+            'path': bin_path(config['SINGLEFILE_BINARY']),
+            'version': config['SINGLEFILE_VERSION'],
+            'hash': bin_hash(config['SINGLEFILE_BINARY']),
+            'enabled': config['USE_SINGLEFILE'],
+            'is_valid': bool(config['SINGLEFILE_VERSION']),
+        },
         'GIT_BINARY': {
             'path': bin_path(config['GIT_BINARY']),
             'version': config['GIT_VERSION'],
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 284ce569..bdeae3d7 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -25,6 +25,7 @@ from ..logging_util import (
 from .title import should_save_title, save_title
 from .favicon import should_save_favicon, save_favicon
 from .wget import should_save_wget, save_wget
+from .singlefile import should_save_singlefile, save_singlefile
 from .pdf import should_save_pdf, save_pdf
 from .screenshot import should_save_screenshot, save_screenshot
 from .dom import should_save_dom, save_dom
@@ -37,6 +38,7 @@ def get_default_archive_methods():
             ('title', should_save_title, save_title),
             ('favicon', should_save_favicon, save_favicon),
             ('wget', should_save_wget, save_wget),
+            ('singlefile', should_save_singlefile, save_singlefile),
             ('pdf', should_save_pdf, save_pdf),
             ('screenshot', should_save_screenshot, save_screenshot),
             ('dom', should_save_dom, save_dom),
diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py
new file mode 100644
index 00000000..0c9718e4
--- /dev/null
+++ b/archivebox/extractors/singlefile.py
@@ -0,0 +1,81 @@
+__package__ = 'archivebox.extractors'
+
+import os
+from pathlib import Path
+
+from typing import Optional
+
+from ..index.schema import Link, ArchiveResult, ArchiveError
+from ..system import run, chmod_file
+from ..util import (
+    enforce_types,
+)
+from ..config import (
+    TIMEOUT,
+    SAVE_SINGLEFILE,
+    SINGLEFILE_BINARY,
+    SINGLEFILE_VERSION,
+    CHROME_BINARY,
+)
+from ..logging_util import TimedProgress
+
+
+@enforce_types
+def should_save_singlefile(link: Link, out_dir: Optional[str]=None) -> bool:
+    out_dir = out_dir or link.link_dir
+    if not os.path.exists(out_dir):
+        return False
+
+    return SAVE_SINGLEFILE
+
+
+@enforce_types
+def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT) -> ArchiveResult:
+    """download full site using single-file"""
+
+    out_dir = out_dir or link.link_dir
+    output = str(Path(out_dir).absolute() / "single-file.html")
+
+    # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html
+    cmd = [
+        SINGLEFILE_BINARY,
+        '--browser-executable-path={}'.format(CHROME_BINARY),
+        link.url,
+        output
+    ]
+
+    status = 'succeeded'
+    timer = TimedProgress(timeout, prefix='      ')
+    try:
+        result = run(cmd, cwd=out_dir, timeout=timeout)
+
+        # parse out number of files downloaded from last line of stderr:
+        #  "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
+        output_tail = [
+            line.strip()
+            for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
+            if line.strip()
+        ]
+        hints = (
+            'Got single-file response code: {}.'.format(result.returncode),
+            *output_tail,
+        )
+
+        # Check for common failure cases
+        if (result.returncode > 0):
+            raise ArchiveError('SingleFile was not able to archive the page', hints)
+        chmod_file(output)
+    except Exception as err:
+        status = 'failed'
+        output = err
+    finally:
+        timer.end()
+
+    return ArchiveResult(
+        cmd=cmd,
+        pwd=out_dir,
+        cmd_version=SINGLEFILE_VERSION,
+        output=output,
+        status=status,
+        **timer.stats,
+    )

From 787a5ad43eb909da83cb189f4153e7e71fc9175b Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 13:07:22 -0500
Subject: [PATCH 301/333] fix: Commit code review suggestions

---
 archivebox/extractors/singlefile.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py
index 0c9718e4..4fa3cf2e 100644
--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@@ -23,10 +23,9 @@ from ..logging_util import TimedProgress
 @enforce_types
 def should_save_singlefile(link: Link, out_dir: Optional[str]=None) -> bool:
     out_dir = out_dir or link.link_dir
-    if not os.path.exists(out_dir):
-        return False
 
-    return SAVE_SINGLEFILE
+    output = Path(out_dir or link.link_dir) / 'single-file.html'
+    return SAVE_SINGLEFILE and (not output.exists())
 
 
 @enforce_types
@@ -36,7 +35,7 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
     out_dir = out_dir or link.link_dir
     output = str(Path(out_dir).absolute() / "single-file.html")
 
-    # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html
+    # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
     cmd = [
         SINGLEFILE_BINARY,
         '--browser-executable-path={}'.format(CHROME_BINARY),

From 3d22da39fef4d4205502ff0224db4e9da7f62981 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 30 Jul 2020 17:26:24 -0400
Subject: [PATCH 302/333] Update archivebox/config/__init__.py

---
 archivebox/config/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 5a747187..e1e3117f 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -113,7 +113,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'CURL_BINARY':              {'type': str,   'default': 'curl'},
         'GIT_BINARY':               {'type': str,   'default': 'git'},
         'WGET_BINARY':              {'type': str,   'default': 'wget'},
-        'SINGLEFILE_BINARY':        {'type': str, 'default': 'single-file'},
+        'SINGLEFILE_BINARY':        {'type': str,   'default': 'single-file'},
         'YOUTUBEDL_BINARY':         {'type': str,   'default': 'youtube-dl'},
         'CHROME_BINARY':            {'type': str,   'default': None},
     },

From 42b0c804659006a227a7215f97826e326687c399 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 13:51:32 -0500
Subject: [PATCH 303/333] feat: Add singlefile to link_details

---
 archivebox/extractors/singlefile.py        |  1 -
 archivebox/index/schema.py                 |  1 +
 archivebox/themes/legacy/link_details.html | 13 +++++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py
index 4fa3cf2e..196765d8 100644
--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@@ -1,6 +1,5 @@
 __package__ = 'archivebox.extractors'
 
-import os
 from pathlib import Path
 
 from typing import Optional
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 8285e412..cf162f6b 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -406,6 +406,7 @@ class Link:
             'google_favicon_path': 'https://www.google.com/s2/favicons?domain={}'.format(self.domain),
             'wget_path': wget_output_path(self),
             'warc_path': 'warc',
+            'singlefile_path': 'single-file.html',
             'pdf_path': 'output.pdf',
             'screenshot_path': 'screenshot.png',
             'dom_path': 'output.html',
diff --git a/archivebox/themes/legacy/link_details.html b/archivebox/themes/legacy/link_details.html
index c5173470..405d933c 100644
--- a/archivebox/themes/legacy/link_details.html
+++ b/archivebox/themes/legacy/link_details.html
@@ -79,6 +79,7 @@
             .card {
                 overflow: hidden;
                 box-shadow: 2px 3px 14px 0px rgba(0,0,0,0.02);
+                margin-top: 10px;
             }
             .card h4 {
                 font-size: 1.4vw;
@@ -335,6 +336,18 @@
                           </div>
                         </div>
                     </div>
+                    <div class="col-lg-2">
+                        <div class="card">
+                          <iframe class="card-img-top" src="$singlefile_path" sandbox="allow-same-origin allow-scripts allow-forms" scrolling="no"></iframe>
+                          <div class="card-body">
+                            <a href="$singlefile_path" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
+                                <img src="../../static/external.png" class="external"/>
+                            </a>
+                            <a href="$singlefile_path" target="preview"><h4 class="card-title">SingleFile</h4></a>
+                            <p class="card-text">archive/single-file.html</p>
+                          </div>
+                        </div>
+                    </div>
                     <div class="col-lg-2">
                         <div class="card">
                           <iframe class="card-img-top pdf-frame" src="$pdf_path" scrolling="no"></iframe>

From a40e3372806e97bb8dce3975dd3eccfc32685a3c Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 14:11:19 -0500
Subject: [PATCH 304/333] feat: Add link to admin list of files

---
 archivebox/core/admin.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 4578cc11..941cedab 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -106,6 +106,7 @@ class SnapshotAdmin(admin.ModelAdmin):
                 '<a href="/{}/{}" class="exists-{}" title="Screenshot">🖥 </a> '
                 '<a href="/{}/{}" class="exists-{}" title="HTML dump">🅷 </a> '
                 '<a href="/{}/{}/" class="exists-{}" title="WARC">🆆 </a> '
+                '<a href="/{}/{}" class="exists-{}" title="SingleFile">&#128476; </a>'
                 '<a href="/{}/{}/" class="exists-{}" title="Media files">📼 </a> '
                 '<a href="/{}/{}/" class="exists-{}" title="Git repos">📦 </a> '
                 '<a href="{}" class="exists-{}" title="Archive.org snapshot">🏛 </a> '
@@ -115,6 +116,7 @@ class SnapshotAdmin(admin.ModelAdmin):
             *link_tuple(link, 'screenshot_path'),
             *link_tuple(link, 'dom_path'),
             *link_tuple(link, 'warc_path')[:2], any((out_dir / canon['warc_path']).glob('*.warc.gz')),
+            *link_tuple(link, 'singlefile_path'),
             *link_tuple(link, 'media_path')[:2], any((out_dir / canon['media_path']).glob('*')),
             *link_tuple(link, 'git_path')[:2], any((out_dir / canon['git_path']).glob('*')),
             canon['archive_org_path'], (out_dir / 'archive.org.txt').exists(),

From b325c0dd9ffbae21a0542321975224412e769bed Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 14:35:42 -0500
Subject: [PATCH 305/333] feat: Add singlefile to latest outputs

---
 archivebox/index/schema.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index cf162f6b..0824dbde 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -365,6 +365,7 @@ class Link:
             'screenshot.png',
             'output.html',
             'media',
+            'single-file.html'
         )
 
         return any(
@@ -376,7 +377,7 @@ class Link:
         """get the latest output that each archive method produced for link"""
         
         ARCHIVE_METHODS = (
-            'title', 'favicon', 'wget', 'warc', 'pdf',
+            'title', 'favicon', 'wget', 'warc', 'singlefile', 'pdf',
             'screenshot', 'dom', 'git', 'media', 'archive_org',
         )
         latest: Dict[str, ArchiveOutput] = {}
@@ -392,7 +393,6 @@ class Link:
                 latest[archive_method] = history[0].output
             else:
                 latest[archive_method] = None
-
         return latest
 
 
From 91f63635e8a6e20ab07f26dac5c3c62eeaf07e2b Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 14:46:21 -0500
Subject: [PATCH 306/333] feat: Add singlefile in a couple more places

---
 archivebox/index/schema.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 0824dbde..09476034 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -426,6 +426,7 @@ class Link:
                 'pdf_path': static_path,
                 'screenshot_path': static_path,
                 'dom_path': static_path,
+                'singlefile_path': static_path,
             })
         return canonical
 

From 37df00a08b486b247de0bd43af45125fcedcf2c8 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Fri, 31 Jul 2020 14:49:54 -0500
Subject: [PATCH 307/333] tests: Add basic singlefile test

---
 tests/test_extractors.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/test_extractors.py b/tests/test_extractors.py
index c7aaaeaf..9b354f08 100644
--- a/tests/test_extractors.py
+++ b/tests/test_extractors.py
@@ -10,4 +10,12 @@ def test_ignore_methods():
     Takes the passed method out of the default methods list and returns that value
     """
     ignored = ignore_methods(['title'])
-    assert should_save_title not in ignored
\ No newline at end of file
+    assert should_save_title not in ignored
+
+
+
+def test_singlefile_works(tmp_path, process):
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) 
+    archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
+    output_file = archived_item_path / "single-file.html" 
+    assert output_file.exists()

From 5b6eb5e4ad8944ac4b0d936ffa18e04123c6b61c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Sat, 1 Aug 2020 11:59:07 -0400
Subject: [PATCH 308/333] make filenames consistent with program name

---
 archivebox/extractors/singlefile.py        | 4 ++--
 archivebox/index/schema.py                 | 5 ++---
 archivebox/themes/legacy/link_details.html | 2 +-
 tests/test_extractors.py                   | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py
index 196765d8..60ebdab6 100644
--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@@ -23,7 +23,7 @@ from ..logging_util import TimedProgress
 def should_save_singlefile(link: Link, out_dir: Optional[str]=None) -> bool:
     out_dir = out_dir or link.link_dir
 
-    output = Path(out_dir or link.link_dir) / 'single-file.html'
+    output = Path(out_dir or link.link_dir) / 'singlefile.html'
     return SAVE_SINGLEFILE and (not output.exists())
 
 
@@ -32,7 +32,7 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
     """download full site using single-file"""
 
     out_dir = out_dir or link.link_dir
-    output = str(Path(out_dir).absolute() / "single-file.html")
+    output = str(Path(out_dir).absolute() / "singlefile.html")
 
     # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
     cmd = [
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index 09476034..2129f5d3 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -365,7 +365,7 @@ class Link:
             'screenshot.png',
             'output.html',
             'media',
-            'single-file.html'
+            'singlefile.html'
         )
 
         return any(
@@ -406,7 +406,7 @@ class Link:
             'google_favicon_path': 'https://www.google.com/s2/favicons?domain={}'.format(self.domain),
             'wget_path': wget_output_path(self),
             'warc_path': 'warc',
-            'singlefile_path': 'single-file.html',
+            'singlefile_path': 'singlefile.html',
             'pdf_path': 'output.pdf',
             'screenshot_path': 'screenshot.png',
             'dom_path': 'output.html',
@@ -430,4 +430,3 @@ class Link:
             })
         return canonical
 
-
diff --git a/archivebox/themes/legacy/link_details.html b/archivebox/themes/legacy/link_details.html
index 405d933c..447552ad 100644
--- a/archivebox/themes/legacy/link_details.html
+++ b/archivebox/themes/legacy/link_details.html
@@ -344,7 +344,7 @@
                                 <img src="../../static/external.png" class="external"/>
                             </a>
                             <a href="$singlefile_path" target="preview"><h4 class="card-title">SingleFile</h4></a>
-                            <p class="card-text">archive/single-file.html</p>
+                            <p class="card-text">archive/singlefile.html</p>
                           </div>
                         </div>
                     </div>
diff --git a/tests/test_extractors.py b/tests/test_extractors.py
index 9b354f08..fb02044b 100644
--- a/tests/test_extractors.py
+++ b/tests/test_extractors.py
@@ -17,5 +17,5 @@ def test_ignore_methods():
 def test_singlefile_works(tmp_path, process):
     add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) 
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
-    output_file = archived_item_path / "single-file.html" 
+    output_file = archived_item_path / "singlefile.html" 
     assert output_file.exists()

From 06d0e9de6cdf1e64a16cc679a153e4a367bd6445 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 3 Aug 2020 13:19:47 -0500
Subject: [PATCH 309/333] feat: Add support for singlefile in docker

---
 Dockerfile                          | 45 +++++++++++++++++++----------
 archivebox/extractors/singlefile.py |  5 ++++
 archivebox/logging_util.py          |  1 +
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 97bd1bd1..7d76ea1d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,8 +10,8 @@
 FROM python:3.8-slim-buster
 
 LABEL name="archivebox" \
-      maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
-      description="All-in-one personal internet archiving container"
+    maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
+    description="All-in-one personal internet archiving container"
 
 ENV TZ=UTC \
     LANGUAGE=en_US:en \
@@ -22,28 +22,41 @@ ENV TZ=UTC \
     APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
     CODE_PATH=/app \
     VENV_PATH=/venv \
-    DATA_PATH=/data
+    DATA_PATH=/data \
+    EXTRA_PATH=/extra
 
-# First install CLI utils and base deps, then Chrome + Fons
+# First install CLI utils and base deps, then Chrome + Fons + nodejs
 RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
     && apt-get update -qq \
     && apt-get install -qq -y --no-install-recommends \
-       apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
-       dumb-init jq git wget curl youtube-dl ffmpeg \
+    apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
+    dumb-init jq git wget curl youtube-dl ffmpeg \
     && curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
     && echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
+    && curl -sL https://deb.nodesource.com/setup_14.x | bash - \
     && apt-get update -qq \
     && apt-get install -qq -y --no-install-recommends \
-       google-chrome-stable \
-       fontconfig \
-       fonts-ipafont-gothic \
-       fonts-wqy-zenhei \
-       fonts-thai-tlwg \
-       fonts-kacst \
-       fonts-symbola \
-       fonts-noto \
-       fonts-freefont-ttf \
-    && rm -rf /var/lib/apt/lists/*
+    google-chrome-stable \
+    fontconfig \
+    fonts-ipafont-gothic \
+    fonts-wqy-zenhei \
+    fonts-thai-tlwg \
+    fonts-kacst \
+    fonts-symbola \
+    fonts-noto \
+    fonts-freefont-ttf \
+    nodejs \
+    unzip \
+    && rm -rf /var/lib/apt/lists/* 
+
+# Clone singlefile and move it to the /bin folder so archivebox can find it
+
+WORKDIR "$EXTRA_PATH"
+RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
+    && unzip -q SingleFile.zip \
+    && npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
+    && chmod +x SingleFile-master/cli/single-file \
+    && ln -s "$EXTRA_PATH/SingleFile-master/cli/single-file" "/bin/single-file" 
 
 # Run everything from here on out as non-privileged user
 RUN groupadd --system archivebox \
diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py
index 60ebdab6..3a1a3759 100644
--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@@ -3,11 +3,13 @@ __package__ = 'archivebox.extractors'
 from pathlib import Path
 
 from typing import Optional
+import json
 
 from ..index.schema import Link, ArchiveResult, ArchiveError
 from ..system import run, chmod_file
 from ..util import (
     enforce_types,
+    chrome_args
 )
 from ..config import (
     TIMEOUT,
@@ -34,10 +36,13 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
     out_dir = out_dir or link.link_dir
     output = str(Path(out_dir).absolute() / "singlefile.html")
 
+    browser_args = chrome_args(TIMEOUT=0)
+
     # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
     cmd = [
         SINGLEFILE_BINARY,
         '--browser-executable-path={}'.format(CHROME_BINARY),
+        '--browser-args="{}"'.format(json.dumps(browser_args[1:])),
         link.url,
         output
     ]
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index c44f87f1..684f3d80 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -518,6 +518,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
 
 @enforce_types
 def printable_dependency_version(name: str, dependency: Dict) -> str:
+    version = None
     if dependency['enabled']:
         if dependency['is_valid']:
             color, symbol, note, version = 'green', '√', 'valid', ''

From 3c5c6a689e8b5b4d75ca4791c3b9266f29097254 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 4 Aug 2020 07:35:58 -0500
Subject: [PATCH 310/333] fix: Add missing configuration variable to be able to
 disable singlefile

---
 archivebox/config/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index e1e3117f..826f9824 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -254,6 +254,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
 
     'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and (c['SAVE_SINGLEFILE'])},
     'SINGLEFILE_VERSION':       {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
+    'SAVE_SINGLEFILE':          {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
 
     'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
     'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},

From 5429096c305500f586abfd44b4450e9917785c3b Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 4 Aug 2020 08:42:30 -0500
Subject: [PATCH 311/333] tests: Add mechanism to avoid using extractors that
 we are not testing

---
 tests/fixtures.py        | 17 ++++++++++++++++-
 tests/test_args.py       | 23 ++++++++++++++---------
 tests/test_extractors.py | 14 ++++++++------
 tests/test_init.py       | 15 +++++++++------
 tests/test_oneshot.py    |  9 +++++----
 tests/test_remove.py     |  4 ++--
 tests/test_title.py      |  5 +++--
 7 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/tests/fixtures.py b/tests/fixtures.py
index 9bf2640a..3d8dabfe 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -7,4 +7,19 @@ import pytest
 def process(tmp_path):
     os.chdir(tmp_path)
     process = subprocess.run(['archivebox', 'init'], capture_output=True)
-    return process
\ No newline at end of file
+    return process
+
+@pytest.fixture
+def disable_extractors_dict():
+    env = os.environ.copy()
+    env.update({
+        "USE_WGET": "false",
+        "USE_SINGLEFILE": "false",
+        "SAVE_PDF": "false",
+        "SAVE_SCREENSHOT": "false",
+        "SAVE_DOM": "false",
+        "USE_GIT": "false",
+        "SAVE_MEDIA": "false",
+        "SAVE_ARCHIVE_DOT_ORG": "false"
+    })
+    return env
\ No newline at end of file
diff --git a/tests/test_args.py b/tests/test_args.py
index ed132524..c616cb80 100644
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -3,25 +3,30 @@ import json
 
 from .fixtures import *
 
-def test_depth_flag_is_accepted(process):
-    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True)
+def test_depth_flag_is_accepted(process, disable_extractors_dict):
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
+                                  capture_output=True, env=disable_extractors_dict)
     assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8")
 
-def test_depth_flag_fails_if_it_is_not_0_or_1(process):
-    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=5"], capture_output=True)
+def test_depth_flag_fails_if_it_is_not_0_or_1(process, disable_extractors_dict):
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=5"],
+                                  capture_output=True, env=disable_extractors_dict)
     assert 'invalid choice' in arg_process.stderr.decode("utf-8")
-    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=-1"], capture_output=True)
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=-1"],
+                                  capture_output=True, env=disable_extractors_dict)
     assert 'invalid choice' in arg_process.stderr.decode("utf-8")
 
-def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
-    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True)
+def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process, disable_extractors_dict):
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
+                                  capture_output=True, env=disable_extractors_dict)
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
     with open(archived_item_path / "index.json", "r") as f:
         output_json = json.load(f)
     assert output_json["base_url"] == "127.0.0.1:8080/static/example.com.html"
 
-def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process):
-    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=1"], capture_output=True)
+def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process, disable_extractors_dict):
+    arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=1"],
+                                  capture_output=True, env=disable_extractors_dict)
     with open(tmp_path / "index.json", "r") as f:
         archive_file = f.read()
     assert "http://127.0.0.1:8080/static/example.com.html" in archive_file
diff --git a/tests/test_extractors.py b/tests/test_extractors.py
index fb02044b..ffb933c1 100644
--- a/tests/test_extractors.py
+++ b/tests/test_extractors.py
@@ -1,8 +1,10 @@
 from .fixtures import *
 from archivebox.extractors import ignore_methods, get_default_archive_methods, should_save_title
 
-def test_wget_broken_pipe(tmp_path, process):
-    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+def test_wget_broken_pipe(tmp_path, process, disable_extractors_dict):
+    disable_extractors_dict.update({"USE_WGET": "true"})
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
+                                 capture_output=True, env=disable_extractors_dict)
     assert "TypeError chmod_file(..., path: str) got unexpected NoneType argument path=None" not in add_process.stdout.decode("utf-8")
 
 def test_ignore_methods():
@@ -12,10 +14,10 @@ def test_ignore_methods():
     ignored = ignore_methods(['title'])
     assert should_save_title not in ignored
 
-
-
-def test_singlefile_works(tmp_path, process):
-    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) 
+def test_singlefile_works(tmp_path, process, disable_extractors_dict):
+    disable_extractors_dict.update({"USE_SINGLEFILE": "true"})
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
+                                  capture_output=True, env=disable_extractors_dict) 
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
     output_file = archived_item_path / "singlefile.html" 
     assert output_file.exists()
diff --git a/tests/test_init.py b/tests/test_init.py
index 133aaaa9..bd1ad516 100644
--- a/tests/test_init.py
+++ b/tests/test_init.py
@@ -18,9 +18,10 @@ def test_update(tmp_path, process):
     update_process = subprocess.run(['archivebox', 'init'], capture_output=True)
     assert "Updating existing ArchiveBox collection in this folder" in update_process.stdout.decode("utf-8")
 
-def test_add_link(tmp_path, process):
+def test_add_link(tmp_path, process, disable_extractors_dict):
     os.chdir(tmp_path)
-    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
+                                  capture_output=True, env=disable_extractors_dict)
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
     assert "index.json" in [x.name for x in archived_item_path.iterdir()]
@@ -33,9 +34,10 @@ def test_add_link(tmp_path, process):
         output_html = f.read()
     assert "Example Domain" in output_html
 
-def test_add_link_support_stdin(tmp_path, process):
+def test_add_link_support_stdin(tmp_path, process, disable_extractors_dict):
     os.chdir(tmp_path)
-    stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                                      env=disable_extractors_dict)
     stdin_process.communicate(input="http://127.0.0.1:8080/static/example.com.html".encode())
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
 
@@ -51,9 +53,10 @@ def test_correct_permissions_output_folder(tmp_path, process):
         file_path = tmp_path / file
         assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
 
-def test_correct_permissions_add_command_results(tmp_path, process):
+def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
     os.chdir(tmp_path)
-    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+    add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True,
+                                  env=disable_extractors_dict)
     archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
     for path in archived_item_path.iterdir():
         assert oct(path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
diff --git a/tests/test_oneshot.py b/tests/test_oneshot.py
index 7ff9867f..66a567e1 100644
--- a/tests/test_oneshot.py
+++ b/tests/test_oneshot.py
@@ -2,13 +2,14 @@ from pathlib import Path
 
 from .fixtures import *
 
-def test_oneshot_command_exists(tmp_path):
+def test_oneshot_command_exists(tmp_path, disable_extractors_dict):
     os.chdir(tmp_path)
-    process = subprocess.run(['archivebox', 'oneshot'], capture_output=True)
+    process = subprocess.run(['archivebox', 'oneshot'], capture_output=True, env=disable_extractors_dict)
     assert not "invalid choice: 'oneshot'" in process.stderr.decode("utf-8")
 
-def test_oneshot_commad_saves_page_in_right_folder(tmp_path):
-    process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"], capture_output=True)
+def test_oneshot_commad_saves_page_in_right_folder(tmp_path, disable_extractors_dict):
+    process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"],
+                              capture_output=True, env=disable_extractors_dict)
     items = ' '.join([str(x) for x in tmp_path.iterdir()])
     current_path = ' '.join([str(x) for x in Path.cwd().iterdir()])
     assert "index.json" in items
diff --git a/tests/test_remove.py b/tests/test_remove.py
index 040dafdc..d26c96bb 100644
--- a/tests/test_remove.py
+++ b/tests/test_remove.py
@@ -1,8 +1,8 @@
 from .fixtures import *
 
-def test_remove_leaves_index_in_consistent_state(tmp_path, process):
+def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict):
     os.chdir(tmp_path)
-    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
     remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
     list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
     assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")
\ No newline at end of file
diff --git a/tests/test_title.py b/tests/test_title.py
index b5090844..24b2cc28 100644
--- a/tests/test_title.py
+++ b/tests/test_title.py
@@ -1,12 +1,13 @@
 from .fixtures import *
 
-def test_title_is_htmlencoded_in_index_html(tmp_path, process):
+def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractors_dict):
     """
     https://github.com/pirate/ArchiveBox/issues/330
     Unencoded content should not be rendered as it facilitates xss injections
     and breaks the layout.
     """
-    add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'], capture_output=True)
+    add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'],
+                                 capture_output=True, env=disable_extractors_dict)
 
     with open(tmp_path / "index.html", "r") as f:
         output_html = f.read()

From 02afd948dc35f079a63807f1145f2ff3efc697f2 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 4 Aug 2020 09:01:17 -0500
Subject: [PATCH 312/333] tests: Add singlefile steps

---
 .github/workflows/test.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9fccf5da..117a760a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -40,6 +40,22 @@ jobs:
         with:
           fetch-depth: 1
 
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 1
+          repository: "gildas-lormeau/SingleFile"
+          ref: "master"
+          path: "singlefile"
+
+      - name: Install npm requirements for singlefile
+        run: npm install --prefix singlefile/cli
+
+      - name: Give singlefile execution permissions
+        run: chmod +x singlefile/cli/single-file
+
+      - name: Add singlefile cli folder to path
+        run: echo "::add-path::$GITHUB_WORKSPACE/singlefile/cli"
+
       - name: Set up Python ${{ matrix.python }}
         uses: actions/setup-python@v1
         with:
@@ -55,6 +71,14 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-${{ matrix.python }}-venv-
 
+      - name: Use nodejs 14.7.0
+        uses: actions/setup-node@v1
+        with:
+          node-version: 14.7.0
+
+      - name: Debug
+        run: ls ./
+
       - name: Install dependencies
         run: |
           python -m pip install .

From e2c4e6fff98b3f94ffc9403da37289afeeca1ec6 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 4 Aug 2020 11:50:01 -0500
Subject: [PATCH 313/333] refactor: Dockerfile uses env to point to the
 singlefile binary instead of adding it to a path folder

---
 Dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 7d76ea1d..460175d9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -55,8 +55,7 @@ WORKDIR "$EXTRA_PATH"
 RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
     && unzip -q SingleFile.zip \
     && npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
-    && chmod +x SingleFile-master/cli/single-file \
-    && ln -s "$EXTRA_PATH/SingleFile-master/cli/single-file" "/bin/single-file" 
+    && chmod +x SingleFile-master/cli/single-file 
 
 # Run everything from here on out as non-privileged user
 RUN groupadd --system archivebox \
@@ -73,7 +72,8 @@ VOLUME "$DATA_PATH"
 WORKDIR "$DATA_PATH"
 EXPOSE 8000
 ENV CHROME_BINARY=google-chrome \
-    CHROME_SANDBOX=False
+    CHROME_SANDBOX=False \
+    SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file"
 
 RUN env ALLOW_ROOT=True archivebox version
 

From 733075cbd0b91637559436117ccfb19b2467ebca Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Tue, 4 Aug 2020 11:53:31 -0500
Subject: [PATCH 314/333] tests: Set SINGLEFILE_BINARY instead of putting the
 binary in the path

---
 .github/workflows/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 117a760a..e689b7c5 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -53,8 +53,8 @@ jobs:
       - name: Give singlefile execution permissions
         run: chmod +x singlefile/cli/single-file
 
-      - name: Add singlefile cli folder to path
-        run: echo "::add-path::$GITHUB_WORKSPACE/singlefile/cli"
+      - name: Set SINGLEFILE_BINARY
+        run: echo "::set-env name=SINGLEFILE_BINARY::$GITHUB_WORKSPACE/singlefile/cli/single-file"
 
       - name: Set up Python ${{ matrix.python }}
         uses: actions/setup-python@v1

From 2c2a44dced353d12a0e29b303180e8e30d8ffec9 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Wed, 5 Aug 2020 13:30:20 -0500
Subject: [PATCH 315/333] tests: Correct pip cache folder

---
 .github/workflows/test.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9fccf5da..492cf334 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -46,11 +46,16 @@ jobs:
           python-version: ${{ matrix.python }}
           architecture: x64
 
+      - name: Get pip cache dir
+        id: pip-cache
+        run: |
+          echo "::set-output name=dir::$(pip cache dir)"
+
       - name: Cache pip
         uses: actions/cache@v2
         id: cache-pip
         with:
-          path: ~/.cache/pip
+          path: ${{ steps.pip-cache.outputs.dir }}
           key: ${{ runner.os }}-${{ matrix.python }}-venv-${{ hashFiles('setup.py') }}
           restore-keys: |
             ${{ runner.os }}-${{ matrix.python }}-venv-

From 19aa5c3e94118c495a21ea291d56608cc96ccf47 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 6 Aug 2020 23:07:25 -0400
Subject: [PATCH 316/333] fix SAVE_SINGLEFILE setting to depend on chrome

---
 archivebox/config/__init__.py | 4 ++--
 archivebox/config/stubs.py    | 3 +++
 docs                          | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 826f9824..f6e8e80d 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -252,9 +252,8 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'SAVE_WGET':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
     'SAVE_WARC':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
 
-    'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and (c['SAVE_SINGLEFILE'])},
+    'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
     'SINGLEFILE_VERSION':       {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
-    'SAVE_SINGLEFILE':          {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
 
     'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
     'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
@@ -271,6 +270,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'SAVE_PDF':                 {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
     'SAVE_SCREENSHOT':          {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
     'SAVE_DOM':                 {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
+    'SAVE_SINGLEFILE':          {'default': lambda c: c['USE_CHROME'] and c['USE_SINGLEFILE']},
 
     'DEPENDENCIES':             {'default': lambda c: get_dependency_info(c)},
     'CODE_LOCATIONS':           {'default': lambda c: get_code_locations(c)},
diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py
index 438f7d8a..6df90396 100644
--- a/archivebox/config/stubs.py
+++ b/archivebox/config/stubs.py
@@ -53,6 +53,7 @@ class ConfigDict(BaseConfig, total=False):
     SAVE_PDF: bool
     SAVE_SCREENSHOT: bool
     SAVE_DOM: bool
+    SAVE_SINGLEFILE: bool
     SAVE_WARC: bool
     SAVE_GIT: bool
     SAVE_MEDIA: bool
@@ -75,12 +76,14 @@ class ConfigDict(BaseConfig, total=False):
     USE_GIT: bool
     USE_CHROME: bool
     USE_YOUTUBEDL: bool
+    USE_SINGLEFILE: bool
 
     CURL_BINARY: Optional[str]
     GIT_BINARY: Optional[str]
     WGET_BINARY: Optional[str]
     YOUTUBEDL_BINARY: Optional[str]
     CHROME_BINARY: Optional[str]
+    SINGLEFILE_BINARY: Optional[str]
 
     TERM_WIDTH: Callable[[], int]
     USER: str
diff --git a/docs b/docs
index f23f5e47..6bea9eb0 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit f23f5e4701dc275e9cd413343754d52f7ec06106
+Subproject commit 6bea9eb017ecf09072ea3ef0afa406d25f58f219

From 5b8abb2dcecccbe3c47643e6d85e829bcf18b2c6 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 6 Aug 2020 23:10:30 -0400
Subject: [PATCH 317/333] bump version

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index 76914ddc..e8423da8 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.9
+0.4.10

From 87ba82ad39036847f79be24ec677af00db9176e2 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Thu, 6 Aug 2020 23:10:59 -0400
Subject: [PATCH 318/333] 0.4.11 release

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index e8423da8..5f749c13 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.10
+0.4.11

From e358634f812f0832c7adaf231eae559c653aa8a6 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Sat, 8 Aug 2020 09:12:14 -0500
Subject: [PATCH 319/333] fix: Add missing configuration that breaks on edge
 case where only single file is being used

---
 archivebox/config/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index f6e8e80d..98d01068 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -264,7 +264,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'SAVE_MEDIA':               {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
     'SAVE_PLAYLISTS':           {'default': lambda c: c['SAVE_PLAYLISTS'] and c['SAVE_MEDIA']},
 
-    'USE_CHROME':               {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'])},
+    'USE_CHROME':               {'default': lambda c: c['USE_CHROME'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
     'CHROME_BINARY':            {'default': lambda c: c['CHROME_BINARY'] if c['CHROME_BINARY'] else find_chrome_binary()},
     'CHROME_VERSION':           {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
     'SAVE_PDF':                 {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},

From f9afb7869d07842625b062303401a373d0d14d3b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Sun, 9 Aug 2020 23:12:28 -0400
Subject: [PATCH 320/333] improve docker quickstart completeness

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 021691d4..245aaf26 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,8 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv
 mkdir data && cd data
 docker run -v $PWD:/data nikisweeting/archivebox init
 docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com'
-docker run -v $PWD:/data -p 8000 nikisweeting/archivebox server
+docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
+docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
 open https://127.0.0.1:8000
 ```
 

From 76846d18a06f745038a4f185bc532dbe8ce069d0 Mon Sep 17 00:00:00 2001
From: Cristian <cristian@swapps.com>
Date: Mon, 10 Aug 2020 09:00:10 -0500
Subject: [PATCH 321/333] docs: Improve message for missing singlefile binary

---
 archivebox/config/__init__.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 98d01068..9bdea244 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -778,9 +778,16 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
                 stderr('        CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
             raise SystemExit(2)
 
+def dependency_additional_info(dependency: str) -> str:
+    if dependency == "SINGLEFILE_BINARY":
+        return "Please follow the installation instructions at https://github.com/gildas-lormeau/SingleFile/tree/master/cli and set SINGLEFILE_BINARY or set USE_SINGLEFILE=false"
+    return ""
+
+
 def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
     invalid = [
-        '{}: {} ({})'.format(name, info['path'] or 'unable to find binary', info['version'] or 'unable to detect version')
+        '{}: {} ({}). {}'.format(name, info['path'] or 'unable to find binary', info['version'] or 'unable to detect version',
+                                 dependency_additional_info(name))
         for name, info in config['DEPENDENCIES'].items()
         if info['enabled'] and not info['is_valid']
     ]

From 09241606898f7c8673c995765451c0578d35f30b Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 13:06:43 -0400
Subject: [PATCH 322/333] fix mistaken https urls in localserver instructions
 for some reason

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 245aaf26..1207039e 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
 <a href="https://github.com/pirate/ArchiveBox/blob/master/LICENSE"><img src="https://img.shields.io/badge/Open_source-MIT-green.svg?logo=git&logoColor=green"/></a>
 <a href="https://github.com/pirate/ArchiveBox/commits/dev"><img src="https://img.shields.io/github/last-commit/pirate/ArchiveBox.svg?logo=Sublime+Text&logoColor=green&label=Active"/></a>
 <a href="https://github.com/pirate/ArchiveBox"><img src="https://img.shields.io/github/stars/pirate/ArchiveBox.svg?logo=github&label=Stars&logoColor=blue"/></a>
-<a href="https://test.pypi.org/project/archivebox/"><img src="https://img.shields.io/badge/Python-%3E%3D3.5-yellow.svg?logo=python&logoColor=yellow"/></a>
+<a href="https://test.pypi.org/project/archivebox/"><img src="https://img.shields.io/badge/Python-%3E%3D3.7-yellow.svg?logo=python&logoColor=yellow"/></a>
 <a href="https://github.com/pirate/ArchiveBox/wiki/Install#dependencies"><img src="https://img.shields.io/badge/Chromium-%3E%3D59-orange.svg?logo=Google+Chrome&logoColor=orange"/></a>
 <a href="https://hub.docker.com/r/nikisweeting/archivebox"><img src="https://img.shields.io/badge/Docker-all%20platforms-lightblue.svg?logo=docker&logoColor=lightblue"/></a>
 <hr/>
@@ -66,7 +66,7 @@ docker run -v $PWD:/data nikisweeting/archivebox init
 docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com'
 docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
 docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
-open https://127.0.0.1:8000
+open http://127.0.0.1:8000
 ```
 
 ```bash
@@ -74,7 +74,9 @@ open https://127.0.0.1:8000
 # first download: https://github.com/pirate/ArchiveBox/blob/master/docker-compose.yml
 docker-compose run archivebox init
 docker-compose run archivebox add 'https://example.com'
+docker-compose run archivebox manage createsuperuser
 docker-compose up
+open http://127.0.0.1:8000
 ```
 
 ```bash
@@ -100,7 +102,7 @@ archivebox manage createsuperuser
 archivebox server
 ```
 
-You can visit `https://127.0.0.1:8000` in your browser to access it.
+You can visit `http://127.0.0.1:8000` in your browser to access it.
 
 [DEMO: archivebox.zervice.io/](https://archivebox.zervice.io)  
 For more information, see the [full Quickstart guide](https://github.com/pirate/ArchiveBox/wiki/Quickstart), [Usage](https://github.com/pirate/ArchiveBox/wiki/Usage), and [Configuration](https://github.com/pirate/ArchiveBox/wiki/Configuration) docs.

From dfa5d55c36eac06a8452b7e96530793161d0e78c Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 13:21:56 -0400
Subject: [PATCH 323/333] make placeholder for old calling style to aid in
 migration

---
 bin/archive | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100755 bin/archive

diff --git a/bin/archive b/bin/archive
new file mode 100755
index 00000000..2091a7ac
--- /dev/null
+++ b/bin/archive
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+echo "[X] This method of running ArchiveBox is deprecated as of >= v0.4."
+echo "    You should 'pip install archivebox' and use the installed 'archivebox' binary instead."
+echo "    For more info, see the Quickstart section of the README.md:"
+echo "        https://github.com/pirate/ArchiveBox#Quickstart"
+exit 2

From 430be7bc68983b0dd711f8aaa987033bcd9712fa Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 13:42:20 -0400
Subject: [PATCH 324/333] add missing staticfile check to singlefile

---
 archivebox/extractors/singlefile.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py
index 3a1a3759..28641b66 100644
--- a/archivebox/extractors/singlefile.py
+++ b/archivebox/extractors/singlefile.py
@@ -9,7 +9,8 @@ from ..index.schema import Link, ArchiveResult, ArchiveError
 from ..system import run, chmod_file
 from ..util import (
     enforce_types,
-    chrome_args
+    is_static_file,
+    chrome_args,
 )
 from ..config import (
     TIMEOUT,
@@ -24,6 +25,8 @@ from ..logging_util import TimedProgress
 @enforce_types
 def should_save_singlefile(link: Link, out_dir: Optional[str]=None) -> bool:
     out_dir = out_dir or link.link_dir
+    if is_static_file(link.url):
+        return False
 
     output = Path(out_dir or link.link_dir) / 'singlefile.html'
     return SAVE_SINGLEFILE and (not output.exists())

From f24cb3dcbea7c28e1d404a15de93f82544097c2d Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 13:42:31 -0400
Subject: [PATCH 325/333] add docker help text

---
 archivebox/main.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/archivebox/main.py b/archivebox/main.py
index 1b58ad17..be915ca9 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -186,6 +186,9 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
         print('    1. Create an empty directory, then cd into it and run:')
         print('    2. archivebox init')
         print()
+        print('If using Docker, you need to mount a volume to use as your data dir:')
+        print('    docker run -v /some/path:/data archivebox ...')
+        print()
         print('For more information, see the documentation here:')
         print('    https://github.com/pirate/ArchiveBox/wiki')
 

From 33ab7fd4ec1204c9a917fb04674d43ab6aec304e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 14:15:53 -0400
Subject: [PATCH 326/333] autodetect when running inside docker and provide
 hints

---
 Dockerfile                    |  7 ++++---
 archivebox/config/__init__.py |  1 +
 archivebox/config/stubs.py    |  1 +
 archivebox/main.py            | 16 +++++++++++-----
 bin/docker_entrypoint.sh      | 23 ++++++++++++++++-------
 5 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 460175d9..c6b898e7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -71,11 +71,12 @@ RUN python -m venv --clear --symlinks "$VENV_PATH" \
 VOLUME "$DATA_PATH"
 WORKDIR "$DATA_PATH"
 EXPOSE 8000
-ENV CHROME_BINARY=google-chrome \
+ENV IN_DOCKER=True \
+    CHROME_BINARY=google-chrome \
     CHROME_SANDBOX=False \
     SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file"
 
 RUN env ALLOW_ROOT=True archivebox version
 
-ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh", "archivebox"]
-CMD ["server", "0.0.0.0:8000"]
+ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
+CMD ["archivebox", "server", "0.0.0.0:8000"]
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 9bdea244..e70c2fb0 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -45,6 +45,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
         'IS_TTY':                   {'type': bool,  'default': lambda _: sys.stdout.isatty()},
         'USE_COLOR':                {'type': bool,  'default': lambda c: c['IS_TTY']},
         'SHOW_PROGRESS':            {'type': bool,  'default': lambda c: c['IS_TTY']},
+        'IN_DOCKER':                {'type': bool,  'default': False},
         # TODO: 'SHOW_HINTS':       {'type:  bool,  'default': True},
     },
 
diff --git a/archivebox/config/stubs.py b/archivebox/config/stubs.py
index 6df90396..68a442eb 100644
--- a/archivebox/config/stubs.py
+++ b/archivebox/config/stubs.py
@@ -29,6 +29,7 @@ class ConfigDict(BaseConfig, total=False):
     IS_TTY: bool
     USE_COLOR: bool
     SHOW_PROGRESS: bool
+    IN_DOCKER: bool
 
     OUTPUT_DIR: str
     CONFIG_FILE: str
diff --git a/archivebox/main.py b/archivebox/main.py
index be915ca9..652f2d5e 100644
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -57,7 +57,8 @@ from .config import (
     stderr,
     ConfigDict,
     ANSI,
-    # IS_TTY,
+    IS_TTY,
+    IN_DOCKER,
     USER,
     ARCHIVEBOX_BINARY,
     ONLY_NEW,
@@ -178,6 +179,10 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
     else:
         print('{green}Welcome to ArchiveBox v{}!{reset}'.format(VERSION, **ANSI))
         print()
+        if IN_DOCKER:
+            print('When using Docker, you need to mount a volume to use as your data dir:')
+            print('    docker run -v /some/path:/data archivebox ...')
+            print()
         print('To import an existing archive (from a previous version of ArchiveBox):')
         print('    1. cd into your data dir OUTPUT_DIR (usually ArchiveBox/output) and run:')
         print('    2. archivebox init')
@@ -186,9 +191,6 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
         print('    1. Create an empty directory, then cd into it and run:')
         print('    2. archivebox init')
         print()
-        print('If using Docker, you need to mount a volume to use as your data dir:')
-        print('    docker run -v /some/path:/data archivebox ...')
-        print()
         print('For more information, see the documentation here:')
         print('    https://github.com/pirate/ArchiveBox/wiki')
 
@@ -1060,10 +1062,14 @@ def manage(args: Optional[List[str]]=None, out_dir: str=OUTPUT_DIR) -> None:
     """Run an ArchiveBox Django management command"""
 
     check_data_folder(out_dir=out_dir)
-
     setup_django(out_dir)
     from django.core.management import execute_from_command_line
 
+    if (args and "createsuperuser" in args) and (IN_DOCKER and not IS_TTY):
+        stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
+        stderr('    docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
+        stderr()
+
     execute_from_command_line([f'{ARCHIVEBOX_BINARY} manage', *(args or ['help'])])
 
 
diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh
index 7e5836e3..c70d7f27 100755
--- a/bin/docker_entrypoint.sh
+++ b/bin/docker_entrypoint.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env bash
 
-COMMAND="$*"
-
 # Autodetect UID,GID of host user based on ownership of files in the data volume
 DATA_DIR="${DATA_DIR:-/data}"
 ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
@@ -18,8 +16,19 @@ if [[ "$USID" != 0 && "$GRID" != 0 ]]; then
     chown "$USID":"$GRID" "$DATA_DIR/*" > /dev/null 2>&1 || true
 fi
 
-# run django as the new archivebox user
-# any files touched will have the same uid,gid
-# inside docker and outside docker on the host
-gosu "$ARCHIVEBOX_USER" bash -c "$COMMAND"
-# e.g. "archivebox server"
+# Run commands as the new archivebox user in Docker.
+#   Any files touched will have the same uid & gid
+#   inside Docker and outside on the host machine.
+if [[ "$1" == /* || "$1" == "echo" || "$1" == "archivebox" ]]; then
+    # arg 1 is a binary, execute it verbatim
+    # e.g. "archivebox init"
+    #      "/bin/bash"
+    #      "echo"
+    gosu "$ARCHIVEBOX_USER" bash -c "$*"
+else
+    # no command given, assume args were meant to be passed to archivebox cmd
+    # e.g. "add https://example.com"
+    #      "manage createsupseruser"
+    #      "server 0.0.0.0:8000"
+    gosu "$ARCHIVEBOX_USER" bash -c "archivebox $*"
+fi

From fe71af6c428cc70e49622c4221459d83651d0d12 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 14:20:49 -0400
Subject: [PATCH 327/333] bump docs version

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index 6bea9eb0..8f3df14e 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit 6bea9eb017ecf09072ea3ef0afa406d25f58f219
+Subproject commit 8f3df14e74aa4934ad4d1c4d63fe2aecdcbf6f10

From fcbc61917e5cc73f8dc379028bc65899f10baae6 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 14:26:32 -0400
Subject: [PATCH 328/333] 0.4.12 release

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index 5f749c13..75274d83 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.11
+0.4.12

From 3544add254be3a7da07d9b60810a60170da2ab41 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 14:36:18 -0400
Subject: [PATCH 329/333] bump docs version

---
 docs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs b/docs
index 8f3df14e..1f787421 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit 8f3df14e74aa4934ad4d1c4d63fe2aecdcbf6f10
+Subproject commit 1f7874212f8686025b56183768921ac885809859

From dad6bf82c92449540e326b0ae1e75ea14e25bbe3 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 14:38:39 -0400
Subject: [PATCH 330/333] switch back to using wiki repo without PRs

---
 .gitmodules | 2 +-
 docs        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index a28574ad..9bbb6b2c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "docs"]
 	path = docs
-	url = https://github.com/pirate/ArchiveBox-docs.git
+	url = https://github.com/pirate/ArchiveBox.wiki.git
diff --git a/docs b/docs
index 1f787421..8f3df14e 160000
--- a/docs
+++ b/docs
@@ -1 +1 @@
-Subproject commit 1f7874212f8686025b56183768921ac885809859
+Subproject commit 8f3df14e74aa4934ad4d1c4d63fe2aecdcbf6f10

From cd09d1b077695ea36c8d7514c2ebda8f15fbcc35 Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 10 Aug 2020 14:39:06 -0400
Subject: [PATCH 331/333] 0.4.13 release

---
 archivebox/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/VERSION b/archivebox/VERSION
index 75274d83..1f771699 100644
--- a/archivebox/VERSION
+++ b/archivebox/VERSION
@@ -1 +1 @@
-0.4.12
+0.4.13

From e9bd0b122e6ed85b14364f0fe6ff04d40062e411 Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Mon, 10 Aug 2020 19:17:17 -0400
Subject: [PATCH 332/333] fix: utc timing for initial command log as well

---
 archivebox/logging_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index 684f3d80..50b639eb 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -164,7 +164,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
     cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
     stdin_hint = ' < /dev/stdin' if not stdin.isatty() else ''
     stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{stdin_hint}{reset}'.format(
-        now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+        now=datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
         VERSION=VERSION,
         cmd=cmd,
         stdin_hint=stdin_hint,

From 50069d1eb334b5df0b7081e0b6b96c358d8f9667 Mon Sep 17 00:00:00 2001
From: apkallum <apkallum@protonmail.com>
Date: Mon, 10 Aug 2020 23:21:02 -0400
Subject: [PATCH 333/333] set tz variable globally as UTC

---
 archivebox/config/__init__.py | 3 +++
 archivebox/logging_util.py    | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index e70c2fb0..c53c5eec 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -738,6 +738,9 @@ def load_all_config():
 CONFIG = load_all_config()
 globals().update(CONFIG)
 
+# Timezone set as UTC
+os.environ["TZ"] = 'UTC'
+
 
 ############################## Importable Checkers #############################
 
diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py
index 50b639eb..684f3d80 100644
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -164,7 +164,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional
     cmd = ' '.join(('archivebox', subcommand, *subcommand_args))
     stdin_hint = ' < /dev/stdin' if not stdin.isatty() else ''
     stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{stdin_hint}{reset}'.format(
-        now=datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
+        now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
         VERSION=VERSION,
         cmd=cmd,
         stdin_hint=stdin_hint,