Merge pull request #721 from ArchiveBox/dev

This commit is contained in:
Nick Sweeting 2023-11-08 00:15:50 -08:00 committed by GitHub
commit 6fd7d1f0bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
91 changed files with 5619 additions and 2659 deletions

View File

@ -5,16 +5,21 @@ __pycache__/
.mypy_cache/
.pytest_cache/
.github/
.git/
.pdm-build/
.pdm-python/
.eggs/
venv/
.venv/
.docker-venv/
node_modules/
build/
dist/
pip_dist/
!pip_dist/archivebox.egg-info/requires.txt
brew_dist/
deb_dist/
pip_dist/
assets/
data/

View File

@ -1,6 +0,0 @@
[flake8]
ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E272,E701,E731,W293,W503,W291,W391
select = F,E9,W
max-line-length = 130
max-complexity = 10
exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv

View File

@ -9,10 +9,10 @@ env:
jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
@ -38,17 +38,17 @@ jobs:
run: |
cd deb_dist/
sudo apt-get install ./archivebox*.deb
cd ..
python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > ./requirements.txt
python3 -m pip install -r ./requirements.txt
- name: Check ArchiveBox version
run: |
# must create dir needed for snaps to run as non-root on github actions
sudo mkdir -p /run/user/1001 && sudo chmod -R 777 /run/user/1001
mkdir "${{ github.workspace }}/data" && cd "${{ github.workspace }}/data"
archivebox init
archivebox config --set SAVE_READABILITY=False
archivebox config --set SAVE_MERCURY=False
archivebox config --set SAVE_SINGLEFILE=False
archivebox --version
archivebox init --setup
- name: Add some links to test
run: |

View File

@ -18,20 +18,21 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v3
with:
version: latest
install: true
platforms: linux/amd64,linux/arm64,linux/arm/v7
- name: Builder instance name
run: echo ${{ steps.buildx.outputs.name }}
@ -40,7 +41,7 @@ jobs:
run: echo ${{ steps.buildx.outputs.platforms }}
- name: Cache Docker layers
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
@ -48,7 +49,7 @@ jobs:
${{ runner.os }}-buildx-
- name: Docker Login
uses: docker/login-action@v1
uses: docker/login-action@v3
if: github.event_name != 'pull_request'
with:
username: ${{ secrets.DOCKER_USERNAME }}
@ -56,7 +57,7 @@ jobs:
- name: Collect Docker tags
id: docker_meta
uses: crazy-max/ghaction-docker-meta@v2
uses: docker/metadata-action@v5
with:
images: archivebox/archivebox,nikisweeting/archivebox
flavor: |
@ -69,7 +70,7 @@ jobs:
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
uses: docker/build-push-action@v5
with:
context: ./
file: ./Dockerfile

View File

@ -23,11 +23,12 @@ jobs:
cd brew_dist/
brew install --build-bottle ./archivebox.rb
# brew bottle archivebox
archivebox version
- name: Add some links to test
run: |
mkdir data && cd data
archivebox init
archivebox init --setup
archivebox add 'https://example.com'
archivebox version
archivebox status

View File

@ -28,7 +28,8 @@ jobs:
- name: Lint with flake8
run: |
cd archivebox
# one pass for show-stopper syntax errors or undefined names
flake8 archivebox --count --show-source --statistics
flake8 . --count --show-source --statistics
# one pass for small stylistic things
flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
flake8 . --count --max-line-length="$MAX_LINE_LENGTH" --statistics

View File

@ -7,29 +7,35 @@ on:
jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
permissions:
id-token: write
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v1
uses: actions/setup-python@v4
with:
python-version: 3.9
python-version: 3.11
architecture: x64
- name: Build Python Package
- uses: pdm-project/setup-pdm@v3
- name: Install dependencies
run: pdm install --fail-fast --no-lock --group :all --no-self
- name: Build package
run: |
pip3 install --upgrade pip setuptools wheel
rm -Rf pip_dist/*.whl
python3 setup.py \
sdist --dist-dir=./pip_dist \
bdist_wheel --dist-dir=./pip_dist \
egg_info --egg-base=./pip_dist
pip install pip_dist/archivebox-*.whl
rm ./dist/archivebox-*.whl
pdm build
- name: Install from build
run: pip install ./dist/archivebox-*.whl
- name: Add some links to test
run: |
@ -39,6 +45,9 @@ jobs:
archivebox version
archivebox status
- name: Publish package distributions to PyPI
run: pdm publish --no-build
# - name: Push build to PyPI
# run: |
# cd pip_dist/

View File

@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-20.04, macos-latest, windows-latest]
python: [3.7]
python: [3.9]
steps:
- uses: actions/checkout@v2
@ -24,15 +24,18 @@ jobs:
### Setup Python & JS Languages
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v1
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python }}
architecture: x64
- name: Set up Node JS 14.7.0
uses: actions/setup-node@v1
uses: actions/setup-node@v3
with:
node-version: 14.7.0
node-version: 18.12.0
- name: Setup PDM
uses: pdm-project/setup-pdm@v3
### Install Python & JS Dependencies
- name: Get pip cache dir
@ -51,9 +54,9 @@ jobs:
- name: Install pip dependencies
run: |
python -m pip install --upgrade pip setuptools wheel pytest bottle
python -m pip install --upgrade pip setuptools wheel pytest bottle build
./bin/build_pip.sh
python -m pip install .
pdm install
- name: Get npm cache dir
id: npm-cache

5
.gitignore vendored
View File

@ -13,6 +13,8 @@ venv/
node_modules/
# Packaging artifacts
.pdm-python
.pdm-build
archivebox.egg-info
archivebox-*.tar.gz
build/
@ -24,3 +26,6 @@ data1/
data2/
data3/
output/
# vim
*.sw?

View File

@ -1,125 +1,297 @@
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, yt-dlp, single-file
# Usage:
# git submodule update --init --recursive
# git pull --recurse-submodules
# docker build . -t archivebox --no-cache
# docker run -v "$PWD/data":/data archivebox init
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
# Multi-arch build:
# docker buildx create --use
# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
#
# Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
FROM python:3.9-slim-buster
# Use Debian 12 w/ faster package updates: https://packages.debian.org/bookworm-backports/
FROM python:3.11-slim-bookworm
LABEL name="archivebox" \
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
description="All-in-one personal internet archiving container" \
homepage="https://github.com/ArchiveBox/ArchiveBox" \
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
# System-level base config
ARG TARGETPLATFORM
ARG TARGETOS
ARG TARGETARCH
ARG TARGETVARIANT
######### Environment Variables #################################
# Global system-level config
ENV TZ=UTC \
LANGUAGE=en_US:en \
LC_ALL=C.UTF-8 \
LANG=C.UTF-8 \
DEBIAN_FRONTEND=noninteractive \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
PYTHONIOENCODING=UTF-8 \
PYTHONUNBUFFERED=1 \
DEBIAN_FRONTEND=noninteractive \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
PIP_DISABLE_PIP_VERSION_CHECK=1 \
npm_config_loglevel=error
# Application-level base config
# Version config
ENV PYTHON_VERSION=3.11 \
NODE_VERSION=21
# User config
ENV ARCHIVEBOX_USER="archivebox" \
DEFAULT_PUID=911 \
DEFAULT_PGID=911
# Global paths
ENV CODE_DIR=/app \
VENV_PATH=/venv \
DATA_DIR=/data \
NODE_DIR=/node \
ARCHIVEBOX_USER="archivebox"
GLOBAL_VENV=/venv \
PLAYWRIGHT_BROWSERS_PATH=/browsers
# Application-level paths
ENV APP_VENV=/app/.venv \
NODE_MODULES=/app/node_modules
# Build shell config
ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin"
SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "errtrace", "-o", "nounset", "-c"]
######### System Environment ####################################
# Detect ArchiveBox version number by reading package.json
COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
# Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
# Print debug info about build and save it to disk, for human eyes only, not used by anything else
RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
&& echo "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
&& echo "BUILD_START_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ} LANG=${LANG}" \
&& echo \
&& echo "GLOBAL_VENV=${GLOBAL_VENV} APP_VENV=${APP_VENV} NODE_MODULES=${NODE_MODULES}" \
&& echo "PYTHON=${PYTHON_VERSION} NODE=${NODE_VERSION} PATH=${PATH}" \
&& echo "CODE_DIR=${CODE_DIR} DATA_DIR=${DATA_DIR}" \
&& echo \
&& uname -a \
&& cat /etc/os-release | head -n7 \
&& which bash && bash --version | head -n1 \
&& which dpkg && dpkg --version | head -n1 \
&& echo -e '\n\n' && env && echo -e '\n\n' \
) | tee -a /VERSION.txt
# Create non-privileged user for archivebox and chrome
RUN groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
RUN echo "[*] Setting up $ARCHIVEBOX_USER user uid=${DEFAULT_PUID}..." \
&& groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER \
&& usermod -u "$DEFAULT_PUID" "$ARCHIVEBOX_USER" \
&& groupmod -g "$DEFAULT_PGID" "$ARCHIVEBOX_USER" \
&& echo -e "\nARCHIVEBOX_USER=$ARCHIVEBOX_USER PUID=$(id -u $ARCHIVEBOX_USER) PGID=$(id -g $ARCHIVEBOX_USER)\n\n" \
| tee -a /VERSION.txt
# DEFAULT_PUID and DEFAULT_PID are overriden by PUID and PGID in /bin/docker_entrypoint.sh at runtime
# https://docs.linuxserver.io/general/understanding-puid-and-pgid
# Install system dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
apt-transport-https ca-certificates gnupg2 zlib1g-dev \
dumb-init gosu cron unzip curl \
# Install system apt dependencies (adding backports to access more recent apt updates)
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT \
echo "[+] Installing APT base system dependencies for $TARGETPLATFORM..." \
&& echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
&& mkdir -p /etc/apt/keyrings \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
# 1. packaging dependencies
apt-transport-https ca-certificates apt-utils gnupg2 curl wget \
# 2. docker and init system dependencies
zlib1g-dev dumb-init gosu cron unzip grep \
# 3. frivolous CLI helpers to make debugging failed archiving easier
# nano iputils-ping dnsutils htop procps jq yq
&& rm -rf /var/lib/apt/lists/*
# Install apt dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
wget curl chromium git ffmpeg youtube-dl ripgrep \
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/*
######### Language Environments ####################################
# Install Node environment
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
nodejs \
# && npm install -g npm \
&& rm -rf /var/lib/apt/lists/*
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
nodejs libatomic1 python3-minimal \
&& rm -rf /var/lib/apt/lists/* \
# Update NPM to latest version
&& npm i -g npm --cache /root/.npm \
# Save version info
&& ( \
which node && node --version \
&& which npm && npm --version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
# Install Python environment
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
echo "[+] Setting up Python $PYTHON_VERSION runtime..." \
# tell PDM to allow using global system python site packages
# && rm /usr/lib/python3*/EXTERNALLY-MANAGED \
# create global virtual environment GLOBAL_VENV to use (better than using pip install --global)
# && python3 -m venv --system-site-packages --symlinks $GLOBAL_VENV \
# && python3 -m venv --system-site-packages $GLOBAL_VENV \
# && python3 -m venv $GLOBAL_VENV \
# install global dependencies / python build dependencies in GLOBAL_VENV
# && pip install --upgrade pip setuptools wheel \
# Save version info
&& ( \
which python3 && python3 --version | grep " $PYTHON_VERSION" \
&& which pip && pip --version \
# && which pdm && pdm --version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
######### Extractor Dependencies ##################################
# Install apt dependencies
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
echo "[+] Installing APT extractor dependencies globally using apt..." \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
curl wget git yt-dlp ffmpeg ripgrep \
# Packages we have also needed in the past:
# youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
# fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/* \
# Save version info
&& ( \
which curl && curl --version | head -n1 \
&& which wget && wget --version | head -n1 \
&& which yt-dlp && yt-dlp --version | head -n1 \
&& which git && git --version | head -n1 \
&& which rg && rg --version | head -n1 \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
# Install chromium browser using playwright
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
&& apt-get update -qq \
&& if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
# install Chromium using playwright
pip install playwright \
&& cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
&& playwright install --with-deps chromium \
&& export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
else \
# fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
apt-get install -qq -y -t bookworm-backports --no-install-recommends \
chromium fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& export CHROME_BINARY="$(which chromium)"; \
fi \
&& rm -rf /var/lib/apt/lists/* \
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
&& mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
&& chown -R $ARCHIVEBOX_USER "/home/${ARCHIVEBOX_USER}/.config" \
&& chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \
# Save version info
&& ( \
which chromium-browser && /usr/bin/chromium-browser --version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
# Install Node dependencies
WORKDIR "$NODE_DIR"
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
npm_config_loglevel=error
ADD ./package.json ./package.json
ADD ./package-lock.json ./package-lock.json
RUN npm ci
# Install Python dependencies
WORKDIR "$CODE_DIR"
ENV PATH="${PATH}:$VENV_PATH/bin"
RUN python -m venv --clear --symlinks "$VENV_PATH" \
&& pip install --upgrade --quiet pip setuptools
ADD "./setup.py" "$CODE_DIR/"
ADD "./package.json" "$CODE_DIR/archivebox/"
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
build-essential python-dev python3-dev \
&& echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
&& python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
&& pip install --quiet -r /tmp/requirements.txt \
&& apt-get purge -y build-essential python-dev python3-dev \
COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR/"
RUN --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
echo "[+] Installing NPM extractor dependencies from package.json into $NODE_MODULES..." \
&& npm ci --prefer-offline --no-audit --cache /root/.npm \
&& ( \
which node && node --version \
&& which npm && npm version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
######### Build Dependencies ####################################
# Install ArchiveBox Python dependencies
WORKDIR "$CODE_DIR"
COPY --chown=root:root --chmod=755 "./pyproject.toml" "requirements.txt" "$CODE_DIR/"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
echo "[+] Installing PIP ArchiveBox dependencies from requirements.txt for ${TARGETPLATFORM}..." \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
build-essential \
libssl-dev libldap2-dev libsasl2-dev \
python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps \
# && ln -s "$GLOBAL_VENV" "$APP_VENV" \
# && pdm use --venv in-project \
# && pdm run python -m ensurepip \
# && pdm sync --fail-fast --no-editable --group :all --no-self \
# && pdm export -o requirements.txt --without-hashes \
# && source $GLOBAL_VENV/bin/activate \
&& pip install -r requirements.txt \
&& apt-get purge -y \
build-essential \
# these are only needed to build CPython libs, we discard after build phase to shrink layer size
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# Install apt development dependencies
# RUN apt-get install -qq \
# && apt-get install -qq -y --no-install-recommends \
# python3 python3-dev python3-pip python3-venv python3-all \
# dh-python debhelper devscripts dput software-properties-common \
# python3-distutils python3-setuptools python3-wheel python3-stdeb
# RUN python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.extras_require["dev"]))' > /tmp/dev_requirements.txt \
# && pip install --quiet -r /tmp/dev_requirements.txt
# Install ArchiveBox Python package from source
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
&& apt-get update -qq \
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
build-essential \
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
&& pip install -e "$CODE_DIR"[sonic,ldap] \
# save docker image size and always remove compilers / build tools after building is complete
&& apt-get purge -y build-essential \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# Install ArchiveBox Python package and its dependencies
WORKDIR "$CODE_DIR"
ADD . "$CODE_DIR"
RUN pip install -e .
####################################################
# Setup ArchiveBox runtime config
WORKDIR "$DATA_DIR"
ENV IN_DOCKER=True \
CHROME_SANDBOX=False \
CHROME_BINARY="chromium" \
USE_SINGLEFILE=True \
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
USE_READABILITY=True \
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
USE_MERCURY=True \
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser"
ENV IN_DOCKER=True
## No need to set explicitly, these values will be autodetected by archivebox in docker:
# CHROME_SANDBOX=False \
# WGET_BINARY="wget" \
# YOUTUBEDL_BINARY="yt-dlp" \
# CHROME_BINARY="/usr/bin/chromium-browser" \
# USE_SINGLEFILE=True \
# SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
# USE_READABILITY=True \
# READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
# USE_MERCURY=True \
# MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
# Print version for nice docker finish summary
# RUN archivebox version
RUN /app/bin/docker_entrypoint.sh archivebox version
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \
&& "$CODE_DIR/bin/docker_entrypoint.sh" \
archivebox version 2>&1 \
) | tee -a /VERSION.txt
####################################################
# Open up the interfaces to the outside world
WORKDIR "$DATA_DIR"
VOLUME "$DATA_DIR"
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
# Optional:
# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]

View File

@ -1,6 +0,0 @@
graft archivebox
global-exclude .DS_Store
global-exclude __pycache__
global-exclude *.pyc
prune tests/

12
Pipfile
View File

@ -1,12 +0,0 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[packages]
# see setup.py for package dependency list
"e1839a8" = {path = ".", editable = true}
[dev-packages]
# see setup.py for dev package dependency list
"e1839a8" = {path = ".", extras = ["dev"], editable = true}

774
README.md

File diff suppressed because it is too large Load Diff

34
SECURITY.md Normal file
View File

@ -0,0 +1,34 @@
# Security Policy
---
## Security Information
Please see this wiki page for important notices about ArchiveBox security, publishing your archives securely, and the dangers of executing archived JS:
https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview
Also see this section of the README about important caveats when running ArchiveBox:
https://github.com/ArchiveBox/ArchiveBox?tab=readme-ov-file#caveats
You can also read these pages for more information about ArchiveBox's internals, development environment, DB schema, and more:
- https://github.com/ArchiveBox/ArchiveBox#archive-layout
- https://github.com/ArchiveBox/ArchiveBox#archivebox-development
- https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives
- https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting
---
## Reporting a Vulnerability
We use Github's built-in [Private Reporting](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing-information-about-vulnerabilities/privately-reporting-a-security-vulnerability) feature to accept vulnerability reports.
1. Go to the Security tab on our Github repo: https://github.com/ArchiveBox/ArchiveBox/security
2. Click the ["Report a Vulnerability"](https://github.com/ArchiveBox/ArchiveBox/security/advisories/new) button
3. Fill out the form to submit the details of the report and it will be securely sent to the maintainers
You can also contact the maintainers via our public [Zulip Chat Server zulip.archivebox.io](https://zulip.archivebox.io) or [Twitter DMs @ArchiveBoxApp](https://twitter.com/ArchiveBoxApp).

View File

@ -1 +1,3 @@
production_url: https://archivebox.io
theme: jekyll-theme-merlot
# Github Pages static site settings for https://archivebox.io

View File

@ -3,4 +3,4 @@ ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E
select = F,E9,W
max-line-length = 130
max-complexity = 10
exclude = migrations,tests,node_modules,vendor,static,venv,.venv,.venv2,.docker-venv
exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv

0
archivebox/__init__.py Normal file → Executable file
View File

View File

@ -30,11 +30,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
)
parser.add_argument(
'--update-all', #'-n',
'--update', #'-u',
action='store_true',
default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
help="Also retry previously skipped/failed links when adding new links",
)
parser.add_argument(
'--update-all', #'-n',
action='store_true',
default=False,
help="Also update ALL links in index when finished adding new links",
)
parser.add_argument(
'--index-only', #'-o',
action='store_true',
@ -104,6 +110,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
urls=stdin_urls or urls,
depth=command.depth,
tag=command.tag,
update=command.update,
update_all=command.update_all,
index_only=command.index_only,
overwrite=command.overwrite,

View File

@ -51,6 +51,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
action='store_true',
help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots',
)
parser.add_argument(
'--update',
action='store_true',
help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults',
)
group.add_argument(
'--clear', # '-c'
action='store_true',
@ -94,6 +99,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
every=command.every,
depth=command.depth,
overwrite=command.overwrite,
update=command.update,
import_path=command.import_path,
out_dir=pwd or OUTPUT_DIR,
)

View File

@ -26,11 +26,12 @@ import io
import re
import sys
import json
import inspect
import getpass
import platform
import shutil
import sqlite3
import django
from sqlite3 import dbapi2 as sqlite3
from hashlib import md5
from pathlib import Path
@ -39,6 +40,7 @@ from typing import Optional, Type, Tuple, Dict, Union, List
from subprocess import run, PIPE, DEVNULL
from configparser import ConfigParser
from collections import defaultdict
import importlib.metadata
from .config_stubs import (
SimpleConfigValueDict,
@ -48,6 +50,26 @@ from .config_stubs import (
ConfigDefaultDict,
)
### Pre-Fetch Minimal System Config
SYSTEM_USER = getpass.getuser() or os.getlogin()
try:
import pwd
SYSTEM_USER = pwd.getpwuid(os.geteuid()).pw_name or SYSTEM_USER
except KeyError:
# Process' UID might not map to a user in cases such as running the Docker image
# (where `archivebox` is 999) as a different UID.
pass
except ModuleNotFoundError:
# pwd is only needed for some linux systems, doesn't exist on windows
pass
except Exception:
# this should never happen, uncomment to debug
# raise
pass
############################### Config Schema ##################################
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
@ -56,6 +78,9 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now
'IN_DOCKER': {'type': bool, 'default': False},
'IN_QEMU': {'type': bool, 'default': False},
'PUID': {'type': int, 'default': os.getuid()},
'PGID': {'type': int, 'default': os.getgid()},
# TODO: 'SHOW_HINTS': {'type: bool, 'default': True},
},
@ -65,9 +90,17 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'ONLY_NEW': {'type': bool, 'default': True},
'TIMEOUT': {'type': int, 'default': 60},
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'},
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages
'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)},
'ADMIN_USERNAME': {'type': str, 'default': None},
'ADMIN_PASSWORD': {'type': str, 'default': None},
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
},
'SERVER_CONFIG': {
@ -82,6 +115,22 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
'TIME_ZONE': {'type': str, 'default': 'UTC'},
'TIMEZONE': {'type': str, 'default': 'UTC'},
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'},
'PREVIEW_ORIGINALS': {'type': bool, 'default': True},
'LDAP': {'type': bool, 'default': False},
'LDAP_SERVER_URI': {'type': str, 'default': None},
'LDAP_BIND_DN': {'type': str, 'default': None},
'LDAP_BIND_PASSWORD': {'type': str, 'default': None},
'LDAP_USER_BASE': {'type': str, 'default': None},
'LDAP_USER_FILTER': {'type': str, 'default': None},
'LDAP_USERNAME_ATTR': {'type': str, 'default': None},
'LDAP_FIRSTNAME_ATTR': {'type': str, 'default': None},
'LDAP_LASTNAME_ATTR': {'type': str, 'default': None},
'LDAP_EMAIL_ATTR': {'type': str, 'default': None},
},
'ARCHIVE_METHOD_TOGGLES': {
@ -100,6 +149,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
'SAVE_ALLOWLIST': {'type': dict, 'default': {},},
'SAVE_DENYLIST': {'type': dict, 'default': {},},
},
'ARCHIVE_METHOD_OPTIONS': {
@ -108,13 +159,14 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'},
'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
'COOKIES_FILE': {'type': str, 'default': None},
'CHROME_USER_DATA_DIR': {'type': str, 'default': None},
'CHROME_TIMEOUT': {'type': int, 'default': 0},
'CHROME_HEADLESS': {'type': bool, 'default': True},
'CHROME_SANDBOX': {'type': bool, 'default': lambda c: not c['IN_DOCKER']},
'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [
@ -124,15 +176,20 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'--write-thumbnail',
'--no-call-home',
'--write-sub',
'--all-subs',
'--write-auto-sub',
'--write-auto-subs',
'--convert-subs=srt',
'--yes-playlist',
'--continue',
# This flag doesn't exist in youtube-dl
# only in yt-dlp
'--no-abort-on-error',
# --ignore-errors must come AFTER
# --no-abort-on-error
# https://github.com/yt-dlp/yt-dlp/issues/4914
'--ignore-errors',
'--geo-bypass',
'--add-metadata',
'--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
'--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
]},
@ -150,6 +207,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'--compressed'
]},
'GIT_ARGS': {'type': list, 'default': ['--recursive']},
'SINGLEFILE_ARGS': {'type': list, 'default' : None},
'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'},
},
'SEARCH_BACKEND_CONFIG' : {
@ -163,6 +222,11 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90},
# SQLite3 FTS5
'FTS_SEPARATE_DATABASE': {'type': bool, 'default': True},
'FTS_TOKENIZERS': {'type': str, 'default': 'porter unicode61 remove_diacritics 2'},
# Default from https://www.sqlite.org/limits.html#max_length
'FTS_SQLITE_MAX_LENGTH': {'type': int, 'default': int(1e9)},
},
'DEPENDENCY_CONFIG': {
@ -179,17 +243,19 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'CURL_BINARY': {'type': str, 'default': 'curl'},
'GIT_BINARY': {'type': str, 'default': 'git'},
'WGET_BINARY': {'type': str, 'default': 'wget'},
'WGET_BINARY': {'type': str, 'default': 'wget'}, # also can accept wget2
'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('mercury-parser')},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('postlight-parser')},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, # also can accept youtube-dl
'NODE_BINARY': {'type': str, 'default': 'node'},
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
'CHROME_BINARY': {'type': str, 'default': None},
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
'READWISE_READER_TOKENS': {'type': dict, 'default': {}},
},
}
@ -292,6 +358,7 @@ ALLOWED_IN_OUTPUT_DIR = {
'yarn.lock',
'static',
'sonic',
'search.sqlite3',
ARCHIVE_DIR_NAME,
SOURCES_DIR_NAME,
LOGS_DIR_NAME,
@ -307,12 +374,23 @@ ALLOWED_IN_OUTPUT_DIR = {
'static_index.json',
}
def get_version(config):
return importlib.metadata.version(__package__ or 'archivebox')
def get_commit_hash(config):
try:
return list((config['PACKAGE_DIR'] / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
except Exception:
return None
############################## Derived Config ##################################
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
'USER': {'default': lambda c: SYSTEM_USER},
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
'PACKAGE_DIR': {'default': lambda c: Path(__file__).resolve().parent},
@ -326,18 +404,26 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
'VERSION': {'default': lambda c: json.loads((Path(c['PACKAGE_DIR']) / 'package.json').read_text(encoding='utf-8').strip())['version']},
'VERSION': {'default': lambda c: get_version(c)},
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
'PYTHON_BINARY': {'default': lambda c: sys.executable},
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
'DJANGO_BINARY': {'default': lambda c: django.__file__.replace('__init__.py', 'bin/django-admin.py')},
'DJANGO_BINARY': {'default': lambda c: inspect.getfile(django)},
'DJANGO_VERSION': {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)},
'SQLITE_BINARY': {'default': lambda c: inspect.getfile(sqlite3)},
'SQLITE_VERSION': {'default': lambda c: sqlite3.version},
#'SQLITE_JOURNAL_MODE': {'default': lambda c: 'wal'}, # set at runtime below, interesting but unused for now
#'SQLITE_OPTIONS': {'default': lambda c: ['JSON1']}, # set at runtime below
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
'CURL_USER_AGENT': {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
@ -357,12 +443,13 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
'SINGLEFILE_ARGS': {'default': lambda c: c['SINGLEFILE_ARGS'] or []},
'USE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']},
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury is unversioned
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
@ -392,10 +479,11 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'EXTERNAL_LOCATIONS': {'default': lambda c: get_external_locations(c)},
'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
'CHROME_OPTIONS': {'default': lambda c: get_chrome_info(c)},
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
}
################################### Helpers ####################################
@ -636,6 +724,9 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
return None
try:
bin_env = os.environ | {'LANG': 'C'}
version_str = run([abspath, "--version"], stdout=PIPE, env=bin_env).stdout.strip().decode()
if not version_str:
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
# take first 3 columns of first line of version info
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
@ -779,6 +870,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'path': config['OUTPUT_DIR'].resolve(),
'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
'is_mount': os.path.ismount(config['OUTPUT_DIR'].resolve()),
},
'SOURCES_DIR': {
'path': config['SOURCES_DIR'].resolve(),
@ -794,6 +886,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'path': config['ARCHIVE_DIR'].resolve(),
'enabled': True,
'is_valid': config['ARCHIVE_DIR'].exists(),
'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
},
'CONFIG_FILE': {
'path': config['CONFIG_FILE'].resolve(),
@ -804,18 +897,12 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
'path': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve(),
'enabled': True,
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
'is_mount': os.path.ismount((config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve()),
},
}
def get_dependency_info(config: ConfigDict) -> ConfigValue:
return {
'ARCHIVEBOX_BINARY': {
'path': bin_path(config['ARCHIVEBOX_BINARY']),
'version': config['VERSION'],
'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
'enabled': True,
'is_valid': True,
},
'PYTHON_BINARY': {
'path': bin_path(config['PYTHON_BINARY']),
'version': config['PYTHON_VERSION'],
@ -823,6 +910,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': True,
'is_valid': bool(config['PYTHON_VERSION']),
},
'SQLITE_BINARY': {
'path': bin_path(config['SQLITE_BINARY']),
'version': config['SQLITE_VERSION'],
'hash': bin_hash(config['SQLITE_BINARY']),
'enabled': True,
'is_valid': bool(config['SQLITE_VERSION']),
},
'DJANGO_BINARY': {
'path': bin_path(config['DJANGO_BINARY']),
'version': config['DJANGO_VERSION'],
@ -830,6 +924,14 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': True,
'is_valid': bool(config['DJANGO_VERSION']),
},
'ARCHIVEBOX_BINARY': {
'path': bin_path(config['ARCHIVEBOX_BINARY']),
'version': config['VERSION'],
'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
'enabled': True,
'is_valid': True,
},
'CURL_BINARY': {
'path': bin_path(config['CURL_BINARY']),
'version': config['CURL_VERSION'],
@ -915,7 +1017,8 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
'TIMEOUT': config['TIMEOUT'],
'RESOLUTION': config['RESOLUTION'],
'CHECK_SSL_VALIDITY': config['CHECK_SSL_VALIDITY'],
'CHROME_BINARY': config['CHROME_BINARY'],
'CHROME_BINARY': bin_path(config['CHROME_BINARY']),
'CHROME_TIMEOUT': config['CHROME_TIMEOUT'],
'CHROME_HEADLESS': config['CHROME_HEADLESS'],
'CHROME_SANDBOX': config['CHROME_SANDBOX'],
'CHROME_USER_AGENT': config['CHROME_USER_AGENT'],
@ -956,13 +1059,22 @@ globals().update(CONFIG)
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
os.environ["TZ"] = 'UTC'
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
assert TIMEZONE == 'UTC', 'The server timezone should always be set to UTC' # we may allow this to change later
os.environ["TZ"] = TIMEZONE
os.umask(0o777 - int(DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
sys.path.append(NODE_BIN_PATH)
# OPTIONAL: also look around the host system for node modules to use
# avoid enabling this unless absolutely needed,
# having overlapping potential sources of libs is a big source of bugs/confusing to users
# DEV_NODE_BIN_PATH = str((Path(CONFIG["PACKAGE_DIR"]).absolute() / '..' / 'node_modules' / '.bin'))
# sys.path.append(DEV_NODE_BIN_PATH)
# USER_NODE_BIN_PATH = str(Path('~/.node_modules/.bin').resolve())
# sys.path.append(USER_NODE_BIN_PATH)
# disable stderr "you really shouldnt disable ssl" warnings with library config
if not CONFIG['CHECK_SSL_VALIDITY']:
import urllib3
@ -970,6 +1082,13 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# get SQLite database version, compile options, and runtime options
# TODO: make this a less hacky proper assertion checker helper function in somewhere like setup_django
#cursor = sqlite3.connect(':memory:').cursor()
#DYNAMIC_CONFIG_SCHEMA['SQLITE_VERSION'] = lambda c: cursor.execute("SELECT sqlite_version();").fetchone()[0]
#DYNAMIC_CONFIG_SCHEMA['SQLITE_JOURNAL_MODE'] = lambda c: cursor.execute('PRAGMA journal_mode;').fetchone()[0]
#DYNAMIC_CONFIG_SCHEMA['SQLITE_OPTIONS'] = lambda c: [option[0] for option in cursor.execute('PRAGMA compile_options;').fetchall()]
#cursor.close()
########################### Config Validity Checkers ###########################
@ -988,6 +1107,11 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
raise SystemExit(2)
if int(CONFIG['DJANGO_VERSION'].split('.')[0]) < 3:
stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
raise SystemExit(2)
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
@ -1061,6 +1185,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
stderr()
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
output_dir = out_dir or config['OUTPUT_DIR']
assert isinstance(output_dir, (str, Path))
@ -1135,11 +1260,10 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
# without running migrations automatically (user runs them manually by calling init)
django.setup()
from django.conf import settings
# log startup message to the error log
with open(settings.ERROR_LOG, "a+", encoding='utf-8') as f:
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
@ -1149,10 +1273,17 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
# Enable WAL mode in sqlite3
from django.db import connection
with connection.cursor() as cursor:
# Set Journal mode to WAL to allow for multiple writers
current_mode = cursor.execute("PRAGMA journal_mode")
if current_mode != 'wal':
cursor.execute("PRAGMA journal_mode=wal;")
# Set max blocking delay for concurrent writes and write sync mode
# https://litestream.io/tips/#busy-timeout
cursor.execute("PRAGMA busy_timeout = 5000;")
cursor.execute("PRAGMA synchronous = NORMAL;")
# Create cache table in DB if needed
try:
from django.core.cache import cache
@ -1160,7 +1291,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
except django.db.utils.OperationalError:
call_command("createcachetable", verbosity=0)
# if archivebox gets imported multiple times, we have to close
# the sqlite3 whenever we init from scratch to avoid multiple threads
# sharing the same connection by accident

View File

@ -41,7 +41,7 @@ class ConfigDict(BaseConfig, total=False):
MEDIA_TIMEOUT: int
OUTPUT_PERMISSIONS: str
RESTRICT_FILE_NAMES: str
URL_BLACKLIST: str
URL_DENYLIST: str
SECRET_KEY: Optional[str]
BIND_ADDR: str
@ -74,6 +74,7 @@ class ConfigDict(BaseConfig, total=False):
CHROME_USER_AGENT: str
COOKIES_FILE: Union[str, Path, None]
CHROME_USER_DATA_DIR: Union[str, Path, None]
CHROME_TIMEOUT: int
CHROME_HEADLESS: bool
CHROME_SANDBOX: bool
@ -98,6 +99,7 @@ class ConfigDict(BaseConfig, total=False):
WGET_ARGS: List[str]
CURL_ARGS: List[str]
GIT_ARGS: List[str]
TAG_SEPARATOR_PATTERN: str
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]

View File

@ -41,7 +41,7 @@ class AddLinkForm(forms.Form):
# label="Exclude patterns",
# min_length='1',
# required=False,
# initial=URL_BLACKLIST,
# initial=URL_DENYLIST,
# )
# timeout = forms.IntegerField(
# initial=TIMEOUT,

View File

@ -1,8 +1,11 @@
__package__ = 'archivebox.core'
import ipaddress
from django.utils import timezone
from django.contrib.auth.middleware import RemoteUserMiddleware
from django.core.exceptions import ImproperlyConfigured
from ..config import PUBLIC_SNAPSHOTS
from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
def detect_timezone(request, activate: bool=True):
@ -35,3 +38,23 @@ def CacheControlMiddleware(get_response):
return response
return middleware
class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
header = 'HTTP_{normalized}'.format(normalized=REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
def process_request(self, request):
if REVERSE_PROXY_WHITELIST == '':
return
ip = request.META.get('REMOTE_ADDR')
for cidr in REVERSE_PROXY_WHITELIST.split(','):
try:
network = ipaddress.ip_network(cidr)
except ValueError:
raise ImproperlyConfigured(
"The REVERSE_PROXY_WHITELIST config paramater is in invalid format, or "
"contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.")
if ipaddress.ip_address(ip) in network:
return super().process_request(request)

View File

@ -0,0 +1,18 @@
# Generated by Django 3.1.14 on 2022-09-14 09:34
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0020_auto_20210410_1031'),
]
operations = [
migrations.AlterField(
model_name='archiveresult',
name='extractor',
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archive_org', 'archive_org')], max_length=32),
),
]

View File

@ -19,7 +19,18 @@ from ..config import (
SQL_INDEX_FILENAME,
OUTPUT_DIR,
LOGS_DIR,
TIME_ZONE,
TIMEZONE,
LDAP,
LDAP_SERVER_URI,
LDAP_BIND_DN,
LDAP_BIND_PASSWORD,
LDAP_USER_BASE,
LDAP_USER_FILTER,
LDAP_USERNAME_ATTR,
LDAP_FIRSTNAME_ATTR,
LDAP_LASTNAME_ATTR,
LDAP_EMAIL_ATTR,
)
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
@ -34,7 +45,8 @@ WSGI_APPLICATION = 'core.wsgi.application'
ROOT_URLCONF = 'core.urls'
LOGIN_URL = '/accounts/login/'
LOGOUT_REDIRECT_URL = '/'
LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
PASSWORD_RESET_URL = '/accounts/password_reset/'
APPEND_SLASH = True
@ -54,6 +66,12 @@ INSTALLED_APPS = [
]
# For usage with https://www.jetadmin.io/integrations/django
# INSTALLED_APPS += ['jet_django']
# JET_PROJECT = 'archivebox'
# JET_TOKEN = 'some-api-token-here'
MIDDLEWARE = [
'core.middleware.TimezoneMiddleware',
'django.middleware.security.SecurityMiddleware',
@ -61,14 +79,63 @@ MIDDLEWARE = [
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware',
]
################################################################################
### Authentication Settings
################################################################################
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend',
]
if LDAP:
try:
import ldap
from django_auth_ldap.config import LDAPSearch
global AUTH_LDAP_SERVER_URI
global AUTH_LDAP_BIND_DN
global AUTH_LDAP_BIND_PASSWORD
global AUTH_LDAP_USER_SEARCH
global AUTH_LDAP_USER_ATTR_MAP
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
AUTH_LDAP_BIND_DN = LDAP_BIND_DN
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
AUTH_LDAP_USER_SEARCH = LDAPSearch(
LDAP_USER_BASE,
ldap.SCOPE_SUBTREE,
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
)
AUTH_LDAP_USER_ATTR_MAP = {
'username': LDAP_USERNAME_ATTR,
'first_name': LDAP_FIRSTNAME_ATTR,
'last_name': LDAP_LASTNAME_ATTR,
'email': LDAP_EMAIL_ATTR,
}
AUTHENTICATION_BACKENDS = [
'django_auth_ldap.backend.LDAPBackend',
]
except ModuleNotFoundError:
sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n')
# dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
# sys.exit(1)
################################################################################
### Debug Settings
################################################################################
# only enable debug toolbar when in DEBUG mode with --nothreading (it doesnt work in multithreaded mode)
DEBUG_TOOLBAR = DEBUG and ('--nothreading' in sys.argv) and ('--reload' not in sys.argv)
if DEBUG_TOOLBAR:
@ -154,7 +221,7 @@ DATABASES = {
'timeout': 60,
'check_same_thread': False,
},
'TIME_ZONE': 'UTC',
'TIME_ZONE': TIMEZONE,
# DB setup is sometimes modified at runtime by setup_django() in config.py
}
}
@ -224,7 +291,8 @@ USE_L10N = True
USE_TZ = True
DATETIME_FORMAT = 'Y-m-d g:iA'
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
TIME_ZONE = TIME_ZONE # noqa
TIME_ZONE = TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
from django.conf.locale.en import formats as en_formats
@ -263,8 +331,8 @@ class NoisyRequestsFilter(logging.Filter):
if LOGS_DIR.exists():
ERROR_LOG = (LOGS_DIR / 'errors.log')
else:
# meh too many edge cases here around creating log dir w/ correct permissions
# cant be bothered, just trash the log and let them figure it out via stdout/stderr
# historically too many edge cases here around creating log dir w/ correct permissions early on
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
ERROR_LOG = tempfile.NamedTemporaryFile().name
LOGGING = {

View File

@ -6,7 +6,7 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
from django.conf import settings
from django.views.generic.base import RedirectView
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
# print('DEBUG', settings.DEBUG)
@ -32,6 +32,11 @@ urlpatterns = [
path('accounts/', include('django.contrib.auth.urls')),
path('admin/', admin.site.urls),
path('health/', HealthCheckView.as_view(), name='healthcheck'),
path('error/', lambda _: 1/0),
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
path('index.html', RedirectView.as_view(url='/')),
path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
path('', HomepageView.as_view(), name='Home'),

View File

@ -11,6 +11,8 @@ from django.views.generic.list import ListView
from django.views.generic import FormView
from django.db.models import Q
from django.contrib.auth.mixins import UserPassesTestMixin
from django.views.decorators.csrf import csrf_exempt
from django.utils.decorators import method_decorator
from core.models import Snapshot
from core.forms import AddLinkForm
@ -236,7 +238,7 @@ class PublicIndexView(ListView):
else:
return redirect(f'/admin/login/?next={self.request.path}')
@method_decorator(csrf_exempt, name='dispatch')
class AddView(UserPassesTestMixin, FormView):
template_name = "add.html"
form_class = AddLinkForm
@ -293,3 +295,18 @@ class AddView(UserPassesTestMixin, FormView):
"form": AddLinkForm()
})
return render(template_name=self.template_name, request=self.request, context=context)
class HealthCheckView(View):
"""
A Django view that renders plain text "OK" for service discovery tools
"""
def get(self, request):
"""
Handle a GET request
"""
return HttpResponse(
'OK',
content_type='text/plain',
status=200
)

View File

@ -1,13 +1,19 @@
__package__ = 'archivebox.extractors'
import os
import sys
from pathlib import Path
from typing import Optional, List, Iterable, Union
from typing import Callable, Optional, List, Iterable, Union
from datetime import datetime, timezone
from django.db.models import QuerySet
from ..index.schema import Link
from ..config import (
SAVE_ALLOWLIST_PTN,
SAVE_DENYLIST_PTN,
)
from ..core.settings import ERROR_LOG
from ..index.schema import ArchiveResult, Link
from ..index.sql import write_link_to_sql_index
from ..index import (
load_link_details,
@ -40,9 +46,12 @@ from .archive_org import should_save_archive_dot_org, save_archive_dot_org
from .headers import should_save_headers, save_headers
def get_default_archive_methods():
ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool]
SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult]
ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction]
def get_default_archive_methods() -> List[ArchiveMethodEntry]:
return [
('title', should_save_title, save_title),
('favicon', should_save_favicon, save_favicon),
('headers', should_save_headers, save_headers),
('singlefile', should_save_singlefile, save_singlefile),
@ -50,21 +59,39 @@ def get_default_archive_methods():
('screenshot', should_save_screenshot, save_screenshot),
('dom', should_save_dom, save_dom),
('wget', should_save_wget, save_wget),
('readability', should_save_readability, save_readability), # keep readability below wget and singlefile, as it depends on them
('title', should_save_title, save_title), # keep title and readability below wget and singlefile, as it depends on them
('readability', should_save_readability, save_readability),
('mercury', should_save_mercury, save_mercury),
('git', should_save_git, save_git),
('media', should_save_media, save_media),
('archive_org', should_save_archive_dot_org, save_archive_dot_org),
]
@enforce_types
def get_archive_methods_for_link(link: Link) -> Iterable[ArchiveMethodEntry]:
DEFAULT_METHODS = get_default_archive_methods()
allowed_methods = {
m for pat, methods in
SAVE_ALLOWLIST_PTN.items()
if pat.search(link.url)
for m in methods
} or { m[0] for m in DEFAULT_METHODS }
denied_methods = {
m for pat, methods in
SAVE_DENYLIST_PTN.items()
if pat.search(link.url)
for m in methods
}
allowed_methods -= denied_methods
return (m for m in DEFAULT_METHODS if m[0] in allowed_methods)
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [('readability', 1), ('singlefile', 2), ('dom', 3), ('wget', 4)]
@enforce_types
def ignore_methods(to_ignore: List[str]):
def ignore_methods(to_ignore: List[str]) -> Iterable[str]:
ARCHIVE_METHODS = get_default_archive_methods()
methods = filter(lambda x: x[0] not in to_ignore, ARCHIVE_METHODS)
methods = map(lambda x: x[0], methods)
return list(methods)
return [x[0] for x in ARCHIVE_METHODS if x[0] not in to_ignore]
@enforce_types
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
@ -77,11 +104,11 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
except Snapshot.DoesNotExist:
snapshot = write_link_to_sql_index(link)
ARCHIVE_METHODS = get_default_archive_methods()
active_methods = get_archive_methods_for_link(link)
if methods:
ARCHIVE_METHODS = [
method for method in ARCHIVE_METHODS
active_methods = [
method for method in active_methods
if method[0] in methods
]
@ -98,7 +125,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
start_ts = datetime.now(timezone.utc)
for method_name, should_run, method_function in ARCHIVE_METHODS:
for method_name, should_run, method_function in active_methods:
try:
if method_name not in link.history:
link.history[method_name] = []
@ -127,10 +154,27 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
# print('{black} X {}{reset}'.format(method_name, **ANSI))
stats['skipped'] += 1
except Exception as e:
# Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984
# and https://github.com/ArchiveBox/ArchiveBox/issues/1014
# are fixed.
"""
raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
method_name,
link.url,
)) from e
"""
# Instead, use the kludgy workaround from
# https://github.com/ArchiveBox/ArchiveBox/issues/984#issuecomment-1150541627
with open(ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(("\n" + 'Exception in archive_methods.save_{}(Link(url={})) command={}; ts={}'.format(
method_name,
link.url,
command,
ts
) + "\n"))
#f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
# print(' ', stats)
@ -143,7 +187,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
log_link_archiving_finished(link, link.link_dir, is_new, stats, start_ts)
log_link_archiving_finished(link, out_dir, is_new, stats, start_ts)
except KeyboardInterrupt:
try:

View File

@ -9,6 +9,7 @@ from ..util import (
enforce_types,
is_static_file,
chrome_args,
chrome_cleanup,
)
from ..config import (
TIMEOUT,
@ -26,6 +27,7 @@ def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
out_dir = out_dir or Path(link.link_dir)
if not overwrite and (out_dir / 'output.html').exists():
if (out_dir / 'output.html').stat().st_size > 1:
return False
return SAVE_DOM
@ -38,7 +40,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
output: ArchiveOutput = 'output.html'
output_path = out_dir / output
cmd = [
*chrome_args(TIMEOUT=timeout),
*chrome_args(),
'--dump-dom',
link.url
]
@ -56,6 +58,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
except Exception as err:
status = 'failed'
output = err
chrome_cleanup()
finally:
timer.end()

View File

@ -10,6 +10,7 @@ from ..util import enforce_types, domain
from ..config import (
TIMEOUT,
SAVE_FAVICON,
FAVICON_PROVIDER,
CURL_BINARY,
CURL_ARGS,
CURL_VERSION,
@ -40,7 +41,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
'--output', str(output),
*(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
*([] if CHECK_SSL_VALIDITY else ['--insecure']),
'https://www.google.com/s2/favicons?domain={}'.format(domain(link.url)),
FAVICON_PROVIDER.format(domain(link.url)),
]
status = 'failed'
timer = TimedProgress(timeout, prefix=' ')

View File

@ -33,7 +33,7 @@ def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optio
@enforce_types
def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult:
"""Download playlists or individual video, audio, and subtitles using youtube-dl"""
"""Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""
out_dir = out_dir or Path(link.link_dir)
output: ArchiveOutput = 'media'
@ -43,6 +43,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
YOUTUBEDL_BINARY,
*YOUTUBEDL_ARGS,
*([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
# TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
link.url,
]
status = 'succeeded'
@ -60,7 +61,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
pass
else:
hints = (
'Got youtube-dl response code: {}.'.format(result.returncode),
'Got youtube-dl (or yt-dlp) response code: {}.'.format(result.returncode),
*result.stderr.decode().split('\n'),
)
raise ArchiveError('Failed to save media', hints)
@ -71,8 +72,18 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
timer.end()
# add video description and subtitles to full-text index
# Let's try a few different
index_texts = [
text_file.read_text(encoding='utf-8').strip()
# errors:
# * 'strict' to raise a ValueError exception if there is an
# encoding error. The default value of None has the same effect.
# * 'ignore' ignores errors. Note that ignoring encoding errors
# can lead to data loss.
# * 'xmlcharrefreplace' is only supported when writing to a
# file. Characters not supported by the encoding are replaced with
# the appropriate XML character reference &#nnn;.
# There are a few more options described in https://docs.python.org/3/library/functions.html#open
text_file.read_text(encoding='utf-8', errors='xmlcharrefreplace').strip()
for text_file in (
*output_path.glob('*.description'),
*output_path.glob('*.srt'),

View File

@ -9,6 +9,7 @@ from ..util import (
enforce_types,
is_static_file,
chrome_args,
chrome_cleanup,
)
from ..config import (
TIMEOUT,
@ -37,7 +38,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
out_dir = out_dir or Path(link.link_dir)
output: ArchiveOutput = 'output.pdf'
cmd = [
*chrome_args(TIMEOUT=timeout),
*chrome_args(),
'--print-to-pdf',
link.url,
]
@ -54,6 +55,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
except Exception as err:
status = 'failed'
output = err
chrome_cleanup()
finally:
timer.end()

View File

@ -10,9 +10,7 @@ from ..index.schema import Link, ArchiveResult, ArchiveError
from ..system import run, atomic_write
from ..util import (
enforce_types,
download_url,
is_static_file,
)
from ..config import (
TIMEOUT,
@ -22,28 +20,8 @@ from ..config import (
READABILITY_VERSION,
)
from ..logging_util import TimedProgress
from .title import get_html
@enforce_types
def get_html(link: Link, path: Path) -> str:
"""
Try to find wget, singlefile and then dom files.
If none is found, download the url again.
"""
canonical = link.canonical_outputs()
abs_path = path.absolute()
sources = [canonical["singlefile_path"], canonical["wget_path"], canonical["dom_path"]]
document = None
for source in sources:
try:
with open(abs_path / source, "r", encoding="utf-8") as f:
document = f.read()
break
except (FileNotFoundError, TypeError):
continue
if document is None:
return download_url(link.url)
else:
return document
@enforce_types
def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
@ -87,12 +65,13 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
cmd = [
DEPENDENCIES['READABILITY_BINARY']['path'],
temp_doc.name,
link.url,
]
result = run(cmd, cwd=out_dir, timeout=timeout)
try:
result_json = json.loads(result.stdout)
assert result_json and 'content' in result_json
assert result_json and 'content' in result_json, 'Readability output is not valid JSON'
except json.JSONDecodeError:
raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr)
@ -106,7 +85,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
output_tail = [
line.strip()
for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:]
if line.strip()
]
hints = (

View File

@ -9,6 +9,7 @@ from ..util import (
enforce_types,
is_static_file,
chrome_args,
chrome_cleanup,
)
from ..config import (
TIMEOUT,
@ -37,7 +38,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
out_dir = out_dir or Path(link.link_dir)
output: ArchiveOutput = 'screenshot.png'
cmd = [
*chrome_args(TIMEOUT=timeout),
*chrome_args(),
'--screenshot',
link.url,
]
@ -54,6 +55,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
except Exception as err:
status = 'failed'
output = err
chrome_cleanup()
finally:
timer.end()

View File

@ -17,6 +17,7 @@ from ..config import (
SAVE_SINGLEFILE,
DEPENDENCIES,
SINGLEFILE_VERSION,
SINGLEFILE_ARGS,
CHROME_BINARY,
)
from ..logging_util import TimedProgress
@ -41,14 +42,35 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
out_dir = out_dir or Path(link.link_dir)
output = "singlefile.html"
browser_args = chrome_args(TIMEOUT=0)
browser_args = chrome_args(CHROME_TIMEOUT=0)
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:]))
cmd = [
DEPENDENCIES['SINGLEFILE_BINARY']['path'],
options = [
*SINGLEFILE_ARGS,
'--browser-executable-path={}'.format(CHROME_BINARY),
browser_args,
]
# Deduplicate options (single-file doesn't like when you use the same option two times)
#
# NOTE: Options names that come first clobber conflicting names that come later
# My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most
# specificity, therefore the user sets it with a lot intent, therefore it should take precedence
# kind of like the ergonomic principle of lexical scope in programming languages.
seen_option_names = []
def test_seen(argument):
option_name = argument.split("=")[0]
if option_name in seen_option_names:
return False
else:
seen_option_names.append(option_name)
return True
deduped_options = list(filter(test_seen, options))
cmd = [
DEPENDENCIES['SINGLEFILE_BINARY']['path'],
*deduped_options,
link.url,
output,
]

View File

@ -26,7 +26,7 @@ from ..logging_util import TimedProgress
HTML_TITLE_REGEX = re.compile(
r'<title.*?>' # start matching text after <title> tag
r'(.[^<>]+)', # get everything up to these symbols
r'([^<>]+)', # get everything up to these symbols
re.IGNORECASE | re.MULTILINE | re.DOTALL | re.UNICODE,
)
@ -58,6 +58,27 @@ class TitleParser(HTMLParser):
if tag.lower() == "title":
self.inside_title_tag = False
@enforce_types
def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
"""
Try to find wget, singlefile and then dom files.
If none is found, download the url again.
"""
canonical = link.canonical_outputs()
abs_path = path.absolute()
sources = [canonical["singlefile_path"], canonical["wget_path"], canonical["dom_path"]]
document = None
for source in sources:
try:
with open(abs_path / source, "r", encoding="utf-8") as f:
document = f.read()
break
except (FileNotFoundError, TypeError):
continue
if document is None:
return download_url(link.url, timeout=timeout)
else:
return document
@enforce_types
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
@ -90,7 +111,7 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
status = 'succeeded'
timer = TimedProgress(timeout, prefix=' ')
try:
html = download_url(link.url, timeout=timeout)
html = get_html(link, out_dir, timeout=timeout)
try:
# try using relatively strict html parser first
parser = TitleParser()

View File

@ -22,7 +22,8 @@ from ..config import (
JSON_INDEX_FILENAME,
OUTPUT_DIR,
TIMEOUT,
URL_BLACKLIST_PTN,
URL_DENYLIST_PTN,
URL_ALLOWLIST_PTN,
stderr,
OUTPUT_PERMISSIONS
)
@ -141,7 +142,9 @@ def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
continue
if scheme(link.url) not in ('http', 'https', 'ftp'):
continue
if URL_BLACKLIST_PTN and URL_BLACKLIST_PTN.search(link.url):
if URL_DENYLIST_PTN and URL_DENYLIST_PTN.search(link.url):
continue
if URL_ALLOWLIST_PTN and (not URL_ALLOWLIST_PTN.search(link.url)):
continue
yield link

View File

@ -24,6 +24,7 @@ from ..config import (
FOOTER_INFO,
HTML_INDEX_FILENAME,
SAVE_ARCHIVE_DOT_ORG,
PREVIEW_ORIGINALS,
)
MAIN_INDEX_TEMPLATE = 'static_index.html'
@ -105,6 +106,7 @@ def link_details_template(link: Link) -> str:
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
})
@enforce_types
@ -175,7 +177,7 @@ def snapshot_icons(snapshot) -> str:
# The check for archive_org is different, so it has to be handled separately
# get from db (faster)
exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
exists = extractor in extractor_outputs and extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
# get from filesystem (slower)
# target_path = Path(path) / "archive.org.txt"
# exists = target_path.exists()

View File

@ -20,7 +20,7 @@ from django.utils.functional import cached_property
from ..system import get_dir_size
from ..util import ts_to_date_str, parse_date
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
class ArchiveError(Exception):
def __init__(self, message, hints=None):
@ -423,7 +423,7 @@ class Link:
canonical = {
'index_path': 'index.html',
'favicon_path': 'favicon.ico',
'google_favicon_path': 'https://www.google.com/s2/favicons?domain={}'.format(self.domain),
'google_favicon_path': FAVICON_PROVIDER.format(self.domain),
'wget_path': wget_output_path(self),
'warc_path': 'warc/',
'singlefile_path': 'singlefile.html',

View File

@ -1,5 +1,7 @@
__package__ = 'archivebox.index'
import re
from io import StringIO
from pathlib import Path
from typing import List, Tuple, Iterator
@ -8,7 +10,10 @@ from django.db import transaction
from .schema import Link
from ..util import enforce_types, parse_date
from ..config import OUTPUT_DIR
from ..config import (
OUTPUT_DIR,
TAG_SEPARATOR_PATTERN,
)
### Main Links Index
@ -33,9 +38,11 @@ def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir:
def write_link_to_sql_index(link: Link):
from core.models import Snapshot, ArchiveResult
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
tags = info.pop("tags")
if tags is None:
tags = []
tag_list = list(dict.fromkeys(
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
))
info.pop('tags')
try:
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
@ -44,7 +51,7 @@ def write_link_to_sql_index(link: Link):
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
snapshot.save_tags(tags)
snapshot.save_tags(tag_list)
for extractor, entries in link.history.items():
for entry in entries:
@ -104,10 +111,9 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
snap = write_link_to_sql_index(link)
snap.title = link.title
tag_set = (
set(tag.strip() for tag in (link.tags or '').split(','))
)
tag_list = list(tag_set) or []
tag_list = list(dict.fromkeys(
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
))
snap.save()
snap.save_tags(tag_list)

View File

@ -108,12 +108,12 @@ def reject_stdin(caller: str, stdin: Optional[IO]=sys.stdin) -> None:
if not stdin.isatty():
# stderr('READING STDIN TO REJECT...')
stdin_raw_text = stdin.read()
if stdin_raw_text:
if stdin_raw_text.strip():
# stderr('GOT STDIN!', len(stdin_str))
stderr(f'[X] The "{caller}" command does not accept stdin.', color='red')
stderr(f'[!] The "{caller}" command does not accept stdin (ignoring).', color='red')
stderr(f' Run archivebox "{caller} --help" to see usage and examples.')
stderr()
raise SystemExit(1)
# raise SystemExit(1)
return None
@ -432,10 +432,16 @@ def log_archive_method_finished(result: "ArchiveResult"):
# Prettify error output hints string and limit to five lines
hints = getattr(result.output, 'hints', None) or ()
if hints:
hints = hints if isinstance(hints, (list, tuple)) else hints.split('\n')
if isinstance(hints, (list, tuple, type(_ for _ in ()))):
hints = [hint.decode() for hint in hints if isinstance(hint, bytes)]
else:
if isinstance(hints, bytes):
hints = hints.decode()
hints = hints.split('\n')
hints = (
' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
for line in hints[:5] if line.strip()
for line in list(hints)[:5] if line.strip()
)
@ -527,11 +533,27 @@ def log_shell_welcome_msg():
### Helpers
@enforce_types
def pretty_path(path: Union[Path, str]) -> str:
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=OUTPUT_DIR) -> str:
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
pwd = Path('.').resolve()
# parent = os.path.abspath(os.path.join(pwd, os.path.pardir))
return str(path).replace(str(pwd) + '/', './')
pwd = str(Path(pwd)) # .resolve()
path = str(path)
if not path:
return path
# replace long absolute paths with ./ relative ones to save on terminal output width
if path.startswith(pwd) and (pwd != '/'):
path = path.replace(pwd, '.', 1)
# quote paths containing spaces
if ' ' in path:
path = f'"{path}"'
# if path is just a plain dot, replace it back with the absolute path for clarity
if path == '.':
path = pwd
return path
@enforce_types
@ -566,12 +588,13 @@ def printable_config(config: ConfigDict, prefix: str='') -> str:
def printable_folder_status(name: str, folder: Dict) -> str:
if folder['enabled']:
if folder['is_valid']:
color, symbol, note = 'green', '', 'valid'
color, symbol, note, num_files = 'green', '', 'valid', ''
else:
color, symbol, note, num_files = 'red', 'X', 'invalid', '?'
else:
color, symbol, note, num_files = 'lightyellow', '-', 'disabled', '-'
if folder['path']:
if Path(folder['path']).exists():
num_files = (
@ -582,13 +605,11 @@ def printable_folder_status(name: str, folder: Dict) -> str:
else:
num_files = 'missing'
path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else ''
if path and ' ' in path:
path = f'"{path}"'
if folder.get('is_mount'):
# add symbol @ next to filecount if path is a remote filesystem mount
num_files = f'{num_files} @' if num_files else '@'
# if path is just a plain dot, replace it back with the full path for clarity
if path == '.':
path = str(OUTPUT_DIR)
path = pretty_path(folder['path'])
return ' '.join((
ANSI[color],
@ -619,9 +640,7 @@ def printable_dependency_version(name: str, dependency: Dict) -> str:
else:
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
path = str(dependency["path"]).replace(str(OUTPUT_DIR), '.') if dependency["path"] else ''
if path and ' ' in path:
path = f'"{path}"'
path = pretty_path(dependency['path'])
return ' '.join((
ANSI[color],

119
archivebox/main.py Normal file → Executable file
View File

@ -4,8 +4,9 @@ import os
import sys
import shutil
import platform
from django.utils import timezone
from pathlib import Path
from datetime import date
from datetime import date, datetime
from typing import Dict, List, Optional, Iterable, IO, Union
from crontab import CronTab, CronSlices
@ -70,7 +71,13 @@ from .config import (
IS_TTY,
DEBUG,
IN_DOCKER,
IN_QEMU,
PUID,
PGID,
USER,
TIMEZONE,
ENFORCE_ATOMIC_WRITES,
OUTPUT_PERMISSIONS,
PYTHON_BINARY,
ARCHIVEBOX_BINARY,
ONLY_NEW,
@ -90,11 +97,11 @@ from .config import (
check_data_folder,
write_config_file,
VERSION,
COMMIT_HASH,
CODE_LOCATIONS,
EXTERNAL_LOCATIONS,
DATA_LOCATIONS,
DEPENDENCIES,
USE_CHROME,
CHROME_BINARY,
CHROME_VERSION,
YOUTUBEDL_BINARY,
@ -102,12 +109,12 @@ from .config import (
SINGLEFILE_VERSION,
READABILITY_VERSION,
MERCURY_VERSION,
USE_YOUTUBEDL,
USE_NODE,
NODE_VERSION,
load_all_config,
CONFIG,
USER_CONFIG,
ADMIN_USERNAME,
ADMIN_PASSWORD,
get_real_name,
setup_django,
)
@ -207,25 +214,35 @@ def version(quiet: bool=False,
out_dir: Path=OUTPUT_DIR) -> None:
"""Print the ArchiveBox version and dependency information"""
if quiet:
print(VERSION)
else:
# ArchiveBox v0.5.6
# Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
print('ArchiveBox v{}'.format(VERSION))
if not quiet:
# 0.6.3
# ArchiveBox v0.6.3 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
# DEBUG=False IN_DOCKER=True IN_QEMU=False IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
p = platform.uname()
print(
'ArchiveBox v{}'.format(VERSION),
*((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
sys.implementation.name.title(),
p.system,
platform.platform(),
p.machine,
)
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
print(
f'IN_DOCKER={IN_DOCKER}',
f'DEBUG={DEBUG}',
f'IN_DOCKER={IN_DOCKER}',
f'IN_QEMU={IN_QEMU}',
f'IS_TTY={IS_TTY}',
f'TZ={os.environ.get("TZ", "UTC")}',
f'SEARCH_BACKEND_ENGINE={SEARCH_BACKEND_ENGINE}',
f'TZ={TIMEZONE}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
f'FS_USER={PUID}:{PGID}',
f'FS_PERMS={OUTPUT_PERMISSIONS}',
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
)
print()
@ -233,21 +250,25 @@ def version(quiet: bool=False,
for name, dependency in DEPENDENCIES.items():
print(printable_dependency_version(name, dependency))
# add a newline between core dependencies and extractor dependencies for easier reading
if name == 'ARCHIVEBOX_BINARY':
print()
print()
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
for name, folder in CODE_LOCATIONS.items():
print(printable_folder_status(name, folder))
for name, path in CODE_LOCATIONS.items():
print(printable_folder_status(name, path))
print()
print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
for name, folder in EXTERNAL_LOCATIONS.items():
print(printable_folder_status(name, folder))
for name, path in EXTERNAL_LOCATIONS.items():
print(printable_folder_status(name, path))
print()
if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
print('{white}[i] Data locations:{reset}'.format(**ANSI))
for name, folder in DATA_LOCATIONS.items():
print(printable_folder_status(name, folder))
for name, path in DATA_LOCATIONS.items():
print(printable_folder_status(name, path))
else:
print()
print('{white}[i] Data locations:{reset}'.format(**ANSI))
@ -403,6 +424,13 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
write_main_index(list(pending_links.values()), out_dir=out_dir)
print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
from django.contrib.auth.models import User
if (ADMIN_USERNAME and ADMIN_PASSWORD) and not User.objects.filter(username=ADMIN_USERNAME).exists():
print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**ANSI))
User.objects.create_superuser(username=ADMIN_USERNAME, password=ADMIN_PASSWORD)
if existing_index:
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
else:
@ -425,7 +453,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
print(' archivebox server # then visit http://127.0.0.1:8000')
print()
print(' To add new links, you can run:')
print(" archivebox add ~/some/path/or/url/to/list_of_links.txt")
print(" archivebox add < ~/some/path/to/list_of_links.txt")
print()
print(' For more usage and examples, run:')
print(' archivebox help')
@ -552,7 +580,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
def add(urls: Union[str, List[str]],
tag: str='',
depth: int=0,
update_all: bool=not ONLY_NEW,
update: bool=not ONLY_NEW,
update_all: bool=False,
index_only: bool=False,
overwrite: bool=False,
# duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
@ -585,6 +614,7 @@ def add(urls: Union[str, List[str]],
# save verbatim args to sources
write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
# If we're going one level deeper, download each link and look for more links
@ -592,8 +622,11 @@ def add(urls: Union[str, List[str]],
if new_links and depth == 1:
log_crawl_started(new_links)
for new_link in new_links:
try:
downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
except Exception as err:
stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
@ -616,11 +649,21 @@ def add(urls: Union[str, List[str]],
if extractors:
archive_kwargs["methods"] = extractors
if update_all:
stderr()
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
if update:
stderr(f'[*] [{ts}] Archiving + updating {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
archive_links(imported_links, overwrite=overwrite, **archive_kwargs)
elif update_all:
stderr(f'[*] [{ts}] Archiving + updating {len(all_links)}/{len(all_links)}', len(all_links), 'URLs from entire library...', color='green')
archive_links(all_links, overwrite=overwrite, **archive_kwargs)
elif overwrite:
stderr(f'[*] [{ts}] Archiving + overwriting {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
archive_links(imported_links, overwrite=True, **archive_kwargs)
elif new_links:
stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
archive_links(new_links, overwrite=False, **archive_kwargs)
@ -910,7 +953,6 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
stderr('\n[+] Installing enabled ArchiveBox dependencies automatically...', color='green')
stderr('\n Installing YOUTUBEDL_BINARY automatically using pip...')
if USE_YOUTUBEDL:
if YOUTUBEDL_VERSION:
print(f'{YOUTUBEDL_VERSION} is already installed', YOUTUBEDL_BINARY)
else:
@ -927,17 +969,19 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
PYTHON_BINARY, '-m', 'pip',
'show',
'youtube_dl',
], capture_output=True, text=True, cwd=out_dir).stdout.split('Location: ')[-1].split('\n', 1)[0]
], capture_output=True, text=True, cwd=out_dir).stdout.decode().split('Location: ')[-1].split('\n', 1)[0]
NEW_YOUTUBEDL_BINARY = Path(pkg_path) / 'youtube_dl' / '__main__.py'
os.chmod(NEW_YOUTUBEDL_BINARY, 0o777)
assert NEW_YOUTUBEDL_BINARY.exists(), f'youtube_dl must exist inside {pkg_path}'
config(f'YOUTUBEDL_BINARY={NEW_YOUTUBEDL_BINARY}', set=True, out_dir=out_dir)
except BaseException as e:
except BaseException as e: # lgtm [py/catch-base-exception]
stderr(f'[X] Failed to install python packages: {e}', color='red')
raise SystemExit(1)
if platform.machine() == 'armv7l':
stderr('\n Skip the automatic installation of CHROME_BINARY because playwright is not available on armv7.')
else:
stderr('\n Installing CHROME_BINARY automatically using playwright...')
if USE_CHROME:
if CHROME_VERSION:
print(f'{CHROME_VERSION} is already installed', CHROME_BINARY)
else:
@ -955,12 +999,11 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
NEW_CHROME_BINARY = proc.stdout.decode().strip() if isinstance(proc.stdout, bytes) else proc.stdout.strip()
assert NEW_CHROME_BINARY and len(NEW_CHROME_BINARY), 'CHROME_BINARY must contain a path'
config(f'CHROME_BINARY={NEW_CHROME_BINARY}', set=True, out_dir=out_dir)
except BaseException as e:
except BaseException as e: # lgtm [py/catch-base-exception]
stderr(f'[X] Failed to install chromium using playwright: {e.__class__.__name__} {e}', color='red')
raise SystemExit(1)
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
if USE_NODE:
if not NODE_VERSION:
stderr('[X] You must first install node using your system package manager', color='red')
hint([
@ -985,26 +1028,28 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
elif path.is_file():
os.remove(path)
shutil.copyfile(PACKAGE_DIR / 'package.json', out_dir / 'package.json')
shutil.copyfile(PACKAGE_DIR / 'package.json', out_dir / 'package.json') # copy the js requirements list from the source install into the data dir
# lets blindly assume that calling out to npm via shell works reliably cross-platform 🤡 (until proven otherwise via support tickets)
run_shell([
'npm',
'install',
'--prefix', str(out_dir),
'--force',
'--no-save',
'--no-audit',
'--no-fund',
'--loglevel', 'error',
'--prefix', str(out_dir), # force it to put the node_modules dir in this folder
'--force', # overwrite any existing node_modules
'--no-save', # don't bother saving updating the package.json or package-lock.json file
'--no-audit', # don't bother checking for newer versions with security vuln fixes
'--no-fund', # hide "please fund our project" messages
'--loglevel', 'error', # only show erros (hide warn/info/debug) during installation
# these args are written in blood, change with caution
], capture_output=False, cwd=out_dir)
os.remove(out_dir / 'package.json')
except BaseException as e:
except BaseException as e: # lgtm [py/catch-base-exception]
stderr(f'[X] Failed to install npm packages: {e}', color='red')
hint(f'Try deleting {out_dir}/node_modules and running it again')
raise SystemExit(1)
stderr('\n[√] Set up ArchiveBox and its dependencies successfully.', color='green')
run_shell([ARCHIVEBOX_BINARY, '--version'], capture_output=False, cwd=out_dir)
run_shell([PYTHON_BINARY, ARCHIVEBOX_BINARY, '--version'], capture_output=False, cwd=out_dir)
@enforce_types
def config(config_options_str: Optional[str]=None,
@ -1112,6 +1157,7 @@ def schedule(add: bool=False,
every: Optional[str]=None,
depth: int=0,
overwrite: bool=False,
update: bool=not ONLY_NEW,
import_path: Optional[str]=None,
out_dir: Path=OUTPUT_DIR):
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
@ -1141,6 +1187,7 @@ def schedule(add: bool=False,
*([
'add',
*(['--overwrite'] if overwrite else []),
*(['--update'] if update else []),
f'--depth={depth}',
f'"{import_path}"',
] if import_path else ['update']),

View File

@ -1 +0,0 @@
../package.json

13
archivebox/package.json Normal file
View File

@ -0,0 +1,13 @@
{
"name": "archivebox",
"version": "0.7.0",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
"repository": "github:ArchiveBox/ArchiveBox",
"license": "MIT",
"dependencies": {
"@postlight/parser": "^2.2.3",
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
"single-file-cli": "^1.1.12"
}
}

View File

@ -34,6 +34,7 @@ from ..index.schema import Link
from ..logging_util import TimedProgress, log_source_saved
from . import pocket_api
from . import readwise_reader_api
from . import wallabag_atom
from . import pocket_html
from . import pinboard_rss
@ -51,6 +52,7 @@ from . import url_list
PARSERS = {
# Specialized parsers
pocket_api.KEY: (pocket_api.NAME, pocket_api.PARSER),
readwise_reader_api.KEY: (readwise_reader_api.NAME, readwise_reader_api.PARSER),
wallabag_atom.KEY: (wallabag_atom.NAME, wallabag_atom.PARSER),
pocket_html.KEY: (pocket_html.NAME, pocket_html.PARSER),
pinboard_rss.KEY: (pinboard_rss.NAME, pinboard_rss.PARSER),
@ -149,7 +151,17 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
atomic_write(source_path, raw_text)
referenced_texts = ''
for entry in raw_text.split():
try:
if Path(entry).exists():
referenced_texts += Path(entry).read_text()
except Exception as err:
print(err)
atomic_write(source_path, raw_text + '\n' + referenced_texts)
log_source_saved(source_file=source_path)
return source_path
@ -176,7 +188,7 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
ANSI['reset'],
))
print(' ', e)
raise SystemExit(1)
raise e
else:
# Source is a path to a local file on the filesystem
@ -223,6 +235,10 @@ _test_url_strs = {
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
'<test>http://example7.com</test>': 1,
'https://<test>': 0,
'https://[test]': 0,
'http://"test"': 0,
'http://\'test\'': 0,
'[https://example8.com/what/is/this.php?what=1]': 1,
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
'<what>https://example10.com#and-thing=2 "</about>': 1,

View File

@ -17,7 +17,10 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
json_file.seek(0)
links = json.load(json_file)
# sometimes the first line is a comment or filepath, so we get everything after the first {
json_file_json_str = '{' + json_file.read().split('{', 1)[-1]
links = json.loads(json_file_json_str)
json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')
for link in links:

View File

@ -21,13 +21,18 @@ def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
root = ElementTree.parse(rss_file).getroot()
items = root.findall("{http://purl.org/rss/1.0/}item")
for item in items:
find = lambda p: item.find(p).text.strip() if item.find(p) else None # type: ignore
find = lambda p: item.find(p).text.strip() if item.find(p) is not None else None # type: ignore
url = find("{http://purl.org/rss/1.0/}link")
tags = find("{http://purl.org/dc/elements/1.1/}subject")
title = find("{http://purl.org/rss/1.0/}title")
ts_str = find("{http://purl.org/dc/elements/1.1/}date")
if url is None:
# Yielding a Link with no URL will
# crash on a URL validation assertion
continue
# Pinboard includes a colon in its date stamp timezone offsets, which
# Python can't parse. Remove it:
if ts_str and ts_str[-3:-2] == ":":

View File

@ -47,11 +47,11 @@ def get_pocket_articles(api: Pocket, since=None, page=0):
def link_from_article(article: dict, sources: list):
url: str = article['resolved_url'] or article['given_url']
url: str = article.get('resolved_url') or article['given_url']
broken_protocol = _BROKEN_PROTOCOL_RE.match(url)
if broken_protocol:
url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://')
title = article['resolved_title'] or article['given_title'] or url
title = article.get('resolved_title') or article.get('given_title') or url
return Link(
url=url,

View File

@ -0,0 +1,123 @@
__package__ = "archivebox.parsers"
import re
import requests
from datetime import datetime
from typing import IO, Iterable, Optional
from configparser import ConfigParser
from pathlib import Path
from ..index.schema import Link
from ..util import enforce_types
from ..system import atomic_write
from ..config import (
SOURCES_DIR,
READWISE_READER_TOKENS,
)
API_DB_PATH = Path(SOURCES_DIR) / "readwise_reader_api.db"
class ReadwiseReaderAPI:
cursor: Optional[str]
def __init__(self, api_token, cursor=None) -> None:
self.api_token = api_token
self.cursor = cursor
def get_archive(self):
response = requests.get(
url="https://readwise.io/api/v3/list/",
headers={"Authorization": f"Token {self.api_token}"},
params={
"location": "archive",
"pageCursor": self.cursor,
}
)
response.raise_for_status()
return response
def get_readwise_reader_articles(api: ReadwiseReaderAPI):
response = api.get_archive()
body = response.json()
articles = body["results"]
yield from articles
if body['nextPageCursor']:
api.cursor = body["nextPageCursor"]
yield from get_readwise_reader_articles(api)
def link_from_article(article: dict, sources: list):
url: str = article['source_url']
title = article["title"] or url
timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
return Link(
url=url,
timestamp=str(timestamp),
title=title,
tags="",
sources=sources,
)
def write_cursor(username: str, since: str):
if not API_DB_PATH.exists():
atomic_write(API_DB_PATH, "")
since_file = ConfigParser()
since_file.optionxform = str
since_file.read(API_DB_PATH)
since_file[username] = {"since": since}
with open(API_DB_PATH, "w+") as new:
since_file.write(new)
def read_cursor(username: str) -> Optional[str]:
if not API_DB_PATH.exists():
atomic_write(API_DB_PATH, "")
config_file = ConfigParser()
config_file.optionxform = str
config_file.read(API_DB_PATH)
return config_file.get(username, "since", fallback=None)
@enforce_types
def should_parse_as_readwise_reader_api(text: str) -> bool:
return text.startswith("readwise-reader://")
@enforce_types
def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Readwise Reader API"""
input_buffer.seek(0)
pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
for line in input_buffer:
if should_parse_as_readwise_reader_api(line):
username = pattern.search(line).group(1)
api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
for article in get_readwise_reader_articles(api):
yield link_from_article(article, sources=[line])
if api.cursor:
write_cursor(username, api.cursor)
KEY = "readwise_reader_api"
NAME = "Readwise Reader API"
PARSER = parse_readwise_reader_api_export

View File

@ -34,13 +34,19 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
trailing_removed = entry.split('</entry>', 1)[0]
leading_removed = trailing_removed.strip()
rows = leading_removed.split('\n')
splits_fixed = leading_removed.replace('"\n href="', '" href="')
rows = splits_fixed.split('\n')
def get_row(key):
return [r.strip() for r in rows if r.strip().startswith('<{}'.format(key))][0]
def get_row(prefix):
return [
row.strip()
for row in rows
if row.strip().startswith('<{}'.format(prefix))
][0]
title = str_between(get_row('title'), '<title><![CDATA[', ']]></title>').strip()
url = str_between(get_row('link rel="via"'), '<link rel="via">', '</link>')
url_inside_link = str_between(get_row('link rel="via"'), '<link rel="via">', '</link>')
url_inside_attr = str_between(get_row('link rel="via"'), 'href="', '"/>')
ts_str = str_between(get_row('published'), '<published>', '</published>')
time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
try:
@ -49,7 +55,7 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
tags = None
yield Link(
url=htmldecode(url),
url=htmldecode(url_inside_attr or url_inside_link),
timestamp=str(time.timestamp()),
title=htmldecode(title) or None,
tags=tags or '',

View File

@ -0,0 +1,195 @@
import codecs
from typing import List, Generator
import sqlite3
from archivebox.util import enforce_types
from archivebox.config import (
FTS_SEPARATE_DATABASE,
FTS_TOKENIZERS,
FTS_SQLITE_MAX_LENGTH
)
FTS_TABLE = "snapshot_fts"
FTS_ID_TABLE = "snapshot_id_fts"
FTS_COLUMN = "texts"
if FTS_SEPARATE_DATABASE:
database = sqlite3.connect("search.sqlite3")
# Make get_connection callable, because `django.db.connection.cursor()`
# has to be called to get a context manager, but sqlite3.Connection
# is a context manager without being called.
def get_connection():
return database
SQLITE_BIND = "?"
else:
from django.db import connection as database # type: ignore[no-redef, assignment]
get_connection = database.cursor
SQLITE_BIND = "%s"
# Only Python >= 3.11 supports sqlite3.Connection.getlimit(),
# so fall back to the default if the API to get the real value isn't present
try:
limit_id = sqlite3.SQLITE_LIMIT_LENGTH
try:
with database.temporary_connection() as cursor: # type: ignore[attr-defined]
SQLITE_LIMIT_LENGTH = cursor.connection.getlimit(limit_id)
except AttributeError:
SQLITE_LIMIT_LENGTH = database.getlimit(limit_id)
except AttributeError:
SQLITE_LIMIT_LENGTH = FTS_SQLITE_MAX_LENGTH
def _escape_sqlite3(value: str, *, quote: str, errors='strict') -> str:
assert isinstance(quote, str), "quote is not a str"
assert len(quote) == 1, "quote must be a single character"
encodable = value.encode('utf-8', errors).decode('utf-8')
nul_index = encodable.find("\x00")
if nul_index >= 0:
error = UnicodeEncodeError("NUL-terminated utf-8", encodable,
nul_index, nul_index + 1, "NUL not allowed")
error_handler = codecs.lookup_error(errors)
replacement, _ = error_handler(error)
assert isinstance(replacement, str), "handling a UnicodeEncodeError should return a str replacement"
encodable = encodable.replace("\x00", replacement)
return quote + encodable.replace(quote, quote * 2) + quote
def _escape_sqlite3_value(value: str, errors='strict') -> str:
return _escape_sqlite3(value, quote="'", errors=errors)
def _escape_sqlite3_identifier(value: str) -> str:
return _escape_sqlite3(value, quote='"', errors='strict')
@enforce_types
def _create_tables():
table = _escape_sqlite3_identifier(FTS_TABLE)
# Escape as value, because fts5() expects
# string literal column names
column = _escape_sqlite3_value(FTS_COLUMN)
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
tokenizers = _escape_sqlite3_value(FTS_TOKENIZERS)
trigger_name = _escape_sqlite3_identifier(f"{FTS_ID_TABLE}_ad")
with get_connection() as cursor:
# Create a contentless-delete FTS5 table that indexes
# but does not store the texts of snapshots
try:
cursor.execute(
f"CREATE VIRTUAL TABLE {table}"
f" USING fts5({column},"
f" tokenize={tokenizers},"
" content='', contentless_delete=1);"
)
except Exception as e:
msg = str(e)
if 'unrecognized option: "contentlessdelete"' in msg:
sqlite_version = getattr(sqlite3, "sqlite_version", "Unknown")
raise RuntimeError(
"SQLite full-text search requires SQLite >= 3.43.0;"
f" the running version is {sqlite_version}"
) from e
else:
raise
# Create a one-to-one mapping between ArchiveBox snapshot_id
# and FTS5 rowid, because the column type of rowid can't be
# customized.
cursor.execute(
f"CREATE TABLE {id_table}("
" rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
" snapshot_id char(32) NOT NULL UNIQUE"
");"
)
# Create a trigger to delete items from the FTS5 index when
# the snapshot_id is deleted from the mapping, to maintain
# consistency and make the `flush()` query simpler.
cursor.execute(
f"CREATE TRIGGER {trigger_name}"
f" AFTER DELETE ON {id_table} BEGIN"
f" DELETE FROM {table} WHERE rowid=old.rowid;"
" END;"
)
def _handle_query_exception(exc: Exception):
message = str(exc)
if message.startswith("no such table:"):
raise RuntimeError(
"SQLite full-text search index has not yet"
" been created; run `archivebox update --index-only`."
)
else:
raise exc
@enforce_types
def index(snapshot_id: str, texts: List[str]):
text = ' '.join(texts)[:SQLITE_LIMIT_LENGTH]
table = _escape_sqlite3_identifier(FTS_TABLE)
column = _escape_sqlite3_identifier(FTS_COLUMN)
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
with get_connection() as cursor:
retries = 2
while retries > 0:
retries -= 1
try:
# If there is already an FTS index rowid to snapshot_id mapping,
# then don't insert a new one, silently ignoring the operation.
# {id_table}.rowid is AUTOINCREMENT, so will generate an unused
# rowid for the index if it is an unindexed snapshot_id.
cursor.execute(
f"INSERT OR IGNORE INTO {id_table}(snapshot_id) VALUES({SQLITE_BIND})",
[snapshot_id])
# Fetch the FTS index rowid for the given snapshot_id
id_res = cursor.execute(
f"SELECT rowid FROM {id_table} WHERE snapshot_id = {SQLITE_BIND}",
[snapshot_id])
rowid = id_res.fetchone()[0]
# (Re-)index the content
cursor.execute(
"INSERT OR REPLACE INTO"
f" {table}(rowid, {column}) VALUES ({SQLITE_BIND}, {SQLITE_BIND})",
[rowid, text])
# All statements succeeded; return
return
except Exception as e:
if str(e).startswith("no such table:") and retries > 0:
_create_tables()
else:
raise
raise RuntimeError("Failed to create tables for SQLite FTS5 search")
@enforce_types
def search(text: str) -> List[str]:
table = _escape_sqlite3_identifier(FTS_TABLE)
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
with get_connection() as cursor:
try:
res = cursor.execute(
f"SELECT snapshot_id FROM {table}"
f" INNER JOIN {id_table}"
f" ON {id_table}.rowid = {table}.rowid"
f" WHERE {table} MATCH {SQLITE_BIND}",
[text])
except Exception as e:
_handle_query_exception(e)
snap_ids = [row[0] for row in res.fetchall()]
return snap_ids
@enforce_types
def flush(snapshot_ids: Generator[str, None, None]):
snapshot_ids = list(snapshot_ids) # type: ignore[assignment]
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
with get_connection() as cursor:
try:
cursor.executemany(
f"DELETE FROM {id_table} WHERE snapshot_id={SQLITE_BIND}",
[snapshot_ids])
except Exception as e:
_handle_query_exception(e)

View File

@ -14,11 +14,11 @@ from crontab import CronTab
from .vendor.atomicwrites import atomic_write as lib_atomic_write
from .util import enforce_types, ExtendedEncoder
from .config import OUTPUT_PERMISSIONS
from .config import PYTHON_BINARY, OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
def run(*args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):
def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):
"""Patched of subprocess.run to kill forked child subprocesses and fix blocking io making timeout=innefective
Mostly copied from https://github.com/python/cpython/blob/master/Lib/subprocess.py
"""
@ -37,7 +37,10 @@ def run(*args, input=None, capture_output=True, timeout=None, check=False, text=
pgid = None
try:
with Popen(*args, start_new_session=start_new_session, **kwargs) as process:
if isinstance(cmd, (list, tuple)) and cmd[0].endswith('.py'):
cmd = (PYTHON_BINARY, *cmd)
with Popen(cmd, *args, start_new_session=start_new_session, **kwargs) as process:
pgid = os.getpgid(process.pid)
try:
stdout, stderr = process.communicate(input, timeout=timeout)
@ -89,14 +92,24 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
elif isinstance(contents, (bytes, str)):
f.write(contents)
except OSError as e:
if ENFORCE_ATOMIC_WRITES:
print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})")
print(" You can store the archive/ subfolder on a hard drive or network share that doesn't support support syncronous writes,")
print(" but the main folder containing the index.sqlite3 and ArchiveBox.conf files must be on a filesystem that supports FSYNC.")
raise SystemExit(1)
# retry the write without forcing FSYNC (aka atomic mode)
with open(path, mode=mode, encoding=encoding) as f:
if isinstance(contents, dict):
dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
elif isinstance(contents, (bytes, str)):
f.write(contents)
# set file permissions
os.chmod(path, int(OUTPUT_PERMISSIONS, base=8))
@enforce_types
def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) -> None:
def chmod_file(path: str, cwd: str='.') -> None:
"""chmod -R <permissions> <cwd>/<path>"""
root = Path(cwd) / path
@ -104,9 +117,14 @@ def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) ->
raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
if not root.is_dir():
# path is just a plain file
os.chmod(root, int(OUTPUT_PERMISSIONS, base=8))
else:
for subpath in Path(path).glob('**/*'):
if subpath.is_dir():
# directories need execute permissions to be able to list contents
os.chmod(subpath, int(DIR_OUTPUT_PERMISSIONS, base=8))
else:
os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))

View File

@ -197,7 +197,7 @@
// select the action button from the dropdown
container.find('select[name=action]')
.find('op:selected').removeAttr('selected').end()
.find('[selected]').removeAttr('selected').end()
.find('[value=' + action_type + ']').attr('selected', 'selected').click()
// click submit & replace the archivebox logo with a spinner

View File

@ -1,62 +1,3 @@
{% extends "base.html" %}
{% load static %}
{% block body %}
<div id="toolbar">
<form id="changelist-search" action="{% url 'public-index' %}" method="get">
<div>
<label for="searchbar"><img src="/static/admin/img/search.svg" alt="Search"></label>
<input type="text" size="40" name="q" value="" id="searchbar" autofocus placeholder="Title, URL, tags, timestamp, or content...".>
<input type="submit" value="Search" style="height: 36px; padding-top: 6px; margin: 8px"/>
<input type="button"
value="♺"
title="Refresh..."
onclick="location.href='{% url 'public-index' %}'"
style="background-color: rgba(121, 174, 200, 0.8); height: 30px; font-size: 0.8em; margin-top: 12px; padding-top: 6px; float:right">
</input>
</div>
</form>
</div>
<table id="table-bookmarks">
<thead>
<tr>
<th style="width: 100px;">Bookmarked</th>
<th style="width: 26vw;">Snapshot ({{object_list|length}})</th>
<th style="width: 140px">Files</th>
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
</tr>
</thead>
<tbody>
{% for link in object_list %}
{% include 'main_index_row.html' with link=link %}
{% endfor %}
</tbody>
</table>
<center>
<span class="step-links">
{% if page_obj.has_previous %}
<a href="{% url 'public-index' %}?page=1">&laquo; first</a>
<a href="{% url 'public-index' %}?page={{ page_obj.previous_page_number }}">previous</a>
{% endif %}
<span class="current">
Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}.
</span>
{% if page_obj.has_next %}
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
{% endif %}
</span>
{% if page_obj.has_next %}
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
{% endif %}
</span>
<br>
</center>
{% endblock %}
{% extends "admin/base_site.html" %}
{% load i18n admin_urls static admin_list %}
{% load core_tags %}

View File

@ -28,6 +28,14 @@
<a href="/add" id="submit">&nbsp; Add more URLs </a>
</center>
{% else %}
<div id="in-progress" style="display: none;">
<center><h3>Adding URLs to index and running archive methods...</h3>
<br/>
<div class="loader"></div>
<br/>
Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for detailed progress...
</center>
</div>
<form id="add-form" method="POST" class="p-form">{% csrf_token %}
<h1>Add new URLs to your archive</h1>
<br/>
@ -38,20 +46,22 @@
</form>
<br/><br/><br/>
<center id="delay-warning" style="display: none">
<small>(it's safe to leave this page, adding will continue in the background)</small>
<small>(you will be redirected to your <a href="/">Snapshot list</a> momentarily, its safe to close this page at any time)</small>
</center>
{% if absolute_add_path %}
<center id="bookmarklet">
<!-- <center id="bookmarklet">
<p>Bookmark this link to quickly add to your archive:
<a href="javascript:void(window.open('{{ absolute_add_path }}?url='+document.location.href));">Add to ArchiveBox</a></p>
</center>
<a href="javascript:void(window.open('{{ absolute_add_path }}?url='+encodeURIComponent(document.location.href)));">Add to ArchiveBox</a></p>
</center> -->
{% endif %}
<script>
document.getElementById('add-form').addEventListener('submit', function(event) {
setTimeout(function() {
document.getElementById('add-form').innerHTML = '<center><h3>Adding URLs to index and running archive methods...<h3><br/><div class="loader"></div><br/>Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for progress...</center>'
document.getElementById('in-progress').style.display = 'block'
document.getElementById('add-form').style.display = 'none'
document.getElementById('delay-warning').style.display = 'block'
}, 200)
setTimeout(function() {
window.location = '/'
}, 2000)
return true
})
</script>

View File

@ -36,7 +36,7 @@
{% endif %}
</span>
</td>
<td style="text-align:left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; title="{{link.url}}">
<td style="text-align:left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;" title="{{link.url}}">
<a href="{{link.url}}">
{{link.url}}
</a>

View File

@ -414,17 +414,21 @@
</div>
</div>
{% endif %}
{% if PREVIEW_ORIGINALS %}
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
<iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy" referrerpolicy="no-referrer"></iframe>
<div class="card-body">
<a href="{{url}}" title="Open in new tab..." target="_blank" rel="noopener">
<a href="{{url}}" title="Open in new tab..." target="_blank" rel="noopener" referrerpolicy="no-referrer">
<p class="card-text"><code>🌐 {{domain}}</code></p>
</a>
<a href="{{url}}" target="preview" id="original-btn"><h4 class="card-title">Original</h4></a>
<a href="{{url}}" target="preview" id="original-btn" referrerpolicy="no-referrer">
<h4 class="card-title">Original</h4>
</a>
</div>
</div>
</div>
{% endif %}
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="{{headers_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>

View File

@ -17,6 +17,8 @@ from requests.exceptions import RequestException, ReadTimeout
from .vendor.base32_crockford import encode as base32_encode # type: ignore
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
from os.path import lexists
from os import remove as remove_file
try:
import chardet
@ -59,7 +61,7 @@ URL_REGEX = re.compile(
r'(?=('
r'http[s]?://' # start matching from allowed schemes
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols
r'|[-_$@.&+!*\(\),]' # or allowed symbols (keep hyphen first to match literal hyphen)
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols
r'))',
@ -219,7 +221,7 @@ def get_headers(url: str, timeout: int=None) -> str:
def chrome_args(**options) -> List[str]:
"""helper to build up a chrome shell command with arguments"""
from .config import CHROME_OPTIONS
from .config import CHROME_OPTIONS, CHROME_VERSION
options = {**CHROME_OPTIONS, **options}
@ -229,6 +231,10 @@ def chrome_args(**options) -> List[str]:
cmd_args = [options['CHROME_BINARY']]
if options['CHROME_HEADLESS']:
chrome_major_version = int(re.search(r'\s(\d+)\.\d', CHROME_VERSION)[1])
if chrome_major_version >= 111:
cmd_args += ("--headless=new",)
else:
cmd_args += ('--headless',)
if not options['CHROME_SANDBOX']:
@ -236,12 +242,18 @@ def chrome_args(**options) -> List[str]:
# in docker, GPU support is limited, sandboxing is unecessary,
# and SHM is limited to 64MB by default (which is too low to be usable).
cmd_args += (
'--no-sandbox',
'--disable-gpu',
'--disable-dev-shm-usage',
'--disable-software-rasterizer',
'--run-all-compositor-stages-before-draw',
'--hide-scrollbars',
"--no-sandbox",
"--no-zygote",
"--disable-dev-shm-usage",
"--disable-software-rasterizer",
"--run-all-compositor-stages-before-draw",
"--hide-scrollbars",
"--window-size=1440,2000",
"--autoplay-policy=no-user-gesture-required",
"--no-first-run",
"--use-fake-ui-for-media-stream",
"--use-fake-device-for-media-stream",
"--disable-sync",
)
@ -254,14 +266,24 @@ def chrome_args(**options) -> List[str]:
if options['RESOLUTION']:
cmd_args += ('--window-size={}'.format(options['RESOLUTION']),)
if options['TIMEOUT']:
cmd_args += ('--timeout={}'.format(options['TIMEOUT'] * 1000),)
if options['CHROME_TIMEOUT']:
cmd_args += ('--timeout={}'.format(options['CHROME_TIMEOUT'] * 1000),)
if options['CHROME_USER_DATA_DIR']:
cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))
return cmd_args
def chrome_cleanup():
"""
Cleans up any state or runtime files that chrome leaves behind when killed by
a timeout or other error
"""
from .config import IN_DOCKER
if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
remove_file("/home/archivebox/.config/chromium/SingletonLock")
def ansi_to_html(text):
"""

5
assets/README.md Normal file
View File

@ -0,0 +1,5 @@
# assets/
This folder contains assets used by the Jekyll Static Site Generator for ArchiveBox.io.
It cannot be moved or renamed or the custom CSS on ArchiveBox.io will break.

View File

@ -1,11 +1,13 @@
#!/bin/bash
#!/usr/bin/env bash
# DEPRECATED: this helper exists for backwards compatibility with <v0.4 only
# Do not use this to call archivebox, instead use the archivebox binary directly.
if python3 -m django --version >/dev/null 2>&1; then
python3 -m archivebox "$*"
else
echo '[X] ArchiveBox must be installed before using:'
echo " pip install archivebox"
echo '[X] ArchiveBox not found, is it installed and present in your $PATH?'
echo ' pip3 install archivebox'
echo
echo "Hint: Did you forget to activate a virtuenv or set your $$PATH?"
echo 'Hint: Did you forget to activate a virtualenv?'
exit 2
fi

View File

@ -23,7 +23,24 @@ fi
cd "$REPO_DIR/brew_dist"
# make sure archivebox.rb is up-to-date with the dependencies
git pull
git status | grep 'up to date'
echo "[+] Building Homebrew bottle"
brew install --build-bottle ./archivebox.rb
echo
echo "[+] Uninstalling any exisitng archivebox versions..."
brew uninstall archivebox || true
brew untap archivebox/archivebox || true
# echo "[*] Running Formula linters and test build..."
# brew test-bot --tap=ArchiveBox/homebrew-archivebox archivebox/archivebox/archivebox || true
# brew uninstall archivebox || true
# brew untap archivebox/archivebox || true
echo
echo "[+] Installing and building hombrew bottle from https://Github.com/ArchiveBox/homebrew-archivebox#main"
brew tap archivebox/archivebox
brew install --build-bottle archivebox
brew bottle archivebox
echo
echo "[√] Finished. Make sure to commit the outputted .tar.gz and bottle files!"

35
bin/build_dev.sh Executable file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env bash
# ./bin/build_docker.sh dev 'linux/arm/v7'
### Bash Environment Setup
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
# set -o xtrace
set -o errexit
set -o errtrace
set -o nounset
set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
cd "$REPO_DIR"
which docker > /dev/null || exit 1
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
REQUIRED_PLATFORMS="${2:-"linux/arm64,linux/amd64,linux/arm/v7"}"
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$REQUIRED_PLATFORMS"
echo "[+] Building archivebox:$VERSION docker image..."
# docker builder prune
docker build . --no-cache -t archivebox-dev --load
# docker buildx build --platform "$REQUIRED_PLATFORMS" --load . \
# -t archivebox \
# -t archivebox:$TAG_NAME \
# -t archivebox:$VERSION \
# -t archivebox:$SHORT_VERSION

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
# ./bin/build_docker.sh dev 'linux/arm/v7'
### Bash Environment Setup
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
@ -11,23 +12,83 @@ set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
cd "$REPO_DIR"
which docker > /dev/null
which docker > /dev/null || exit 1
which jq > /dev/null || exit 1
# which pdm > /dev/null || exit 1
SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$SELECTED_PLATFORMS"
function check_platforms() {
INSTALLED_PLATFORMS="$(docker buildx inspect | grep 'Platforms:' )"
for REQUIRED_PLATFORM in ${SELECTED_PLATFORMS//,/$IFS}; do
echo "[+] Checking for: $REQUIRED_PLATFORM..."
if ! (echo "$INSTALLED_PLATFORMS" | grep -q "$REQUIRED_PLATFORM"); then
return 1
fi
done
echo
return 0
}
function remove_builder() {
# remove existing xbuilder
docker buildx stop xbuilder || true
docker buildx rm xbuilder || true
}
function create_builder() {
docker buildx use xbuilder && return 0
echo "[+] Creating new xbuilder for: $SELECTED_PLATFORMS"
echo
# Switch to buildx builder if already present / previously created
docker buildx create --name xbuilder --driver docker-container --bootstrap --use --platform "$SELECTED_PLATFORMS" || true
docker buildx inspect --bootstrap || true
}
function recreate_builder() {
# Install QEMU binaries for cross-platform building if not installed
docker run --privileged --rm 'tonistiigi/binfmt' --install all
remove_builder
create_builder
}
# Check if docker is ready for cross-plaform builds, if not, recreate builder
docker buildx use xbuilder 2>&1 >/dev/null || create_builder
check_platforms || (recreate_builder && check_platforms) || exit 1
# Build python package lists
echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..."
pdm lock --group=':all' --strategy="cross_platform" --production
pdm export --group=':all' --production --without-hashes -o requirements.txt
echo "[+] Building archivebox:$VERSION docker image..."
docker build . -t archivebox \
-t archivebox:latest \
-t archivebox:$VERSION \
-t archivebox:$SHORT_VERSION \
-t docker.io/nikisweeting/archivebox:latest \
-t docker.io/nikisweeting/archivebox:$VERSION \
-t docker.io/nikisweeting/archivebox:$SHORT_VERSION \
-t docker.io/archivebox/archivebox:latest \
-t docker.io/archivebox/archivebox:$VERSION \
-t docker.io/archivebox/archivebox:$SHORT_VERSION \
-t docker.pkg.github.com/archivebox/archivebox/archivebox:latest \
-t docker.pkg.github.com/archivebox/archivebox/archivebox:$VERSION \
-t docker.pkg.github.com/archivebox/archivebox/archivebox:$SHORT_VERSION
# docker builder prune
# docker build . --no-cache -t archivebox-dev \
# replace --load with --push to deploy
docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
-t archivebox/archivebox \
-t archivebox/archivebox:$TAG_NAME \
-t archivebox/archivebox:$VERSION \
-t archivebox/archivebox:$SHORT_VERSION \
-t archivebox/archivebox:latest \
-t nikisweeting/archivebox \
-t nikisweeting/archivebox:$TAG_NAME \
-t nikisweeting/archivebox:$VERSION \
-t nikisweeting/archivebox:$SHORT_VERSION \
-t nikisweeting/archivebox:latest \
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION

View File

@ -15,17 +15,25 @@ REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && p
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
source "$REPO_DIR/.venv/bin/activate"
else
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR/.venv, creating one now..."
python3 -m venv --system-site-packages --symlinks $REPO_DIR/.venv
fi
cd "$REPO_DIR"
echo "[*] Cleaning up build dirs"
cd "$REPO_DIR"
rm -Rf build dist
echo "[+] Building sdist, bdist_wheel, and egg_info"
python3 setup.py \
sdist --dist-dir=./pip_dist \
bdist_wheel --dist-dir=./pip_dist \
egg_info --egg-base=./pip_dist
rm -f archivebox/package.json
cp package.json archivebox/package.json
pdm self update
pdm install
pdm build
pdm export --without-hashes -o ./pip_dist/requirements.txt
cp dist/* ./pip_dist/
echo
echo "[√] Finished. Don't forget to commit the new sdist and wheel files in ./pip_dist/"

View File

@ -1,45 +1,75 @@
#!/usr/bin/env bash
#!/bin/bash
DATA_DIR="${DATA_DIR:-/data}"
ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
export DATA_DIR="${DATA_DIR:-/data}"
export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
# default PUID and PGID if data dir is empty and no PUID+PGID is set
export DEFAULT_PUID=911
export DEFAULT_PGID=911
# Set the archivebox user UID & GID
if [[ -n "$PUID" && "$PUID" != 0 ]]; then
usermod -u "$PUID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
fi
if [[ -n "$PGID" && "$PGID" != 0 ]]; then
groupmod -g "$PGID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
fi
# if data directory already exists, autodetect detect owner by looking at files within
export DETECTED_UID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
export DETECTED_GID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
# Set the archivebox user to use the configured UID & GID
# prefers PUID and PGID env vars passsed in explicitly, falls back to autodetected defaults
groupmod -o -g "${PUID:-$DETECTED_UID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
usermod -o -u "${PGID:-$DETECTED_GID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
# Set the permissions of the data dir to match the archivebox user
# re-set PUID and PGID to values reported by system instead of values we tried to set,
# in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
export PUID="$(id -u archivebox)"
export PGID="$(id -g archivebox)"
# Check the permissions of the data dir (or create if it doesn't exist)
if [[ -d "$DATA_DIR/archive" ]]; then
# check data directory permissions
if [[ ! "$(stat -c %u $DATA_DIR/archive)" = "$(id -u archivebox)" ]]; then
echo "Change in ownership detected, please be patient while we chown existing files"
echo "This could take some time..."
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER -R "$DATA_DIR"
if touch "$DATA_DIR/archive/.permissions_test_safe_to_delete" 2>/dev/null; then
# It's fine, we are able to write to the data directory (as root inside the container)
rm -f "$DATA_DIR/archive/.permissions_test_safe_to_delete"
# echo "[√] Permissions are correct"
else
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir." >&2
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:"
echo -e " \$ chown -R $PUID:$PGID ./data\n" >&2
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" >&2
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" >&2
exit 1
fi
else
# create data directory
mkdir -p "$DATA_DIR/logs"
chown -R $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
fi
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
# force set the ownership of the data dir contents to the archivebox user and group
# this is needed because Docker Desktop often does not map user permissions from the host properly
chown $PUID:$PGID "$DATA_DIR"
chown $PUID:$PGID "$DATA_DIR"/*
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome
PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
chown $PUID:$PGID "${PLAYWRIGHT_BROWSERS_PATH}/*"
# (this check is written in blood, QEMU silently breaks things in ways that are not obvious)
export IN_QEMU="$(pmap 1 | grep qemu | wc -l | grep -E '^0$' >/dev/null && echo 'False' || echo 'True')"
if [[ "$IN_QEMU" == 'True' ]]; then
echo -e "\n[!] Warning: Running $(uname -m) emulated container in QEMU, some things will break!" >&2
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." >&2
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" >&2
fi
# Drop permissions to run commands as the archivebox user
if [[ "$1" == /* || "$1" == "echo" || "$1" == "archivebox" ]]; then
# arg 1 is a binary, execute it verbatim
# e.g. "archivebox init"
# "/bin/bash"
# "echo"
exec gosu "$ARCHIVEBOX_USER" bash -c "$*"
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
# handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c
# e.g. "docker run archivebox /venv/bin/archivebox-alt init"
# "docker run archivebox /bin/bash -c '...'"
# "docker run archivebox echo test"
exec gosu "$PUID" bash -c "$*"
else
# no command given, assume args were meant to be passed to archivebox cmd
# e.g. "add https://example.com"
# "manage createsupseruser"
# "server 0.0.0.0:8000"
exec gosu "$ARCHIVEBOX_USER" bash -c "archivebox $*"
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
# e.g. "docker run archivebox add --depth=1 https://example.com"
# "docker run archivebox manage createsupseruser"
# "docker run archivebox server 0.0.0.0:8000"
exec gosu "$PUID" bash -c "archivebox $*"
fi

View File

@ -1,4 +1,15 @@
#!/bin/bash
#!/usr/bin/env bash
# Helper script to export browser history and bookmarks to a format ArchiveBox can ingest.
# Usage:
# curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/bin/export_browser_history.sh'
# bash export_browser_history.sh --chrome
# bash export_browser_history.sh --firefox
# bash export_browser_history.sh --safari
# ls
# chrome_history.json
# firefox_history.json
# firefox_bookmarks.json
# safari_history.json
OUTPUT_DIR="$(pwd)"
@ -16,9 +27,9 @@ if [[ "$1" == "--chrome" ]]; then
sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_time, 'description', title, 'href', url)) || \"]\" FROM urls;" > "$OUTPUT_DIR/chrome_history.json"
jq < "$(dirname "${2:-$default}")"/Bookmarks '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' > "$OUTPUT_DIR/chrome_bookmarks.json"
rm "$DATA_DIR"/output/sources/chrome_history.db.*
rm "$OUTPUT_DIR"/chrome_history.db.*
echo "Chrome history exported to:"
echo " output/sources/chrome_history.json"
echo " $OUTPUT_DIR/chrome_history.json"
fi
if [[ "$1" == "--firefox" ]]; then
@ -33,12 +44,29 @@ if [[ "$1" == "--firefox" ]]; then
fi
sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || \"]\" FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json"
sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', b.dateAdded, 'description', b.title, 'href', f.url)) || \"]\" FROM moz_bookmarks AS b JOIN moz_places AS f ON f.id = b.fk" > "$OUTPUT_DIR/firefox_bookmarks.json"
rm "$DATA_DIR"/output/sources/firefox_history.db.*
sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "
with recursive tags AS (
select id, title, '' AS tags
FROM moz_bookmarks
where parent == 0
UNION ALL
select c.id, p.title, c.title || ',' || tags AS tags
from moz_bookmarks AS c
JOIN tags AS p
ON c.parent = p.id
)
SELECT '[' || group_concat(json_object('timestamp', b.dateAdded, 'description', b.title, 'href', f.url, 'tags', tags.tags)) || ']'
FROM moz_bookmarks AS b
JOIN moz_places AS f ON f.id = b.fk
JOIN tags ON tags.id = b.parent
WHERE f.url LIKE '%://%';" > "$OUTPUT_DIR/firefox_bookmarks.json"
rm "$OUTPUT_DIR"/firefox_history.db.*
echo "Firefox history exported to:"
echo " output/sources/firefox_history.json"
echo " output/sources/firefox_bookmarks.json"
echo " $OUTPUT_DIR/firefox_history.json"
echo " $OUTPUT_DIR/firefox_bookmarks.json"
fi
if [[ "$1" == "--safari" ]]; then
@ -54,7 +82,7 @@ if [[ "$1" == "--safari" ]]; then
sqlite3 "$OUTPUT_DIR/safari_history.db.tmp" "select url from history_items" > "$OUTPUT_DIR/safari_history.json"
rm "$DATA_DIR"/output/sources/safari_history.db.*
rm "$OUTPUT_DIR"/safari_history.db.*
echo "Safari history exported to:"
echo " output/sources/safari_history.json"
echo " $OUTPUT_DIR/safari_history.json"
fi

View File

@ -15,7 +15,8 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
source "$DIR/.venv/bin/activate"
echo "[*] Running flake8..."
flake8 archivebox && echo "√ No errors found."
cd archivebox
flake8 . && echo "√ No errors found."
echo

View File

@ -11,9 +11,20 @@ set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
CURRENT_PLAFORM="$(uname)"
REQUIRED_PLATFORM="Darwin"
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
echo "[!] Skipping the Homebrew package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
exit 0
fi
cd "$REPO_DIR"
# TODO
exit 0
./bin/build_brew.sh
git add '*.bottle.tar.gz'
git commit -m "add new release bottle"
git pull
git push

View File

@ -11,15 +11,35 @@ set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
cd "$REPO_DIR"
SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
# echo "[*] Logging in to Docker Hub & Github Container Registry"
# docker login --username=nikisweeting
# docker login ghcr.io --username=pirate
echo "[^] Building docker image"
./bin/build_docker.sh "$TAG_NAME" "$SELECTED_PLATFORMS"
echo "[^] Uploading docker image"
# docker login --username=nikisweeting
# docker login docker.pkg.github.com --username=pirate
docker push archivebox/archivebox:$VERSION archivebox/archivebox:$SHORT_VERSION archivebox/archivebox:latest
docker push docker.io/nikisweeting/archivebox
docker push docker.io/archivebox/archivebox
docker push docker.pkg.github.com/archivebox/archivebox/archivebox
docker buildx build --platform "$SELECTED_PLATFORMS" --push . \
-t archivebox/archivebox \
-t archivebox/archivebox:$TAG_NAME \
-t archivebox/archivebox:$VERSION \
-t archivebox/archivebox:$SHORT_VERSION \
-t archivebox/archivebox:latest \
-t nikisweeting/archivebox \
-t nikisweeting/archivebox:$TAG_NAME \
-t nikisweeting/archivebox:$VERSION \
-t nikisweeting/archivebox:$SHORT_VERSION \
-t nikisweeting/archivebox:latest \
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION

View File

@ -11,17 +11,11 @@ set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
cd "$REPO_DIR"
source "$REPO_DIR/.venv/bin/activate"
echo "[^] Publishing to Test PyPI..."
pdm publish --repository testpypi
# apt install python3 python3-all python3-dev
# pip install '.[dev]'
echo "[^] Uploading to test.pypi.org"
python3 -m twine upload --repository testpypi pip_dist/archivebox-${VERSION}*.{whl,tar.gz}
echo "[^] Uploading to pypi.org"
python3 -m twine upload --repository pypi pip_dist/archivebox-${VERSION}*.{whl,tar.gz}
echo "[^] Publishing to PyPI..."
pdm publish --no-build

View File

@ -1,120 +1,200 @@
#!/usr/bin/env bash
# ArchiveBox Setup Script
# https://github.com/ArchiveBox/ArchiveBox
#!/usr/bin/env sh
# ArchiveBox Setup Script: https://github.com/ArchiveBox/ArchiveBox
# Supported Platforms: Ubuntu/Debian/FreeBSD/macOS
# Usage:
# curl -sSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/bin/setup.sh' | sh
clear
if [ $(id -u) -eq 0 ]; then
echo ""
echo "[X] You cannot run this script as root. You must run it as a non-root user with sudo ability."
echo " Create a new non-privileged user 'archivebox' if necessary."
echo " adduser archivebox && usermod -a archivebox -G sudo && su archivebox"
echo " https://www.digitalocean.com/community/tutorials/how-to-create-a-new-sudo-enabled-user-on-ubuntu-20-04-quickstart"
echo " https://www.vultr.com/docs/create-a-sudo-user-on-freebsd"
echo " Then re-run this script as the non-root user."
echo ""
exit 2
fi
if (which docker-compose > /dev/null && docker pull archivebox/archivebox:latest); then
echo "[+] Initializing an ArchiveBox data folder at ~/archivebox/data using Docker Compose..."
mkdir -p ~/archivebox
cd ~/archivebox
mkdir -p data
if [ -f "./index.sqlite3" ]; then
mv ~/archivebox/* ~/archivebox/data/
fi
curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
docker-compose run --rm archivebox init --setup
echo
echo "[+] Starting ArchiveBox server using: docker-compose up -d..."
docker-compose up -d
sleep 7
open http://127.0.0.1:8000 || true
echo
echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox/data. Usage:"
echo " cd ~/archivebox"
echo " docker-compose ps"
echo " docker-compose down"
echo " docker-compose pull"
echo " docker-compose up"
echo " docker-compose run archivebox manage createsuperuser"
echo " docker-compose run archivebox add 'https://example.com'"
echo " docker-compose run archivebox list"
echo " docker-compose run archivebox help"
exit 0
elif (which docker > /dev/null && docker pull archivebox/archivebox:latest); then
echo "[+] Initializing an ArchiveBox data folder at ~/archivebox using Docker..."
mkdir -p ~/archivebox
cd ~/archivebox
if [ -f "./data/index.sqlite3" ]; then
cd ./data
fi
docker run -v "$PWD":/data -it --rm archivebox/archivebox:latest init --setup
echo
echo "[+] Starting ArchiveBox server using: docker run -d archivebox/archivebox..."
docker run -v "$PWD":/data -it -d -p 8000:8000 --name=archivebox archivebox/archivebox:latest
sleep 7
open http://127.0.0.1:8000 || true
echo
echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox. Usage:"
echo " cd ~/archivebox"
echo " docker ps --filter name=archivebox"
echo " docker kill archivebox"
echo " docker pull archivebox/archivebox"
echo " docker run -v $PWD:/data -d -p 8000:8000 --name=archivebox archivebox/archivebox"
echo " docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser"
echo " docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'"
echo " docker run -v $PWD:/data -it archivebox/archivebox list"
echo " docker run -v $PWD:/data -it archivebox/archivebox help"
exit 0
fi
echo ""
echo "[!] It's highly recommended to use ArchiveBox with Docker, but Docker wasn't found."
echo ""
echo " ⚠️ If you want to use Docker, press [Ctrl-C] to cancel now. ⚠️"
echo " Get Docker: https://docs.docker.com/get-docker/"
echo " After you've installed Docker, run this script again."
echo ""
echo "Otherwise, install will continue with apt/brew/pip in 12s... (press [Ctrl+C] to cancel)"
echo ""
sleep 12 || exit 1
echo "Proceeding with system package manager..."
echo ""
echo "[i] ArchiveBox Setup Script 📦"
echo ""
echo " This is a helper script which installs the ArchiveBox dependencies on your system using homebrew/aptitude."
echo " You may be prompted for a password in order to install the following:"
echo " This is a helper script which installs the ArchiveBox dependencies on your system using brew/apt/pip3."
echo " You may be prompted for a sudo password in order to install the following:"
echo ""
echo " - python3, python3-pip, python3-distutils"
echo " - curl"
echo " - wget"
echo " - git"
echo " - youtube-dl"
echo " - chromium-browser (skip this if Chrome/Chromium is already installed)"
echo " - nodejs (used for singlefile, readability, mercury, and more)"
echo " - archivebox"
echo " - python3, pip, nodejs, npm (languages used by ArchiveBox, and its extractor modules)"
echo " - curl, wget, git, youtube-dl, yt-dlp (used for extracting title, favicon, git, media, and more)"
echo " - chromium (skips this if any Chrome/Chromium version is already installed)"
echo ""
echo " If you'd rather install these manually, you can find documentation here:"
echo " If you'd rather install these manually as-needed, you can find detailed documentation here:"
echo " https://github.com/ArchiveBox/ArchiveBox/wiki/Install"
echo ""
read -p "Press [enter] to continue with the automatic install, or Ctrl+C to cancel..." REPLY
echo "Continuing in 12s... (press [Ctrl+C] to cancel)"
echo ""
sleep 12 || exit 1
echo "Proceeding to install dependencies..."
echo ""
# On Linux:
if which apt-get > /dev/null; then
echo "[+] Adding ArchiveBox apt repo to sources..."
sudo apt install software-properties-common
sudo add-apt-repository -u ppa:archivebox/archivebox
echo "[+] Installing python3, wget, curl..."
sudo apt install -y git python3 python3-pip python3-distutils wget curl youtube-dl nodejs npm ripgrep
# sudo apt install archivebox
if which google-chrome; then
echo "[i] You already have google-chrome installed, if you would like to download chromium instead (they work pretty much the same), follow the Manual Setup instructions"
google-chrome --version
elif which chromium-browser; then
echo "[i] chromium-browser already installed, using existing installation."
chromium-browser --version
elif which chromium; then
echo "[i] chromium already installed, using existing installation."
chromium --version
else
echo "[+] Installing chromium..."
sudo apt install chromium || sudo apt install chromium-browser
echo "[+] Adding ArchiveBox apt repo and signing key to sources..."
if ! (sudo apt install -y software-properties-common && sudo add-apt-repository -u ppa:archivebox/archivebox); then
echo "deb http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main" | sudo tee /etc/apt/sources.list.d/archivebox.list
echo "deb-src http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main" | sudo tee -a /etc/apt/sources.list.d/archivebox.list
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C258F79DCC02E369
sudo apt-get update -qq
fi
echo
echo "[+] Installing ArchiveBox system dependencies using apt..."
sudo apt-get install -y git python3 python3-pip python3-distutils wget curl youtube-dl yt-dlp ffmpeg git nodejs npm ripgrep
sudo apt-get install -y libgtk2.0-0 libgtk-3-0 libnotify-dev libgconf-2-4 libnss3 libxss1 libasound2 libxtst6 xauth xvfb libgbm-dev || sudo apt-get install -y chromium || sudo apt-get install -y chromium-browser || true
sudo apt-get install -y archivebox
sudo apt-get --only-upgrade install -y archivebox
echo ""
echo "[+] Installing ArchiveBox python dependencies using pip3..."
sudo python3 -m pip install --upgrade --ignore-installed archivebox
# On Mac:
elif which brew > /dev/null; then # 🐍 eye of newt
echo "[+] Installing python3, wget, curl (ignore 'already installed' warnings)..."
brew install git wget curl youtube-dl ripgrep node
if which python3; then
if python3 -c 'import sys; raise SystemExit(sys.version_info < (3,5,0))'; then
echo "[√] Using existing $(which python3)..."
elif which brew > /dev/null; then
echo "[+] Installing ArchiveBox system dependencies using brew..."
brew tap archivebox/archivebox
brew update
brew install --fetch-HEAD -f archivebox
echo ""
echo "[+] Installing ArchiveBox python dependencies using pip3..."
python3 -m pip install --upgrade --ignore-installed archivebox
elif which pkg > /dev/null; then
echo "[+] Installing ArchiveBox system dependencies using pkg and pip (python3.9)..."
sudo pkg install -y python3 py39-pip py39-sqlite3 npm wget curl youtube_dl ffmpeg git ripgrep
sudo pkg install -y chromium
echo ""
echo "[+] Installing ArchiveBox python dependencies using pip..."
# don't use sudo here so that pip installs in $HOME/.local instead of into /usr/local
python3 -m pip install --upgrade --ignore-installed archivebox
else
echo "[+] Installing python3..."
brew install python3
fi
else
echo "[+] Installing python3..."
brew install python3
fi
if ls /Applications/Google\ Chrome*.app > /dev/null; then
echo "[√] Using existing /Applications/Google Chrome.app"
elif ls /Applications/Chromium.app; then
echo "[√] Using existing /Applications/Chromium.app"
elif which chromium-browser; then
echo "[√] Using existing $(which chromium-browser)"
elif which chromium; then
echo "[√] Using existing $(which chromium)"
else
echo "[+] Installing chromium..."
brew cask install chromium
fi
else
echo "[X] Could not find aptitude or homebrew! ‼️"
echo "[!] Warning: Could not find aptitude/homebrew/pkg! May not be able to install all dependencies automatically."
echo ""
echo " If you're on macOS, make sure you have homebrew installed: https://brew.sh/"
echo " If you're on Ubuntu/Debian, make sure you have apt installed: https://help.ubuntu.com/lts/serverguide/apt.html"
echo " (those are the only currently supported systems for the automatic setup script)"
echo " If you're on Linux, only Ubuntu/Debian/BSD systems are officially supported with this script."
echo " If you're on Windows, this script is not officially supported (Docker is recommeded instead)."
echo ""
echo "See the README.md for Manual Setup & Troubleshooting instructions."
echo "See the README.md for Manual Setup & Troubleshooting instructions if you you're unable to run ArchiveBox after this script completes."
fi
echo ""
if ! (python3 --version && python3 -m pip --version && python3 -m django --version); then
echo "[X] Python 3 pip was not found on your system!"
echo " You must first install Python >= 3.7 (and pip3):"
echo " https://www.python.org/downloads/"
echo " https://wiki.python.org/moin/BeginnersGuide/Download"
echo " After installing, run this script again."
exit 1
fi
npm i -g npm
pip3 install --upgrade pip setuptools
pip3 install --upgrade archivebox
npm install -g 'git+https://github.com/ArchiveBox/ArchiveBox.git'
# Check:
if ! (python3 -m django --version && python3 -m archivebox version --quiet); then
echo "[X] Django and ArchiveBox were not found after installing!"
echo " Check to see if a previous step failed."
echo ""
echo "[*] Checking installed versions:"
echo "---------------------------------------------------"
which python3 &&
python3 --version | head -n 1 &&
echo "" &&
which git &&
git --version | head -n 1 &&
echo "" &&
which wget &&
wget --version | head -n 1 &&
echo "" &&
which curl &&
curl --version | head -n 1 &&
echo "" &&
which youtube-dl &&
youtube-dl --version | head -n 1 &&
echo "---------------------------------------------------" &&
archivebox version &&
echo "[√] All dependencies installed. ✅" &&
exit 0
echo "---------------------------------------------------"
echo "[X] Failed to install some dependencies! ‼️"
echo " - Try the Manual Setup instructions in the README.md"
echo " - Try the Troubleshooting: Dependencies instructions in the README.md"
echo " - Open an issue on github to get help: https://github.com/ArchiveBox/ArchiveBox/issues"
exit 1
fi
# echo ""
# echo "[+] Upgrading npm and pip..."
# sudo npm i -g npm || true
# sudo python3 -m pip install --upgrade pip setuptools || true
echo
echo "[+] Initializing ArchiveBox data folder at ~/archivebox..."
mkdir -p ~/archivebox
cd ~/archivebox
if [ -f "./data/index.sqlite3" ]; then
cd ./data
fi
: | python3 -m archivebox init --setup || true # pipe in empty command to make sure stdin is closed
echo
echo "[+] Starting ArchiveBox server using: nohup archivebox server &..."
nohup python3 -m archivebox server 0.0.0.0:8000 > ./logs/server.log 2>&1 &
sleep 7
which open > /dev/null && open http://127.0.0.1:8000 || true
echo
echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox. Usage:"
echo " cd ~/archivebox"
echo " ps aux | grep archivebox"
echo " pkill -f archivebox"
echo " python3 -m pip install --upgrade archivebox"
echo " archivebox server --quick-init 0.0.0.0:8000"
echo " archivebox manage createsuperuser"
echo " archivebox add 'https://example.com'"
echo " archivebox list"
echo " archivebox help"

@ -1 +1 @@
Subproject commit 95a1c1a0875841d076f06106bd4c2307504928c2
Subproject commit ec64946796d46a31e5a4d6449908b4060edb3079

@ -1 +1 @@
Subproject commit f8e3a0247c09a2f9aaea2848ee7da9c486e14669
Subproject commit 88e4b7e5a6c8998f781e45be1e264c48f3ed5e65

2
docker

@ -1 +1 @@
Subproject commit 236f7881e3105b218864d9b3185b17c44b306106
Subproject commit 2cbe77e39c275b4a5a274ff7e75c0b13b39a9dbe

View File

@ -1,72 +1,133 @@
# Usage:
# docker-compose run archivebox init --setup
# docker-compose up
# echo "https://example.com" | docker-compose run archivebox archivebox add
# docker-compose run archivebox add --depth=1 https://example.com/some/feed.rss
# docker-compose run archivebox config --set PUBLIC_INDEX=True
# docker-compose run archivebox help
# docker compose run archivebox init --setup
# docker compose up
# echo "https://example.com" | docker compose run archivebox archivebox add
# docker compose run archivebox add --depth=1 https://example.com/some/feed.rss
# docker compose run archivebox config --set MEDIA_MAX_SIZE=750m
# docker compose run archivebox help
# Documentation:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
version: '2.4'
version: '3.9'
services:
archivebox:
# build: . # for developers working on archivebox
image: ${DOCKER_IMAGE:-archivebox/archivebox:master}
image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
command: server --quick-init 0.0.0.0:8000
ports:
- 8000:8000
environment:
- ALLOWED_HOSTS=* # add any config options you want as env vars
- MEDIA_MAX_SIZE=750m
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these if you enable sonic below
# - SEARCH_BACKEND_HOST_NAME=sonic
# - SEARCH_BACKEND_PASSWORD=SecretPassword
volumes:
- ./data:/data
# - ./archivebox:/app/archivebox # for developers working on archivebox
# - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
# - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
# build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
environment:
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
# - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
# - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
# - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo
# - ADMIN_PASSWORD=SomeSecretPassword
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=911
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search
# - SEARCH_BACKEND_HOST_NAME=sonic
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
# ...
# add further configuration options from archivebox/config.py as needed (to apply them only to this container)
# or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers)
# For ad-blocking during archiving, uncomment this section and pihole service section below
# networks:
# - dns
# dns:
# - 172.20.0.53
######## Optional Addons: tweak examples below as needed for your specific use case ########
### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg
# $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
# After starting, backfill any existing Snapshots into the full-text index:
# $ docker-compose run archivebox update --index-only
# To run the Sonic full-text search backend, first download the config file to sonic.cfg
# curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
# after starting, backfill any existing Snapshots into the index: docker-compose run archivebox update --index-only
# sonic:
# image: valeriansaliou/sonic:v1.3.0
# image: valeriansaliou/sonic:latest
# expose:
# - 1491
# environment:
# - SEARCH_BACKEND_PASSWORD=SecretPassword
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# volumes:
# - ./sonic.cfg:/etc/sonic.cfg:ro
# - ./data/sonic:/var/lib/sonic/store
### Optional Addons: tweak these examples as needed for your specific use case
### Example: To run pihole in order to block ad/tracker requests during archiving,
# uncomment this block and set up pihole using its admin interface
# Example: Run scheduled imports in a docker instead of using cron on the
# host machine, add tasks and see more info with archivebox schedule --help
# scheduler:
# image: archivebox/archivebox:latest
# command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNAME/feed/all'
# pihole:
# image: pihole/pihole:latest
# ports:
# - 127.0.0.1:8090:80 # uncomment to access the admin HTTP interface on http://localhost:8090
# environment:
# - USE_COLOR=True
# - SHOW_PROGRESS=False
# - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD
# - DNSMASQ_LISTENING=all
# dns:
# - 127.0.0.1
# - 1.1.1.1
# networks:
# dns:
# ipv4_address: 172.20.0.53
# volumes:
# - ./etc/pihole:/etc/pihole
# - ./etc/dnsmasq:/etc/dnsmasq.d
### Example: Enable ability to run regularly scheduled archiving tasks by uncommenting this container
# $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml'
# then restart the scheduler container to apply the changes to the schedule
# $ docker compose restart archivebox_scheduler
# archivebox_scheduler:
# image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
# command: schedule --foreground
# environment:
# - MEDIA_MAX_SIZE=750m # increase this number to allow archiving larger audio/video files
# # - TIMEOUT=60 # increase if you see timeouts often during archiving / on slow networks
# # - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them
# # - CHECK_SSL_VALIDITY=True # set to False to allow saving URLs w/ broken SSL certs
# # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting URLs to Archive.org when archiving
# # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues
# # - PGID=20
# volumes:
# - ./data:/data
# - ./etc/crontabs:/var/spool/cron/crontabs
# # cpus: 2 # uncomment / edit these values to limit container resource consumption
# # mem_limit: 2048m
# # shm_size: 1024m
### Example: Put Nginx in front of the ArchiveBox server for SSL termination
# Example: Put Nginx in front of the ArchiveBox server for SSL termination
# nginx:
# image: nginx:alpine
# ports:
# - 443:443
# - 80:80
# volumes:
# - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
# - ./etc/nginx.conf:/etc/nginx/nginx.conf
# - ./data:/var/www
# Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
# wireguard:
# image: linuxserver/wireguard
# image: linuxserver/wireguard:latest
# network_mode: 'service:archivebox'
# cap_add:
# - NET_ADMIN
@ -78,14 +139,26 @@ services:
# - /lib/modules:/lib/modules
# - ./wireguard.conf:/config/wg0.conf:ro
# Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
# pywb:
# image: webrecorder/pywb:latest
# entrypoint: /bin/sh 'wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback --proxy;'
# entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;'
# environment:
# - INIT_COLLECTION=archivebox
# ports:
# - 8080:8080
# volumes:
# ./data:/archivebox
# ./data/wayback:/webarchive
# - ./data:/archivebox
# - ./data/wayback:/webarchive
networks:
# network needed for pihole container to offer :53 dns resolving on fixed ip for archivebox container
dns:
ipam:
driver: default
config:
- subnet: 172.20.0.0/24

View File

@ -55,7 +55,7 @@
# CURL_BINARY = curl
# GIT_BINARY = git
# WGET_BINARY = wget
# YOUTUBEDL_BINARY = youtube-dl
# YOUTUBEDL_BINARY = yt-dlp
# CHROME_BINARY = chromium
# CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"

View File

@ -2,7 +2,7 @@
In this folder are some example config files you can use for setting up ArchiveBox on your machine.
E.g. see `etc/nginx` for an example nginx config to serve your archive, or `etc/cron` for an example cron job that crawls a feed every 24 hours.
E.g. see `nginx.conf` for an example nginx config to serve your archive with SSL, or `fly.toml` for an example deployment to the Fly.io hosting platform.
Please contribute your etc files here! Example contributions

29
etc/archivebox.service Normal file
View File

@ -0,0 +1,29 @@
# This is an example systemd service config definition for ArchiveBox.
#
# Link it into place on your system to use systemd to auto-start the ArchiveBox server on boot:
# https://unix.stackexchange.com/questions/224992/where-do-i-put-my-systemd-unit-file
#
# Review and change these lines as-needed for your specific environment and needs:
# WorkingDirectory, ExecStart, User, Group
[Unit]
Description=Open source self-hosted web archiving
Documentation=https://github.com/ArchiveBox/ArchiveBox/wiki
[Service]
Type=simple
WorkingDirectory=/home/archivebox/archivebox/
ExecStart=/usr/local/bin/archivebox server 0.0.0.0:8000
ExecReload=/bin/kill -s HUP $MAINPID
ExecStop=/bin/kill -s QUIT $MAINPID
Restart=always
RestartSec=2
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=archivebox
User=archivebox
Group=archivebox
[Install]
WantedBy=multi-user.target

View File

@ -1 +0,0 @@
0 24 * * * www-data /opt/ArchiveBox/bin/archive "https://getpocket.com/users/example/feed/all" >> /var/log/ArchiveBox.log

40
etc/fly.toml Normal file
View File

@ -0,0 +1,40 @@
# fly.toml file generated for archivebox on 2021-04-23T16:35:11-04:00
app = "archivebox"
kill_signal = "SIGINT"
kill_timeout = 5
[env]
[mounts]
source="archivebox_data"
destination="/data"
[experimental]
auto_rollback = true
[[services]]
http_checks = []
internal_port = 8000
protocol = "tcp"
script_checks = []
[services.concurrency]
hard_limit = 25
soft_limit = 20
type = "connections"
[[services.ports]]
handlers = ["http"]
port = 80
[[services.ports]]
handlers = ["tls", "http"]
port = 443
[[services.tcp_checks]]
grace_period = "1s"
interval = "15s"
restart_limit = 6
timeout = "2s"

View File

@ -35,11 +35,13 @@ http {
listen 80 default_server;
server_name _;
root /var/www;
index index.html;
autoindex on;
try_files $uri $uri/ $uri.html =404;
location /archive {
root /var/www/archive;
}
}
}

3224
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,13 @@
{
"name": "archivebox",
"version": "0.6.2",
"version": "0.7.1",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
"repository": "github:ArchiveBox/ArchiveBox",
"license": "MIT",
"dependencies": {
"@postlight/mercury-parser": "^2.2.0",
"@postlight/parser": "^2.2.3",
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
"single-file": "git+https://github.com/gildas-lormeau/SingleFile.git"
"single-file-cli": "^1.1.12"
}
}

920
pdm.lock Normal file
View File

@ -0,0 +1,920 @@
# This file is @generated by PDM.
# It is not intended for manual editing.
[metadata]
groups = ["default", "ldap", "sonic"]
strategy = ["cross_platform"]
lock_version = "4.4"
content_hash = "sha256:3355b57d87304093c9176a6387d80f5c5226b169964d8039f14a5998046faf4d"
[[package]]
name = "appnope"
version = "0.1.3"
summary = "Disable App Nap on macOS >= 10.9"
files = [
{file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"},
{file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
]
[[package]]
name = "asgiref"
version = "3.7.2"
requires_python = ">=3.7"
summary = "ASGI specs, helper code, and adapters"
dependencies = [
"typing-extensions>=4; python_version < \"3.11\"",
]
files = [
{file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"},
{file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"},
]
[[package]]
name = "asttokens"
version = "2.4.1"
summary = "Annotate AST trees with source code positions"
dependencies = [
"six>=1.12.0",
]
files = [
{file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"},
{file = "asttokens-2.4.1.tar.gz", hash = "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0"},
]
[[package]]
name = "brotli"
version = "1.1.0"
summary = "Python bindings for the Brotli compression library"
files = [
{file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752"},
{file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9"},
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ae56aca0402a0f9a3431cddda62ad71666ca9d4dc3a10a142b9dce2e3c0cda3"},
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43ce1b9935bfa1ede40028054d7f48b5469cd02733a365eec8a329ffd342915d"},
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7c4855522edb2e6ae7fdb58e07c3ba9111e7621a8956f481c68d5d979c93032e"},
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:38025d9f30cf4634f8309c6874ef871b841eb3c347e90b0851f63d1ded5212da"},
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6a904cb26bfefc2f0a6f240bdf5233be78cd2488900a2f846f3c3ac8489ab80"},
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"},
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"},
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"},
{file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"},
{file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"},
{file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"},
{file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6"},
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd"},
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf"},
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61"},
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327"},
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd"},
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"},
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"},
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"},
{file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"},
{file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"},
{file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"},
{file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"},
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"},
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91"},
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408"},
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0"},
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc"},
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"},
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"},
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"},
{file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"},
{file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"},
{file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"},
{file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7905193081db9bfa73b1219140b3d315831cbff0d8941f22da695832f0dd188f"},
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a77def80806c421b4b0af06f45d65a136e7ac0bdca3c09d9e2ea4e515367c7e9"},
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dadd1314583ec0bf2d1379f7008ad627cd6336625d6679cf2f8e67081b83acf"},
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:901032ff242d479a0efa956d853d16875d42157f98951c0230f69e69f9c09bac"},
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:22fc2a8549ffe699bfba2256ab2ed0421a7b8fadff114a3d201794e45a9ff578"},
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae15b066e5ad21366600ebec29a7ccbc86812ed267e4b28e860b8ca16a2bc474"},
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"},
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"},
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"},
{file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"},
{file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"},
{file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"},
]
[[package]]
name = "brotlicffi"
version = "1.1.0.0"
requires_python = ">=3.7"
summary = "Python CFFI bindings to the Brotli library"
dependencies = [
"cffi>=1.0.0",
]
files = [
{file = "brotlicffi-1.1.0.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9b7ae6bd1a3f0df532b6d67ff674099a96d22bc0948955cb338488c31bfb8851"},
{file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19ffc919fa4fc6ace69286e0a23b3789b4219058313cf9b45625016bf7ff996b"},
{file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9feb210d932ffe7798ee62e6145d3a757eb6233aa9a4e7db78dd3690d7755814"},
{file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84763dbdef5dd5c24b75597a77e1b30c66604725707565188ba54bab4f114820"},
{file = "brotlicffi-1.1.0.0-cp37-abi3-win32.whl", hash = "sha256:1b12b50e07c3911e1efa3a8971543e7648100713d4e0971b13631cce22c587eb"},
{file = "brotlicffi-1.1.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:994a4f0681bb6c6c3b0925530a1926b7a189d878e6e5e38fae8efa47c5d9c613"},
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2e4aeb0bd2540cb91b069dbdd54d458da8c4334ceaf2d25df2f4af576d6766ca"},
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b7b0033b0d37bb33009fb2fef73310e432e76f688af76c156b3594389d81391"},
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54a07bb2374a1eba8ebb52b6fafffa2afd3c4df85ddd38fcc0511f2bb387c2a8"},
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7901a7dc4b88f1c1475de59ae9be59799db1007b7d059817948d8e4f12e24e35"},
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce01c7316aebc7fce59da734286148b1d1b9455f89cf2c8a4dfce7d41db55c2d"},
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:246f1d1a90279bb6069de3de8d75a8856e073b8ff0b09dcca18ccc14cec85979"},
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc4bc5d82bc56ebd8b514fb8350cfac4627d6b0743382e46d033976a5f80fab6"},
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37c26ecb14386a44b118ce36e546ce307f4810bc9598a6e6cb4f7fca725ae7e6"},
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca72968ae4eaf6470498d5c2887073f7efe3b1e7d7ec8be11a06a79cc810e990"},
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:add0de5b9ad9e9aa293c3aa4e9deb2b61e99ad6c1634e01d01d98c03e6a354cc"},
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9b6068e0f3769992d6b622a1cd2e7835eae3cf8d9da123d7f51ca9c1e9c333e5"},
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8557a8559509b61e65083f8782329188a250102372576093c88930c875a69838"},
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a7ae37e5d79c5bdfb5b4b99f2715a6035e6c5bf538c3746abc8e26694f92f33"},
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391151ec86bb1c683835980f4816272a87eaddc46bb91cbf44f62228b84d8cca"},
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2f3711be9290f0453de8eed5275d93d286abe26b08ab4a35d7452caa1fef532f"},
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a807d760763e398bbf2c6394ae9da5815901aa93ee0a37bca5efe78d4ee3171"},
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa8ca0623b26c94fccc3a1fdd895be1743b838f3917300506d04aa3346fd2a14"},
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3de0cf28a53a3238b252aca9fed1593e9d36c1d116748013339f0949bfc84112"},
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6be5ec0e88a4925c91f3dea2bb0013b3a2accda6f77238f76a34a1ea532a1cb0"},
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d9eb71bb1085d996244439154387266fd23d6ad37161f6f52f1cd41dd95a3808"},
{file = "brotlicffi-1.1.0.0.tar.gz", hash = "sha256:b77827a689905143f87915310b93b273ab17888fd43ef350d4832c4a71083c13"},
]
[[package]]
name = "certifi"
version = "2023.7.22"
requires_python = ">=3.6"
summary = "Python package for providing Mozilla's CA Bundle."
files = [
{file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
{file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
]
[[package]]
name = "cffi"
version = "1.16.0"
requires_python = ">=3.8"
summary = "Foreign Function Interface for Python calling C code."
dependencies = [
"pycparser",
]
files = [
{file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"},
{file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"},
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"},
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"},
{file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"},
{file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"},
{file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"},
{file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"},
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"},
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"},
{file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"},
{file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"},
{file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"},
{file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"},
{file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"},
{file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"},
{file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"},
{file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"},
{file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"},
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"},
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"},
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"},
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"},
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"},
{file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"},
{file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"},
{file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"},
{file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"},
{file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"},
]
[[package]]
name = "charset-normalizer"
version = "3.3.2"
requires_python = ">=3.7.0"
summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
files = [
{file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
{file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
{file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
{file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
{file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
{file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
{file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
{file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
{file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
{file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
]
[[package]]
name = "colorama"
version = "0.4.6"
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
summary = "Cross-platform colored terminal text."
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
[[package]]
name = "croniter"
version = "2.0.1"
requires_python = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
summary = "croniter provides iteration for datetime object with cron like format"
dependencies = [
"python-dateutil",
"pytz>2021.1",
]
files = [
{file = "croniter-2.0.1-py2.py3-none-any.whl", hash = "sha256:4cb064ce2d8f695b3b078be36ff50115cf8ac306c10a7e8653ee2a5b534673d7"},
{file = "croniter-2.0.1.tar.gz", hash = "sha256:d199b2ec3ea5e82988d1f72022433c5f9302b3b3ea9e6bfd6a1518f6ea5e700a"},
]
[[package]]
name = "dateparser"
version = "1.1.8"
requires_python = ">=3.7"
summary = "Date parsing library designed to parse dates from HTML pages"
dependencies = [
"python-dateutil",
"pytz",
"regex!=2019.02.19,!=2021.8.27",
"tzlocal",
]
files = [
{file = "dateparser-1.1.8-py2.py3-none-any.whl", hash = "sha256:070b29b5bbf4b1ec2cd51c96ea040dc68a614de703910a91ad1abba18f9f379f"},
{file = "dateparser-1.1.8.tar.gz", hash = "sha256:86b8b7517efcc558f085a142cdb7620f0921543fcabdb538c8a4c4001d8178e3"},
]
[[package]]
name = "decorator"
version = "5.1.1"
requires_python = ">=3.5"
summary = "Decorators for Humans"
files = [
{file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
{file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
]
[[package]]
name = "django"
version = "3.1.14"
requires_python = ">=3.6"
summary = "A high-level Python Web framework that encourages rapid development and clean, pragmatic design."
dependencies = [
"asgiref<4,>=3.2.10",
"pytz",
"sqlparse>=0.2.2",
]
files = [
{file = "Django-3.1.14-py3-none-any.whl", hash = "sha256:0fabc786489af16ad87a8c170ba9d42bfd23f7b699bd5ef05675864e8d012859"},
{file = "Django-3.1.14.tar.gz", hash = "sha256:72a4a5a136a214c39cf016ccdd6b69e2aa08c7479c66d93f3a9b5e4bb9d8a347"},
]
[[package]]
name = "django-auth-ldap"
version = "4.1.0"
requires_python = ">=3.7"
summary = "Django LDAP authentication backend."
dependencies = [
"Django>=2.2",
"python-ldap>=3.1",
]
files = [
{file = "django-auth-ldap-4.1.0.tar.gz", hash = "sha256:77f749d3b17807ce8eb56a9c9c8e5746ff316567f81d5ba613495d9c7495a949"},
{file = "django_auth_ldap-4.1.0-py3-none-any.whl", hash = "sha256:68870e7921e84b1a9867e268a9c8a3e573e8a0d95ea08bcf31be178f5826ff36"},
]
[[package]]
name = "django-extensions"
version = "3.1.5"
requires_python = ">=3.6"
summary = "Extensions for Django"
dependencies = [
"Django>=2.2",
]
files = [
{file = "django-extensions-3.1.5.tar.gz", hash = "sha256:28e1e1bf49f0e00307ba574d645b0af3564c981a6dfc87209d48cb98f77d0b1a"},
{file = "django_extensions-3.1.5-py3-none-any.whl", hash = "sha256:9238b9e016bb0009d621e05cf56ea8ce5cce9b32e91ad2026996a7377ca28069"},
]
[[package]]
name = "exceptiongroup"
version = "1.1.3"
requires_python = ">=3.7"
summary = "Backport of PEP 654 (exception groups)"
files = [
{file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
{file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
]
[[package]]
name = "executing"
version = "2.0.1"
requires_python = ">=3.5"
summary = "Get the currently executing AST node of a frame, and other information"
files = [
{file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"},
{file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"},
]
[[package]]
name = "idna"
version = "3.4"
requires_python = ">=3.5"
summary = "Internationalized Domain Names in Applications (IDNA)"
files = [
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
]
[[package]]
name = "ipython"
version = "8.17.2"
requires_python = ">=3.9"
summary = "IPython: Productive Interactive Computing"
dependencies = [
"appnope; sys_platform == \"darwin\"",
"colorama; sys_platform == \"win32\"",
"decorator",
"exceptiongroup; python_version < \"3.11\"",
"jedi>=0.16",
"matplotlib-inline",
"pexpect>4.3; sys_platform != \"win32\"",
"prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30",
"pygments>=2.4.0",
"stack-data",
"traitlets>=5",
"typing-extensions; python_version < \"3.10\"",
]
files = [
{file = "ipython-8.17.2-py3-none-any.whl", hash = "sha256:1e4d1d666a023e3c93585ba0d8e962867f7a111af322efff6b9c58062b3e5444"},
{file = "ipython-8.17.2.tar.gz", hash = "sha256:126bb57e1895594bb0d91ea3090bbd39384f6fe87c3d57fd558d0670f50339bb"},
]
[[package]]
name = "jedi"
version = "0.19.1"
requires_python = ">=3.6"
summary = "An autocompletion tool for Python that can be used for text editors."
dependencies = [
"parso<0.9.0,>=0.8.3",
]
files = [
{file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"},
{file = "jedi-0.19.1.tar.gz", hash = "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd"},
]
[[package]]
name = "matplotlib-inline"
version = "0.1.6"
requires_python = ">=3.5"
summary = "Inline Matplotlib backend for Jupyter"
dependencies = [
"traitlets",
]
files = [
{file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"},
{file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"},
]
[[package]]
name = "mutagen"
version = "1.47.0"
requires_python = ">=3.7"
summary = "read and write audio tags for many formats"
files = [
{file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719"},
{file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99"},
]
[[package]]
name = "mypy-extensions"
version = "1.0.0"
requires_python = ">=3.5"
summary = "Type system extensions for programs checked with the mypy type checker."
files = [
{file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
]
[[package]]
name = "parso"
version = "0.8.3"
requires_python = ">=3.6"
summary = "A Python Parser"
files = [
{file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"},
{file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"},
]
[[package]]
name = "pexpect"
version = "4.8.0"
summary = "Pexpect allows easy control of interactive console applications."
dependencies = [
"ptyprocess>=0.5",
]
files = [
{file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"},
{file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"},
]
[[package]]
name = "prompt-toolkit"
version = "3.0.39"
requires_python = ">=3.7.0"
summary = "Library for building powerful interactive command lines in Python"
dependencies = [
"wcwidth",
]
files = [
{file = "prompt_toolkit-3.0.39-py3-none-any.whl", hash = "sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88"},
{file = "prompt_toolkit-3.0.39.tar.gz", hash = "sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac"},
]
[[package]]
name = "ptyprocess"
version = "0.7.0"
summary = "Run a subprocess in a pseudo terminal"
files = [
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
{file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
]
[[package]]
name = "pure-eval"
version = "0.2.2"
summary = "Safely evaluate AST nodes without side effects"
files = [
{file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"},
{file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"},
]
[[package]]
name = "pyasn1"
version = "0.5.0"
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
summary = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
files = [
{file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"},
{file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"},
]
[[package]]
name = "pyasn1-modules"
version = "0.3.0"
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
summary = "A collection of ASN.1-based protocols modules"
dependencies = [
"pyasn1<0.6.0,>=0.4.6",
]
files = [
{file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"},
{file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"},
]
[[package]]
name = "pycparser"
version = "2.21"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
summary = "C parser in Python"
files = [
{file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
]
[[package]]
name = "pycryptodomex"
version = "3.19.0"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
summary = "Cryptographic library for Python"
files = [
{file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:a77b79852175064c822b047fee7cf5a1f434f06ad075cc9986aa1c19a0c53eb0"},
{file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5b883e1439ab63af976656446fb4839d566bb096f15fc3c06b5a99cde4927188"},
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3866d68e2fc345162b1b9b83ef80686acfe5cec0d134337f3b03950a0a8bf56"},
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74eb1f73f788facece7979ce91594dc177e1a9b5d5e3e64697dd58299e5cb4d"},
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cb51096a6a8d400724104db8a7e4f2206041a1f23e58924aa3d8d96bcb48338"},
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a588a1cb7781da9d5e1c84affd98c32aff9c89771eac8eaa659d2760666f7139"},
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:d4dd3b381ff5a5907a3eb98f5f6d32c64d319a840278ceea1dcfcc65063856f3"},
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:263de9a96d2fcbc9f5bd3a279f14ea0d5f072adb68ebd324987576ec25da084d"},
{file = "pycryptodomex-3.19.0-cp35-abi3-win32.whl", hash = "sha256:67c8eb79ab33d0fbcb56842992298ddb56eb6505a72369c20f60bc1d2b6fb002"},
{file = "pycryptodomex-3.19.0-cp35-abi3-win_amd64.whl", hash = "sha256:09c9401dc06fb3d94cb1ec23b4ea067a25d1f4c6b7b118ff5631d0b5daaab3cc"},
{file = "pycryptodomex-3.19.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:edbe083c299835de7e02c8aa0885cb904a75087d35e7bab75ebe5ed336e8c3e2"},
{file = "pycryptodomex-3.19.0-pp27-pypy_73-win32.whl", hash = "sha256:136b284e9246b4ccf4f752d435c80f2c44fc2321c198505de1d43a95a3453b3c"},
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5d73e9fa3fe830e7b6b42afc49d8329b07a049a47d12e0ef9225f2fd220f19b2"},
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f1982c5bc311f0aab8c293524b861b485d76f7c9ab2c3ac9a25b6f7655975"},
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb040b5dda1dff1e197d2ef71927bd6b8bfcb9793bc4dfe0bb6df1e691eaacb"},
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:800a2b05cfb83654df80266692f7092eeefe2a314fa7901dcefab255934faeec"},
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c01678aee8ac0c1a461cbc38ad496f953f9efcb1fa19f5637cbeba7544792a53"},
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2126bc54beccbede6eade00e647106b4f4c21e5201d2b0a73e9e816a01c50905"},
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b801216c48c0886742abf286a9a6b117e248ca144d8ceec1f931ce2dd0c9cb40"},
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:50cb18d4dd87571006fd2447ccec85e6cec0136632a550aa29226ba075c80644"},
{file = "pycryptodomex-3.19.0.tar.gz", hash = "sha256:af83a554b3f077564229865c45af0791be008ac6469ef0098152139e6bd4b5b6"},
]
[[package]]
name = "pygments"
version = "2.16.1"
requires_python = ">=3.7"
summary = "Pygments is a syntax highlighting package written in Python."
files = [
{file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"},
{file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"},
]
[[package]]
name = "python-crontab"
version = "3.0.0"
summary = "Python Crontab API"
dependencies = [
"python-dateutil",
]
files = [
{file = "python-crontab-3.0.0.tar.gz", hash = "sha256:79fb7465039ddfd4fb93d072d6ee0d45c1ac8bf1597f0686ea14fd4361dba379"},
{file = "python_crontab-3.0.0-py3-none-any.whl", hash = "sha256:6d5ba3c190ec76e4d252989a1644fcb233dbf53fbc8fceeb9febe1657b9fb1d4"},
]
[[package]]
name = "python-dateutil"
version = "2.8.2"
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
summary = "Extensions to the standard Python datetime module"
dependencies = [
"six>=1.5",
]
files = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
]
[[package]]
name = "python-ldap"
version = "3.4.3"
requires_python = ">=3.6"
summary = "Python modules for implementing LDAP clients"
dependencies = [
"pyasn1-modules>=0.1.5",
"pyasn1>=0.3.7",
]
files = [
{file = "python-ldap-3.4.3.tar.gz", hash = "sha256:ab26c519a0ef2a443a2a10391fa3c5cb52d7871323399db949ebfaa9f25ee2a0"},
]
[[package]]
name = "pytz"
version = "2023.3.post1"
summary = "World timezone definitions, modern and historical"
files = [
{file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
{file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
]
[[package]]
name = "regex"
version = "2023.10.3"
requires_python = ">=3.7"
summary = "Alternative regular expression module, to replace re."
files = [
{file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"},
{file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"},
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"},
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"},
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"},
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"},
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"},
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"},
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"},
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"},
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"},
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"},
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"},
{file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"},
{file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"},
{file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"},
{file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"},
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"},
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"},
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"},
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"},
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"},
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"},
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"},
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"},
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"},
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"},
{file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"},
{file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"},
{file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"},
{file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"},
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"},
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"},
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"},
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"},
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"},
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"},
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"},
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"},
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"},
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"},
{file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"},
{file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"},
{file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"},
{file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"},
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"},
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"},
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"},
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"},
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"},
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"},
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"},
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"},
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"},
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"},
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"},
{file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"},
{file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"},
{file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"},
]
[[package]]
name = "requests"
version = "2.31.0"
requires_python = ">=3.7"
summary = "Python HTTP for Humans."
dependencies = [
"certifi>=2017.4.17",
"charset-normalizer<4,>=2",
"idna<4,>=2.5",
"urllib3<3,>=1.21.1",
]
files = [
{file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
{file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
]
[[package]]
name = "six"
version = "1.16.0"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
summary = "Python 2 and 3 compatibility utilities"
files = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
[[package]]
name = "sonic-client"
version = "1.0.0"
summary = "python client for sonic search backend"
files = [
{file = "sonic-client-1.0.0.tar.gz", hash = "sha256:fe324c7354670488ed84847f6a6727d3cb5fb3675cb9b61396dcf5720e5aca66"},
{file = "sonic_client-1.0.0-py3-none-any.whl", hash = "sha256:291bf292861e97a2dd765ff0c8754ea9631383680d31a63ec3da6f5aa5f4beda"},
]
[[package]]
name = "sqlparse"
version = "0.4.4"
requires_python = ">=3.5"
summary = "A non-validating SQL parser."
files = [
{file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"},
{file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"},
]
[[package]]
name = "stack-data"
version = "0.6.3"
summary = "Extract data from python stack frames and tracebacks for informative displays"
dependencies = [
"asttokens>=2.1.0",
"executing>=1.2.0",
"pure-eval",
]
files = [
{file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
{file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
]
[[package]]
name = "traitlets"
version = "5.13.0"
requires_python = ">=3.8"
summary = "Traitlets Python configuration system"
files = [
{file = "traitlets-5.13.0-py3-none-any.whl", hash = "sha256:baf991e61542da48fe8aef8b779a9ea0aa38d8a54166ee250d5af5ecf4486619"},
{file = "traitlets-5.13.0.tar.gz", hash = "sha256:9b232b9430c8f57288c1024b34a8f0251ddcc47268927367a0dd3eeaca40deb5"},
]
[[package]]
name = "typing-extensions"
version = "4.8.0"
requires_python = ">=3.8"
summary = "Backported and Experimental Type Hints for Python 3.8+"
files = [
{file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"},
{file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
]
[[package]]
name = "tzdata"
version = "2023.3"
requires_python = ">=2"
summary = "Provider of IANA time zone data"
files = [
{file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
{file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
]
[[package]]
name = "tzlocal"
version = "5.2"
requires_python = ">=3.8"
summary = "tzinfo object for the local timezone"
dependencies = [
"tzdata; platform_system == \"Windows\"",
]
files = [
{file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"},
{file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"},
]
[[package]]
name = "urllib3"
version = "2.0.7"
requires_python = ">=3.7"
summary = "HTTP library with thread-safe connection pooling, file post, and more."
files = [
{file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"},
{file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"},
]
[[package]]
name = "w3lib"
version = "2.1.2"
requires_python = ">=3.7"
summary = "Library of web-related functions"
files = [
{file = "w3lib-2.1.2-py3-none-any.whl", hash = "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7"},
{file = "w3lib-2.1.2.tar.gz", hash = "sha256:ed5b74e997eea2abe3c1321f916e344144ee8e9072a6f33463ee8e57f858a4b1"},
]
[[package]]
name = "wcwidth"
version = "0.2.9"
summary = "Measures the displayed width of unicode strings in a terminal"
files = [
{file = "wcwidth-0.2.9-py2.py3-none-any.whl", hash = "sha256:9a929bd8380f6cd9571a968a9c8f4353ca58d7cd812a4822bba831f8d685b223"},
{file = "wcwidth-0.2.9.tar.gz", hash = "sha256:a675d1a4a2d24ef67096a04b85b02deeecd8e226f57b5e3a72dbb9ed99d27da8"},
]
[[package]]
name = "websockets"
version = "12.0"
requires_python = ">=3.8"
summary = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
files = [
{file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
{file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
{file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"},
{file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"},
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"},
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"},
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"},
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"},
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"},
{file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"},
{file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"},
{file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"},
{file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"},
{file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"},
{file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"},
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"},
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"},
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"},
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"},
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"},
{file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"},
{file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"},
{file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"},
{file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"},
{file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"},
{file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"},
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"},
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"},
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"},
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"},
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"},
{file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"},
{file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"},
{file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"},
{file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"},
{file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"},
{file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"},
{file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"},
{file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"},
{file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"},
{file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"},
{file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"},
{file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"},
{file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"},
{file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"},
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"},
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"},
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"},
{file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"},
{file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"},
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"},
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"},
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"},
{file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"},
{file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"},
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"},
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"},
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"},
{file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"},
{file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"},
{file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
]
[[package]]
name = "yt-dlp"
version = "2023.10.13"
requires_python = ">=3.7"
summary = "A youtube-dl fork with additional features and patches"
dependencies = [
"brotli; platform_python_implementation == \"CPython\"",
"brotlicffi; platform_python_implementation != \"CPython\"",
"certifi",
"mutagen",
"pycryptodomex",
"websockets",
]
files = [
{file = "yt-dlp-2023.10.13.tar.gz", hash = "sha256:e026ea1c435ff36eef1215bc4c5bb8c479938b90054997ba99f63a4541fe63b4"},
{file = "yt_dlp-2023.10.13-py2.py3-none-any.whl", hash = "sha256:2b069f22675532eebacdfd6372b1825651a751fef848de9ae6efe6491b2dc38a"},
]

@ -1 +1 @@
Subproject commit 534998571c9a2ddff462a9c8f3ed5ea825f91958
Subproject commit 5323fc773d33ef3f219c35c946f3b353b1251d37

129
pyproject.toml Normal file
View File

@ -0,0 +1,129 @@
[project]
name = "archivebox"
version = "0.7.1"
description = "Self-hosted internet archiving solution."
authors = [
{name = "Nick Sweeting", email = "setup.py@archivebox.io"},
]
dependencies = [
# "setuptools>=68.2.2",
"croniter>=0.3.34",
"dateparser>=1.0.0",
"django-extensions>=3.0.3",
"django>=3.1.3,<3.2",
"ipython>5.0.0",
"mypy-extensions>=0.4.3",
"python-crontab>=2.5.1",
"requests>=2.24.0",
"w3lib>=1.22.0",
# "youtube-dl>=2021.04.17",
"yt-dlp>=2021.4.11",
# "playwright>=1.39.0; platform_machine != 'armv7l'",
]
requires-python = ">=3.9"
readme = "README.md"
license = {text = "MIT"}
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Environment :: Web Environment",
"Framework :: Django",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Information Technology",
"Intended Audience :: Legal Industry",
"Intended Audience :: System Administrators",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
"Topic :: Sociology :: History",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: System :: Archiving",
"Topic :: System :: Archiving :: Backup",
"Topic :: System :: Recovery Tools",
"Topic :: Utilities",
"Typing :: Typed",
]
# pdm lock -G:all
# pdm install -G:all
[tool.pdm.dev-dependencies]
build = [
"setuptools",
"wheel",
"pdm",
# "bottle",
# "stdeb",
# "twine",
]
lint = [
"flake8",
"mypy",
"django-stubs",
]
test = [
"pytest",
]
debug = [
"django-debug-toolbar",
"djdt_flamegraph",
"ipdb",
]
doc = [
"recommonmark",
"sphinx",
"sphinx-rtd-theme",
]
dev = [
"homebrew-pypi-poet>=0.10.0",
]
[tool.pdm.scripts]
lint = "./bin/lint.sh"
test = "./bin/test.sh"
# all = {composite = ["lint mypackage/", "test -v tests/"]}
[project.optional-dependencies]
sonic = [
# echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
# curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
"sonic-client>=0.0.5",
]
ldap = [
# apt install libldap2-dev libsasl2-dev
"python-ldap>=3.4.3",
"django-auth-ldap>=4.1.0",
]
# playwright = [
# platform_machine isnt respected by pdm export -o requirements.txt, this breaks arm/v7
# "playwright>=1.39.0; platform_machine != 'armv7l'",
# ]
[project.scripts]
archivebox = "archivebox.cli:main"
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project.urls]
Homepage = "https://github.com/ArchiveBox/ArchiveBox"
Source = "https://github.com/ArchiveBox/ArchiveBox"
Documentation = "https://github.com/ArchiveBox/ArchiveBox/wiki"
"Bug Tracker" = "https://github.com/ArchiveBox/ArchiveBox/issues"
Changelog = "https://github.com/ArchiveBox/ArchiveBox/releases"
Roadmap = "https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap"
Community = "https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community"
Demo = "https://demo.archivebox.io"
Donate = "https://github.com/ArchiveBox/ArchiveBox/wiki/Donations"

142
setup.py
View File

@ -1,142 +0,0 @@
import json
import setuptools
from setuptools.command.test import test
from pathlib import Path
PKG_NAME = "archivebox"
DESCRIPTION = "The self-hosted internet archive."
LICENSE = "MIT"
AUTHOR = "Nick Sweeting"
AUTHOR_EMAIL="git@nicksweeting.com"
REPO_URL = "https://github.com/ArchiveBox/ArchiveBox"
PROJECT_URLS = {
"Source": f"{REPO_URL}",
"Documentation": f"{REPO_URL}/wiki",
"Bug Tracker": f"{REPO_URL}/issues",
"Changelog": f"{REPO_URL}/wiki/Changelog",
"Roadmap": f"{REPO_URL}/wiki/Roadmap",
"Community": f"{REPO_URL}/wiki/Web-Archiving-Community",
"Donate": f"{REPO_URL}/wiki/Donations",
}
ROOT_DIR = Path(__file__).parent.resolve()
PACKAGE_DIR = ROOT_DIR / PKG_NAME
README = (PACKAGE_DIR / "README.md").read_text(encoding='utf-8', errors='ignore')
VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['version']
PYTHON_REQUIRES = ">=3.7"
SETUP_REQUIRES = ["wheel"]
INSTALL_REQUIRES = [
# only add things here that have corresponding apt python3-packages available
# anything added here also needs to be added to our package dependencies in
# stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
# if there is no apt python3-package equivalent, then vendor it instead in
# ./archivebox/vendor/
"requests>=2.24.0",
"mypy-extensions>=0.4.3",
"django>=3.1.3,<3.2",
"django-extensions>=3.0.3",
"dateparser",
"ipython",
"youtube-dl",
"python-crontab>=2.5.1",
"croniter>=0.3.34",
"w3lib>=1.22.0",
]
EXTRAS_REQUIRE = {
'sonic': [
"sonic-client>=0.0.5",
],
'dev': [
"setuptools",
"twine",
"wheel",
"flake8",
"ipdb",
"mypy",
"django-stubs",
"sphinx",
"sphinx-rtd-theme",
"recommonmark",
"pytest",
"bottle",
"stdeb",
"django-debug-toolbar",
"djdt_flamegraph",
],
}
# To see when setup.py gets called (uncomment for debugging):
# import sys
# print(PACKAGE_DIR, f" (v{VERSION})")
# print('>', sys.executable, *sys.argv)
class DisabledTestCommand(test):
def run(self):
# setup.py test is deprecated, disable it here by force so stdeb doesnt run it
print()
print('[X] Running tests via setup.py test is deprecated.')
print(' Hint: Use the ./bin/test.sh script or pytest instead')
setuptools.setup(
name=PKG_NAME,
version=VERSION,
license=LICENSE,
author=AUTHOR,
author_email=AUTHOR_EMAIL,
description=DESCRIPTION,
long_description=README,
long_description_content_type="text/markdown",
url=REPO_URL,
project_urls=PROJECT_URLS,
python_requires=PYTHON_REQUIRES,
setup_requires=SETUP_REQUIRES,
install_requires=INSTALL_REQUIRES,
extras_require=EXTRAS_REQUIRE,
packages=[PKG_NAME],
include_package_data=True, # see MANIFEST.in
entry_points={
"console_scripts": [
f"{PKG_NAME} = {PKG_NAME}.cli:main",
],
},
classifiers=[
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Development Status :: 4 - Beta",
"Topic :: Utilities",
"Topic :: System :: Archiving",
"Topic :: System :: Archiving :: Backup",
"Topic :: System :: Recovery Tools",
"Topic :: Sociology :: History",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
"Topic :: Software Development :: Libraries :: Python Modules",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Information Technology",
"Intended Audience :: Legal Industry",
"Intended Audience :: System Administrators",
"Environment :: Console",
"Environment :: Web Environment",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Framework :: Django",
"Typing :: Typed",
],
cmdclass={
"test": DisabledTestCommand,
},
)

View File

@ -4,7 +4,8 @@ Package: archivebox
Package3: archivebox
Suite: focal
Suite3: focal
Build-Depends: dh-python, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
Depends3: nodejs, wget, curl, git, ffmpeg, youtube-dl, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
XS-Python-Version: >= 3.7
Build-Depends: debhelper, dh-python, python3-all, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
Depends3: nodejs, wget, curl, git, ffmpeg, yt-dlp, ripgrep, python3-all, python3-pip, python3-setuptools, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib
X-Python3-Version: >= 3.9
XS-Python-Version: >= 3.9
Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck

View File

@ -13,7 +13,46 @@ def test_ignore_methods():
Takes the passed method out of the default methods list and returns that value
"""
ignored = ignore_methods(['title'])
assert should_save_title not in ignored
assert "title" not in ignored
def test_save_allowdenylist_works(tmp_path, process, disable_extractors_dict):
allow_list = {
r'/static': ["headers", "singlefile"],
r'example\.com\.html$': ["headers"],
}
deny_list = {
"/static": ["singlefile"],
}
disable_extractors_dict.update({
"SAVE_HEADERS": "true",
"USE_SINGLEFILE": "true",
"SAVE_ALLOWLIST": pyjson.dumps(allow_list),
"SAVE_DENYLIST": pyjson.dumps(deny_list),
})
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
capture_output=True, env=disable_extractors_dict)
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
singlefile_file = archived_item_path / "singlefile.html"
assert not singlefile_file.exists()
headers_file = archived_item_path / "headers.json"
assert headers_file.exists()
def test_save_denylist_works(tmp_path, process, disable_extractors_dict):
deny_list = {
"/static": ["singlefile"],
}
disable_extractors_dict.update({
"SAVE_HEADERS": "true",
"USE_SINGLEFILE": "true",
"SAVE_DENYLIST": pyjson.dumps(deny_list),
})
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
capture_output=True, env=disable_extractors_dict)
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
singlefile_file = archived_item_path / "singlefile.html"
assert not singlefile_file.exists()
headers_file = archived_item_path / "headers.json"
assert headers_file.exists()
def test_singlefile_works(tmp_path, process, disable_extractors_dict):
disable_extractors_dict.update({"USE_SINGLEFILE": "true"})

View File

@ -11,6 +11,8 @@ from archivebox.config import OUTPUT_PERMISSIONS
from .fixtures import *
DIR_PERMISSIONS = OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
def test_init(tmp_path, process):
assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
@ -55,7 +57,7 @@ def test_correct_permissions_output_folder(tmp_path, process):
index_files = ['index.sqlite3', 'archive']
for file in index_files:
file_path = tmp_path / file
assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
assert oct(file_path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path)
@ -63,7 +65,7 @@ def test_correct_permissions_add_command_results(tmp_path, process, disable_extr
env=disable_extractors_dict)
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
for path in archived_item_path.iterdir():
assert oct(path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
assert oct(path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path)

View File

@ -26,3 +26,35 @@ def test_oneshot_command_saves_page_in_right_folder(tmp_path, disable_extractors
assert not "index.sqlite3" in current_path
assert "output.html" in items
def test_oneshot_command_succeeds(tmp_path, disable_extractors_dict):
disable_extractors_dict.update({"SAVE_DOM": "true"})
process = subprocess.run(
[
"archivebox",
"oneshot",
f"--out-dir={tmp_path}",
"--extract=title,favicon,dom",
"http://127.0.0.1:8080/static/example.com.html",
],
capture_output=True,
env=disable_extractors_dict,
)
assert process.returncode == 0
def test_oneshot_command_logs_archiving_finished(tmp_path, disable_extractors_dict):
disable_extractors_dict.update({"SAVE_DOM": "true"})
process = subprocess.run(
[
"archivebox",
"oneshot",
f"--out-dir={tmp_path}",
"--extract=title,favicon,dom",
"http://127.0.0.1:8080/static/example.com.html",
],
capture_output=True,
env=disable_extractors_dict,
)
output_str = process.stdout.decode("utf-8")
assert "4 files" in output_str