mirror of
https://github.com/zebrajr/ArchiveBox.git
synced 2025-12-06 12:20:54 +01:00
Merge pull request #721 from ArchiveBox/dev
This commit is contained in:
commit
6fd7d1f0bd
|
|
@ -5,16 +5,21 @@ __pycache__/
|
||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
.github/
|
.github/
|
||||||
|
.git/
|
||||||
|
.pdm-build/
|
||||||
|
.pdm-python/
|
||||||
|
.eggs/
|
||||||
|
|
||||||
venv/
|
venv/
|
||||||
.venv/
|
.venv/
|
||||||
.docker-venv/
|
.docker-venv/
|
||||||
|
node_modules/
|
||||||
|
|
||||||
build/
|
build/
|
||||||
dist/
|
dist/
|
||||||
pip_dist/
|
|
||||||
!pip_dist/archivebox.egg-info/requires.txt
|
|
||||||
brew_dist/
|
brew_dist/
|
||||||
|
deb_dist/
|
||||||
|
pip_dist/
|
||||||
assets/
|
assets/
|
||||||
|
|
||||||
data/
|
data/
|
||||||
|
|
|
||||||
6
.flake8
6
.flake8
|
|
@ -1,6 +0,0 @@
|
||||||
[flake8]
|
|
||||||
ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E272,E701,E731,W293,W503,W291,W391
|
|
||||||
select = F,E9,W
|
|
||||||
max-line-length = 130
|
|
||||||
max-complexity = 10
|
|
||||||
exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv
|
|
||||||
12
.github/workflows/debian.yml
vendored
12
.github/workflows/debian.yml
vendored
|
|
@ -9,10 +9,10 @@ env:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
fetch-depth: 1
|
fetch-depth: 1
|
||||||
|
|
@ -38,17 +38,17 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
cd deb_dist/
|
cd deb_dist/
|
||||||
sudo apt-get install ./archivebox*.deb
|
sudo apt-get install ./archivebox*.deb
|
||||||
|
cd ..
|
||||||
|
python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > ./requirements.txt
|
||||||
|
python3 -m pip install -r ./requirements.txt
|
||||||
|
|
||||||
- name: Check ArchiveBox version
|
- name: Check ArchiveBox version
|
||||||
run: |
|
run: |
|
||||||
# must create dir needed for snaps to run as non-root on github actions
|
# must create dir needed for snaps to run as non-root on github actions
|
||||||
sudo mkdir -p /run/user/1001 && sudo chmod -R 777 /run/user/1001
|
sudo mkdir -p /run/user/1001 && sudo chmod -R 777 /run/user/1001
|
||||||
mkdir "${{ github.workspace }}/data" && cd "${{ github.workspace }}/data"
|
mkdir "${{ github.workspace }}/data" && cd "${{ github.workspace }}/data"
|
||||||
archivebox init
|
|
||||||
archivebox config --set SAVE_READABILITY=False
|
|
||||||
archivebox config --set SAVE_MERCURY=False
|
|
||||||
archivebox config --set SAVE_SINGLEFILE=False
|
|
||||||
archivebox --version
|
archivebox --version
|
||||||
|
archivebox init --setup
|
||||||
|
|
||||||
- name: Add some links to test
|
- name: Add some links to test
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
15
.github/workflows/docker.yml
vendored
15
.github/workflows/docker.yml
vendored
|
|
@ -18,20 +18,21 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
fetch-depth: 1
|
fetch-depth: 1
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v1
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
id: buildx
|
id: buildx
|
||||||
uses: docker/setup-buildx-action@v1
|
uses: docker/setup-buildx-action@v3
|
||||||
with:
|
with:
|
||||||
version: latest
|
version: latest
|
||||||
install: true
|
install: true
|
||||||
|
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||||
|
|
||||||
- name: Builder instance name
|
- name: Builder instance name
|
||||||
run: echo ${{ steps.buildx.outputs.name }}
|
run: echo ${{ steps.buildx.outputs.name }}
|
||||||
|
|
@ -40,7 +41,7 @@ jobs:
|
||||||
run: echo ${{ steps.buildx.outputs.platforms }}
|
run: echo ${{ steps.buildx.outputs.platforms }}
|
||||||
|
|
||||||
- name: Cache Docker layers
|
- name: Cache Docker layers
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v3
|
||||||
with:
|
with:
|
||||||
path: /tmp/.buildx-cache
|
path: /tmp/.buildx-cache
|
||||||
key: ${{ runner.os }}-buildx-${{ github.sha }}
|
key: ${{ runner.os }}-buildx-${{ github.sha }}
|
||||||
|
|
@ -48,7 +49,7 @@ jobs:
|
||||||
${{ runner.os }}-buildx-
|
${{ runner.os }}-buildx-
|
||||||
|
|
||||||
- name: Docker Login
|
- name: Docker Login
|
||||||
uses: docker/login-action@v1
|
uses: docker/login-action@v3
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKER_USERNAME }}
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
|
|
@ -56,7 +57,7 @@ jobs:
|
||||||
|
|
||||||
- name: Collect Docker tags
|
- name: Collect Docker tags
|
||||||
id: docker_meta
|
id: docker_meta
|
||||||
uses: crazy-max/ghaction-docker-meta@v2
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: archivebox/archivebox,nikisweeting/archivebox
|
images: archivebox/archivebox,nikisweeting/archivebox
|
||||||
flavor: |
|
flavor: |
|
||||||
|
|
@ -69,7 +70,7 @@ jobs:
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
id: docker_build
|
id: docker_build
|
||||||
uses: docker/build-push-action@v2
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: ./
|
context: ./
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
|
|
|
||||||
3
.github/workflows/homebrew.yml
vendored
3
.github/workflows/homebrew.yml
vendored
|
|
@ -23,11 +23,12 @@ jobs:
|
||||||
cd brew_dist/
|
cd brew_dist/
|
||||||
brew install --build-bottle ./archivebox.rb
|
brew install --build-bottle ./archivebox.rb
|
||||||
# brew bottle archivebox
|
# brew bottle archivebox
|
||||||
|
archivebox version
|
||||||
|
|
||||||
- name: Add some links to test
|
- name: Add some links to test
|
||||||
run: |
|
run: |
|
||||||
mkdir data && cd data
|
mkdir data && cd data
|
||||||
archivebox init
|
archivebox init --setup
|
||||||
archivebox add 'https://example.com'
|
archivebox add 'https://example.com'
|
||||||
archivebox version
|
archivebox version
|
||||||
archivebox status
|
archivebox status
|
||||||
|
|
|
||||||
5
.github/workflows/lint.yml
vendored
5
.github/workflows/lint.yml
vendored
|
|
@ -28,7 +28,8 @@ jobs:
|
||||||
|
|
||||||
- name: Lint with flake8
|
- name: Lint with flake8
|
||||||
run: |
|
run: |
|
||||||
|
cd archivebox
|
||||||
# one pass for show-stopper syntax errors or undefined names
|
# one pass for show-stopper syntax errors or undefined names
|
||||||
flake8 archivebox --count --show-source --statistics
|
flake8 . --count --show-source --statistics
|
||||||
# one pass for small stylistic things
|
# one pass for small stylistic things
|
||||||
flake8 archivebox --count --max-line-length="$MAX_LINE_LENGTH" --statistics
|
flake8 . --count --max-line-length="$MAX_LINE_LENGTH" --statistics
|
||||||
|
|
|
||||||
33
.github/workflows/pip.yml
vendored
33
.github/workflows/pip.yml
vendored
|
|
@ -7,29 +7,35 @@ on:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
fetch-depth: 1
|
fetch-depth: 1
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v1
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: 3.9
|
python-version: 3.11
|
||||||
architecture: x64
|
architecture: x64
|
||||||
|
|
||||||
- name: Build Python Package
|
- uses: pdm-project/setup-pdm@v3
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: pdm install --fail-fast --no-lock --group :all --no-self
|
||||||
|
|
||||||
|
- name: Build package
|
||||||
run: |
|
run: |
|
||||||
pip3 install --upgrade pip setuptools wheel
|
rm ./dist/archivebox-*.whl
|
||||||
rm -Rf pip_dist/*.whl
|
pdm build
|
||||||
python3 setup.py \
|
|
||||||
sdist --dist-dir=./pip_dist \
|
- name: Install from build
|
||||||
bdist_wheel --dist-dir=./pip_dist \
|
run: pip install ./dist/archivebox-*.whl
|
||||||
egg_info --egg-base=./pip_dist
|
|
||||||
pip install pip_dist/archivebox-*.whl
|
|
||||||
|
|
||||||
- name: Add some links to test
|
- name: Add some links to test
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -39,6 +45,9 @@ jobs:
|
||||||
archivebox version
|
archivebox version
|
||||||
archivebox status
|
archivebox status
|
||||||
|
|
||||||
|
- name: Publish package distributions to PyPI
|
||||||
|
run: pdm publish --no-build
|
||||||
|
|
||||||
# - name: Push build to PyPI
|
# - name: Push build to PyPI
|
||||||
# run: |
|
# run: |
|
||||||
# cd pip_dist/
|
# cd pip_dist/
|
||||||
|
|
|
||||||
15
.github/workflows/test.yml
vendored
15
.github/workflows/test.yml
vendored
|
|
@ -14,7 +14,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-20.04, macos-latest, windows-latest]
|
os: [ubuntu-20.04, macos-latest, windows-latest]
|
||||||
python: [3.7]
|
python: [3.9]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
@ -24,15 +24,18 @@ jobs:
|
||||||
|
|
||||||
### Setup Python & JS Languages
|
### Setup Python & JS Languages
|
||||||
- name: Set up Python ${{ matrix.python }}
|
- name: Set up Python ${{ matrix.python }}
|
||||||
uses: actions/setup-python@v1
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python }}
|
python-version: ${{ matrix.python }}
|
||||||
architecture: x64
|
architecture: x64
|
||||||
|
|
||||||
- name: Set up Node JS 14.7.0
|
- name: Set up Node JS 14.7.0
|
||||||
uses: actions/setup-node@v1
|
uses: actions/setup-node@v3
|
||||||
with:
|
with:
|
||||||
node-version: 14.7.0
|
node-version: 18.12.0
|
||||||
|
|
||||||
|
- name: Setup PDM
|
||||||
|
uses: pdm-project/setup-pdm@v3
|
||||||
|
|
||||||
### Install Python & JS Dependencies
|
### Install Python & JS Dependencies
|
||||||
- name: Get pip cache dir
|
- name: Get pip cache dir
|
||||||
|
|
@ -51,9 +54,9 @@ jobs:
|
||||||
|
|
||||||
- name: Install pip dependencies
|
- name: Install pip dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip setuptools wheel pytest bottle
|
python -m pip install --upgrade pip setuptools wheel pytest bottle build
|
||||||
./bin/build_pip.sh
|
./bin/build_pip.sh
|
||||||
python -m pip install .
|
pdm install
|
||||||
|
|
||||||
- name: Get npm cache dir
|
- name: Get npm cache dir
|
||||||
id: npm-cache
|
id: npm-cache
|
||||||
|
|
|
||||||
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -13,6 +13,8 @@ venv/
|
||||||
node_modules/
|
node_modules/
|
||||||
|
|
||||||
# Packaging artifacts
|
# Packaging artifacts
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build
|
||||||
archivebox.egg-info
|
archivebox.egg-info
|
||||||
archivebox-*.tar.gz
|
archivebox-*.tar.gz
|
||||||
build/
|
build/
|
||||||
|
|
@ -24,3 +26,6 @@ data1/
|
||||||
data2/
|
data2/
|
||||||
data3/
|
data3/
|
||||||
output/
|
output/
|
||||||
|
|
||||||
|
# vim
|
||||||
|
*.sw?
|
||||||
|
|
|
||||||
320
Dockerfile
320
Dockerfile
|
|
@ -1,125 +1,297 @@
|
||||||
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
|
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
|
||||||
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
|
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, yt-dlp, single-file
|
||||||
# Usage:
|
# Usage:
|
||||||
|
# git submodule update --init --recursive
|
||||||
|
# git pull --recurse-submodules
|
||||||
# docker build . -t archivebox --no-cache
|
# docker build . -t archivebox --no-cache
|
||||||
# docker run -v "$PWD/data":/data archivebox init
|
# docker run -v "$PWD/data":/data archivebox init
|
||||||
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
|
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
|
||||||
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
|
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
|
||||||
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
||||||
|
# Multi-arch build:
|
||||||
|
# docker buildx create --use
|
||||||
|
# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
|
||||||
|
#
|
||||||
|
# Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
|
||||||
|
|
||||||
FROM python:3.9-slim-buster
|
|
||||||
|
# Use Debian 12 w/ faster package updates: https://packages.debian.org/bookworm-backports/
|
||||||
|
FROM python:3.11-slim-bookworm
|
||||||
|
|
||||||
LABEL name="archivebox" \
|
LABEL name="archivebox" \
|
||||||
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
|
maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
|
||||||
description="All-in-one personal internet archiving container" \
|
description="All-in-one personal internet archiving container" \
|
||||||
homepage="https://github.com/ArchiveBox/ArchiveBox" \
|
homepage="https://github.com/ArchiveBox/ArchiveBox" \
|
||||||
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
|
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
|
||||||
|
|
||||||
# System-level base config
|
ARG TARGETPLATFORM
|
||||||
|
ARG TARGETOS
|
||||||
|
ARG TARGETARCH
|
||||||
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
|
######### Environment Variables #################################
|
||||||
|
|
||||||
|
# Global system-level config
|
||||||
ENV TZ=UTC \
|
ENV TZ=UTC \
|
||||||
LANGUAGE=en_US:en \
|
LANGUAGE=en_US:en \
|
||||||
LC_ALL=C.UTF-8 \
|
LC_ALL=C.UTF-8 \
|
||||||
LANG=C.UTF-8 \
|
LANG=C.UTF-8 \
|
||||||
|
DEBIAN_FRONTEND=noninteractive \
|
||||||
|
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
|
||||||
PYTHONIOENCODING=UTF-8 \
|
PYTHONIOENCODING=UTF-8 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
DEBIAN_FRONTEND=noninteractive \
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||||
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
|
npm_config_loglevel=error
|
||||||
|
|
||||||
# Application-level base config
|
# Version config
|
||||||
|
ENV PYTHON_VERSION=3.11 \
|
||||||
|
NODE_VERSION=21
|
||||||
|
|
||||||
|
# User config
|
||||||
|
ENV ARCHIVEBOX_USER="archivebox" \
|
||||||
|
DEFAULT_PUID=911 \
|
||||||
|
DEFAULT_PGID=911
|
||||||
|
|
||||||
|
# Global paths
|
||||||
ENV CODE_DIR=/app \
|
ENV CODE_DIR=/app \
|
||||||
VENV_PATH=/venv \
|
|
||||||
DATA_DIR=/data \
|
DATA_DIR=/data \
|
||||||
NODE_DIR=/node \
|
GLOBAL_VENV=/venv \
|
||||||
ARCHIVEBOX_USER="archivebox"
|
PLAYWRIGHT_BROWSERS_PATH=/browsers
|
||||||
|
|
||||||
|
# Application-level paths
|
||||||
|
ENV APP_VENV=/app/.venv \
|
||||||
|
NODE_MODULES=/app/node_modules
|
||||||
|
|
||||||
|
# Build shell config
|
||||||
|
ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin"
|
||||||
|
SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "errtrace", "-o", "nounset", "-c"]
|
||||||
|
|
||||||
|
######### System Environment ####################################
|
||||||
|
|
||||||
|
# Detect ArchiveBox version number by reading package.json
|
||||||
|
COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
|
||||||
|
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
|
||||||
|
|
||||||
|
# Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
|
||||||
|
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||||
|
|
||||||
|
# Print debug info about build and save it to disk, for human eyes only, not used by anything else
|
||||||
|
RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
|
||||||
|
&& echo "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
|
||||||
|
&& echo "BUILD_START_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ} LANG=${LANG}" \
|
||||||
|
&& echo \
|
||||||
|
&& echo "GLOBAL_VENV=${GLOBAL_VENV} APP_VENV=${APP_VENV} NODE_MODULES=${NODE_MODULES}" \
|
||||||
|
&& echo "PYTHON=${PYTHON_VERSION} NODE=${NODE_VERSION} PATH=${PATH}" \
|
||||||
|
&& echo "CODE_DIR=${CODE_DIR} DATA_DIR=${DATA_DIR}" \
|
||||||
|
&& echo \
|
||||||
|
&& uname -a \
|
||||||
|
&& cat /etc/os-release | head -n7 \
|
||||||
|
&& which bash && bash --version | head -n1 \
|
||||||
|
&& which dpkg && dpkg --version | head -n1 \
|
||||||
|
&& echo -e '\n\n' && env && echo -e '\n\n' \
|
||||||
|
) | tee -a /VERSION.txt
|
||||||
|
|
||||||
# Create non-privileged user for archivebox and chrome
|
# Create non-privileged user for archivebox and chrome
|
||||||
RUN groupadd --system $ARCHIVEBOX_USER \
|
RUN echo "[*] Setting up $ARCHIVEBOX_USER user uid=${DEFAULT_PUID}..." \
|
||||||
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
|
&& groupadd --system $ARCHIVEBOX_USER \
|
||||||
|
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER \
|
||||||
|
&& usermod -u "$DEFAULT_PUID" "$ARCHIVEBOX_USER" \
|
||||||
|
&& groupmod -g "$DEFAULT_PGID" "$ARCHIVEBOX_USER" \
|
||||||
|
&& echo -e "\nARCHIVEBOX_USER=$ARCHIVEBOX_USER PUID=$(id -u $ARCHIVEBOX_USER) PGID=$(id -g $ARCHIVEBOX_USER)\n\n" \
|
||||||
|
| tee -a /VERSION.txt
|
||||||
|
# DEFAULT_PUID and DEFAULT_PID are overriden by PUID and PGID in /bin/docker_entrypoint.sh at runtime
|
||||||
|
# https://docs.linuxserver.io/general/understanding-puid-and-pgid
|
||||||
|
|
||||||
# Install system dependencies
|
# Install system apt dependencies (adding backports to access more recent apt updates)
|
||||||
RUN apt-get update -qq \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT \
|
||||||
&& apt-get install -qq -y --no-install-recommends \
|
echo "[+] Installing APT base system dependencies for $TARGETPLATFORM..." \
|
||||||
apt-transport-https ca-certificates gnupg2 zlib1g-dev \
|
&& echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
|
||||||
dumb-init gosu cron unzip curl \
|
&& mkdir -p /etc/apt/keyrings \
|
||||||
|
&& apt-get update -qq \
|
||||||
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
|
# 1. packaging dependencies
|
||||||
|
apt-transport-https ca-certificates apt-utils gnupg2 curl wget \
|
||||||
|
# 2. docker and init system dependencies
|
||||||
|
zlib1g-dev dumb-init gosu cron unzip grep \
|
||||||
|
# 3. frivolous CLI helpers to make debugging failed archiving easier
|
||||||
|
# nano iputils-ping dnsutils htop procps jq yq
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install apt dependencies
|
######### Language Environments ####################################
|
||||||
RUN apt-get update -qq \
|
|
||||||
&& apt-get install -qq -y --no-install-recommends \
|
|
||||||
wget curl chromium git ffmpeg youtube-dl ripgrep \
|
|
||||||
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install Node environment
|
# Install Node environment
|
||||||
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
|
||||||
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
|
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
|
||||||
|
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
|
||||||
|
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
||||||
&& apt-get update -qq \
|
&& apt-get update -qq \
|
||||||
&& apt-get install -qq -y --no-install-recommends \
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
nodejs \
|
nodejs libatomic1 python3-minimal \
|
||||||
# && npm install -g npm \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
# Update NPM to latest version
|
||||||
|
&& npm i -g npm --cache /root/.npm \
|
||||||
|
# Save version info
|
||||||
|
&& ( \
|
||||||
|
which node && node --version \
|
||||||
|
&& which npm && npm --version \
|
||||||
|
&& echo -e '\n\n' \
|
||||||
|
) | tee -a /VERSION.txt
|
||||||
|
|
||||||
|
# Install Python environment
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||||
|
echo "[+] Setting up Python $PYTHON_VERSION runtime..." \
|
||||||
|
# tell PDM to allow using global system python site packages
|
||||||
|
# && rm /usr/lib/python3*/EXTERNALLY-MANAGED \
|
||||||
|
# create global virtual environment GLOBAL_VENV to use (better than using pip install --global)
|
||||||
|
# && python3 -m venv --system-site-packages --symlinks $GLOBAL_VENV \
|
||||||
|
# && python3 -m venv --system-site-packages $GLOBAL_VENV \
|
||||||
|
# && python3 -m venv $GLOBAL_VENV \
|
||||||
|
# install global dependencies / python build dependencies in GLOBAL_VENV
|
||||||
|
# && pip install --upgrade pip setuptools wheel \
|
||||||
|
# Save version info
|
||||||
|
&& ( \
|
||||||
|
which python3 && python3 --version | grep " $PYTHON_VERSION" \
|
||||||
|
&& which pip && pip --version \
|
||||||
|
# && which pdm && pdm --version \
|
||||||
|
&& echo -e '\n\n' \
|
||||||
|
) | tee -a /VERSION.txt
|
||||||
|
|
||||||
|
######### Extractor Dependencies ##################################
|
||||||
|
|
||||||
|
# Install apt dependencies
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||||
|
echo "[+] Installing APT extractor dependencies globally using apt..." \
|
||||||
|
&& apt-get update -qq \
|
||||||
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
|
curl wget git yt-dlp ffmpeg ripgrep \
|
||||||
|
# Packages we have also needed in the past:
|
||||||
|
# youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
|
||||||
|
# fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
# Save version info
|
||||||
|
&& ( \
|
||||||
|
which curl && curl --version | head -n1 \
|
||||||
|
&& which wget && wget --version | head -n1 \
|
||||||
|
&& which yt-dlp && yt-dlp --version | head -n1 \
|
||||||
|
&& which git && git --version | head -n1 \
|
||||||
|
&& which rg && rg --version | head -n1 \
|
||||||
|
&& echo -e '\n\n' \
|
||||||
|
) | tee -a /VERSION.txt
|
||||||
|
|
||||||
|
# Install chromium browser using playwright
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
|
||||||
|
echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
|
||||||
|
&& apt-get update -qq \
|
||||||
|
&& if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
|
||||||
|
# install Chromium using playwright
|
||||||
|
pip install playwright \
|
||||||
|
&& cp -r /root/.cache/ms-playwright "$PLAYWRIGHT_BROWSERS_PATH" \
|
||||||
|
&& playwright install --with-deps chromium \
|
||||||
|
&& export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
|
||||||
|
else \
|
||||||
|
# fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
|
||||||
|
apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
|
chromium fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
||||||
|
&& export CHROME_BINARY="$(which chromium)"; \
|
||||||
|
fi \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
|
||||||
|
&& mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
|
||||||
|
&& chown -R $ARCHIVEBOX_USER "/home/${ARCHIVEBOX_USER}/.config" \
|
||||||
|
&& chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \
|
||||||
|
# Save version info
|
||||||
|
&& ( \
|
||||||
|
which chromium-browser && /usr/bin/chromium-browser --version \
|
||||||
|
&& echo -e '\n\n' \
|
||||||
|
) | tee -a /VERSION.txt
|
||||||
|
|
||||||
# Install Node dependencies
|
# Install Node dependencies
|
||||||
WORKDIR "$NODE_DIR"
|
|
||||||
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
|
|
||||||
npm_config_loglevel=error
|
|
||||||
ADD ./package.json ./package.json
|
|
||||||
ADD ./package-lock.json ./package-lock.json
|
|
||||||
RUN npm ci
|
|
||||||
|
|
||||||
# Install Python dependencies
|
|
||||||
WORKDIR "$CODE_DIR"
|
WORKDIR "$CODE_DIR"
|
||||||
ENV PATH="${PATH}:$VENV_PATH/bin"
|
COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR/"
|
||||||
RUN python -m venv --clear --symlinks "$VENV_PATH" \
|
RUN --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
|
||||||
&& pip install --upgrade --quiet pip setuptools
|
echo "[+] Installing NPM extractor dependencies from package.json into $NODE_MODULES..." \
|
||||||
ADD "./setup.py" "$CODE_DIR/"
|
&& npm ci --prefer-offline --no-audit --cache /root/.npm \
|
||||||
ADD "./package.json" "$CODE_DIR/archivebox/"
|
&& ( \
|
||||||
RUN apt-get update -qq \
|
which node && node --version \
|
||||||
&& apt-get install -qq -y --no-install-recommends \
|
&& which npm && npm version \
|
||||||
build-essential python-dev python3-dev \
|
&& echo -e '\n\n' \
|
||||||
&& echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
|
) | tee -a /VERSION.txt
|
||||||
&& python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
|
|
||||||
&& pip install --quiet -r /tmp/requirements.txt \
|
######### Build Dependencies ####################################
|
||||||
&& apt-get purge -y build-essential python-dev python3-dev \
|
|
||||||
|
# Install ArchiveBox Python dependencies
|
||||||
|
WORKDIR "$CODE_DIR"
|
||||||
|
COPY --chown=root:root --chmod=755 "./pyproject.toml" "requirements.txt" "$CODE_DIR/"
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||||
|
echo "[+] Installing PIP ArchiveBox dependencies from requirements.txt for ${TARGETPLATFORM}..." \
|
||||||
|
&& apt-get update -qq \
|
||||||
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
libssl-dev libldap2-dev libsasl2-dev \
|
||||||
|
python3-ldap python3-msgpack python3-mutagen python3-regex python3-pycryptodome procps \
|
||||||
|
# && ln -s "$GLOBAL_VENV" "$APP_VENV" \
|
||||||
|
# && pdm use --venv in-project \
|
||||||
|
# && pdm run python -m ensurepip \
|
||||||
|
# && pdm sync --fail-fast --no-editable --group :all --no-self \
|
||||||
|
# && pdm export -o requirements.txt --without-hashes \
|
||||||
|
# && source $GLOBAL_VENV/bin/activate \
|
||||||
|
&& pip install -r requirements.txt \
|
||||||
|
&& apt-get purge -y \
|
||||||
|
build-essential \
|
||||||
|
# these are only needed to build CPython libs, we discard after build phase to shrink layer size
|
||||||
&& apt-get autoremove -y \
|
&& apt-get autoremove -y \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install apt development dependencies
|
# Install ArchiveBox Python package from source
|
||||||
# RUN apt-get install -qq \
|
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
|
||||||
# && apt-get install -qq -y --no-install-recommends \
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||||
# python3 python3-dev python3-pip python3-venv python3-all \
|
echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
|
||||||
# dh-python debhelper devscripts dput software-properties-common \
|
&& apt-get update -qq \
|
||||||
# python3-distutils python3-setuptools python3-wheel python3-stdeb
|
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
|
||||||
# RUN python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.extras_require["dev"]))' > /tmp/dev_requirements.txt \
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
# && pip install --quiet -r /tmp/dev_requirements.txt
|
build-essential \
|
||||||
|
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
|
||||||
|
&& pip install -e "$CODE_DIR"[sonic,ldap] \
|
||||||
|
# save docker image size and always remove compilers / build tools after building is complete
|
||||||
|
&& apt-get purge -y build-essential \
|
||||||
|
&& apt-get autoremove -y \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install ArchiveBox Python package and its dependencies
|
####################################################
|
||||||
WORKDIR "$CODE_DIR"
|
|
||||||
ADD . "$CODE_DIR"
|
|
||||||
RUN pip install -e .
|
|
||||||
|
|
||||||
# Setup ArchiveBox runtime config
|
# Setup ArchiveBox runtime config
|
||||||
WORKDIR "$DATA_DIR"
|
WORKDIR "$DATA_DIR"
|
||||||
ENV IN_DOCKER=True \
|
ENV IN_DOCKER=True
|
||||||
CHROME_SANDBOX=False \
|
## No need to set explicitly, these values will be autodetected by archivebox in docker:
|
||||||
CHROME_BINARY="chromium" \
|
# CHROME_SANDBOX=False \
|
||||||
USE_SINGLEFILE=True \
|
# WGET_BINARY="wget" \
|
||||||
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
|
# YOUTUBEDL_BINARY="yt-dlp" \
|
||||||
USE_READABILITY=True \
|
# CHROME_BINARY="/usr/bin/chromium-browser" \
|
||||||
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
|
# USE_SINGLEFILE=True \
|
||||||
USE_MERCURY=True \
|
# SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
|
||||||
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser"
|
# USE_READABILITY=True \
|
||||||
|
# READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
|
||||||
|
# USE_MERCURY=True \
|
||||||
|
# MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
|
||||||
|
|
||||||
# Print version for nice docker finish summary
|
# Print version for nice docker finish summary
|
||||||
# RUN archivebox version
|
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
|
||||||
RUN /app/bin/docker_entrypoint.sh archivebox version
|
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
|
||||||
|
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \
|
||||||
|
&& "$CODE_DIR/bin/docker_entrypoint.sh" \
|
||||||
|
archivebox version 2>&1 \
|
||||||
|
) | tee -a /VERSION.txt
|
||||||
|
|
||||||
|
####################################################
|
||||||
|
|
||||||
# Open up the interfaces to the outside world
|
# Open up the interfaces to the outside world
|
||||||
|
WORKDIR "$DATA_DIR"
|
||||||
VOLUME "$DATA_DIR"
|
VOLUME "$DATA_DIR"
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
|
# Optional:
|
||||||
CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
|
# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
|
||||||
|
# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
|
||||||
|
|
||||||
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
|
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
|
||||||
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]
|
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
graft archivebox
|
|
||||||
global-exclude .DS_Store
|
|
||||||
global-exclude __pycache__
|
|
||||||
global-exclude *.pyc
|
|
||||||
|
|
||||||
prune tests/
|
|
||||||
12
Pipfile
12
Pipfile
|
|
@ -1,12 +0,0 @@
|
||||||
[[source]]
|
|
||||||
name = "pypi"
|
|
||||||
url = "https://pypi.org/simple"
|
|
||||||
verify_ssl = true
|
|
||||||
|
|
||||||
[packages]
|
|
||||||
# see setup.py for package dependency list
|
|
||||||
"e1839a8" = {path = ".", editable = true}
|
|
||||||
|
|
||||||
[dev-packages]
|
|
||||||
# see setup.py for dev package dependency list
|
|
||||||
"e1839a8" = {path = ".", extras = ["dev"], editable = true}
|
|
||||||
34
SECURITY.md
Normal file
34
SECURITY.md
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
# Security Policy
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Security Information
|
||||||
|
|
||||||
|
Please see this wiki page for important notices about ArchiveBox security, publishing your archives securely, and the dangers of executing archived JS:
|
||||||
|
|
||||||
|
https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview
|
||||||
|
|
||||||
|
Also see this section of the README about important caveats when running ArchiveBox:
|
||||||
|
|
||||||
|
https://github.com/ArchiveBox/ArchiveBox?tab=readme-ov-file#caveats
|
||||||
|
|
||||||
|
You can also read these pages for more information about ArchiveBox's internals, development environment, DB schema, and more:
|
||||||
|
|
||||||
|
- https://github.com/ArchiveBox/ArchiveBox#archive-layout
|
||||||
|
- https://github.com/ArchiveBox/ArchiveBox#archivebox-development
|
||||||
|
- https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives
|
||||||
|
- https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
|
We use Github's built-in [Private Reporting](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing-information-about-vulnerabilities/privately-reporting-a-security-vulnerability) feature to accept vulnerability reports.
|
||||||
|
|
||||||
|
1. Go to the Security tab on our Github repo: https://github.com/ArchiveBox/ArchiveBox/security
|
||||||
|
|
||||||
|
2. Click the ["Report a Vulnerability"](https://github.com/ArchiveBox/ArchiveBox/security/advisories/new) button
|
||||||
|
|
||||||
|
3. Fill out the form to submit the details of the report and it will be securely sent to the maintainers
|
||||||
|
|
||||||
|
You can also contact the maintainers via our public [Zulip Chat Server zulip.archivebox.io](https://zulip.archivebox.io) or [Twitter DMs @ArchiveBoxApp](https://twitter.com/ArchiveBoxApp).
|
||||||
|
|
@ -1 +1,3 @@
|
||||||
theme: jekyll-theme-merlot
|
production_url: https://archivebox.io
|
||||||
|
theme: jekyll-theme-merlot
|
||||||
|
# Github Pages static site settings for https://archivebox.io
|
||||||
|
|
|
||||||
|
|
@ -3,4 +3,4 @@ ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E
|
||||||
select = F,E9,W
|
select = F,E9,W
|
||||||
max-line-length = 130
|
max-line-length = 130
|
||||||
max-complexity = 10
|
max-complexity = 10
|
||||||
exclude = migrations,tests,node_modules,vendor,static,venv,.venv,.venv2,.docker-venv
|
exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv
|
||||||
|
|
|
||||||
0
archivebox/__init__.py
Normal file → Executable file
0
archivebox/__init__.py
Normal file → Executable file
|
|
@ -30,11 +30,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
|
help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--update-all', #'-n',
|
'--update', #'-u',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
|
default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
|
||||||
help="Also retry previously skipped/failed links when adding new links",
|
help="Also retry previously skipped/failed links when adding new links",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--update-all', #'-n',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help="Also update ALL links in index when finished adding new links",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--index-only', #'-o',
|
'--index-only', #'-o',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
|
|
@ -104,6 +110,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
urls=stdin_urls or urls,
|
urls=stdin_urls or urls,
|
||||||
depth=command.depth,
|
depth=command.depth,
|
||||||
tag=command.tag,
|
tag=command.tag,
|
||||||
|
update=command.update,
|
||||||
update_all=command.update_all,
|
update_all=command.update_all,
|
||||||
index_only=command.index_only,
|
index_only=command.index_only,
|
||||||
overwrite=command.overwrite,
|
overwrite=command.overwrite,
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots',
|
help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots',
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--update',
|
||||||
|
action='store_true',
|
||||||
|
help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults',
|
||||||
|
)
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
'--clear', # '-c'
|
'--clear', # '-c'
|
||||||
action='store_true',
|
action='store_true',
|
||||||
|
|
@ -94,6 +99,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
every=command.every,
|
every=command.every,
|
||||||
depth=command.depth,
|
depth=command.depth,
|
||||||
overwrite=command.overwrite,
|
overwrite=command.overwrite,
|
||||||
|
update=command.update,
|
||||||
import_path=command.import_path,
|
import_path=command.import_path,
|
||||||
out_dir=pwd or OUTPUT_DIR,
|
out_dir=pwd or OUTPUT_DIR,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -26,11 +26,12 @@ import io
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
|
import inspect
|
||||||
import getpass
|
import getpass
|
||||||
import platform
|
import platform
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
|
||||||
import django
|
import django
|
||||||
|
from sqlite3 import dbapi2 as sqlite3
|
||||||
|
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -39,6 +40,7 @@ from typing import Optional, Type, Tuple, Dict, Union, List
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, DEVNULL
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
import importlib.metadata
|
||||||
|
|
||||||
from .config_stubs import (
|
from .config_stubs import (
|
||||||
SimpleConfigValueDict,
|
SimpleConfigValueDict,
|
||||||
|
|
@ -48,6 +50,26 @@ from .config_stubs import (
|
||||||
ConfigDefaultDict,
|
ConfigDefaultDict,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
### Pre-Fetch Minimal System Config
|
||||||
|
|
||||||
|
SYSTEM_USER = getpass.getuser() or os.getlogin()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pwd
|
||||||
|
SYSTEM_USER = pwd.getpwuid(os.geteuid()).pw_name or SYSTEM_USER
|
||||||
|
except KeyError:
|
||||||
|
# Process' UID might not map to a user in cases such as running the Docker image
|
||||||
|
# (where `archivebox` is 999) as a different UID.
|
||||||
|
pass
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
# pwd is only needed for some linux systems, doesn't exist on windows
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
# this should never happen, uncomment to debug
|
||||||
|
# raise
|
||||||
|
pass
|
||||||
|
|
||||||
############################### Config Schema ##################################
|
############################### Config Schema ##################################
|
||||||
|
|
||||||
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
|
|
@ -56,6 +78,9 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||||
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now
|
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now
|
||||||
'IN_DOCKER': {'type': bool, 'default': False},
|
'IN_DOCKER': {'type': bool, 'default': False},
|
||||||
|
'IN_QEMU': {'type': bool, 'default': False},
|
||||||
|
'PUID': {'type': int, 'default': os.getuid()},
|
||||||
|
'PGID': {'type': int, 'default': os.getgid()},
|
||||||
# TODO: 'SHOW_HINTS': {'type: bool, 'default': True},
|
# TODO: 'SHOW_HINTS': {'type: bool, 'default': True},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
@ -65,23 +90,47 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'ONLY_NEW': {'type': bool, 'default': True},
|
'ONLY_NEW': {'type': bool, 'default': True},
|
||||||
'TIMEOUT': {'type': int, 'default': 60},
|
'TIMEOUT': {'type': int, 'default': 60},
|
||||||
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
||||||
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
|
'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'},
|
||||||
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
|
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
|
||||||
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
|
|
||||||
|
'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages
|
||||||
|
'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)},
|
||||||
|
|
||||||
|
'ADMIN_USERNAME': {'type': str, 'default': None},
|
||||||
|
'ADMIN_PASSWORD': {'type': str, 'default': None},
|
||||||
|
|
||||||
|
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
|
||||||
|
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
|
||||||
},
|
},
|
||||||
|
|
||||||
'SERVER_CONFIG': {
|
'SERVER_CONFIG': {
|
||||||
'SECRET_KEY': {'type': str, 'default': None},
|
'SECRET_KEY': {'type': str, 'default': None},
|
||||||
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
|
'BIND_ADDR': {'type': str, 'default': lambda c: ['127.0.0.1:8000', '0.0.0.0:8000'][c['IN_DOCKER']]},
|
||||||
'ALLOWED_HOSTS': {'type': str, 'default': '*'},
|
'ALLOWED_HOSTS': {'type': str, 'default': '*'},
|
||||||
'DEBUG': {'type': bool, 'default': False},
|
'DEBUG': {'type': bool, 'default': False},
|
||||||
'PUBLIC_INDEX': {'type': bool, 'default': True},
|
'PUBLIC_INDEX': {'type': bool, 'default': True},
|
||||||
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
|
'PUBLIC_SNAPSHOTS': {'type': bool, 'default': True},
|
||||||
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
|
'PUBLIC_ADD_VIEW': {'type': bool, 'default': False},
|
||||||
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
||||||
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
|
'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40},
|
||||||
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
|
'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None},
|
||||||
'TIME_ZONE': {'type': str, 'default': 'UTC'},
|
'TIME_ZONE': {'type': str, 'default': 'UTC'},
|
||||||
|
'TIMEZONE': {'type': str, 'default': 'UTC'},
|
||||||
|
'REVERSE_PROXY_USER_HEADER': {'type': str, 'default': 'Remote-User'},
|
||||||
|
'REVERSE_PROXY_WHITELIST': {'type': str, 'default': ''},
|
||||||
|
'LOGOUT_REDIRECT_URL': {'type': str, 'default': '/'},
|
||||||
|
'PREVIEW_ORIGINALS': {'type': bool, 'default': True},
|
||||||
|
|
||||||
|
'LDAP': {'type': bool, 'default': False},
|
||||||
|
'LDAP_SERVER_URI': {'type': str, 'default': None},
|
||||||
|
'LDAP_BIND_DN': {'type': str, 'default': None},
|
||||||
|
'LDAP_BIND_PASSWORD': {'type': str, 'default': None},
|
||||||
|
'LDAP_USER_BASE': {'type': str, 'default': None},
|
||||||
|
'LDAP_USER_FILTER': {'type': str, 'default': None},
|
||||||
|
'LDAP_USERNAME_ATTR': {'type': str, 'default': None},
|
||||||
|
'LDAP_FIRSTNAME_ATTR': {'type': str, 'default': None},
|
||||||
|
'LDAP_LASTNAME_ATTR': {'type': str, 'default': None},
|
||||||
|
'LDAP_EMAIL_ATTR': {'type': str, 'default': None},
|
||||||
},
|
},
|
||||||
|
|
||||||
'ARCHIVE_METHOD_TOGGLES': {
|
'ARCHIVE_METHOD_TOGGLES': {
|
||||||
|
|
@ -100,6 +149,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
|
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
|
||||||
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
|
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
|
||||||
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
||||||
|
'SAVE_ALLOWLIST': {'type': dict, 'default': {},},
|
||||||
|
'SAVE_DENYLIST': {'type': dict, 'default': {},},
|
||||||
},
|
},
|
||||||
|
|
||||||
'ARCHIVE_METHOD_OPTIONS': {
|
'ARCHIVE_METHOD_OPTIONS': {
|
||||||
|
|
@ -108,13 +159,14 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
||||||
'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'},
|
'MEDIA_MAX_SIZE': {'type': str, 'default': '750m'},
|
||||||
|
|
||||||
'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
|
'CURL_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
|
||||||
'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
|
'WGET_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
|
||||||
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.61 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
|
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
|
||||||
|
|
||||||
'COOKIES_FILE': {'type': str, 'default': None},
|
'COOKIES_FILE': {'type': str, 'default': None},
|
||||||
'CHROME_USER_DATA_DIR': {'type': str, 'default': None},
|
'CHROME_USER_DATA_DIR': {'type': str, 'default': None},
|
||||||
|
|
||||||
|
'CHROME_TIMEOUT': {'type': int, 'default': 0},
|
||||||
'CHROME_HEADLESS': {'type': bool, 'default': True},
|
'CHROME_HEADLESS': {'type': bool, 'default': True},
|
||||||
'CHROME_SANDBOX': {'type': bool, 'default': lambda c: not c['IN_DOCKER']},
|
'CHROME_SANDBOX': {'type': bool, 'default': lambda c: not c['IN_DOCKER']},
|
||||||
'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [
|
'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [
|
||||||
|
|
@ -124,17 +176,22 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'--write-thumbnail',
|
'--write-thumbnail',
|
||||||
'--no-call-home',
|
'--no-call-home',
|
||||||
'--write-sub',
|
'--write-sub',
|
||||||
'--all-subs',
|
'--write-auto-subs',
|
||||||
'--write-auto-sub',
|
|
||||||
'--convert-subs=srt',
|
'--convert-subs=srt',
|
||||||
'--yes-playlist',
|
'--yes-playlist',
|
||||||
'--continue',
|
'--continue',
|
||||||
|
# This flag doesn't exist in youtube-dl
|
||||||
|
# only in yt-dlp
|
||||||
|
'--no-abort-on-error',
|
||||||
|
# --ignore-errors must come AFTER
|
||||||
|
# --no-abort-on-error
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/4914
|
||||||
'--ignore-errors',
|
'--ignore-errors',
|
||||||
'--geo-bypass',
|
'--geo-bypass',
|
||||||
'--add-metadata',
|
'--add-metadata',
|
||||||
'--max-filesize={}'.format(c['MEDIA_MAX_SIZE']),
|
'--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
|
||||||
]},
|
]},
|
||||||
|
|
||||||
|
|
||||||
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
'WGET_ARGS': {'type': list, 'default': ['--no-verbose',
|
||||||
'--adjust-extension',
|
'--adjust-extension',
|
||||||
|
|
@ -150,6 +207,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'--compressed'
|
'--compressed'
|
||||||
]},
|
]},
|
||||||
'GIT_ARGS': {'type': list, 'default': ['--recursive']},
|
'GIT_ARGS': {'type': list, 'default': ['--recursive']},
|
||||||
|
'SINGLEFILE_ARGS': {'type': list, 'default' : None},
|
||||||
|
'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'},
|
||||||
},
|
},
|
||||||
|
|
||||||
'SEARCH_BACKEND_CONFIG' : {
|
'SEARCH_BACKEND_CONFIG' : {
|
||||||
|
|
@ -163,6 +222,11 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
|
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
|
||||||
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
|
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
|
||||||
'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90},
|
'SEARCH_BACKEND_TIMEOUT': {'type': int, 'default': 90},
|
||||||
|
# SQLite3 FTS5
|
||||||
|
'FTS_SEPARATE_DATABASE': {'type': bool, 'default': True},
|
||||||
|
'FTS_TOKENIZERS': {'type': str, 'default': 'porter unicode61 remove_diacritics 2'},
|
||||||
|
# Default from https://www.sqlite.org/limits.html#max_length
|
||||||
|
'FTS_SQLITE_MAX_LENGTH': {'type': int, 'default': int(1e9)},
|
||||||
},
|
},
|
||||||
|
|
||||||
'DEPENDENCY_CONFIG': {
|
'DEPENDENCY_CONFIG': {
|
||||||
|
|
@ -176,20 +240,22 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'USE_NODE': {'type': bool, 'default': True},
|
'USE_NODE': {'type': bool, 'default': True},
|
||||||
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
||||||
'USE_RIPGREP': {'type': bool, 'default': True},
|
'USE_RIPGREP': {'type': bool, 'default': True},
|
||||||
|
|
||||||
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
||||||
'GIT_BINARY': {'type': str, 'default': 'git'},
|
'GIT_BINARY': {'type': str, 'default': 'git'},
|
||||||
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
'WGET_BINARY': {'type': str, 'default': 'wget'}, # also can accept wget2
|
||||||
'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
|
'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
|
||||||
'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
|
'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
|
||||||
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('mercury-parser')},
|
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('postlight-parser')},
|
||||||
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
|
'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, # also can accept youtube-dl
|
||||||
'NODE_BINARY': {'type': str, 'default': 'node'},
|
'NODE_BINARY': {'type': str, 'default': 'node'},
|
||||||
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
|
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
|
||||||
'CHROME_BINARY': {'type': str, 'default': None},
|
'CHROME_BINARY': {'type': str, 'default': None},
|
||||||
|
|
||||||
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
|
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
|
||||||
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
|
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
|
||||||
|
|
||||||
|
'READWISE_READER_TOKENS': {'type': dict, 'default': {}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -257,7 +323,7 @@ STATICFILE_EXTENSIONS = {
|
||||||
# that can be downloaded as-is, not html pages that need to be rendered
|
# that can be downloaded as-is, not html pages that need to be rendered
|
||||||
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
|
'gif', 'jpeg', 'jpg', 'png', 'tif', 'tiff', 'wbmp', 'ico', 'jng', 'bmp',
|
||||||
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
|
'svg', 'svgz', 'webp', 'ps', 'eps', 'ai',
|
||||||
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
|
'mp3', 'mp4', 'm4a', 'mpeg', 'mpg', 'mkv', 'mov', 'webm', 'm4v',
|
||||||
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
|
'flv', 'wmv', 'avi', 'ogg', 'ts', 'm3u8',
|
||||||
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
|
'pdf', 'txt', 'rtf', 'rtfd', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx',
|
||||||
'atom', 'rss', 'css', 'js', 'json',
|
'atom', 'rss', 'css', 'js', 'json',
|
||||||
|
|
@ -266,7 +332,7 @@ STATICFILE_EXTENSIONS = {
|
||||||
|
|
||||||
# Less common extensions to consider adding later
|
# Less common extensions to consider adding later
|
||||||
# jar, swf, bin, com, exe, dll, deb
|
# jar, swf, bin, com, exe, dll, deb
|
||||||
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
|
# ear, hqx, eot, wmlc, kml, kmz, cco, jardiff, jnlp, run, msi, msp, msm,
|
||||||
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
|
# pl pm, prc pdb, rar, rpm, sea, sit, tcl tk, der, pem, crt, xpi, xspf,
|
||||||
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
|
# ra, mng, asx, asf, 3gpp, 3gp, mid, midi, kar, jad, wml, htc, mml
|
||||||
|
|
||||||
|
|
@ -292,6 +358,7 @@ ALLOWED_IN_OUTPUT_DIR = {
|
||||||
'yarn.lock',
|
'yarn.lock',
|
||||||
'static',
|
'static',
|
||||||
'sonic',
|
'sonic',
|
||||||
|
'search.sqlite3',
|
||||||
ARCHIVE_DIR_NAME,
|
ARCHIVE_DIR_NAME,
|
||||||
SOURCES_DIR_NAME,
|
SOURCES_DIR_NAME,
|
||||||
LOGS_DIR_NAME,
|
LOGS_DIR_NAME,
|
||||||
|
|
@ -307,12 +374,23 @@ ALLOWED_IN_OUTPUT_DIR = {
|
||||||
'static_index.json',
|
'static_index.json',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_version(config):
|
||||||
|
return importlib.metadata.version(__package__ or 'archivebox')
|
||||||
|
|
||||||
|
def get_commit_hash(config):
|
||||||
|
try:
|
||||||
|
return list((config['PACKAGE_DIR'] / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
############################## Derived Config ##################################
|
############################## Derived Config ##################################
|
||||||
|
|
||||||
|
|
||||||
|
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
|
||||||
|
|
||||||
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
||||||
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
|
'USER': {'default': lambda c: SYSTEM_USER},
|
||||||
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
||||||
|
|
||||||
'PACKAGE_DIR': {'default': lambda c: Path(__file__).resolve().parent},
|
'PACKAGE_DIR': {'default': lambda c: Path(__file__).resolve().parent},
|
||||||
|
|
@ -326,17 +404,25 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
|
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
|
||||||
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
|
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
|
||||||
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
|
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
|
||||||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
|
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
||||||
|
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
|
||||||
|
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
|
||||||
|
|
||||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
|
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
|
||||||
'VERSION': {'default': lambda c: json.loads((Path(c['PACKAGE_DIR']) / 'package.json').read_text(encoding='utf-8').strip())['version']},
|
'VERSION': {'default': lambda c: get_version(c)},
|
||||||
|
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
|
||||||
|
|
||||||
'PYTHON_BINARY': {'default': lambda c: sys.executable},
|
'PYTHON_BINARY': {'default': lambda c: sys.executable},
|
||||||
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
|
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
|
||||||
'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
|
'PYTHON_VERSION': {'default': lambda c: '{}.{}.{}'.format(*sys.version_info[:3])},
|
||||||
|
|
||||||
'DJANGO_BINARY': {'default': lambda c: django.__file__.replace('__init__.py', 'bin/django-admin.py')},
|
'DJANGO_BINARY': {'default': lambda c: inspect.getfile(django)},
|
||||||
'DJANGO_VERSION': {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)},
|
'DJANGO_VERSION': {'default': lambda c: '{}.{}.{} {} ({})'.format(*django.VERSION)},
|
||||||
|
|
||||||
|
'SQLITE_BINARY': {'default': lambda c: inspect.getfile(sqlite3)},
|
||||||
|
'SQLITE_VERSION': {'default': lambda c: sqlite3.version},
|
||||||
|
#'SQLITE_JOURNAL_MODE': {'default': lambda c: 'wal'}, # set at runtime below, interesting but unused for now
|
||||||
|
#'SQLITE_OPTIONS': {'default': lambda c: ['JSON1']}, # set at runtime below
|
||||||
|
|
||||||
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
|
'USE_CURL': {'default': lambda c: c['USE_CURL'] and (c['SAVE_FAVICON'] or c['SAVE_TITLE'] or c['SAVE_ARCHIVE_DOT_ORG'])},
|
||||||
'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
|
'CURL_VERSION': {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
|
||||||
|
|
@ -357,12 +443,13 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
|
|
||||||
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
|
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
|
||||||
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
|
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
|
||||||
|
'SINGLEFILE_ARGS': {'default': lambda c: c['SINGLEFILE_ARGS'] or []},
|
||||||
|
|
||||||
'USE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']},
|
'USE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']},
|
||||||
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
|
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
|
||||||
|
|
||||||
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
|
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
|
||||||
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury is unversioned
|
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
|
||||||
|
|
||||||
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
|
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
|
||||||
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
|
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
|
||||||
|
|
@ -376,14 +463,14 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
|
'CHROME_BINARY': {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
|
||||||
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
|
'USE_CHROME': {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
|
||||||
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
'CHROME_VERSION': {'default': lambda c: bin_version(c['CHROME_BINARY']) if c['USE_CHROME'] else None},
|
||||||
|
|
||||||
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
|
'SAVE_PDF': {'default': lambda c: c['USE_CHROME'] and c['SAVE_PDF']},
|
||||||
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
|
'SAVE_SCREENSHOT': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SCREENSHOT']},
|
||||||
'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
|
'SAVE_DOM': {'default': lambda c: c['USE_CHROME'] and c['SAVE_DOM']},
|
||||||
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
|
'SAVE_SINGLEFILE': {'default': lambda c: c['USE_CHROME'] and c['SAVE_SINGLEFILE'] and c['USE_NODE']},
|
||||||
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
|
'SAVE_READABILITY': {'default': lambda c: c['USE_READABILITY'] and c['USE_NODE']},
|
||||||
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
|
'SAVE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['USE_NODE']},
|
||||||
|
|
||||||
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
|
'USE_NODE': {'default': lambda c: c['USE_NODE'] and (c['SAVE_READABILITY'] or c['SAVE_SINGLEFILE'] or c['SAVE_MERCURY'])},
|
||||||
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
|
'NODE_VERSION': {'default': lambda c: bin_version(c['NODE_BINARY']) if c['USE_NODE'] else None},
|
||||||
|
|
||||||
|
|
@ -392,10 +479,11 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
'EXTERNAL_LOCATIONS': {'default': lambda c: get_external_locations(c)},
|
'EXTERNAL_LOCATIONS': {'default': lambda c: get_external_locations(c)},
|
||||||
'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
|
'DATA_LOCATIONS': {'default': lambda c: get_data_locations(c)},
|
||||||
'CHROME_OPTIONS': {'default': lambda c: get_chrome_info(c)},
|
'CHROME_OPTIONS': {'default': lambda c: get_chrome_info(c)},
|
||||||
|
'SAVE_ALLOWLIST_PTN': {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
|
||||||
|
'SAVE_DENYLIST_PTN': {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
################################### Helpers ####################################
|
################################### Helpers ####################################
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -433,7 +521,7 @@ def load_config_val(key: str,
|
||||||
elif val.lower() in ('false', 'no', '0'):
|
elif val.lower() in ('false', 'no', '0'):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
|
raise ValueError(f'Invalid configuration option {key}={val} (expected a boolean: True/False)')
|
||||||
|
|
||||||
elif type is str:
|
elif type is str:
|
||||||
if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
|
if val.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
|
||||||
|
|
@ -458,7 +546,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
|
||||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||||
if config_path.exists():
|
if config_path.exists():
|
||||||
config_file = ConfigParser()
|
config_file = ConfigParser()
|
||||||
config_file.optionxform = str
|
config_file.optionxform = str
|
||||||
config_file.read(config_path)
|
config_file.read(config_path)
|
||||||
# flatten into one namespace
|
# flatten into one namespace
|
||||||
config_file_vars = {
|
config_file_vars = {
|
||||||
|
|
@ -482,7 +570,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
#
|
#
|
||||||
# You can add options here manually in INI format, or automatically by running:
|
# You can add options here manually in INI format, or automatically by running:
|
||||||
# archivebox config --set KEY=VALUE
|
# archivebox config --set KEY=VALUE
|
||||||
#
|
#
|
||||||
# If you modify this file manually, make sure to update your archive after by running:
|
# If you modify this file manually, make sure to update your archive after by running:
|
||||||
# archivebox init
|
# archivebox init
|
||||||
#
|
#
|
||||||
|
|
@ -493,7 +581,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
|
|
||||||
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
||||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||||
|
|
||||||
if not config_path.exists():
|
if not config_path.exists():
|
||||||
atomic_write(config_path, CONFIG_HEADER)
|
atomic_write(config_path, CONFIG_HEADER)
|
||||||
|
|
||||||
|
|
@ -531,7 +619,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
|
|
||||||
with open(config_path, 'w+', encoding='utf-8') as new:
|
with open(config_path, 'w+', encoding='utf-8') as new:
|
||||||
config_file.write(new)
|
config_file.write(new)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# validate the config by attempting to re-parse it
|
# validate the config by attempting to re-parse it
|
||||||
CONFIG = load_all_config()
|
CONFIG = load_all_config()
|
||||||
|
|
@ -544,20 +632,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
|
|
||||||
if Path(f'{config_path}.bak').exists():
|
if Path(f'{config_path}.bak').exists():
|
||||||
os.remove(f'{config_path}.bak')
|
os.remove(f'{config_path}.bak')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
key.upper(): CONFIG.get(key.upper())
|
key.upper(): CONFIG.get(key.upper())
|
||||||
for key in config.keys()
|
for key in config.keys()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_config(defaults: ConfigDefaultDict,
|
def load_config(defaults: ConfigDefaultDict,
|
||||||
config: Optional[ConfigDict]=None,
|
config: Optional[ConfigDict]=None,
|
||||||
out_dir: Optional[str]=None,
|
out_dir: Optional[str]=None,
|
||||||
env_vars: Optional[os._Environ]=None,
|
env_vars: Optional[os._Environ]=None,
|
||||||
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
|
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
|
||||||
|
|
||||||
env_vars = env_vars or os.environ
|
env_vars = env_vars or os.environ
|
||||||
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
|
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
|
||||||
|
|
||||||
|
|
@ -587,7 +675,7 @@ def load_config(defaults: ConfigDefaultDict,
|
||||||
stderr()
|
stderr()
|
||||||
# raise
|
# raise
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
return extended_config
|
return extended_config
|
||||||
|
|
||||||
# def write_config(config: ConfigDict):
|
# def write_config(config: ConfigDict):
|
||||||
|
|
@ -636,7 +724,10 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
|
bin_env = os.environ | {'LANG': 'C'}
|
||||||
|
version_str = run([abspath, "--version"], stdout=PIPE, env=bin_env).stdout.strip().decode()
|
||||||
|
if not version_str:
|
||||||
|
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
|
||||||
# take first 3 columns of first line of version info
|
# take first 3 columns of first line of version info
|
||||||
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
||||||
except OSError:
|
except OSError:
|
||||||
|
|
@ -670,7 +761,7 @@ def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||||
with io.open(abs_path, mode='rb') as f:
|
with io.open(abs_path, mode='rb') as f:
|
||||||
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
for chunk in iter(lambda: f.read(io.DEFAULT_BUFFER_SIZE), b''):
|
||||||
file_hash.update(chunk)
|
file_hash.update(chunk)
|
||||||
|
|
||||||
return f'md5:{file_hash.hexdigest()}'
|
return f'md5:{file_hash.hexdigest()}'
|
||||||
|
|
||||||
def find_chrome_binary() -> Optional[str]:
|
def find_chrome_binary() -> Optional[str]:
|
||||||
|
|
@ -695,7 +786,7 @@ def find_chrome_binary() -> Optional[str]:
|
||||||
full_path_exists = shutil.which(name)
|
full_path_exists = shutil.which(name)
|
||||||
if full_path_exists:
|
if full_path_exists:
|
||||||
return name
|
return name
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def find_chrome_data_dir() -> Optional[str]:
|
def find_chrome_data_dir() -> Optional[str]:
|
||||||
|
|
@ -779,6 +870,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
|
||||||
'path': config['OUTPUT_DIR'].resolve(),
|
'path': config['OUTPUT_DIR'].resolve(),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
||||||
|
'is_mount': os.path.ismount(config['OUTPUT_DIR'].resolve()),
|
||||||
},
|
},
|
||||||
'SOURCES_DIR': {
|
'SOURCES_DIR': {
|
||||||
'path': config['SOURCES_DIR'].resolve(),
|
'path': config['SOURCES_DIR'].resolve(),
|
||||||
|
|
@ -794,6 +886,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
|
||||||
'path': config['ARCHIVE_DIR'].resolve(),
|
'path': config['ARCHIVE_DIR'].resolve(),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': config['ARCHIVE_DIR'].exists(),
|
'is_valid': config['ARCHIVE_DIR'].exists(),
|
||||||
|
'is_mount': os.path.ismount(config['ARCHIVE_DIR'].resolve()),
|
||||||
},
|
},
|
||||||
'CONFIG_FILE': {
|
'CONFIG_FILE': {
|
||||||
'path': config['CONFIG_FILE'].resolve(),
|
'path': config['CONFIG_FILE'].resolve(),
|
||||||
|
|
@ -804,18 +897,12 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
|
||||||
'path': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve(),
|
'path': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve(),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
|
||||||
|
'is_mount': os.path.ismount((config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).resolve()),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
||||||
return {
|
return {
|
||||||
'ARCHIVEBOX_BINARY': {
|
|
||||||
'path': bin_path(config['ARCHIVEBOX_BINARY']),
|
|
||||||
'version': config['VERSION'],
|
|
||||||
'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
|
|
||||||
'enabled': True,
|
|
||||||
'is_valid': True,
|
|
||||||
},
|
|
||||||
'PYTHON_BINARY': {
|
'PYTHON_BINARY': {
|
||||||
'path': bin_path(config['PYTHON_BINARY']),
|
'path': bin_path(config['PYTHON_BINARY']),
|
||||||
'version': config['PYTHON_VERSION'],
|
'version': config['PYTHON_VERSION'],
|
||||||
|
|
@ -823,6 +910,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': bool(config['PYTHON_VERSION']),
|
'is_valid': bool(config['PYTHON_VERSION']),
|
||||||
},
|
},
|
||||||
|
'SQLITE_BINARY': {
|
||||||
|
'path': bin_path(config['SQLITE_BINARY']),
|
||||||
|
'version': config['SQLITE_VERSION'],
|
||||||
|
'hash': bin_hash(config['SQLITE_BINARY']),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': bool(config['SQLITE_VERSION']),
|
||||||
|
},
|
||||||
'DJANGO_BINARY': {
|
'DJANGO_BINARY': {
|
||||||
'path': bin_path(config['DJANGO_BINARY']),
|
'path': bin_path(config['DJANGO_BINARY']),
|
||||||
'version': config['DJANGO_VERSION'],
|
'version': config['DJANGO_VERSION'],
|
||||||
|
|
@ -830,6 +924,14 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': bool(config['DJANGO_VERSION']),
|
'is_valid': bool(config['DJANGO_VERSION']),
|
||||||
},
|
},
|
||||||
|
'ARCHIVEBOX_BINARY': {
|
||||||
|
'path': bin_path(config['ARCHIVEBOX_BINARY']),
|
||||||
|
'version': config['VERSION'],
|
||||||
|
'hash': bin_hash(config['ARCHIVEBOX_BINARY']),
|
||||||
|
'enabled': True,
|
||||||
|
'is_valid': True,
|
||||||
|
},
|
||||||
|
|
||||||
'CURL_BINARY': {
|
'CURL_BINARY': {
|
||||||
'path': bin_path(config['CURL_BINARY']),
|
'path': bin_path(config['CURL_BINARY']),
|
||||||
'version': config['CURL_VERSION'],
|
'version': config['CURL_VERSION'],
|
||||||
|
|
@ -915,7 +1017,8 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
|
||||||
'TIMEOUT': config['TIMEOUT'],
|
'TIMEOUT': config['TIMEOUT'],
|
||||||
'RESOLUTION': config['RESOLUTION'],
|
'RESOLUTION': config['RESOLUTION'],
|
||||||
'CHECK_SSL_VALIDITY': config['CHECK_SSL_VALIDITY'],
|
'CHECK_SSL_VALIDITY': config['CHECK_SSL_VALIDITY'],
|
||||||
'CHROME_BINARY': config['CHROME_BINARY'],
|
'CHROME_BINARY': bin_path(config['CHROME_BINARY']),
|
||||||
|
'CHROME_TIMEOUT': config['CHROME_TIMEOUT'],
|
||||||
'CHROME_HEADLESS': config['CHROME_HEADLESS'],
|
'CHROME_HEADLESS': config['CHROME_HEADLESS'],
|
||||||
'CHROME_SANDBOX': config['CHROME_SANDBOX'],
|
'CHROME_SANDBOX': config['CHROME_SANDBOX'],
|
||||||
'CHROME_USER_AGENT': config['CHROME_USER_AGENT'],
|
'CHROME_USER_AGENT': config['CHROME_USER_AGENT'],
|
||||||
|
|
@ -956,13 +1059,22 @@ globals().update(CONFIG)
|
||||||
|
|
||||||
|
|
||||||
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
||||||
os.environ["TZ"] = 'UTC'
|
assert TIMEZONE == 'UTC', 'The server timezone should always be set to UTC' # we may allow this to change later
|
||||||
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
os.environ["TZ"] = TIMEZONE
|
||||||
|
os.umask(0o777 - int(DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||||
|
|
||||||
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
|
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
|
||||||
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
|
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
|
||||||
sys.path.append(NODE_BIN_PATH)
|
sys.path.append(NODE_BIN_PATH)
|
||||||
|
|
||||||
|
# OPTIONAL: also look around the host system for node modules to use
|
||||||
|
# avoid enabling this unless absolutely needed,
|
||||||
|
# having overlapping potential sources of libs is a big source of bugs/confusing to users
|
||||||
|
# DEV_NODE_BIN_PATH = str((Path(CONFIG["PACKAGE_DIR"]).absolute() / '..' / 'node_modules' / '.bin'))
|
||||||
|
# sys.path.append(DEV_NODE_BIN_PATH)
|
||||||
|
# USER_NODE_BIN_PATH = str(Path('~/.node_modules/.bin').resolve())
|
||||||
|
# sys.path.append(USER_NODE_BIN_PATH)
|
||||||
|
|
||||||
# disable stderr "you really shouldnt disable ssl" warnings with library config
|
# disable stderr "you really shouldnt disable ssl" warnings with library config
|
||||||
if not CONFIG['CHECK_SSL_VALIDITY']:
|
if not CONFIG['CHECK_SSL_VALIDITY']:
|
||||||
import urllib3
|
import urllib3
|
||||||
|
|
@ -970,6 +1082,13 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
|
||||||
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
# get SQLite database version, compile options, and runtime options
|
||||||
|
# TODO: make this a less hacky proper assertion checker helper function in somewhere like setup_django
|
||||||
|
#cursor = sqlite3.connect(':memory:').cursor()
|
||||||
|
#DYNAMIC_CONFIG_SCHEMA['SQLITE_VERSION'] = lambda c: cursor.execute("SELECT sqlite_version();").fetchone()[0]
|
||||||
|
#DYNAMIC_CONFIG_SCHEMA['SQLITE_JOURNAL_MODE'] = lambda c: cursor.execute('PRAGMA journal_mode;').fetchone()[0]
|
||||||
|
#DYNAMIC_CONFIG_SCHEMA['SQLITE_OPTIONS'] = lambda c: [option[0] for option in cursor.execute('PRAGMA compile_options;').fetchall()]
|
||||||
|
#cursor.close()
|
||||||
|
|
||||||
########################### Config Validity Checkers ###########################
|
########################### Config Validity Checkers ###########################
|
||||||
|
|
||||||
|
|
@ -988,6 +1107,11 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||||
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
if int(CONFIG['DJANGO_VERSION'].split('.')[0]) < 3:
|
||||||
|
stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
|
||||||
|
stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
|
if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
|
||||||
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
|
stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
|
||||||
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
|
stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
|
||||||
|
|
@ -1060,6 +1184,7 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
||||||
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
||||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
||||||
stderr()
|
stderr()
|
||||||
|
|
||||||
|
|
||||||
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
||||||
output_dir = out_dir or config['OUTPUT_DIR']
|
output_dir = out_dir or config['OUTPUT_DIR']
|
||||||
|
|
@ -1099,7 +1224,7 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO
|
||||||
|
|
||||||
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
||||||
check_system_config()
|
check_system_config()
|
||||||
|
|
||||||
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
||||||
|
|
||||||
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
||||||
|
|
@ -1134,12 +1259,11 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
# Otherwise use default sqlite3 file-based database and initialize django
|
# Otherwise use default sqlite3 file-based database and initialize django
|
||||||
# without running migrations automatically (user runs them manually by calling init)
|
# without running migrations automatically (user runs them manually by calling init)
|
||||||
django.setup()
|
django.setup()
|
||||||
|
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
# log startup message to the error log
|
# log startup message to the error log
|
||||||
with open(settings.ERROR_LOG, "a+", encoding='utf-8') as f:
|
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
||||||
command = ' '.join(sys.argv)
|
command = ' '.join(sys.argv)
|
||||||
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
||||||
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
|
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
|
||||||
|
|
@ -1149,10 +1273,17 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
# Enable WAL mode in sqlite3
|
# Enable WAL mode in sqlite3
|
||||||
from django.db import connection
|
from django.db import connection
|
||||||
with connection.cursor() as cursor:
|
with connection.cursor() as cursor:
|
||||||
|
|
||||||
|
# Set Journal mode to WAL to allow for multiple writers
|
||||||
current_mode = cursor.execute("PRAGMA journal_mode")
|
current_mode = cursor.execute("PRAGMA journal_mode")
|
||||||
if current_mode != 'wal':
|
if current_mode != 'wal':
|
||||||
cursor.execute("PRAGMA journal_mode=wal;")
|
cursor.execute("PRAGMA journal_mode=wal;")
|
||||||
|
|
||||||
|
# Set max blocking delay for concurrent writes and write sync mode
|
||||||
|
# https://litestream.io/tips/#busy-timeout
|
||||||
|
cursor.execute("PRAGMA busy_timeout = 5000;")
|
||||||
|
cursor.execute("PRAGMA synchronous = NORMAL;")
|
||||||
|
|
||||||
# Create cache table in DB if needed
|
# Create cache table in DB if needed
|
||||||
try:
|
try:
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
|
@ -1160,7 +1291,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
except django.db.utils.OperationalError:
|
except django.db.utils.OperationalError:
|
||||||
call_command("createcachetable", verbosity=0)
|
call_command("createcachetable", verbosity=0)
|
||||||
|
|
||||||
|
|
||||||
# if archivebox gets imported multiple times, we have to close
|
# if archivebox gets imported multiple times, we have to close
|
||||||
# the sqlite3 whenever we init from scratch to avoid multiple threads
|
# the sqlite3 whenever we init from scratch to avoid multiple threads
|
||||||
# sharing the same connection by accident
|
# sharing the same connection by accident
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
MEDIA_TIMEOUT: int
|
MEDIA_TIMEOUT: int
|
||||||
OUTPUT_PERMISSIONS: str
|
OUTPUT_PERMISSIONS: str
|
||||||
RESTRICT_FILE_NAMES: str
|
RESTRICT_FILE_NAMES: str
|
||||||
URL_BLACKLIST: str
|
URL_DENYLIST: str
|
||||||
|
|
||||||
SECRET_KEY: Optional[str]
|
SECRET_KEY: Optional[str]
|
||||||
BIND_ADDR: str
|
BIND_ADDR: str
|
||||||
|
|
@ -74,6 +74,7 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
CHROME_USER_AGENT: str
|
CHROME_USER_AGENT: str
|
||||||
COOKIES_FILE: Union[str, Path, None]
|
COOKIES_FILE: Union[str, Path, None]
|
||||||
CHROME_USER_DATA_DIR: Union[str, Path, None]
|
CHROME_USER_DATA_DIR: Union[str, Path, None]
|
||||||
|
CHROME_TIMEOUT: int
|
||||||
CHROME_HEADLESS: bool
|
CHROME_HEADLESS: bool
|
||||||
CHROME_SANDBOX: bool
|
CHROME_SANDBOX: bool
|
||||||
|
|
||||||
|
|
@ -98,6 +99,7 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
WGET_ARGS: List[str]
|
WGET_ARGS: List[str]
|
||||||
CURL_ARGS: List[str]
|
CURL_ARGS: List[str]
|
||||||
GIT_ARGS: List[str]
|
GIT_ARGS: List[str]
|
||||||
|
TAG_SEPARATOR_PATTERN: str
|
||||||
|
|
||||||
|
|
||||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ class AddLinkForm(forms.Form):
|
||||||
# label="Exclude patterns",
|
# label="Exclude patterns",
|
||||||
# min_length='1',
|
# min_length='1',
|
||||||
# required=False,
|
# required=False,
|
||||||
# initial=URL_BLACKLIST,
|
# initial=URL_DENYLIST,
|
||||||
# )
|
# )
|
||||||
# timeout = forms.IntegerField(
|
# timeout = forms.IntegerField(
|
||||||
# initial=TIMEOUT,
|
# initial=TIMEOUT,
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||||
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
|
|
||||||
from ..config import PUBLIC_SNAPSHOTS
|
from ..config import PUBLIC_SNAPSHOTS, REVERSE_PROXY_USER_HEADER, REVERSE_PROXY_WHITELIST
|
||||||
|
|
||||||
|
|
||||||
def detect_timezone(request, activate: bool=True):
|
def detect_timezone(request, activate: bool=True):
|
||||||
|
|
@ -35,3 +38,23 @@ def CacheControlMiddleware(get_response):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
return middleware
|
return middleware
|
||||||
|
|
||||||
|
class ReverseProxyAuthMiddleware(RemoteUserMiddleware):
|
||||||
|
header = 'HTTP_{normalized}'.format(normalized=REVERSE_PROXY_USER_HEADER.replace('-', '_').upper())
|
||||||
|
|
||||||
|
def process_request(self, request):
|
||||||
|
if REVERSE_PROXY_WHITELIST == '':
|
||||||
|
return
|
||||||
|
|
||||||
|
ip = request.META.get('REMOTE_ADDR')
|
||||||
|
|
||||||
|
for cidr in REVERSE_PROXY_WHITELIST.split(','):
|
||||||
|
try:
|
||||||
|
network = ipaddress.ip_network(cidr)
|
||||||
|
except ValueError:
|
||||||
|
raise ImproperlyConfigured(
|
||||||
|
"The REVERSE_PROXY_WHITELIST config paramater is in invalid format, or "
|
||||||
|
"contains invalid CIDR. Correct format is a coma-separated list of IPv4/IPv6 CIDRs.")
|
||||||
|
|
||||||
|
if ipaddress.ip_address(ip) in network:
|
||||||
|
return super().process_request(request)
|
||||||
|
|
|
||||||
18
archivebox/core/migrations/0021_auto_20220914_0934.py
Normal file
18
archivebox/core/migrations/0021_auto_20220914_0934.py
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by Django 3.1.14 on 2022-09-14 09:34
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0020_auto_20210410_1031'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='extractor',
|
||||||
|
field=models.CharField(choices=[('favicon', 'favicon'), ('headers', 'headers'), ('singlefile', 'singlefile'), ('pdf', 'pdf'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('wget', 'wget'), ('title', 'title'), ('readability', 'readability'), ('mercury', 'mercury'), ('git', 'git'), ('media', 'media'), ('archive_org', 'archive_org')], max_length=32),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -19,7 +19,18 @@ from ..config import (
|
||||||
SQL_INDEX_FILENAME,
|
SQL_INDEX_FILENAME,
|
||||||
OUTPUT_DIR,
|
OUTPUT_DIR,
|
||||||
LOGS_DIR,
|
LOGS_DIR,
|
||||||
TIME_ZONE,
|
TIMEZONE,
|
||||||
|
|
||||||
|
LDAP,
|
||||||
|
LDAP_SERVER_URI,
|
||||||
|
LDAP_BIND_DN,
|
||||||
|
LDAP_BIND_PASSWORD,
|
||||||
|
LDAP_USER_BASE,
|
||||||
|
LDAP_USER_FILTER,
|
||||||
|
LDAP_USERNAME_ATTR,
|
||||||
|
LDAP_FIRSTNAME_ATTR,
|
||||||
|
LDAP_LASTNAME_ATTR,
|
||||||
|
LDAP_EMAIL_ATTR,
|
||||||
)
|
)
|
||||||
|
|
||||||
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
|
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
|
||||||
|
|
@ -34,7 +45,8 @@ WSGI_APPLICATION = 'core.wsgi.application'
|
||||||
ROOT_URLCONF = 'core.urls'
|
ROOT_URLCONF = 'core.urls'
|
||||||
|
|
||||||
LOGIN_URL = '/accounts/login/'
|
LOGIN_URL = '/accounts/login/'
|
||||||
LOGOUT_REDIRECT_URL = '/'
|
LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
|
||||||
|
|
||||||
PASSWORD_RESET_URL = '/accounts/password_reset/'
|
PASSWORD_RESET_URL = '/accounts/password_reset/'
|
||||||
APPEND_SLASH = True
|
APPEND_SLASH = True
|
||||||
|
|
||||||
|
|
@ -54,6 +66,12 @@ INSTALLED_APPS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# For usage with https://www.jetadmin.io/integrations/django
|
||||||
|
# INSTALLED_APPS += ['jet_django']
|
||||||
|
# JET_PROJECT = 'archivebox'
|
||||||
|
# JET_TOKEN = 'some-api-token-here'
|
||||||
|
|
||||||
|
|
||||||
MIDDLEWARE = [
|
MIDDLEWARE = [
|
||||||
'core.middleware.TimezoneMiddleware',
|
'core.middleware.TimezoneMiddleware',
|
||||||
'django.middleware.security.SecurityMiddleware',
|
'django.middleware.security.SecurityMiddleware',
|
||||||
|
|
@ -61,14 +79,63 @@ MIDDLEWARE = [
|
||||||
'django.middleware.common.CommonMiddleware',
|
'django.middleware.common.CommonMiddleware',
|
||||||
'django.middleware.csrf.CsrfViewMiddleware',
|
'django.middleware.csrf.CsrfViewMiddleware',
|
||||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||||
|
'core.middleware.ReverseProxyAuthMiddleware',
|
||||||
'django.contrib.messages.middleware.MessageMiddleware',
|
'django.contrib.messages.middleware.MessageMiddleware',
|
||||||
'core.middleware.CacheControlMiddleware',
|
'core.middleware.CacheControlMiddleware',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### Authentication Settings
|
||||||
|
################################################################################
|
||||||
|
|
||||||
AUTHENTICATION_BACKENDS = [
|
AUTHENTICATION_BACKENDS = [
|
||||||
|
'django.contrib.auth.backends.RemoteUserBackend',
|
||||||
'django.contrib.auth.backends.ModelBackend',
|
'django.contrib.auth.backends.ModelBackend',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if LDAP:
|
||||||
|
try:
|
||||||
|
import ldap
|
||||||
|
from django_auth_ldap.config import LDAPSearch
|
||||||
|
|
||||||
|
global AUTH_LDAP_SERVER_URI
|
||||||
|
global AUTH_LDAP_BIND_DN
|
||||||
|
global AUTH_LDAP_BIND_PASSWORD
|
||||||
|
global AUTH_LDAP_USER_SEARCH
|
||||||
|
global AUTH_LDAP_USER_ATTR_MAP
|
||||||
|
|
||||||
|
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
|
||||||
|
AUTH_LDAP_BIND_DN = LDAP_BIND_DN
|
||||||
|
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
|
||||||
|
|
||||||
|
assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
|
||||||
|
|
||||||
|
AUTH_LDAP_USER_SEARCH = LDAPSearch(
|
||||||
|
LDAP_USER_BASE,
|
||||||
|
ldap.SCOPE_SUBTREE,
|
||||||
|
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
|
||||||
|
)
|
||||||
|
|
||||||
|
AUTH_LDAP_USER_ATTR_MAP = {
|
||||||
|
'username': LDAP_USERNAME_ATTR,
|
||||||
|
'first_name': LDAP_FIRSTNAME_ATTR,
|
||||||
|
'last_name': LDAP_LASTNAME_ATTR,
|
||||||
|
'email': LDAP_EMAIL_ATTR,
|
||||||
|
}
|
||||||
|
|
||||||
|
AUTHENTICATION_BACKENDS = [
|
||||||
|
'django_auth_ldap.backend.LDAPBackend',
|
||||||
|
]
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n')
|
||||||
|
# dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
|
||||||
|
# sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### Debug Settings
|
||||||
|
################################################################################
|
||||||
|
|
||||||
# only enable debug toolbar when in DEBUG mode with --nothreading (it doesnt work in multithreaded mode)
|
# only enable debug toolbar when in DEBUG mode with --nothreading (it doesnt work in multithreaded mode)
|
||||||
DEBUG_TOOLBAR = DEBUG and ('--nothreading' in sys.argv) and ('--reload' not in sys.argv)
|
DEBUG_TOOLBAR = DEBUG and ('--nothreading' in sys.argv) and ('--reload' not in sys.argv)
|
||||||
if DEBUG_TOOLBAR:
|
if DEBUG_TOOLBAR:
|
||||||
|
|
@ -154,7 +221,7 @@ DATABASES = {
|
||||||
'timeout': 60,
|
'timeout': 60,
|
||||||
'check_same_thread': False,
|
'check_same_thread': False,
|
||||||
},
|
},
|
||||||
'TIME_ZONE': 'UTC',
|
'TIME_ZONE': TIMEZONE,
|
||||||
# DB setup is sometimes modified at runtime by setup_django() in config.py
|
# DB setup is sometimes modified at runtime by setup_django() in config.py
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -224,7 +291,8 @@ USE_L10N = True
|
||||||
USE_TZ = True
|
USE_TZ = True
|
||||||
DATETIME_FORMAT = 'Y-m-d g:iA'
|
DATETIME_FORMAT = 'Y-m-d g:iA'
|
||||||
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
|
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
|
||||||
TIME_ZONE = TIME_ZONE # noqa
|
TIME_ZONE = TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
|
||||||
|
|
||||||
|
|
||||||
from django.conf.locale.en import formats as en_formats
|
from django.conf.locale.en import formats as en_formats
|
||||||
|
|
||||||
|
|
@ -263,8 +331,8 @@ class NoisyRequestsFilter(logging.Filter):
|
||||||
if LOGS_DIR.exists():
|
if LOGS_DIR.exists():
|
||||||
ERROR_LOG = (LOGS_DIR / 'errors.log')
|
ERROR_LOG = (LOGS_DIR / 'errors.log')
|
||||||
else:
|
else:
|
||||||
# meh too many edge cases here around creating log dir w/ correct permissions
|
# historically too many edge cases here around creating log dir w/ correct permissions early on
|
||||||
# cant be bothered, just trash the log and let them figure it out via stdout/stderr
|
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr
|
||||||
ERROR_LOG = tempfile.NamedTemporaryFile().name
|
ERROR_LOG = tempfile.NamedTemporaryFile().name
|
||||||
|
|
||||||
LOGGING = {
|
LOGGING = {
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.views.generic.base import RedirectView
|
from django.views.generic.base import RedirectView
|
||||||
|
|
||||||
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView
|
from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
|
||||||
|
|
||||||
|
|
||||||
# print('DEBUG', settings.DEBUG)
|
# print('DEBUG', settings.DEBUG)
|
||||||
|
|
@ -24,14 +24,19 @@ urlpatterns = [
|
||||||
|
|
||||||
path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
|
path('admin/core/snapshot/add/', RedirectView.as_view(url='/add/')),
|
||||||
path('add/', AddView.as_view(), name='add'),
|
path('add/', AddView.as_view(), name='add'),
|
||||||
|
|
||||||
path('accounts/login/', RedirectView.as_view(url='/admin/login/')),
|
path('accounts/login/', RedirectView.as_view(url='/admin/login/')),
|
||||||
path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')),
|
path('accounts/logout/', RedirectView.as_view(url='/admin/logout/')),
|
||||||
|
|
||||||
|
|
||||||
path('accounts/', include('django.contrib.auth.urls')),
|
path('accounts/', include('django.contrib.auth.urls')),
|
||||||
path('admin/', admin.site.urls),
|
path('admin/', admin.site.urls),
|
||||||
|
|
||||||
|
path('health/', HealthCheckView.as_view(), name='healthcheck'),
|
||||||
|
path('error/', lambda _: 1/0),
|
||||||
|
|
||||||
|
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
|
||||||
|
|
||||||
path('index.html', RedirectView.as_view(url='/')),
|
path('index.html', RedirectView.as_view(url='/')),
|
||||||
path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
|
path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}),
|
||||||
path('', HomepageView.as_view(), name='Home'),
|
path('', HomepageView.as_view(), name='Home'),
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ from django.views.generic.list import ListView
|
||||||
from django.views.generic import FormView
|
from django.views.generic import FormView
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
from django.contrib.auth.mixins import UserPassesTestMixin
|
from django.contrib.auth.mixins import UserPassesTestMixin
|
||||||
|
from django.views.decorators.csrf import csrf_exempt
|
||||||
|
from django.utils.decorators import method_decorator
|
||||||
|
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
from core.forms import AddLinkForm
|
from core.forms import AddLinkForm
|
||||||
|
|
@ -36,7 +38,7 @@ class HomepageView(View):
|
||||||
|
|
||||||
if PUBLIC_INDEX:
|
if PUBLIC_INDEX:
|
||||||
return redirect('/public')
|
return redirect('/public')
|
||||||
|
|
||||||
return redirect(f'/admin/login/?next={request.path}')
|
return redirect(f'/admin/login/?next={request.path}')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -203,7 +205,7 @@ class SnapshotView(View):
|
||||||
content_type="text/html",
|
content_type="text/html",
|
||||||
status=404,
|
status=404,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PublicIndexView(ListView):
|
class PublicIndexView(ListView):
|
||||||
template_name = 'public_index.html'
|
template_name = 'public_index.html'
|
||||||
|
|
@ -218,7 +220,7 @@ class PublicIndexView(ListView):
|
||||||
'FOOTER_INFO': FOOTER_INFO,
|
'FOOTER_INFO': FOOTER_INFO,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_queryset(self, **kwargs):
|
def get_queryset(self, **kwargs):
|
||||||
qs = super().get_queryset(**kwargs)
|
qs = super().get_queryset(**kwargs)
|
||||||
query = self.request.GET.get('q')
|
query = self.request.GET.get('q')
|
||||||
if query and query.strip():
|
if query and query.strip():
|
||||||
|
|
@ -236,7 +238,7 @@ class PublicIndexView(ListView):
|
||||||
else:
|
else:
|
||||||
return redirect(f'/admin/login/?next={self.request.path}')
|
return redirect(f'/admin/login/?next={self.request.path}')
|
||||||
|
|
||||||
|
@method_decorator(csrf_exempt, name='dispatch')
|
||||||
class AddView(UserPassesTestMixin, FormView):
|
class AddView(UserPassesTestMixin, FormView):
|
||||||
template_name = "add.html"
|
template_name = "add.html"
|
||||||
form_class = AddLinkForm
|
form_class = AddLinkForm
|
||||||
|
|
@ -247,7 +249,7 @@ class AddView(UserPassesTestMixin, FormView):
|
||||||
url = self.request.GET.get('url', None)
|
url = self.request.GET.get('url', None)
|
||||||
if url:
|
if url:
|
||||||
return {'url': url if '://' in url else f'https://{url}'}
|
return {'url': url if '://' in url else f'https://{url}'}
|
||||||
|
|
||||||
return super().get_initial()
|
return super().get_initial()
|
||||||
|
|
||||||
def test_func(self):
|
def test_func(self):
|
||||||
|
|
@ -293,3 +295,18 @@ class AddView(UserPassesTestMixin, FormView):
|
||||||
"form": AddLinkForm()
|
"form": AddLinkForm()
|
||||||
})
|
})
|
||||||
return render(template_name=self.template_name, request=self.request, context=context)
|
return render(template_name=self.template_name, request=self.request, context=context)
|
||||||
|
|
||||||
|
|
||||||
|
class HealthCheckView(View):
|
||||||
|
"""
|
||||||
|
A Django view that renders plain text "OK" for service discovery tools
|
||||||
|
"""
|
||||||
|
def get(self, request):
|
||||||
|
"""
|
||||||
|
Handle a GET request
|
||||||
|
"""
|
||||||
|
return HttpResponse(
|
||||||
|
'OK',
|
||||||
|
content_type='text/plain',
|
||||||
|
status=200
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,19 @@
|
||||||
__package__ = 'archivebox.extractors'
|
__package__ = 'archivebox.extractors'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from typing import Optional, List, Iterable, Union
|
from typing import Callable, Optional, List, Iterable, Union
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
|
|
||||||
from ..index.schema import Link
|
from ..config import (
|
||||||
|
SAVE_ALLOWLIST_PTN,
|
||||||
|
SAVE_DENYLIST_PTN,
|
||||||
|
)
|
||||||
|
from ..core.settings import ERROR_LOG
|
||||||
|
from ..index.schema import ArchiveResult, Link
|
||||||
from ..index.sql import write_link_to_sql_index
|
from ..index.sql import write_link_to_sql_index
|
||||||
from ..index import (
|
from ..index import (
|
||||||
load_link_details,
|
load_link_details,
|
||||||
|
|
@ -40,9 +46,12 @@ from .archive_org import should_save_archive_dot_org, save_archive_dot_org
|
||||||
from .headers import should_save_headers, save_headers
|
from .headers import should_save_headers, save_headers
|
||||||
|
|
||||||
|
|
||||||
def get_default_archive_methods():
|
ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool]
|
||||||
|
SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult]
|
||||||
|
ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction]
|
||||||
|
|
||||||
|
def get_default_archive_methods() -> List[ArchiveMethodEntry]:
|
||||||
return [
|
return [
|
||||||
('title', should_save_title, save_title),
|
|
||||||
('favicon', should_save_favicon, save_favicon),
|
('favicon', should_save_favicon, save_favicon),
|
||||||
('headers', should_save_headers, save_headers),
|
('headers', should_save_headers, save_headers),
|
||||||
('singlefile', should_save_singlefile, save_singlefile),
|
('singlefile', should_save_singlefile, save_singlefile),
|
||||||
|
|
@ -50,21 +59,39 @@ def get_default_archive_methods():
|
||||||
('screenshot', should_save_screenshot, save_screenshot),
|
('screenshot', should_save_screenshot, save_screenshot),
|
||||||
('dom', should_save_dom, save_dom),
|
('dom', should_save_dom, save_dom),
|
||||||
('wget', should_save_wget, save_wget),
|
('wget', should_save_wget, save_wget),
|
||||||
('readability', should_save_readability, save_readability), # keep readability below wget and singlefile, as it depends on them
|
('title', should_save_title, save_title), # keep title and readability below wget and singlefile, as it depends on them
|
||||||
|
('readability', should_save_readability, save_readability),
|
||||||
('mercury', should_save_mercury, save_mercury),
|
('mercury', should_save_mercury, save_mercury),
|
||||||
('git', should_save_git, save_git),
|
('git', should_save_git, save_git),
|
||||||
('media', should_save_media, save_media),
|
('media', should_save_media, save_media),
|
||||||
('archive_org', should_save_archive_dot_org, save_archive_dot_org),
|
('archive_org', should_save_archive_dot_org, save_archive_dot_org),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def get_archive_methods_for_link(link: Link) -> Iterable[ArchiveMethodEntry]:
|
||||||
|
DEFAULT_METHODS = get_default_archive_methods()
|
||||||
|
allowed_methods = {
|
||||||
|
m for pat, methods in
|
||||||
|
SAVE_ALLOWLIST_PTN.items()
|
||||||
|
if pat.search(link.url)
|
||||||
|
for m in methods
|
||||||
|
} or { m[0] for m in DEFAULT_METHODS }
|
||||||
|
denied_methods = {
|
||||||
|
m for pat, methods in
|
||||||
|
SAVE_DENYLIST_PTN.items()
|
||||||
|
if pat.search(link.url)
|
||||||
|
for m in methods
|
||||||
|
}
|
||||||
|
allowed_methods -= denied_methods
|
||||||
|
|
||||||
|
return (m for m in DEFAULT_METHODS if m[0] in allowed_methods)
|
||||||
|
|
||||||
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [('readability', 1), ('singlefile', 2), ('dom', 3), ('wget', 4)]
|
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [('readability', 1), ('singlefile', 2), ('dom', 3), ('wget', 4)]
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def ignore_methods(to_ignore: List[str]):
|
def ignore_methods(to_ignore: List[str]) -> Iterable[str]:
|
||||||
ARCHIVE_METHODS = get_default_archive_methods()
|
ARCHIVE_METHODS = get_default_archive_methods()
|
||||||
methods = filter(lambda x: x[0] not in to_ignore, ARCHIVE_METHODS)
|
return [x[0] for x in ARCHIVE_METHODS if x[0] not in to_ignore]
|
||||||
methods = map(lambda x: x[0], methods)
|
|
||||||
return list(methods)
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
|
def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
|
||||||
|
|
@ -77,11 +104,11 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
except Snapshot.DoesNotExist:
|
except Snapshot.DoesNotExist:
|
||||||
snapshot = write_link_to_sql_index(link)
|
snapshot = write_link_to_sql_index(link)
|
||||||
|
|
||||||
ARCHIVE_METHODS = get_default_archive_methods()
|
active_methods = get_archive_methods_for_link(link)
|
||||||
|
|
||||||
if methods:
|
if methods:
|
||||||
ARCHIVE_METHODS = [
|
active_methods = [
|
||||||
method for method in ARCHIVE_METHODS
|
method for method in active_methods
|
||||||
if method[0] in methods
|
if method[0] in methods
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -98,7 +125,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
|
||||||
start_ts = datetime.now(timezone.utc)
|
start_ts = datetime.now(timezone.utc)
|
||||||
|
|
||||||
for method_name, should_run, method_function in ARCHIVE_METHODS:
|
for method_name, should_run, method_function in active_methods:
|
||||||
try:
|
try:
|
||||||
if method_name not in link.history:
|
if method_name not in link.history:
|
||||||
link.history[method_name] = []
|
link.history[method_name] = []
|
||||||
|
|
@ -127,10 +154,27 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
# print('{black} X {}{reset}'.format(method_name, **ANSI))
|
# print('{black} X {}{reset}'.format(method_name, **ANSI))
|
||||||
stats['skipped'] += 1
|
stats['skipped'] += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984
|
||||||
|
# and https://github.com/ArchiveBox/ArchiveBox/issues/1014
|
||||||
|
# are fixed.
|
||||||
|
"""
|
||||||
raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
|
raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
|
||||||
method_name,
|
method_name,
|
||||||
link.url,
|
link.url,
|
||||||
)) from e
|
)) from e
|
||||||
|
"""
|
||||||
|
# Instead, use the kludgy workaround from
|
||||||
|
# https://github.com/ArchiveBox/ArchiveBox/issues/984#issuecomment-1150541627
|
||||||
|
with open(ERROR_LOG, "a", encoding='utf-8') as f:
|
||||||
|
command = ' '.join(sys.argv)
|
||||||
|
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
||||||
|
f.write(("\n" + 'Exception in archive_methods.save_{}(Link(url={})) command={}; ts={}'.format(
|
||||||
|
method_name,
|
||||||
|
link.url,
|
||||||
|
command,
|
||||||
|
ts
|
||||||
|
) + "\n"))
|
||||||
|
#f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
|
||||||
|
|
||||||
# print(' ', stats)
|
# print(' ', stats)
|
||||||
|
|
||||||
|
|
@ -143,7 +187,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
|
||||||
|
|
||||||
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
|
write_link_details(link, out_dir=out_dir, skip_sql_index=False)
|
||||||
|
|
||||||
log_link_archiving_finished(link, link.link_dir, is_new, stats, start_ts)
|
log_link_archiving_finished(link, out_dir, is_new, stats, start_ts)
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from ..util import (
|
||||||
enforce_types,
|
enforce_types,
|
||||||
is_static_file,
|
is_static_file,
|
||||||
chrome_args,
|
chrome_args,
|
||||||
|
chrome_cleanup,
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
|
|
@ -26,7 +27,8 @@ def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
if not overwrite and (out_dir / 'output.html').exists():
|
if not overwrite and (out_dir / 'output.html').exists():
|
||||||
return False
|
if (out_dir / 'output.html').stat().st_size > 1:
|
||||||
|
return False
|
||||||
|
|
||||||
return SAVE_DOM
|
return SAVE_DOM
|
||||||
|
|
||||||
|
|
@ -38,7 +40,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
||||||
output: ArchiveOutput = 'output.html'
|
output: ArchiveOutput = 'output.html'
|
||||||
output_path = out_dir / output
|
output_path = out_dir / output
|
||||||
cmd = [
|
cmd = [
|
||||||
*chrome_args(TIMEOUT=timeout),
|
*chrome_args(),
|
||||||
'--dump-dom',
|
'--dump-dom',
|
||||||
link.url
|
link.url
|
||||||
]
|
]
|
||||||
|
|
@ -56,6 +58,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
output = err
|
output = err
|
||||||
|
chrome_cleanup()
|
||||||
finally:
|
finally:
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from ..util import enforce_types, domain
|
||||||
from ..config import (
|
from ..config import (
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
SAVE_FAVICON,
|
SAVE_FAVICON,
|
||||||
|
FAVICON_PROVIDER,
|
||||||
CURL_BINARY,
|
CURL_BINARY,
|
||||||
CURL_ARGS,
|
CURL_ARGS,
|
||||||
CURL_VERSION,
|
CURL_VERSION,
|
||||||
|
|
@ -40,7 +41,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
|
||||||
'--output', str(output),
|
'--output', str(output),
|
||||||
*(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
|
*(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
|
||||||
*([] if CHECK_SSL_VALIDITY else ['--insecure']),
|
*([] if CHECK_SSL_VALIDITY else ['--insecure']),
|
||||||
'https://www.google.com/s2/favicons?domain={}'.format(domain(link.url)),
|
FAVICON_PROVIDER.format(domain(link.url)),
|
||||||
]
|
]
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
timer = TimedProgress(timeout, prefix=' ')
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ def should_save_media(link: Link, out_dir: Optional[Path]=None, overwrite: Optio
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult:
|
def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult:
|
||||||
"""Download playlists or individual video, audio, and subtitles using youtube-dl"""
|
"""Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""
|
||||||
|
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'media'
|
output: ArchiveOutput = 'media'
|
||||||
|
|
@ -43,6 +43,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
|
||||||
YOUTUBEDL_BINARY,
|
YOUTUBEDL_BINARY,
|
||||||
*YOUTUBEDL_ARGS,
|
*YOUTUBEDL_ARGS,
|
||||||
*([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
|
*([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
|
||||||
|
# TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
|
||||||
link.url,
|
link.url,
|
||||||
]
|
]
|
||||||
status = 'succeeded'
|
status = 'succeeded'
|
||||||
|
|
@ -60,7 +61,7 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
hints = (
|
hints = (
|
||||||
'Got youtube-dl response code: {}.'.format(result.returncode),
|
'Got youtube-dl (or yt-dlp) response code: {}.'.format(result.returncode),
|
||||||
*result.stderr.decode().split('\n'),
|
*result.stderr.decode().split('\n'),
|
||||||
)
|
)
|
||||||
raise ArchiveError('Failed to save media', hints)
|
raise ArchiveError('Failed to save media', hints)
|
||||||
|
|
@ -71,8 +72,18 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|
||||||
# add video description and subtitles to full-text index
|
# add video description and subtitles to full-text index
|
||||||
|
# Let's try a few different
|
||||||
index_texts = [
|
index_texts = [
|
||||||
text_file.read_text(encoding='utf-8').strip()
|
# errors:
|
||||||
|
# * 'strict' to raise a ValueError exception if there is an
|
||||||
|
# encoding error. The default value of None has the same effect.
|
||||||
|
# * 'ignore' ignores errors. Note that ignoring encoding errors
|
||||||
|
# can lead to data loss.
|
||||||
|
# * 'xmlcharrefreplace' is only supported when writing to a
|
||||||
|
# file. Characters not supported by the encoding are replaced with
|
||||||
|
# the appropriate XML character reference &#nnn;.
|
||||||
|
# There are a few more options described in https://docs.python.org/3/library/functions.html#open
|
||||||
|
text_file.read_text(encoding='utf-8', errors='xmlcharrefreplace').strip()
|
||||||
for text_file in (
|
for text_file in (
|
||||||
*output_path.glob('*.description'),
|
*output_path.glob('*.description'),
|
||||||
*output_path.glob('*.srt'),
|
*output_path.glob('*.srt'),
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from ..util import (
|
||||||
enforce_types,
|
enforce_types,
|
||||||
is_static_file,
|
is_static_file,
|
||||||
chrome_args,
|
chrome_args,
|
||||||
|
chrome_cleanup,
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
|
|
@ -37,7 +38,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'output.pdf'
|
output: ArchiveOutput = 'output.pdf'
|
||||||
cmd = [
|
cmd = [
|
||||||
*chrome_args(TIMEOUT=timeout),
|
*chrome_args(),
|
||||||
'--print-to-pdf',
|
'--print-to-pdf',
|
||||||
link.url,
|
link.url,
|
||||||
]
|
]
|
||||||
|
|
@ -54,6 +55,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
output = err
|
output = err
|
||||||
|
chrome_cleanup()
|
||||||
finally:
|
finally:
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,9 +10,7 @@ from ..index.schema import Link, ArchiveResult, ArchiveError
|
||||||
from ..system import run, atomic_write
|
from ..system import run, atomic_write
|
||||||
from ..util import (
|
from ..util import (
|
||||||
enforce_types,
|
enforce_types,
|
||||||
download_url,
|
|
||||||
is_static_file,
|
is_static_file,
|
||||||
|
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
|
|
@ -22,28 +20,8 @@ from ..config import (
|
||||||
READABILITY_VERSION,
|
READABILITY_VERSION,
|
||||||
)
|
)
|
||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
from .title import get_html
|
||||||
|
|
||||||
@enforce_types
|
|
||||||
def get_html(link: Link, path: Path) -> str:
|
|
||||||
"""
|
|
||||||
Try to find wget, singlefile and then dom files.
|
|
||||||
If none is found, download the url again.
|
|
||||||
"""
|
|
||||||
canonical = link.canonical_outputs()
|
|
||||||
abs_path = path.absolute()
|
|
||||||
sources = [canonical["singlefile_path"], canonical["wget_path"], canonical["dom_path"]]
|
|
||||||
document = None
|
|
||||||
for source in sources:
|
|
||||||
try:
|
|
||||||
with open(abs_path / source, "r", encoding="utf-8") as f:
|
|
||||||
document = f.read()
|
|
||||||
break
|
|
||||||
except (FileNotFoundError, TypeError):
|
|
||||||
continue
|
|
||||||
if document is None:
|
|
||||||
return download_url(link.url)
|
|
||||||
else:
|
|
||||||
return document
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_readability(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
|
|
@ -87,12 +65,13 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
||||||
cmd = [
|
cmd = [
|
||||||
DEPENDENCIES['READABILITY_BINARY']['path'],
|
DEPENDENCIES['READABILITY_BINARY']['path'],
|
||||||
temp_doc.name,
|
temp_doc.name,
|
||||||
|
link.url,
|
||||||
]
|
]
|
||||||
|
|
||||||
result = run(cmd, cwd=out_dir, timeout=timeout)
|
result = run(cmd, cwd=out_dir, timeout=timeout)
|
||||||
try:
|
try:
|
||||||
result_json = json.loads(result.stdout)
|
result_json = json.loads(result.stdout)
|
||||||
assert result_json and 'content' in result_json
|
assert result_json and 'content' in result_json, 'Readability output is not valid JSON'
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr)
|
raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr)
|
||||||
|
|
||||||
|
|
@ -106,7 +85,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
||||||
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
|
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
|
||||||
output_tail = [
|
output_tail = [
|
||||||
line.strip()
|
line.strip()
|
||||||
for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
|
for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:]
|
||||||
if line.strip()
|
if line.strip()
|
||||||
]
|
]
|
||||||
hints = (
|
hints = (
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from ..util import (
|
||||||
enforce_types,
|
enforce_types,
|
||||||
is_static_file,
|
is_static_file,
|
||||||
chrome_args,
|
chrome_args,
|
||||||
|
chrome_cleanup,
|
||||||
)
|
)
|
||||||
from ..config import (
|
from ..config import (
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
|
|
@ -37,7 +38,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output: ArchiveOutput = 'screenshot.png'
|
output: ArchiveOutput = 'screenshot.png'
|
||||||
cmd = [
|
cmd = [
|
||||||
*chrome_args(TIMEOUT=timeout),
|
*chrome_args(),
|
||||||
'--screenshot',
|
'--screenshot',
|
||||||
link.url,
|
link.url,
|
||||||
]
|
]
|
||||||
|
|
@ -54,6 +55,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
output = err
|
output = err
|
||||||
|
chrome_cleanup()
|
||||||
finally:
|
finally:
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ from ..config import (
|
||||||
SAVE_SINGLEFILE,
|
SAVE_SINGLEFILE,
|
||||||
DEPENDENCIES,
|
DEPENDENCIES,
|
||||||
SINGLEFILE_VERSION,
|
SINGLEFILE_VERSION,
|
||||||
|
SINGLEFILE_ARGS,
|
||||||
CHROME_BINARY,
|
CHROME_BINARY,
|
||||||
)
|
)
|
||||||
from ..logging_util import TimedProgress
|
from ..logging_util import TimedProgress
|
||||||
|
|
@ -41,14 +42,35 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
|
||||||
out_dir = out_dir or Path(link.link_dir)
|
out_dir = out_dir or Path(link.link_dir)
|
||||||
output = "singlefile.html"
|
output = "singlefile.html"
|
||||||
|
|
||||||
browser_args = chrome_args(TIMEOUT=0)
|
browser_args = chrome_args(CHROME_TIMEOUT=0)
|
||||||
|
|
||||||
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
|
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
|
||||||
browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:]))
|
browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:]))
|
||||||
cmd = [
|
options = [
|
||||||
DEPENDENCIES['SINGLEFILE_BINARY']['path'],
|
*SINGLEFILE_ARGS,
|
||||||
'--browser-executable-path={}'.format(CHROME_BINARY),
|
'--browser-executable-path={}'.format(CHROME_BINARY),
|
||||||
browser_args,
|
browser_args,
|
||||||
|
]
|
||||||
|
|
||||||
|
# Deduplicate options (single-file doesn't like when you use the same option two times)
|
||||||
|
#
|
||||||
|
# NOTE: Options names that come first clobber conflicting names that come later
|
||||||
|
# My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most
|
||||||
|
# specificity, therefore the user sets it with a lot intent, therefore it should take precedence
|
||||||
|
# kind of like the ergonomic principle of lexical scope in programming languages.
|
||||||
|
seen_option_names = []
|
||||||
|
def test_seen(argument):
|
||||||
|
option_name = argument.split("=")[0]
|
||||||
|
if option_name in seen_option_names:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
seen_option_names.append(option_name)
|
||||||
|
return True
|
||||||
|
deduped_options = list(filter(test_seen, options))
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
DEPENDENCIES['SINGLEFILE_BINARY']['path'],
|
||||||
|
*deduped_options,
|
||||||
link.url,
|
link.url,
|
||||||
output,
|
output,
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ from ..logging_util import TimedProgress
|
||||||
|
|
||||||
HTML_TITLE_REGEX = re.compile(
|
HTML_TITLE_REGEX = re.compile(
|
||||||
r'<title.*?>' # start matching text after <title> tag
|
r'<title.*?>' # start matching text after <title> tag
|
||||||
r'(.[^<>]+)', # get everything up to these symbols
|
r'([^<>]+)', # get everything up to these symbols
|
||||||
re.IGNORECASE | re.MULTILINE | re.DOTALL | re.UNICODE,
|
re.IGNORECASE | re.MULTILINE | re.DOTALL | re.UNICODE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -58,6 +58,27 @@ class TitleParser(HTMLParser):
|
||||||
if tag.lower() == "title":
|
if tag.lower() == "title":
|
||||||
self.inside_title_tag = False
|
self.inside_title_tag = False
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
|
||||||
|
"""
|
||||||
|
Try to find wget, singlefile and then dom files.
|
||||||
|
If none is found, download the url again.
|
||||||
|
"""
|
||||||
|
canonical = link.canonical_outputs()
|
||||||
|
abs_path = path.absolute()
|
||||||
|
sources = [canonical["singlefile_path"], canonical["wget_path"], canonical["dom_path"]]
|
||||||
|
document = None
|
||||||
|
for source in sources:
|
||||||
|
try:
|
||||||
|
with open(abs_path / source, "r", encoding="utf-8") as f:
|
||||||
|
document = f.read()
|
||||||
|
break
|
||||||
|
except (FileNotFoundError, TypeError):
|
||||||
|
continue
|
||||||
|
if document is None:
|
||||||
|
return download_url(link.url, timeout=timeout)
|
||||||
|
else:
|
||||||
|
return document
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
def should_save_title(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool:
|
||||||
|
|
@ -90,7 +111,7 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
|
||||||
status = 'succeeded'
|
status = 'succeeded'
|
||||||
timer = TimedProgress(timeout, prefix=' ')
|
timer = TimedProgress(timeout, prefix=' ')
|
||||||
try:
|
try:
|
||||||
html = download_url(link.url, timeout=timeout)
|
html = get_html(link, out_dir, timeout=timeout)
|
||||||
try:
|
try:
|
||||||
# try using relatively strict html parser first
|
# try using relatively strict html parser first
|
||||||
parser = TitleParser()
|
parser = TitleParser()
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,8 @@ from ..config import (
|
||||||
JSON_INDEX_FILENAME,
|
JSON_INDEX_FILENAME,
|
||||||
OUTPUT_DIR,
|
OUTPUT_DIR,
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
URL_BLACKLIST_PTN,
|
URL_DENYLIST_PTN,
|
||||||
|
URL_ALLOWLIST_PTN,
|
||||||
stderr,
|
stderr,
|
||||||
OUTPUT_PERMISSIONS
|
OUTPUT_PERMISSIONS
|
||||||
)
|
)
|
||||||
|
|
@ -141,7 +142,9 @@ def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
|
||||||
continue
|
continue
|
||||||
if scheme(link.url) not in ('http', 'https', 'ftp'):
|
if scheme(link.url) not in ('http', 'https', 'ftp'):
|
||||||
continue
|
continue
|
||||||
if URL_BLACKLIST_PTN and URL_BLACKLIST_PTN.search(link.url):
|
if URL_DENYLIST_PTN and URL_DENYLIST_PTN.search(link.url):
|
||||||
|
continue
|
||||||
|
if URL_ALLOWLIST_PTN and (not URL_ALLOWLIST_PTN.search(link.url)):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield link
|
yield link
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from ..config import (
|
||||||
FOOTER_INFO,
|
FOOTER_INFO,
|
||||||
HTML_INDEX_FILENAME,
|
HTML_INDEX_FILENAME,
|
||||||
SAVE_ARCHIVE_DOT_ORG,
|
SAVE_ARCHIVE_DOT_ORG,
|
||||||
|
PREVIEW_ORIGINALS,
|
||||||
)
|
)
|
||||||
|
|
||||||
MAIN_INDEX_TEMPLATE = 'static_index.html'
|
MAIN_INDEX_TEMPLATE = 'static_index.html'
|
||||||
|
|
@ -105,6 +106,7 @@ def link_details_template(link: Link) -> str:
|
||||||
'status_color': 'success' if link.is_archived else 'danger',
|
'status_color': 'success' if link.is_archived else 'danger',
|
||||||
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
|
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
|
||||||
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
|
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
|
||||||
|
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
|
||||||
})
|
})
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
@ -175,7 +177,7 @@ def snapshot_icons(snapshot) -> str:
|
||||||
# The check for archive_org is different, so it has to be handled separately
|
# The check for archive_org is different, so it has to be handled separately
|
||||||
|
|
||||||
# get from db (faster)
|
# get from db (faster)
|
||||||
exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
exists = extractor in extractor_outputs and extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||||
# get from filesystem (slower)
|
# get from filesystem (slower)
|
||||||
# target_path = Path(path) / "archive.org.txt"
|
# target_path = Path(path) / "archive.org.txt"
|
||||||
# exists = target_path.exists()
|
# exists = target_path.exists()
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ from django.utils.functional import cached_property
|
||||||
|
|
||||||
from ..system import get_dir_size
|
from ..system import get_dir_size
|
||||||
from ..util import ts_to_date_str, parse_date
|
from ..util import ts_to_date_str, parse_date
|
||||||
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME
|
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
|
||||||
|
|
||||||
class ArchiveError(Exception):
|
class ArchiveError(Exception):
|
||||||
def __init__(self, message, hints=None):
|
def __init__(self, message, hints=None):
|
||||||
|
|
@ -423,7 +423,7 @@ class Link:
|
||||||
canonical = {
|
canonical = {
|
||||||
'index_path': 'index.html',
|
'index_path': 'index.html',
|
||||||
'favicon_path': 'favicon.ico',
|
'favicon_path': 'favicon.ico',
|
||||||
'google_favicon_path': 'https://www.google.com/s2/favicons?domain={}'.format(self.domain),
|
'google_favicon_path': FAVICON_PROVIDER.format(self.domain),
|
||||||
'wget_path': wget_output_path(self),
|
'wget_path': wget_output_path(self),
|
||||||
'warc_path': 'warc/',
|
'warc_path': 'warc/',
|
||||||
'singlefile_path': 'singlefile.html',
|
'singlefile_path': 'singlefile.html',
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
__package__ = 'archivebox.index'
|
__package__ = 'archivebox.index'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Tuple, Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
|
@ -8,7 +10,10 @@ from django.db import transaction
|
||||||
|
|
||||||
from .schema import Link
|
from .schema import Link
|
||||||
from ..util import enforce_types, parse_date
|
from ..util import enforce_types, parse_date
|
||||||
from ..config import OUTPUT_DIR
|
from ..config import (
|
||||||
|
OUTPUT_DIR,
|
||||||
|
TAG_SEPARATOR_PATTERN,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
### Main Links Index
|
### Main Links Index
|
||||||
|
|
@ -33,9 +38,11 @@ def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir:
|
||||||
def write_link_to_sql_index(link: Link):
|
def write_link_to_sql_index(link: Link):
|
||||||
from core.models import Snapshot, ArchiveResult
|
from core.models import Snapshot, ArchiveResult
|
||||||
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
||||||
tags = info.pop("tags")
|
|
||||||
if tags is None:
|
tag_list = list(dict.fromkeys(
|
||||||
tags = []
|
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
|
||||||
|
))
|
||||||
|
info.pop('tags')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
|
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
|
||||||
|
|
@ -44,7 +51,7 @@ def write_link_to_sql_index(link: Link):
|
||||||
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
|
||||||
|
|
||||||
snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
|
snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
|
||||||
snapshot.save_tags(tags)
|
snapshot.save_tags(tag_list)
|
||||||
|
|
||||||
for extractor, entries in link.history.items():
|
for extractor, entries in link.history.items():
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
|
@ -104,10 +111,9 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
snap = write_link_to_sql_index(link)
|
snap = write_link_to_sql_index(link)
|
||||||
snap.title = link.title
|
snap.title = link.title
|
||||||
|
|
||||||
tag_set = (
|
tag_list = list(dict.fromkeys(
|
||||||
set(tag.strip() for tag in (link.tags or '').split(','))
|
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
|
||||||
)
|
))
|
||||||
tag_list = list(tag_set) or []
|
|
||||||
|
|
||||||
snap.save()
|
snap.save()
|
||||||
snap.save_tags(tag_list)
|
snap.save_tags(tag_list)
|
||||||
|
|
|
||||||
|
|
@ -108,12 +108,12 @@ def reject_stdin(caller: str, stdin: Optional[IO]=sys.stdin) -> None:
|
||||||
if not stdin.isatty():
|
if not stdin.isatty():
|
||||||
# stderr('READING STDIN TO REJECT...')
|
# stderr('READING STDIN TO REJECT...')
|
||||||
stdin_raw_text = stdin.read()
|
stdin_raw_text = stdin.read()
|
||||||
if stdin_raw_text:
|
if stdin_raw_text.strip():
|
||||||
# stderr('GOT STDIN!', len(stdin_str))
|
# stderr('GOT STDIN!', len(stdin_str))
|
||||||
stderr(f'[X] The "{caller}" command does not accept stdin.', color='red')
|
stderr(f'[!] The "{caller}" command does not accept stdin (ignoring).', color='red')
|
||||||
stderr(f' Run archivebox "{caller} --help" to see usage and examples.')
|
stderr(f' Run archivebox "{caller} --help" to see usage and examples.')
|
||||||
stderr()
|
stderr()
|
||||||
raise SystemExit(1)
|
# raise SystemExit(1)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -432,10 +432,16 @@ def log_archive_method_finished(result: "ArchiveResult"):
|
||||||
# Prettify error output hints string and limit to five lines
|
# Prettify error output hints string and limit to five lines
|
||||||
hints = getattr(result.output, 'hints', None) or ()
|
hints = getattr(result.output, 'hints', None) or ()
|
||||||
if hints:
|
if hints:
|
||||||
hints = hints if isinstance(hints, (list, tuple)) else hints.split('\n')
|
if isinstance(hints, (list, tuple, type(_ for _ in ()))):
|
||||||
|
hints = [hint.decode() for hint in hints if isinstance(hint, bytes)]
|
||||||
|
else:
|
||||||
|
if isinstance(hints, bytes):
|
||||||
|
hints = hints.decode()
|
||||||
|
hints = hints.split('\n')
|
||||||
|
|
||||||
hints = (
|
hints = (
|
||||||
' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
|
' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
|
||||||
for line in hints[:5] if line.strip()
|
for line in list(hints)[:5] if line.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -527,11 +533,27 @@ def log_shell_welcome_msg():
|
||||||
### Helpers
|
### Helpers
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def pretty_path(path: Union[Path, str]) -> str:
|
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=OUTPUT_DIR) -> str:
|
||||||
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
|
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
|
||||||
pwd = Path('.').resolve()
|
pwd = str(Path(pwd)) # .resolve()
|
||||||
# parent = os.path.abspath(os.path.join(pwd, os.path.pardir))
|
path = str(path)
|
||||||
return str(path).replace(str(pwd) + '/', './')
|
|
||||||
|
if not path:
|
||||||
|
return path
|
||||||
|
|
||||||
|
# replace long absolute paths with ./ relative ones to save on terminal output width
|
||||||
|
if path.startswith(pwd) and (pwd != '/'):
|
||||||
|
path = path.replace(pwd, '.', 1)
|
||||||
|
|
||||||
|
# quote paths containing spaces
|
||||||
|
if ' ' in path:
|
||||||
|
path = f'"{path}"'
|
||||||
|
|
||||||
|
# if path is just a plain dot, replace it back with the absolute path for clarity
|
||||||
|
if path == '.':
|
||||||
|
path = pwd
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
@ -566,12 +588,13 @@ def printable_config(config: ConfigDict, prefix: str='') -> str:
|
||||||
def printable_folder_status(name: str, folder: Dict) -> str:
|
def printable_folder_status(name: str, folder: Dict) -> str:
|
||||||
if folder['enabled']:
|
if folder['enabled']:
|
||||||
if folder['is_valid']:
|
if folder['is_valid']:
|
||||||
color, symbol, note = 'green', '√', 'valid'
|
color, symbol, note, num_files = 'green', '√', 'valid', ''
|
||||||
else:
|
else:
|
||||||
color, symbol, note, num_files = 'red', 'X', 'invalid', '?'
|
color, symbol, note, num_files = 'red', 'X', 'invalid', '?'
|
||||||
else:
|
else:
|
||||||
color, symbol, note, num_files = 'lightyellow', '-', 'disabled', '-'
|
color, symbol, note, num_files = 'lightyellow', '-', 'disabled', '-'
|
||||||
|
|
||||||
|
|
||||||
if folder['path']:
|
if folder['path']:
|
||||||
if Path(folder['path']).exists():
|
if Path(folder['path']).exists():
|
||||||
num_files = (
|
num_files = (
|
||||||
|
|
@ -581,14 +604,12 @@ def printable_folder_status(name: str, folder: Dict) -> str:
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
num_files = 'missing'
|
num_files = 'missing'
|
||||||
|
|
||||||
|
if folder.get('is_mount'):
|
||||||
|
# add symbol @ next to filecount if path is a remote filesystem mount
|
||||||
|
num_files = f'{num_files} @' if num_files else '@'
|
||||||
|
|
||||||
path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else ''
|
path = pretty_path(folder['path'])
|
||||||
if path and ' ' in path:
|
|
||||||
path = f'"{path}"'
|
|
||||||
|
|
||||||
# if path is just a plain dot, replace it back with the full path for clarity
|
|
||||||
if path == '.':
|
|
||||||
path = str(OUTPUT_DIR)
|
|
||||||
|
|
||||||
return ' '.join((
|
return ' '.join((
|
||||||
ANSI[color],
|
ANSI[color],
|
||||||
|
|
@ -619,9 +640,7 @@ def printable_dependency_version(name: str, dependency: Dict) -> str:
|
||||||
else:
|
else:
|
||||||
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
|
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
|
||||||
|
|
||||||
path = str(dependency["path"]).replace(str(OUTPUT_DIR), '.') if dependency["path"] else ''
|
path = pretty_path(dependency['path'])
|
||||||
if path and ' ' in path:
|
|
||||||
path = f'"{path}"'
|
|
||||||
|
|
||||||
return ' '.join((
|
return ' '.join((
|
||||||
ANSI[color],
|
ANSI[color],
|
||||||
|
|
|
||||||
235
archivebox/main.py
Normal file → Executable file
235
archivebox/main.py
Normal file → Executable file
|
|
@ -4,8 +4,9 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
import platform
|
import platform
|
||||||
|
from django.utils import timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import date
|
from datetime import date, datetime
|
||||||
|
|
||||||
from typing import Dict, List, Optional, Iterable, IO, Union
|
from typing import Dict, List, Optional, Iterable, IO, Union
|
||||||
from crontab import CronTab, CronSlices
|
from crontab import CronTab, CronSlices
|
||||||
|
|
@ -70,7 +71,13 @@ from .config import (
|
||||||
IS_TTY,
|
IS_TTY,
|
||||||
DEBUG,
|
DEBUG,
|
||||||
IN_DOCKER,
|
IN_DOCKER,
|
||||||
|
IN_QEMU,
|
||||||
|
PUID,
|
||||||
|
PGID,
|
||||||
USER,
|
USER,
|
||||||
|
TIMEZONE,
|
||||||
|
ENFORCE_ATOMIC_WRITES,
|
||||||
|
OUTPUT_PERMISSIONS,
|
||||||
PYTHON_BINARY,
|
PYTHON_BINARY,
|
||||||
ARCHIVEBOX_BINARY,
|
ARCHIVEBOX_BINARY,
|
||||||
ONLY_NEW,
|
ONLY_NEW,
|
||||||
|
|
@ -90,11 +97,11 @@ from .config import (
|
||||||
check_data_folder,
|
check_data_folder,
|
||||||
write_config_file,
|
write_config_file,
|
||||||
VERSION,
|
VERSION,
|
||||||
|
COMMIT_HASH,
|
||||||
CODE_LOCATIONS,
|
CODE_LOCATIONS,
|
||||||
EXTERNAL_LOCATIONS,
|
EXTERNAL_LOCATIONS,
|
||||||
DATA_LOCATIONS,
|
DATA_LOCATIONS,
|
||||||
DEPENDENCIES,
|
DEPENDENCIES,
|
||||||
USE_CHROME,
|
|
||||||
CHROME_BINARY,
|
CHROME_BINARY,
|
||||||
CHROME_VERSION,
|
CHROME_VERSION,
|
||||||
YOUTUBEDL_BINARY,
|
YOUTUBEDL_BINARY,
|
||||||
|
|
@ -102,12 +109,12 @@ from .config import (
|
||||||
SINGLEFILE_VERSION,
|
SINGLEFILE_VERSION,
|
||||||
READABILITY_VERSION,
|
READABILITY_VERSION,
|
||||||
MERCURY_VERSION,
|
MERCURY_VERSION,
|
||||||
USE_YOUTUBEDL,
|
|
||||||
USE_NODE,
|
|
||||||
NODE_VERSION,
|
NODE_VERSION,
|
||||||
load_all_config,
|
load_all_config,
|
||||||
CONFIG,
|
CONFIG,
|
||||||
USER_CONFIG,
|
USER_CONFIG,
|
||||||
|
ADMIN_USERNAME,
|
||||||
|
ADMIN_PASSWORD,
|
||||||
get_real_name,
|
get_real_name,
|
||||||
setup_django,
|
setup_django,
|
||||||
)
|
)
|
||||||
|
|
@ -206,48 +213,62 @@ def help(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
def version(quiet: bool=False,
|
def version(quiet: bool=False,
|
||||||
out_dir: Path=OUTPUT_DIR) -> None:
|
out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
"""Print the ArchiveBox version and dependency information"""
|
"""Print the ArchiveBox version and dependency information"""
|
||||||
|
|
||||||
if quiet:
|
print(VERSION)
|
||||||
print(VERSION)
|
|
||||||
else:
|
if not quiet:
|
||||||
# ArchiveBox v0.5.6
|
# 0.6.3
|
||||||
# Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
|
# ArchiveBox v0.6.3 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
|
||||||
print('ArchiveBox v{}'.format(VERSION))
|
# DEBUG=False IN_DOCKER=True IN_QEMU=False IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
|
||||||
|
|
||||||
p = platform.uname()
|
p = platform.uname()
|
||||||
print(
|
print(
|
||||||
|
'ArchiveBox v{}'.format(VERSION),
|
||||||
|
*((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
|
||||||
sys.implementation.name.title(),
|
sys.implementation.name.title(),
|
||||||
p.system,
|
p.system,
|
||||||
platform.platform(),
|
platform.platform(),
|
||||||
p.machine,
|
p.machine,
|
||||||
)
|
)
|
||||||
|
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
|
||||||
print(
|
print(
|
||||||
f'IN_DOCKER={IN_DOCKER}',
|
|
||||||
f'DEBUG={DEBUG}',
|
f'DEBUG={DEBUG}',
|
||||||
|
f'IN_DOCKER={IN_DOCKER}',
|
||||||
|
f'IN_QEMU={IN_QEMU}',
|
||||||
f'IS_TTY={IS_TTY}',
|
f'IS_TTY={IS_TTY}',
|
||||||
f'TZ={os.environ.get("TZ", "UTC")}',
|
f'TZ={TIMEZONE}',
|
||||||
f'SEARCH_BACKEND_ENGINE={SEARCH_BACKEND_ENGINE}',
|
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
||||||
|
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
|
||||||
|
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
||||||
|
f'FS_USER={PUID}:{PGID}',
|
||||||
|
f'FS_PERMS={OUTPUT_PERMISSIONS}',
|
||||||
|
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
|
||||||
)
|
)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print('{white}[i] Dependency versions:{reset}'.format(**ANSI))
|
print('{white}[i] Dependency versions:{reset}'.format(**ANSI))
|
||||||
for name, dependency in DEPENDENCIES.items():
|
for name, dependency in DEPENDENCIES.items():
|
||||||
print(printable_dependency_version(name, dependency))
|
print(printable_dependency_version(name, dependency))
|
||||||
|
|
||||||
|
# add a newline between core dependencies and extractor dependencies for easier reading
|
||||||
|
if name == 'ARCHIVEBOX_BINARY':
|
||||||
|
print()
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
|
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
|
||||||
for name, folder in CODE_LOCATIONS.items():
|
for name, path in CODE_LOCATIONS.items():
|
||||||
print(printable_folder_status(name, folder))
|
print(printable_folder_status(name, path))
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
|
print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
|
||||||
for name, folder in EXTERNAL_LOCATIONS.items():
|
for name, path in EXTERNAL_LOCATIONS.items():
|
||||||
print(printable_folder_status(name, folder))
|
print(printable_folder_status(name, path))
|
||||||
|
|
||||||
print()
|
print()
|
||||||
if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
|
if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
|
||||||
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
||||||
for name, folder in DATA_LOCATIONS.items():
|
for name, path in DATA_LOCATIONS.items():
|
||||||
print(printable_folder_status(name, folder))
|
print(printable_folder_status(name, path))
|
||||||
else:
|
else:
|
||||||
print()
|
print()
|
||||||
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
||||||
|
|
@ -403,6 +424,13 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
|
||||||
write_main_index(list(pending_links.values()), out_dir=out_dir)
|
write_main_index(list(pending_links.values()), out_dir=out_dir)
|
||||||
|
|
||||||
print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
|
print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
|
||||||
|
|
||||||
|
from django.contrib.auth.models import User
|
||||||
|
|
||||||
|
if (ADMIN_USERNAME and ADMIN_PASSWORD) and not User.objects.filter(username=ADMIN_USERNAME).exists():
|
||||||
|
print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**ANSI))
|
||||||
|
User.objects.create_superuser(username=ADMIN_USERNAME, password=ADMIN_PASSWORD)
|
||||||
|
|
||||||
if existing_index:
|
if existing_index:
|
||||||
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
|
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
|
||||||
else:
|
else:
|
||||||
|
|
@ -425,7 +453,7 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
|
||||||
print(' archivebox server # then visit http://127.0.0.1:8000')
|
print(' archivebox server # then visit http://127.0.0.1:8000')
|
||||||
print()
|
print()
|
||||||
print(' To add new links, you can run:')
|
print(' To add new links, you can run:')
|
||||||
print(" archivebox add ~/some/path/or/url/to/list_of_links.txt")
|
print(" archivebox add < ~/some/path/to/list_of_links.txt")
|
||||||
print()
|
print()
|
||||||
print(' For more usage and examples, run:')
|
print(' For more usage and examples, run:')
|
||||||
print(' archivebox help')
|
print(' archivebox help')
|
||||||
|
|
@ -552,7 +580,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
||||||
def add(urls: Union[str, List[str]],
|
def add(urls: Union[str, List[str]],
|
||||||
tag: str='',
|
tag: str='',
|
||||||
depth: int=0,
|
depth: int=0,
|
||||||
update_all: bool=not ONLY_NEW,
|
update: bool=not ONLY_NEW,
|
||||||
|
update_all: bool=False,
|
||||||
index_only: bool=False,
|
index_only: bool=False,
|
||||||
overwrite: bool=False,
|
overwrite: bool=False,
|
||||||
# duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
|
# duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically
|
||||||
|
|
@ -585,6 +614,7 @@ def add(urls: Union[str, List[str]],
|
||||||
# save verbatim args to sources
|
# save verbatim args to sources
|
||||||
write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
|
write_ahead_log = save_text_as_source('\n'.join(urls), filename='{ts}-import.txt', out_dir=out_dir)
|
||||||
|
|
||||||
|
|
||||||
new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
|
new_links += parse_links_from_source(write_ahead_log, root_url=None, parser=parser)
|
||||||
|
|
||||||
# If we're going one level deeper, download each link and look for more links
|
# If we're going one level deeper, download each link and look for more links
|
||||||
|
|
@ -592,8 +622,11 @@ def add(urls: Union[str, List[str]],
|
||||||
if new_links and depth == 1:
|
if new_links and depth == 1:
|
||||||
log_crawl_started(new_links)
|
log_crawl_started(new_links)
|
||||||
for new_link in new_links:
|
for new_link in new_links:
|
||||||
downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
|
try:
|
||||||
new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
|
downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
|
||||||
|
new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
|
||||||
|
except Exception as err:
|
||||||
|
stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
|
||||||
|
|
||||||
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
|
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
|
||||||
|
|
||||||
|
|
@ -616,11 +649,21 @@ def add(urls: Union[str, List[str]],
|
||||||
if extractors:
|
if extractors:
|
||||||
archive_kwargs["methods"] = extractors
|
archive_kwargs["methods"] = extractors
|
||||||
|
|
||||||
if update_all:
|
stderr()
|
||||||
|
|
||||||
|
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
if update:
|
||||||
|
stderr(f'[*] [{ts}] Archiving + updating {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
|
||||||
|
archive_links(imported_links, overwrite=overwrite, **archive_kwargs)
|
||||||
|
elif update_all:
|
||||||
|
stderr(f'[*] [{ts}] Archiving + updating {len(all_links)}/{len(all_links)}', len(all_links), 'URLs from entire library...', color='green')
|
||||||
archive_links(all_links, overwrite=overwrite, **archive_kwargs)
|
archive_links(all_links, overwrite=overwrite, **archive_kwargs)
|
||||||
elif overwrite:
|
elif overwrite:
|
||||||
|
stderr(f'[*] [{ts}] Archiving + overwriting {len(imported_links)}/{len(all_links)}', len(imported_links), 'URLs from added set...', color='green')
|
||||||
archive_links(imported_links, overwrite=True, **archive_kwargs)
|
archive_links(imported_links, overwrite=True, **archive_kwargs)
|
||||||
elif new_links:
|
elif new_links:
|
||||||
|
stderr(f'[*] [{ts}] Archiving {len(new_links)}/{len(all_links)} URLs from added set...', color='green')
|
||||||
archive_links(new_links, overwrite=False, **archive_kwargs)
|
archive_links(new_links, overwrite=False, **archive_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -910,34 +953,35 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
stderr('\n[+] Installing enabled ArchiveBox dependencies automatically...', color='green')
|
stderr('\n[+] Installing enabled ArchiveBox dependencies automatically...', color='green')
|
||||||
|
|
||||||
stderr('\n Installing YOUTUBEDL_BINARY automatically using pip...')
|
stderr('\n Installing YOUTUBEDL_BINARY automatically using pip...')
|
||||||
if USE_YOUTUBEDL:
|
if YOUTUBEDL_VERSION:
|
||||||
if YOUTUBEDL_VERSION:
|
print(f'{YOUTUBEDL_VERSION} is already installed', YOUTUBEDL_BINARY)
|
||||||
print(f'{YOUTUBEDL_VERSION} is already installed', YOUTUBEDL_BINARY)
|
else:
|
||||||
else:
|
try:
|
||||||
try:
|
run_shell([
|
||||||
run_shell([
|
PYTHON_BINARY, '-m', 'pip',
|
||||||
PYTHON_BINARY, '-m', 'pip',
|
'install',
|
||||||
'install',
|
'--upgrade',
|
||||||
'--upgrade',
|
'--no-cache-dir',
|
||||||
'--no-cache-dir',
|
'--no-warn-script-location',
|
||||||
'--no-warn-script-location',
|
'youtube_dl',
|
||||||
'youtube_dl',
|
], capture_output=False, cwd=out_dir)
|
||||||
], capture_output=False, cwd=out_dir)
|
pkg_path = run_shell([
|
||||||
pkg_path = run_shell([
|
PYTHON_BINARY, '-m', 'pip',
|
||||||
PYTHON_BINARY, '-m', 'pip',
|
'show',
|
||||||
'show',
|
'youtube_dl',
|
||||||
'youtube_dl',
|
], capture_output=True, text=True, cwd=out_dir).stdout.decode().split('Location: ')[-1].split('\n', 1)[0]
|
||||||
], capture_output=True, text=True, cwd=out_dir).stdout.split('Location: ')[-1].split('\n', 1)[0]
|
NEW_YOUTUBEDL_BINARY = Path(pkg_path) / 'youtube_dl' / '__main__.py'
|
||||||
NEW_YOUTUBEDL_BINARY = Path(pkg_path) / 'youtube_dl' / '__main__.py'
|
os.chmod(NEW_YOUTUBEDL_BINARY, 0o777)
|
||||||
os.chmod(NEW_YOUTUBEDL_BINARY, 0o777)
|
assert NEW_YOUTUBEDL_BINARY.exists(), f'youtube_dl must exist inside {pkg_path}'
|
||||||
assert NEW_YOUTUBEDL_BINARY.exists(), f'youtube_dl must exist inside {pkg_path}'
|
config(f'YOUTUBEDL_BINARY={NEW_YOUTUBEDL_BINARY}', set=True, out_dir=out_dir)
|
||||||
config(f'YOUTUBEDL_BINARY={NEW_YOUTUBEDL_BINARY}', set=True, out_dir=out_dir)
|
except BaseException as e: # lgtm [py/catch-base-exception]
|
||||||
except BaseException as e:
|
stderr(f'[X] Failed to install python packages: {e}', color='red')
|
||||||
stderr(f'[X] Failed to install python packages: {e}', color='red')
|
raise SystemExit(1)
|
||||||
raise SystemExit(1)
|
|
||||||
|
|
||||||
stderr('\n Installing CHROME_BINARY automatically using playwright...')
|
if platform.machine() == 'armv7l':
|
||||||
if USE_CHROME:
|
stderr('\n Skip the automatic installation of CHROME_BINARY because playwright is not available on armv7.')
|
||||||
|
else:
|
||||||
|
stderr('\n Installing CHROME_BINARY automatically using playwright...')
|
||||||
if CHROME_VERSION:
|
if CHROME_VERSION:
|
||||||
print(f'{CHROME_VERSION} is already installed', CHROME_BINARY)
|
print(f'{CHROME_VERSION} is already installed', CHROME_BINARY)
|
||||||
else:
|
else:
|
||||||
|
|
@ -955,56 +999,57 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
NEW_CHROME_BINARY = proc.stdout.decode().strip() if isinstance(proc.stdout, bytes) else proc.stdout.strip()
|
NEW_CHROME_BINARY = proc.stdout.decode().strip() if isinstance(proc.stdout, bytes) else proc.stdout.strip()
|
||||||
assert NEW_CHROME_BINARY and len(NEW_CHROME_BINARY), 'CHROME_BINARY must contain a path'
|
assert NEW_CHROME_BINARY and len(NEW_CHROME_BINARY), 'CHROME_BINARY must contain a path'
|
||||||
config(f'CHROME_BINARY={NEW_CHROME_BINARY}', set=True, out_dir=out_dir)
|
config(f'CHROME_BINARY={NEW_CHROME_BINARY}', set=True, out_dir=out_dir)
|
||||||
except BaseException as e:
|
except BaseException as e: # lgtm [py/catch-base-exception]
|
||||||
stderr(f'[X] Failed to install chromium using playwright: {e.__class__.__name__} {e}', color='red')
|
stderr(f'[X] Failed to install chromium using playwright: {e.__class__.__name__} {e}', color='red')
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
|
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
|
||||||
if USE_NODE:
|
if not NODE_VERSION:
|
||||||
if not NODE_VERSION:
|
stderr('[X] You must first install node using your system package manager', color='red')
|
||||||
stderr('[X] You must first install node using your system package manager', color='red')
|
hint([
|
||||||
hint([
|
'curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -',
|
||||||
'curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -',
|
'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
|
||||||
'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
|
])
|
||||||
])
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
if all((SINGLEFILE_VERSION, READABILITY_VERSION, MERCURY_VERSION)):
|
||||||
|
print('SINGLEFILE_BINARY, READABILITY_BINARY, and MERCURURY_BINARY are already installed')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
# clear out old npm package locations
|
||||||
|
paths = (
|
||||||
|
out_dir / 'package.json',
|
||||||
|
out_dir / 'package_lock.json',
|
||||||
|
out_dir / 'node_modules',
|
||||||
|
)
|
||||||
|
for path in paths:
|
||||||
|
if path.is_dir():
|
||||||
|
shutil.rmtree(path, ignore_errors=True)
|
||||||
|
elif path.is_file():
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
|
shutil.copyfile(PACKAGE_DIR / 'package.json', out_dir / 'package.json') # copy the js requirements list from the source install into the data dir
|
||||||
|
# lets blindly assume that calling out to npm via shell works reliably cross-platform 🤡 (until proven otherwise via support tickets)
|
||||||
|
run_shell([
|
||||||
|
'npm',
|
||||||
|
'install',
|
||||||
|
'--prefix', str(out_dir), # force it to put the node_modules dir in this folder
|
||||||
|
'--force', # overwrite any existing node_modules
|
||||||
|
'--no-save', # don't bother saving updating the package.json or package-lock.json file
|
||||||
|
'--no-audit', # don't bother checking for newer versions with security vuln fixes
|
||||||
|
'--no-fund', # hide "please fund our project" messages
|
||||||
|
'--loglevel', 'error', # only show erros (hide warn/info/debug) during installation
|
||||||
|
# these args are written in blood, change with caution
|
||||||
|
], capture_output=False, cwd=out_dir)
|
||||||
|
os.remove(out_dir / 'package.json')
|
||||||
|
except BaseException as e: # lgtm [py/catch-base-exception]
|
||||||
|
stderr(f'[X] Failed to install npm packages: {e}', color='red')
|
||||||
|
hint(f'Try deleting {out_dir}/node_modules and running it again')
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
if all((SINGLEFILE_VERSION, READABILITY_VERSION, MERCURY_VERSION)):
|
|
||||||
print('SINGLEFILE_BINARY, READABILITY_BINARY, and MERCURURY_BINARY are already installed')
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
# clear out old npm package locations
|
|
||||||
paths = (
|
|
||||||
out_dir / 'package.json',
|
|
||||||
out_dir / 'package_lock.json',
|
|
||||||
out_dir / 'node_modules',
|
|
||||||
)
|
|
||||||
for path in paths:
|
|
||||||
if path.is_dir():
|
|
||||||
shutil.rmtree(path, ignore_errors=True)
|
|
||||||
elif path.is_file():
|
|
||||||
os.remove(path)
|
|
||||||
|
|
||||||
shutil.copyfile(PACKAGE_DIR / 'package.json', out_dir / 'package.json')
|
|
||||||
run_shell([
|
|
||||||
'npm',
|
|
||||||
'install',
|
|
||||||
'--prefix', str(out_dir),
|
|
||||||
'--force',
|
|
||||||
'--no-save',
|
|
||||||
'--no-audit',
|
|
||||||
'--no-fund',
|
|
||||||
'--loglevel', 'error',
|
|
||||||
], capture_output=False, cwd=out_dir)
|
|
||||||
os.remove(out_dir / 'package.json')
|
|
||||||
except BaseException as e:
|
|
||||||
stderr(f'[X] Failed to install npm packages: {e}', color='red')
|
|
||||||
hint(f'Try deleting {out_dir}/node_modules and running it again')
|
|
||||||
raise SystemExit(1)
|
|
||||||
|
|
||||||
stderr('\n[√] Set up ArchiveBox and its dependencies successfully.', color='green')
|
stderr('\n[√] Set up ArchiveBox and its dependencies successfully.', color='green')
|
||||||
|
|
||||||
run_shell([ARCHIVEBOX_BINARY, '--version'], capture_output=False, cwd=out_dir)
|
run_shell([PYTHON_BINARY, ARCHIVEBOX_BINARY, '--version'], capture_output=False, cwd=out_dir)
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def config(config_options_str: Optional[str]=None,
|
def config(config_options_str: Optional[str]=None,
|
||||||
|
|
@ -1112,6 +1157,7 @@ def schedule(add: bool=False,
|
||||||
every: Optional[str]=None,
|
every: Optional[str]=None,
|
||||||
depth: int=0,
|
depth: int=0,
|
||||||
overwrite: bool=False,
|
overwrite: bool=False,
|
||||||
|
update: bool=not ONLY_NEW,
|
||||||
import_path: Optional[str]=None,
|
import_path: Optional[str]=None,
|
||||||
out_dir: Path=OUTPUT_DIR):
|
out_dir: Path=OUTPUT_DIR):
|
||||||
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
|
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
|
||||||
|
|
@ -1141,6 +1187,7 @@ def schedule(add: bool=False,
|
||||||
*([
|
*([
|
||||||
'add',
|
'add',
|
||||||
*(['--overwrite'] if overwrite else []),
|
*(['--overwrite'] if overwrite else []),
|
||||||
|
*(['--update'] if update else []),
|
||||||
f'--depth={depth}',
|
f'--depth={depth}',
|
||||||
f'"{import_path}"',
|
f'"{import_path}"',
|
||||||
] if import_path else ['update']),
|
] if import_path else ['update']),
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
../package.json
|
|
||||||
13
archivebox/package.json
Normal file
13
archivebox/package.json
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"name": "archivebox",
|
||||||
|
"version": "0.7.0",
|
||||||
|
"description": "ArchiveBox: The self-hosted internet archive",
|
||||||
|
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
||||||
|
"repository": "github:ArchiveBox/ArchiveBox",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@postlight/parser": "^2.2.3",
|
||||||
|
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
|
||||||
|
"single-file-cli": "^1.1.12"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -34,6 +34,7 @@ from ..index.schema import Link
|
||||||
from ..logging_util import TimedProgress, log_source_saved
|
from ..logging_util import TimedProgress, log_source_saved
|
||||||
|
|
||||||
from . import pocket_api
|
from . import pocket_api
|
||||||
|
from . import readwise_reader_api
|
||||||
from . import wallabag_atom
|
from . import wallabag_atom
|
||||||
from . import pocket_html
|
from . import pocket_html
|
||||||
from . import pinboard_rss
|
from . import pinboard_rss
|
||||||
|
|
@ -51,6 +52,7 @@ from . import url_list
|
||||||
PARSERS = {
|
PARSERS = {
|
||||||
# Specialized parsers
|
# Specialized parsers
|
||||||
pocket_api.KEY: (pocket_api.NAME, pocket_api.PARSER),
|
pocket_api.KEY: (pocket_api.NAME, pocket_api.PARSER),
|
||||||
|
readwise_reader_api.KEY: (readwise_reader_api.NAME, readwise_reader_api.PARSER),
|
||||||
wallabag_atom.KEY: (wallabag_atom.NAME, wallabag_atom.PARSER),
|
wallabag_atom.KEY: (wallabag_atom.NAME, wallabag_atom.PARSER),
|
||||||
pocket_html.KEY: (pocket_html.NAME, pocket_html.PARSER),
|
pocket_html.KEY: (pocket_html.NAME, pocket_html.PARSER),
|
||||||
pinboard_rss.KEY: (pinboard_rss.NAME, pinboard_rss.PARSER),
|
pinboard_rss.KEY: (pinboard_rss.NAME, pinboard_rss.PARSER),
|
||||||
|
|
@ -149,7 +151,17 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
|
||||||
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||||
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
|
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
|
||||||
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
|
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
|
||||||
atomic_write(source_path, raw_text)
|
|
||||||
|
referenced_texts = ''
|
||||||
|
|
||||||
|
for entry in raw_text.split():
|
||||||
|
try:
|
||||||
|
if Path(entry).exists():
|
||||||
|
referenced_texts += Path(entry).read_text()
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
|
||||||
|
atomic_write(source_path, raw_text + '\n' + referenced_texts)
|
||||||
log_source_saved(source_file=source_path)
|
log_source_saved(source_file=source_path)
|
||||||
return source_path
|
return source_path
|
||||||
|
|
||||||
|
|
@ -176,7 +188,7 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
|
||||||
ANSI['reset'],
|
ANSI['reset'],
|
||||||
))
|
))
|
||||||
print(' ', e)
|
print(' ', e)
|
||||||
raise SystemExit(1)
|
raise e
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Source is a path to a local file on the filesystem
|
# Source is a path to a local file on the filesystem
|
||||||
|
|
@ -223,6 +235,10 @@ _test_url_strs = {
|
||||||
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
|
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
|
||||||
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
|
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
|
||||||
'<test>http://example7.com</test>': 1,
|
'<test>http://example7.com</test>': 1,
|
||||||
|
'https://<test>': 0,
|
||||||
|
'https://[test]': 0,
|
||||||
|
'http://"test"': 0,
|
||||||
|
'http://\'test\'': 0,
|
||||||
'[https://example8.com/what/is/this.php?what=1]': 1,
|
'[https://example8.com/what/is/this.php?what=1]': 1,
|
||||||
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
|
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
|
||||||
'<what>https://example10.com#and-thing=2 "</about>': 1,
|
'<what>https://example10.com#and-thing=2 "</about>': 1,
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,10 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
|
||||||
"""Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
|
"""Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
|
||||||
|
|
||||||
json_file.seek(0)
|
json_file.seek(0)
|
||||||
links = json.load(json_file)
|
|
||||||
|
# sometimes the first line is a comment or filepath, so we get everything after the first {
|
||||||
|
json_file_json_str = '{' + json_file.read().split('{', 1)[-1]
|
||||||
|
links = json.loads(json_file_json_str)
|
||||||
json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')
|
json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')
|
||||||
|
|
||||||
for link in links:
|
for link in links:
|
||||||
|
|
|
||||||
|
|
@ -21,13 +21,18 @@ def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
|
||||||
root = ElementTree.parse(rss_file).getroot()
|
root = ElementTree.parse(rss_file).getroot()
|
||||||
items = root.findall("{http://purl.org/rss/1.0/}item")
|
items = root.findall("{http://purl.org/rss/1.0/}item")
|
||||||
for item in items:
|
for item in items:
|
||||||
find = lambda p: item.find(p).text.strip() if item.find(p) else None # type: ignore
|
find = lambda p: item.find(p).text.strip() if item.find(p) is not None else None # type: ignore
|
||||||
|
|
||||||
url = find("{http://purl.org/rss/1.0/}link")
|
url = find("{http://purl.org/rss/1.0/}link")
|
||||||
tags = find("{http://purl.org/dc/elements/1.1/}subject")
|
tags = find("{http://purl.org/dc/elements/1.1/}subject")
|
||||||
title = find("{http://purl.org/rss/1.0/}title")
|
title = find("{http://purl.org/rss/1.0/}title")
|
||||||
ts_str = find("{http://purl.org/dc/elements/1.1/}date")
|
ts_str = find("{http://purl.org/dc/elements/1.1/}date")
|
||||||
|
|
||||||
|
if url is None:
|
||||||
|
# Yielding a Link with no URL will
|
||||||
|
# crash on a URL validation assertion
|
||||||
|
continue
|
||||||
|
|
||||||
# Pinboard includes a colon in its date stamp timezone offsets, which
|
# Pinboard includes a colon in its date stamp timezone offsets, which
|
||||||
# Python can't parse. Remove it:
|
# Python can't parse. Remove it:
|
||||||
if ts_str and ts_str[-3:-2] == ":":
|
if ts_str and ts_str[-3:-2] == ":":
|
||||||
|
|
|
||||||
|
|
@ -47,11 +47,11 @@ def get_pocket_articles(api: Pocket, since=None, page=0):
|
||||||
|
|
||||||
|
|
||||||
def link_from_article(article: dict, sources: list):
|
def link_from_article(article: dict, sources: list):
|
||||||
url: str = article['resolved_url'] or article['given_url']
|
url: str = article.get('resolved_url') or article['given_url']
|
||||||
broken_protocol = _BROKEN_PROTOCOL_RE.match(url)
|
broken_protocol = _BROKEN_PROTOCOL_RE.match(url)
|
||||||
if broken_protocol:
|
if broken_protocol:
|
||||||
url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://')
|
url = url.replace(f'{broken_protocol.group(1)}:/', f'{broken_protocol.group(1)}://')
|
||||||
title = article['resolved_title'] or article['given_title'] or url
|
title = article.get('resolved_title') or article.get('given_title') or url
|
||||||
|
|
||||||
return Link(
|
return Link(
|
||||||
url=url,
|
url=url,
|
||||||
|
|
|
||||||
123
archivebox/parsers/readwise_reader_api.py
Normal file
123
archivebox/parsers/readwise_reader_api.py
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
__package__ = "archivebox.parsers"
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from typing import IO, Iterable, Optional
|
||||||
|
from configparser import ConfigParser
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..index.schema import Link
|
||||||
|
from ..util import enforce_types
|
||||||
|
from ..system import atomic_write
|
||||||
|
from ..config import (
|
||||||
|
SOURCES_DIR,
|
||||||
|
READWISE_READER_TOKENS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
API_DB_PATH = Path(SOURCES_DIR) / "readwise_reader_api.db"
|
||||||
|
|
||||||
|
|
||||||
|
class ReadwiseReaderAPI:
|
||||||
|
cursor: Optional[str]
|
||||||
|
|
||||||
|
def __init__(self, api_token, cursor=None) -> None:
|
||||||
|
self.api_token = api_token
|
||||||
|
self.cursor = cursor
|
||||||
|
|
||||||
|
def get_archive(self):
|
||||||
|
response = requests.get(
|
||||||
|
url="https://readwise.io/api/v3/list/",
|
||||||
|
headers={"Authorization": f"Token {self.api_token}"},
|
||||||
|
params={
|
||||||
|
"location": "archive",
|
||||||
|
"pageCursor": self.cursor,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response
|
||||||
|
|
||||||
|
def get_readwise_reader_articles(api: ReadwiseReaderAPI):
|
||||||
|
response = api.get_archive()
|
||||||
|
body = response.json()
|
||||||
|
articles = body["results"]
|
||||||
|
|
||||||
|
yield from articles
|
||||||
|
|
||||||
|
|
||||||
|
if body['nextPageCursor']:
|
||||||
|
api.cursor = body["nextPageCursor"]
|
||||||
|
yield from get_readwise_reader_articles(api)
|
||||||
|
|
||||||
|
|
||||||
|
def link_from_article(article: dict, sources: list):
|
||||||
|
url: str = article['source_url']
|
||||||
|
title = article["title"] or url
|
||||||
|
timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
|
||||||
|
|
||||||
|
return Link(
|
||||||
|
url=url,
|
||||||
|
timestamp=str(timestamp),
|
||||||
|
title=title,
|
||||||
|
tags="",
|
||||||
|
sources=sources,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_cursor(username: str, since: str):
|
||||||
|
if not API_DB_PATH.exists():
|
||||||
|
atomic_write(API_DB_PATH, "")
|
||||||
|
|
||||||
|
since_file = ConfigParser()
|
||||||
|
since_file.optionxform = str
|
||||||
|
since_file.read(API_DB_PATH)
|
||||||
|
|
||||||
|
since_file[username] = {"since": since}
|
||||||
|
|
||||||
|
with open(API_DB_PATH, "w+") as new:
|
||||||
|
since_file.write(new)
|
||||||
|
|
||||||
|
|
||||||
|
def read_cursor(username: str) -> Optional[str]:
|
||||||
|
if not API_DB_PATH.exists():
|
||||||
|
atomic_write(API_DB_PATH, "")
|
||||||
|
|
||||||
|
config_file = ConfigParser()
|
||||||
|
config_file.optionxform = str
|
||||||
|
config_file.read(API_DB_PATH)
|
||||||
|
|
||||||
|
return config_file.get(username, "since", fallback=None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def should_parse_as_readwise_reader_api(text: str) -> bool:
|
||||||
|
return text.startswith("readwise-reader://")
|
||||||
|
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
|
||||||
|
"""Parse bookmarks from the Readwise Reader API"""
|
||||||
|
|
||||||
|
input_buffer.seek(0)
|
||||||
|
pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
|
||||||
|
for line in input_buffer:
|
||||||
|
if should_parse_as_readwise_reader_api(line):
|
||||||
|
username = pattern.search(line).group(1)
|
||||||
|
api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
|
||||||
|
|
||||||
|
for article in get_readwise_reader_articles(api):
|
||||||
|
yield link_from_article(article, sources=[line])
|
||||||
|
|
||||||
|
if api.cursor:
|
||||||
|
write_cursor(username, api.cursor)
|
||||||
|
|
||||||
|
|
||||||
|
KEY = "readwise_reader_api"
|
||||||
|
NAME = "Readwise Reader API"
|
||||||
|
PARSER = parse_readwise_reader_api_export
|
||||||
|
|
@ -34,13 +34,19 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
|
||||||
|
|
||||||
trailing_removed = entry.split('</entry>', 1)[0]
|
trailing_removed = entry.split('</entry>', 1)[0]
|
||||||
leading_removed = trailing_removed.strip()
|
leading_removed = trailing_removed.strip()
|
||||||
rows = leading_removed.split('\n')
|
splits_fixed = leading_removed.replace('"\n href="', '" href="')
|
||||||
|
rows = splits_fixed.split('\n')
|
||||||
|
|
||||||
def get_row(key):
|
def get_row(prefix):
|
||||||
return [r.strip() for r in rows if r.strip().startswith('<{}'.format(key))][0]
|
return [
|
||||||
|
row.strip()
|
||||||
|
for row in rows
|
||||||
|
if row.strip().startswith('<{}'.format(prefix))
|
||||||
|
][0]
|
||||||
|
|
||||||
title = str_between(get_row('title'), '<title><![CDATA[', ']]></title>').strip()
|
title = str_between(get_row('title'), '<title><![CDATA[', ']]></title>').strip()
|
||||||
url = str_between(get_row('link rel="via"'), '<link rel="via">', '</link>')
|
url_inside_link = str_between(get_row('link rel="via"'), '<link rel="via">', '</link>')
|
||||||
|
url_inside_attr = str_between(get_row('link rel="via"'), 'href="', '"/>')
|
||||||
ts_str = str_between(get_row('published'), '<published>', '</published>')
|
ts_str = str_between(get_row('published'), '<published>', '</published>')
|
||||||
time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
|
time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
|
||||||
try:
|
try:
|
||||||
|
|
@ -49,7 +55,7 @@ def parse_wallabag_atom_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
|
||||||
tags = None
|
tags = None
|
||||||
|
|
||||||
yield Link(
|
yield Link(
|
||||||
url=htmldecode(url),
|
url=htmldecode(url_inside_attr or url_inside_link),
|
||||||
timestamp=str(time.timestamp()),
|
timestamp=str(time.timestamp()),
|
||||||
title=htmldecode(title) or None,
|
title=htmldecode(title) or None,
|
||||||
tags=tags or '',
|
tags=tags or '',
|
||||||
|
|
|
||||||
195
archivebox/search/backends/sqlite.py
Normal file
195
archivebox/search/backends/sqlite.py
Normal file
|
|
@ -0,0 +1,195 @@
|
||||||
|
import codecs
|
||||||
|
from typing import List, Generator
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
from archivebox.util import enforce_types
|
||||||
|
from archivebox.config import (
|
||||||
|
FTS_SEPARATE_DATABASE,
|
||||||
|
FTS_TOKENIZERS,
|
||||||
|
FTS_SQLITE_MAX_LENGTH
|
||||||
|
)
|
||||||
|
|
||||||
|
FTS_TABLE = "snapshot_fts"
|
||||||
|
FTS_ID_TABLE = "snapshot_id_fts"
|
||||||
|
FTS_COLUMN = "texts"
|
||||||
|
|
||||||
|
if FTS_SEPARATE_DATABASE:
|
||||||
|
database = sqlite3.connect("search.sqlite3")
|
||||||
|
# Make get_connection callable, because `django.db.connection.cursor()`
|
||||||
|
# has to be called to get a context manager, but sqlite3.Connection
|
||||||
|
# is a context manager without being called.
|
||||||
|
def get_connection():
|
||||||
|
return database
|
||||||
|
SQLITE_BIND = "?"
|
||||||
|
else:
|
||||||
|
from django.db import connection as database # type: ignore[no-redef, assignment]
|
||||||
|
get_connection = database.cursor
|
||||||
|
SQLITE_BIND = "%s"
|
||||||
|
|
||||||
|
# Only Python >= 3.11 supports sqlite3.Connection.getlimit(),
|
||||||
|
# so fall back to the default if the API to get the real value isn't present
|
||||||
|
try:
|
||||||
|
limit_id = sqlite3.SQLITE_LIMIT_LENGTH
|
||||||
|
try:
|
||||||
|
with database.temporary_connection() as cursor: # type: ignore[attr-defined]
|
||||||
|
SQLITE_LIMIT_LENGTH = cursor.connection.getlimit(limit_id)
|
||||||
|
except AttributeError:
|
||||||
|
SQLITE_LIMIT_LENGTH = database.getlimit(limit_id)
|
||||||
|
except AttributeError:
|
||||||
|
SQLITE_LIMIT_LENGTH = FTS_SQLITE_MAX_LENGTH
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_sqlite3(value: str, *, quote: str, errors='strict') -> str:
|
||||||
|
assert isinstance(quote, str), "quote is not a str"
|
||||||
|
assert len(quote) == 1, "quote must be a single character"
|
||||||
|
|
||||||
|
encodable = value.encode('utf-8', errors).decode('utf-8')
|
||||||
|
|
||||||
|
nul_index = encodable.find("\x00")
|
||||||
|
if nul_index >= 0:
|
||||||
|
error = UnicodeEncodeError("NUL-terminated utf-8", encodable,
|
||||||
|
nul_index, nul_index + 1, "NUL not allowed")
|
||||||
|
error_handler = codecs.lookup_error(errors)
|
||||||
|
replacement, _ = error_handler(error)
|
||||||
|
assert isinstance(replacement, str), "handling a UnicodeEncodeError should return a str replacement"
|
||||||
|
encodable = encodable.replace("\x00", replacement)
|
||||||
|
|
||||||
|
return quote + encodable.replace(quote, quote * 2) + quote
|
||||||
|
|
||||||
|
def _escape_sqlite3_value(value: str, errors='strict') -> str:
|
||||||
|
return _escape_sqlite3(value, quote="'", errors=errors)
|
||||||
|
|
||||||
|
def _escape_sqlite3_identifier(value: str) -> str:
|
||||||
|
return _escape_sqlite3(value, quote='"', errors='strict')
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def _create_tables():
|
||||||
|
table = _escape_sqlite3_identifier(FTS_TABLE)
|
||||||
|
# Escape as value, because fts5() expects
|
||||||
|
# string literal column names
|
||||||
|
column = _escape_sqlite3_value(FTS_COLUMN)
|
||||||
|
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
|
||||||
|
tokenizers = _escape_sqlite3_value(FTS_TOKENIZERS)
|
||||||
|
trigger_name = _escape_sqlite3_identifier(f"{FTS_ID_TABLE}_ad")
|
||||||
|
|
||||||
|
with get_connection() as cursor:
|
||||||
|
# Create a contentless-delete FTS5 table that indexes
|
||||||
|
# but does not store the texts of snapshots
|
||||||
|
try:
|
||||||
|
cursor.execute(
|
||||||
|
f"CREATE VIRTUAL TABLE {table}"
|
||||||
|
f" USING fts5({column},"
|
||||||
|
f" tokenize={tokenizers},"
|
||||||
|
" content='', contentless_delete=1);"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
msg = str(e)
|
||||||
|
if 'unrecognized option: "contentlessdelete"' in msg:
|
||||||
|
sqlite_version = getattr(sqlite3, "sqlite_version", "Unknown")
|
||||||
|
raise RuntimeError(
|
||||||
|
"SQLite full-text search requires SQLite >= 3.43.0;"
|
||||||
|
f" the running version is {sqlite_version}"
|
||||||
|
) from e
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
# Create a one-to-one mapping between ArchiveBox snapshot_id
|
||||||
|
# and FTS5 rowid, because the column type of rowid can't be
|
||||||
|
# customized.
|
||||||
|
cursor.execute(
|
||||||
|
f"CREATE TABLE {id_table}("
|
||||||
|
" rowid INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||||
|
" snapshot_id char(32) NOT NULL UNIQUE"
|
||||||
|
");"
|
||||||
|
)
|
||||||
|
# Create a trigger to delete items from the FTS5 index when
|
||||||
|
# the snapshot_id is deleted from the mapping, to maintain
|
||||||
|
# consistency and make the `flush()` query simpler.
|
||||||
|
cursor.execute(
|
||||||
|
f"CREATE TRIGGER {trigger_name}"
|
||||||
|
f" AFTER DELETE ON {id_table} BEGIN"
|
||||||
|
f" DELETE FROM {table} WHERE rowid=old.rowid;"
|
||||||
|
" END;"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _handle_query_exception(exc: Exception):
|
||||||
|
message = str(exc)
|
||||||
|
if message.startswith("no such table:"):
|
||||||
|
raise RuntimeError(
|
||||||
|
"SQLite full-text search index has not yet"
|
||||||
|
" been created; run `archivebox update --index-only`."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise exc
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def index(snapshot_id: str, texts: List[str]):
|
||||||
|
text = ' '.join(texts)[:SQLITE_LIMIT_LENGTH]
|
||||||
|
|
||||||
|
table = _escape_sqlite3_identifier(FTS_TABLE)
|
||||||
|
column = _escape_sqlite3_identifier(FTS_COLUMN)
|
||||||
|
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
|
||||||
|
|
||||||
|
with get_connection() as cursor:
|
||||||
|
retries = 2
|
||||||
|
while retries > 0:
|
||||||
|
retries -= 1
|
||||||
|
try:
|
||||||
|
# If there is already an FTS index rowid to snapshot_id mapping,
|
||||||
|
# then don't insert a new one, silently ignoring the operation.
|
||||||
|
# {id_table}.rowid is AUTOINCREMENT, so will generate an unused
|
||||||
|
# rowid for the index if it is an unindexed snapshot_id.
|
||||||
|
cursor.execute(
|
||||||
|
f"INSERT OR IGNORE INTO {id_table}(snapshot_id) VALUES({SQLITE_BIND})",
|
||||||
|
[snapshot_id])
|
||||||
|
# Fetch the FTS index rowid for the given snapshot_id
|
||||||
|
id_res = cursor.execute(
|
||||||
|
f"SELECT rowid FROM {id_table} WHERE snapshot_id = {SQLITE_BIND}",
|
||||||
|
[snapshot_id])
|
||||||
|
rowid = id_res.fetchone()[0]
|
||||||
|
# (Re-)index the content
|
||||||
|
cursor.execute(
|
||||||
|
"INSERT OR REPLACE INTO"
|
||||||
|
f" {table}(rowid, {column}) VALUES ({SQLITE_BIND}, {SQLITE_BIND})",
|
||||||
|
[rowid, text])
|
||||||
|
# All statements succeeded; return
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
if str(e).startswith("no such table:") and retries > 0:
|
||||||
|
_create_tables()
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
raise RuntimeError("Failed to create tables for SQLite FTS5 search")
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def search(text: str) -> List[str]:
|
||||||
|
table = _escape_sqlite3_identifier(FTS_TABLE)
|
||||||
|
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
|
||||||
|
|
||||||
|
with get_connection() as cursor:
|
||||||
|
try:
|
||||||
|
res = cursor.execute(
|
||||||
|
f"SELECT snapshot_id FROM {table}"
|
||||||
|
f" INNER JOIN {id_table}"
|
||||||
|
f" ON {id_table}.rowid = {table}.rowid"
|
||||||
|
f" WHERE {table} MATCH {SQLITE_BIND}",
|
||||||
|
[text])
|
||||||
|
except Exception as e:
|
||||||
|
_handle_query_exception(e)
|
||||||
|
|
||||||
|
snap_ids = [row[0] for row in res.fetchall()]
|
||||||
|
return snap_ids
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def flush(snapshot_ids: Generator[str, None, None]):
|
||||||
|
snapshot_ids = list(snapshot_ids) # type: ignore[assignment]
|
||||||
|
|
||||||
|
id_table = _escape_sqlite3_identifier(FTS_ID_TABLE)
|
||||||
|
|
||||||
|
with get_connection() as cursor:
|
||||||
|
try:
|
||||||
|
cursor.executemany(
|
||||||
|
f"DELETE FROM {id_table} WHERE snapshot_id={SQLITE_BIND}",
|
||||||
|
[snapshot_ids])
|
||||||
|
except Exception as e:
|
||||||
|
_handle_query_exception(e)
|
||||||
|
|
@ -14,11 +14,11 @@ from crontab import CronTab
|
||||||
from .vendor.atomicwrites import atomic_write as lib_atomic_write
|
from .vendor.atomicwrites import atomic_write as lib_atomic_write
|
||||||
|
|
||||||
from .util import enforce_types, ExtendedEncoder
|
from .util import enforce_types, ExtendedEncoder
|
||||||
from .config import OUTPUT_PERMISSIONS
|
from .config import PYTHON_BINARY, OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def run(*args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):
|
def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):
|
||||||
"""Patched of subprocess.run to kill forked child subprocesses and fix blocking io making timeout=innefective
|
"""Patched of subprocess.run to kill forked child subprocesses and fix blocking io making timeout=innefective
|
||||||
Mostly copied from https://github.com/python/cpython/blob/master/Lib/subprocess.py
|
Mostly copied from https://github.com/python/cpython/blob/master/Lib/subprocess.py
|
||||||
"""
|
"""
|
||||||
|
|
@ -37,7 +37,10 @@ def run(*args, input=None, capture_output=True, timeout=None, check=False, text=
|
||||||
|
|
||||||
pgid = None
|
pgid = None
|
||||||
try:
|
try:
|
||||||
with Popen(*args, start_new_session=start_new_session, **kwargs) as process:
|
if isinstance(cmd, (list, tuple)) and cmd[0].endswith('.py'):
|
||||||
|
cmd = (PYTHON_BINARY, *cmd)
|
||||||
|
|
||||||
|
with Popen(cmd, *args, start_new_session=start_new_session, **kwargs) as process:
|
||||||
pgid = os.getpgid(process.pid)
|
pgid = os.getpgid(process.pid)
|
||||||
try:
|
try:
|
||||||
stdout, stderr = process.communicate(input, timeout=timeout)
|
stdout, stderr = process.communicate(input, timeout=timeout)
|
||||||
|
|
@ -89,14 +92,24 @@ def atomic_write(path: Union[Path, str], contents: Union[dict, str, bytes], over
|
||||||
elif isinstance(contents, (bytes, str)):
|
elif isinstance(contents, (bytes, str)):
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})")
|
if ENFORCE_ATOMIC_WRITES:
|
||||||
print(" You can store the archive/ subfolder on a hard drive or network share that doesn't support support syncronous writes,")
|
print(f"[X] OSError: Failed to write {path} with fcntl.F_FULLFSYNC. ({e})")
|
||||||
print(" but the main folder containing the index.sqlite3 and ArchiveBox.conf files must be on a filesystem that supports FSYNC.")
|
print(" You can store the archive/ subfolder on a hard drive or network share that doesn't support support syncronous writes,")
|
||||||
raise SystemExit(1)
|
print(" but the main folder containing the index.sqlite3 and ArchiveBox.conf files must be on a filesystem that supports FSYNC.")
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
# retry the write without forcing FSYNC (aka atomic mode)
|
||||||
|
with open(path, mode=mode, encoding=encoding) as f:
|
||||||
|
if isinstance(contents, dict):
|
||||||
|
dump(contents, f, indent=4, sort_keys=True, cls=ExtendedEncoder)
|
||||||
|
elif isinstance(contents, (bytes, str)):
|
||||||
|
f.write(contents)
|
||||||
|
|
||||||
|
# set file permissions
|
||||||
os.chmod(path, int(OUTPUT_PERMISSIONS, base=8))
|
os.chmod(path, int(OUTPUT_PERMISSIONS, base=8))
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) -> None:
|
def chmod_file(path: str, cwd: str='.') -> None:
|
||||||
"""chmod -R <permissions> <cwd>/<path>"""
|
"""chmod -R <permissions> <cwd>/<path>"""
|
||||||
|
|
||||||
root = Path(cwd) / path
|
root = Path(cwd) / path
|
||||||
|
|
@ -104,10 +117,15 @@ def chmod_file(path: str, cwd: str='.', permissions: str=OUTPUT_PERMISSIONS) ->
|
||||||
raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
|
raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
|
||||||
|
|
||||||
if not root.is_dir():
|
if not root.is_dir():
|
||||||
|
# path is just a plain file
|
||||||
os.chmod(root, int(OUTPUT_PERMISSIONS, base=8))
|
os.chmod(root, int(OUTPUT_PERMISSIONS, base=8))
|
||||||
else:
|
else:
|
||||||
for subpath in Path(path).glob('**/*'):
|
for subpath in Path(path).glob('**/*'):
|
||||||
os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))
|
if subpath.is_dir():
|
||||||
|
# directories need execute permissions to be able to list contents
|
||||||
|
os.chmod(subpath, int(DIR_OUTPUT_PERMISSIONS, base=8))
|
||||||
|
else:
|
||||||
|
os.chmod(subpath, int(OUTPUT_PERMISSIONS, base=8))
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
|
||||||
|
|
@ -197,7 +197,7 @@
|
||||||
|
|
||||||
// select the action button from the dropdown
|
// select the action button from the dropdown
|
||||||
container.find('select[name=action]')
|
container.find('select[name=action]')
|
||||||
.find('op:selected').removeAttr('selected').end()
|
.find('[selected]').removeAttr('selected').end()
|
||||||
.find('[value=' + action_type + ']').attr('selected', 'selected').click()
|
.find('[value=' + action_type + ']').attr('selected', 'selected').click()
|
||||||
|
|
||||||
// click submit & replace the archivebox logo with a spinner
|
// click submit & replace the archivebox logo with a spinner
|
||||||
|
|
|
||||||
|
|
@ -1,62 +1,3 @@
|
||||||
{% extends "base.html" %}
|
|
||||||
{% load static %}
|
|
||||||
|
|
||||||
{% block body %}
|
|
||||||
<div id="toolbar">
|
|
||||||
<form id="changelist-search" action="{% url 'public-index' %}" method="get">
|
|
||||||
<div>
|
|
||||||
<label for="searchbar"><img src="/static/admin/img/search.svg" alt="Search"></label>
|
|
||||||
<input type="text" size="40" name="q" value="" id="searchbar" autofocus placeholder="Title, URL, tags, timestamp, or content...".>
|
|
||||||
<input type="submit" value="Search" style="height: 36px; padding-top: 6px; margin: 8px"/>
|
|
||||||
<input type="button"
|
|
||||||
value="♺"
|
|
||||||
title="Refresh..."
|
|
||||||
onclick="location.href='{% url 'public-index' %}'"
|
|
||||||
style="background-color: rgba(121, 174, 200, 0.8); height: 30px; font-size: 0.8em; margin-top: 12px; padding-top: 6px; float:right">
|
|
||||||
</input>
|
|
||||||
</div>
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
<table id="table-bookmarks">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th style="width: 100px;">Bookmarked</th>
|
|
||||||
<th style="width: 26vw;">Snapshot ({{object_list|length}})</th>
|
|
||||||
<th style="width: 140px">Files</th>
|
|
||||||
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{% for link in object_list %}
|
|
||||||
{% include 'main_index_row.html' with link=link %}
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
<center>
|
|
||||||
<span class="step-links">
|
|
||||||
{% if page_obj.has_previous %}
|
|
||||||
<a href="{% url 'public-index' %}?page=1">« first</a>
|
|
||||||
<a href="{% url 'public-index' %}?page={{ page_obj.previous_page_number }}">previous</a>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
<span class="current">
|
|
||||||
Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}.
|
|
||||||
</span>
|
|
||||||
|
|
||||||
{% if page_obj.has_next %}
|
|
||||||
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
|
|
||||||
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last »</a>
|
|
||||||
{% endif %}
|
|
||||||
</span>
|
|
||||||
|
|
||||||
{% if page_obj.has_next %}
|
|
||||||
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
|
|
||||||
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last »</a>
|
|
||||||
{% endif %}
|
|
||||||
</span>
|
|
||||||
<br>
|
|
||||||
</center>
|
|
||||||
{% endblock %}
|
|
||||||
{% extends "admin/base_site.html" %}
|
{% extends "admin/base_site.html" %}
|
||||||
{% load i18n admin_urls static admin_list %}
|
{% load i18n admin_urls static admin_list %}
|
||||||
{% load core_tags %}
|
{% load core_tags %}
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,14 @@
|
||||||
<a href="/add" id="submit"> Add more URLs ➕</a>
|
<a href="/add" id="submit"> Add more URLs ➕</a>
|
||||||
</center>
|
</center>
|
||||||
{% else %}
|
{% else %}
|
||||||
|
<div id="in-progress" style="display: none;">
|
||||||
|
<center><h3>Adding URLs to index and running archive methods...</h3>
|
||||||
|
<br/>
|
||||||
|
<div class="loader"></div>
|
||||||
|
<br/>
|
||||||
|
Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for detailed progress...
|
||||||
|
</center>
|
||||||
|
</div>
|
||||||
<form id="add-form" method="POST" class="p-form">{% csrf_token %}
|
<form id="add-form" method="POST" class="p-form">{% csrf_token %}
|
||||||
<h1>Add new URLs to your archive</h1>
|
<h1>Add new URLs to your archive</h1>
|
||||||
<br/>
|
<br/>
|
||||||
|
|
@ -38,20 +46,22 @@
|
||||||
</form>
|
</form>
|
||||||
<br/><br/><br/>
|
<br/><br/><br/>
|
||||||
<center id="delay-warning" style="display: none">
|
<center id="delay-warning" style="display: none">
|
||||||
<small>(it's safe to leave this page, adding will continue in the background)</small>
|
<small>(you will be redirected to your <a href="/">Snapshot list</a> momentarily, its safe to close this page at any time)</small>
|
||||||
</center>
|
</center>
|
||||||
{% if absolute_add_path %}
|
{% if absolute_add_path %}
|
||||||
<center id="bookmarklet">
|
<!-- <center id="bookmarklet">
|
||||||
<p>Bookmark this link to quickly add to your archive:
|
<p>Bookmark this link to quickly add to your archive:
|
||||||
<a href="javascript:void(window.open('{{ absolute_add_path }}?url='+document.location.href));">Add to ArchiveBox</a></p>
|
<a href="javascript:void(window.open('{{ absolute_add_path }}?url='+encodeURIComponent(document.location.href)));">Add to ArchiveBox</a></p>
|
||||||
</center>
|
</center> -->
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<script>
|
<script>
|
||||||
document.getElementById('add-form').addEventListener('submit', function(event) {
|
document.getElementById('add-form').addEventListener('submit', function(event) {
|
||||||
|
document.getElementById('in-progress').style.display = 'block'
|
||||||
|
document.getElementById('add-form').style.display = 'none'
|
||||||
|
document.getElementById('delay-warning').style.display = 'block'
|
||||||
setTimeout(function() {
|
setTimeout(function() {
|
||||||
document.getElementById('add-form').innerHTML = '<center><h3>Adding URLs to index and running archive methods...<h3><br/><div class="loader"></div><br/>Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for progress...</center>'
|
window.location = '/'
|
||||||
document.getElementById('delay-warning').style.display = 'block'
|
}, 2000)
|
||||||
}, 200)
|
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
</script>
|
</script>
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</span>
|
</span>
|
||||||
</td>
|
</td>
|
||||||
<td style="text-align:left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; title="{{link.url}}">
|
<td style="text-align:left; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;" title="{{link.url}}">
|
||||||
<a href="{{link.url}}">
|
<a href="{{link.url}}">
|
||||||
{{link.url}}
|
{{link.url}}
|
||||||
</a>
|
</a>
|
||||||
|
|
|
||||||
|
|
@ -414,17 +414,21 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% if PREVIEW_ORIGINALS %}
|
||||||
<div class="col-lg-2">
|
<div class="col-lg-2">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
|
<iframe class="card-img-top" src="{{url}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy" referrerpolicy="no-referrer"></iframe>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<a href="{{url}}" title="Open in new tab..." target="_blank" rel="noopener">
|
<a href="{{url}}" title="Open in new tab..." target="_blank" rel="noopener" referrerpolicy="no-referrer">
|
||||||
<p class="card-text"><code>🌐 {{domain}}</code></p>
|
<p class="card-text"><code>🌐 {{domain}}</code></p>
|
||||||
</a>
|
</a>
|
||||||
<a href="{{url}}" target="preview" id="original-btn"><h4 class="card-title">Original</h4></a>
|
<a href="{{url}}" target="preview" id="original-btn" referrerpolicy="no-referrer">
|
||||||
|
<h4 class="card-title">Original</h4>
|
||||||
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
<div class="col-lg-2">
|
<div class="col-lg-2">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<iframe class="card-img-top" src="{{headers_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
|
<iframe class="card-img-top" src="{{headers_path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,8 @@ from requests.exceptions import RequestException, ReadTimeout
|
||||||
|
|
||||||
from .vendor.base32_crockford import encode as base32_encode # type: ignore
|
from .vendor.base32_crockford import encode as base32_encode # type: ignore
|
||||||
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
||||||
|
from os.path import lexists
|
||||||
|
from os import remove as remove_file
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import chardet
|
import chardet
|
||||||
|
|
@ -59,7 +61,7 @@ URL_REGEX = re.compile(
|
||||||
r'(?=('
|
r'(?=('
|
||||||
r'http[s]?://' # start matching from allowed schemes
|
r'http[s]?://' # start matching from allowed schemes
|
||||||
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
|
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
|
||||||
r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols
|
r'|[-_$@.&+!*\(\),]' # or allowed symbols (keep hyphen first to match literal hyphen)
|
||||||
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
|
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
|
||||||
r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols
|
r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols
|
||||||
r'))',
|
r'))',
|
||||||
|
|
@ -219,7 +221,7 @@ def get_headers(url: str, timeout: int=None) -> str:
|
||||||
def chrome_args(**options) -> List[str]:
|
def chrome_args(**options) -> List[str]:
|
||||||
"""helper to build up a chrome shell command with arguments"""
|
"""helper to build up a chrome shell command with arguments"""
|
||||||
|
|
||||||
from .config import CHROME_OPTIONS
|
from .config import CHROME_OPTIONS, CHROME_VERSION
|
||||||
|
|
||||||
options = {**CHROME_OPTIONS, **options}
|
options = {**CHROME_OPTIONS, **options}
|
||||||
|
|
||||||
|
|
@ -229,19 +231,29 @@ def chrome_args(**options) -> List[str]:
|
||||||
cmd_args = [options['CHROME_BINARY']]
|
cmd_args = [options['CHROME_BINARY']]
|
||||||
|
|
||||||
if options['CHROME_HEADLESS']:
|
if options['CHROME_HEADLESS']:
|
||||||
cmd_args += ('--headless',)
|
chrome_major_version = int(re.search(r'\s(\d+)\.\d', CHROME_VERSION)[1])
|
||||||
|
if chrome_major_version >= 111:
|
||||||
|
cmd_args += ("--headless=new",)
|
||||||
|
else:
|
||||||
|
cmd_args += ('--headless',)
|
||||||
|
|
||||||
if not options['CHROME_SANDBOX']:
|
if not options['CHROME_SANDBOX']:
|
||||||
# assume this means we are running inside a docker container
|
# assume this means we are running inside a docker container
|
||||||
# in docker, GPU support is limited, sandboxing is unecessary,
|
# in docker, GPU support is limited, sandboxing is unecessary,
|
||||||
# and SHM is limited to 64MB by default (which is too low to be usable).
|
# and SHM is limited to 64MB by default (which is too low to be usable).
|
||||||
cmd_args += (
|
cmd_args += (
|
||||||
'--no-sandbox',
|
"--no-sandbox",
|
||||||
'--disable-gpu',
|
"--no-zygote",
|
||||||
'--disable-dev-shm-usage',
|
"--disable-dev-shm-usage",
|
||||||
'--disable-software-rasterizer',
|
"--disable-software-rasterizer",
|
||||||
'--run-all-compositor-stages-before-draw',
|
"--run-all-compositor-stages-before-draw",
|
||||||
'--hide-scrollbars',
|
"--hide-scrollbars",
|
||||||
|
"--window-size=1440,2000",
|
||||||
|
"--autoplay-policy=no-user-gesture-required",
|
||||||
|
"--no-first-run",
|
||||||
|
"--use-fake-ui-for-media-stream",
|
||||||
|
"--use-fake-device-for-media-stream",
|
||||||
|
"--disable-sync",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -254,14 +266,24 @@ def chrome_args(**options) -> List[str]:
|
||||||
if options['RESOLUTION']:
|
if options['RESOLUTION']:
|
||||||
cmd_args += ('--window-size={}'.format(options['RESOLUTION']),)
|
cmd_args += ('--window-size={}'.format(options['RESOLUTION']),)
|
||||||
|
|
||||||
if options['TIMEOUT']:
|
if options['CHROME_TIMEOUT']:
|
||||||
cmd_args += ('--timeout={}'.format(options['TIMEOUT'] * 1000),)
|
cmd_args += ('--timeout={}'.format(options['CHROME_TIMEOUT'] * 1000),)
|
||||||
|
|
||||||
if options['CHROME_USER_DATA_DIR']:
|
if options['CHROME_USER_DATA_DIR']:
|
||||||
cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))
|
cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))
|
||||||
|
|
||||||
return cmd_args
|
return cmd_args
|
||||||
|
|
||||||
|
def chrome_cleanup():
|
||||||
|
"""
|
||||||
|
Cleans up any state or runtime files that chrome leaves behind when killed by
|
||||||
|
a timeout or other error
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .config import IN_DOCKER
|
||||||
|
|
||||||
|
if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
|
||||||
|
remove_file("/home/archivebox/.config/chromium/SingletonLock")
|
||||||
|
|
||||||
def ansi_to_html(text):
|
def ansi_to_html(text):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
5
assets/README.md
Normal file
5
assets/README.md
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
# assets/
|
||||||
|
|
||||||
|
This folder contains assets used by the Jekyll Static Site Generator for ArchiveBox.io.
|
||||||
|
|
||||||
|
It cannot be moved or renamed or the custom CSS on ArchiveBox.io will break.
|
||||||
10
bin/archive
10
bin/archive
|
|
@ -1,11 +1,13 @@
|
||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
# DEPRECATED: this helper exists for backwards compatibility with <v0.4 only
|
||||||
|
# Do not use this to call archivebox, instead use the archivebox binary directly.
|
||||||
|
|
||||||
if python3 -m django --version >/dev/null 2>&1; then
|
if python3 -m django --version >/dev/null 2>&1; then
|
||||||
python3 -m archivebox "$*"
|
python3 -m archivebox "$*"
|
||||||
else
|
else
|
||||||
echo '[X] ArchiveBox must be installed before using:'
|
echo '[X] ArchiveBox not found, is it installed and present in your $PATH?'
|
||||||
echo " pip install archivebox"
|
echo ' pip3 install archivebox'
|
||||||
echo
|
echo
|
||||||
echo "Hint: Did you forget to activate a virtuenv or set your $$PATH?"
|
echo 'Hint: Did you forget to activate a virtualenv?'
|
||||||
exit 2
|
exit 2
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,24 @@ fi
|
||||||
|
|
||||||
cd "$REPO_DIR/brew_dist"
|
cd "$REPO_DIR/brew_dist"
|
||||||
# make sure archivebox.rb is up-to-date with the dependencies
|
# make sure archivebox.rb is up-to-date with the dependencies
|
||||||
|
git pull
|
||||||
|
git status | grep 'up to date'
|
||||||
|
|
||||||
echo "[+] Building Homebrew bottle"
|
echo
|
||||||
brew install --build-bottle ./archivebox.rb
|
echo "[+] Uninstalling any exisitng archivebox versions..."
|
||||||
|
brew uninstall archivebox || true
|
||||||
|
brew untap archivebox/archivebox || true
|
||||||
|
|
||||||
|
# echo "[*] Running Formula linters and test build..."
|
||||||
|
# brew test-bot --tap=ArchiveBox/homebrew-archivebox archivebox/archivebox/archivebox || true
|
||||||
|
# brew uninstall archivebox || true
|
||||||
|
# brew untap archivebox/archivebox || true
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "[+] Installing and building hombrew bottle from https://Github.com/ArchiveBox/homebrew-archivebox#main"
|
||||||
|
brew tap archivebox/archivebox
|
||||||
|
brew install --build-bottle archivebox
|
||||||
brew bottle archivebox
|
brew bottle archivebox
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "[√] Finished. Make sure to commit the outputted .tar.gz and bottle files!"
|
||||||
35
bin/build_dev.sh
Executable file
35
bin/build_dev.sh
Executable file
|
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# ./bin/build_docker.sh dev 'linux/arm/v7'
|
||||||
|
|
||||||
|
### Bash Environment Setup
|
||||||
|
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
|
||||||
|
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
|
||||||
|
# set -o xtrace
|
||||||
|
set -o errexit
|
||||||
|
set -o errtrace
|
||||||
|
set -o nounset
|
||||||
|
set -o pipefail
|
||||||
|
IFS=$'\n'
|
||||||
|
|
||||||
|
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||||
|
cd "$REPO_DIR"
|
||||||
|
which docker > /dev/null || exit 1
|
||||||
|
|
||||||
|
|
||||||
|
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||||
|
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||||
|
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||||
|
REQUIRED_PLATFORMS="${2:-"linux/arm64,linux/amd64,linux/arm/v7"}"
|
||||||
|
|
||||||
|
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$REQUIRED_PLATFORMS"
|
||||||
|
|
||||||
|
|
||||||
|
echo "[+] Building archivebox:$VERSION docker image..."
|
||||||
|
# docker builder prune
|
||||||
|
docker build . --no-cache -t archivebox-dev --load
|
||||||
|
|
||||||
|
# docker buildx build --platform "$REQUIRED_PLATFORMS" --load . \
|
||||||
|
# -t archivebox \
|
||||||
|
# -t archivebox:$TAG_NAME \
|
||||||
|
# -t archivebox:$VERSION \
|
||||||
|
# -t archivebox:$SHORT_VERSION
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
# ./bin/build_docker.sh dev 'linux/arm/v7'
|
||||||
|
|
||||||
### Bash Environment Setup
|
### Bash Environment Setup
|
||||||
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
|
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
|
||||||
|
|
@ -11,23 +12,83 @@ set -o pipefail
|
||||||
IFS=$'\n'
|
IFS=$'\n'
|
||||||
|
|
||||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
|
||||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
|
||||||
cd "$REPO_DIR"
|
cd "$REPO_DIR"
|
||||||
|
|
||||||
which docker > /dev/null
|
which docker > /dev/null || exit 1
|
||||||
|
which jq > /dev/null || exit 1
|
||||||
|
# which pdm > /dev/null || exit 1
|
||||||
|
|
||||||
|
SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
|
||||||
|
|
||||||
|
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||||
|
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||||
|
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||||
|
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
||||||
|
|
||||||
|
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$SELECTED_PLATFORMS"
|
||||||
|
|
||||||
|
function check_platforms() {
|
||||||
|
INSTALLED_PLATFORMS="$(docker buildx inspect | grep 'Platforms:' )"
|
||||||
|
|
||||||
|
for REQUIRED_PLATFORM in ${SELECTED_PLATFORMS//,/$IFS}; do
|
||||||
|
echo "[+] Checking for: $REQUIRED_PLATFORM..."
|
||||||
|
if ! (echo "$INSTALLED_PLATFORMS" | grep -q "$REQUIRED_PLATFORM"); then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
function remove_builder() {
|
||||||
|
# remove existing xbuilder
|
||||||
|
docker buildx stop xbuilder || true
|
||||||
|
docker buildx rm xbuilder || true
|
||||||
|
}
|
||||||
|
|
||||||
|
function create_builder() {
|
||||||
|
docker buildx use xbuilder && return 0
|
||||||
|
echo "[+] Creating new xbuilder for: $SELECTED_PLATFORMS"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Switch to buildx builder if already present / previously created
|
||||||
|
docker buildx create --name xbuilder --driver docker-container --bootstrap --use --platform "$SELECTED_PLATFORMS" || true
|
||||||
|
docker buildx inspect --bootstrap || true
|
||||||
|
}
|
||||||
|
|
||||||
|
function recreate_builder() {
|
||||||
|
# Install QEMU binaries for cross-platform building if not installed
|
||||||
|
docker run --privileged --rm 'tonistiigi/binfmt' --install all
|
||||||
|
|
||||||
|
remove_builder
|
||||||
|
create_builder
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if docker is ready for cross-plaform builds, if not, recreate builder
|
||||||
|
docker buildx use xbuilder 2>&1 >/dev/null || create_builder
|
||||||
|
check_platforms || (recreate_builder && check_platforms) || exit 1
|
||||||
|
|
||||||
|
|
||||||
|
# Build python package lists
|
||||||
|
echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..."
|
||||||
|
pdm lock --group=':all' --strategy="cross_platform" --production
|
||||||
|
pdm export --group=':all' --production --without-hashes -o requirements.txt
|
||||||
|
|
||||||
echo "[+] Building archivebox:$VERSION docker image..."
|
echo "[+] Building archivebox:$VERSION docker image..."
|
||||||
docker build . -t archivebox \
|
# docker builder prune
|
||||||
-t archivebox:latest \
|
# docker build . --no-cache -t archivebox-dev \
|
||||||
-t archivebox:$VERSION \
|
# replace --load with --push to deploy
|
||||||
-t archivebox:$SHORT_VERSION \
|
docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
|
||||||
-t docker.io/nikisweeting/archivebox:latest \
|
-t archivebox/archivebox \
|
||||||
-t docker.io/nikisweeting/archivebox:$VERSION \
|
-t archivebox/archivebox:$TAG_NAME \
|
||||||
-t docker.io/nikisweeting/archivebox:$SHORT_VERSION \
|
-t archivebox/archivebox:$VERSION \
|
||||||
-t docker.io/archivebox/archivebox:latest \
|
-t archivebox/archivebox:$SHORT_VERSION \
|
||||||
-t docker.io/archivebox/archivebox:$VERSION \
|
-t archivebox/archivebox:latest \
|
||||||
-t docker.io/archivebox/archivebox:$SHORT_VERSION \
|
-t nikisweeting/archivebox \
|
||||||
-t docker.pkg.github.com/archivebox/archivebox/archivebox:latest \
|
-t nikisweeting/archivebox:$TAG_NAME \
|
||||||
-t docker.pkg.github.com/archivebox/archivebox/archivebox:$VERSION \
|
-t nikisweeting/archivebox:$VERSION \
|
||||||
-t docker.pkg.github.com/archivebox/archivebox/archivebox:$SHORT_VERSION
|
-t nikisweeting/archivebox:$SHORT_VERSION \
|
||||||
|
-t nikisweeting/archivebox:latest \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
|
||||||
|
|
|
||||||
|
|
@ -15,17 +15,25 @@ REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && p
|
||||||
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
|
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
|
||||||
source "$REPO_DIR/.venv/bin/activate"
|
source "$REPO_DIR/.venv/bin/activate"
|
||||||
else
|
else
|
||||||
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
|
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR/.venv, creating one now..."
|
||||||
|
python3 -m venv --system-site-packages --symlinks $REPO_DIR/.venv
|
||||||
fi
|
fi
|
||||||
cd "$REPO_DIR"
|
cd "$REPO_DIR"
|
||||||
|
|
||||||
|
|
||||||
echo "[*] Cleaning up build dirs"
|
echo "[*] Cleaning up build dirs"
|
||||||
cd "$REPO_DIR"
|
cd "$REPO_DIR"
|
||||||
rm -Rf build dist
|
rm -Rf build dist
|
||||||
|
|
||||||
echo "[+] Building sdist, bdist_wheel, and egg_info"
|
echo "[+] Building sdist, bdist_wheel, and egg_info"
|
||||||
python3 setup.py \
|
rm -f archivebox/package.json
|
||||||
sdist --dist-dir=./pip_dist \
|
cp package.json archivebox/package.json
|
||||||
bdist_wheel --dist-dir=./pip_dist \
|
|
||||||
egg_info --egg-base=./pip_dist
|
pdm self update
|
||||||
|
pdm install
|
||||||
|
pdm build
|
||||||
|
pdm export --without-hashes -o ./pip_dist/requirements.txt
|
||||||
|
|
||||||
|
cp dist/* ./pip_dist/
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "[√] Finished. Don't forget to commit the new sdist and wheel files in ./pip_dist/"
|
||||||
|
|
@ -1,45 +1,75 @@
|
||||||
#!/usr/bin/env bash
|
#!/bin/bash
|
||||||
|
|
||||||
DATA_DIR="${DATA_DIR:-/data}"
|
export DATA_DIR="${DATA_DIR:-/data}"
|
||||||
ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
|
export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
|
||||||
|
|
||||||
|
# default PUID and PGID if data dir is empty and no PUID+PGID is set
|
||||||
|
export DEFAULT_PUID=911
|
||||||
|
export DEFAULT_PGID=911
|
||||||
|
|
||||||
# Set the archivebox user UID & GID
|
# if data directory already exists, autodetect detect owner by looking at files within
|
||||||
if [[ -n "$PUID" && "$PUID" != 0 ]]; then
|
export DETECTED_UID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
|
||||||
usermod -u "$PUID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
export DETECTED_GID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
|
||||||
fi
|
|
||||||
if [[ -n "$PGID" && "$PGID" != 0 ]]; then
|
|
||||||
groupmod -g "$PGID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
# Set the archivebox user to use the configured UID & GID
|
||||||
|
# prefers PUID and PGID env vars passsed in explicitly, falls back to autodetected defaults
|
||||||
|
groupmod -o -g "${PUID:-$DETECTED_UID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||||
|
usermod -o -u "${PGID:-$DETECTED_GID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||||
|
|
||||||
# Set the permissions of the data dir to match the archivebox user
|
# re-set PUID and PGID to values reported by system instead of values we tried to set,
|
||||||
|
# in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
|
||||||
|
export PUID="$(id -u archivebox)"
|
||||||
|
export PGID="$(id -g archivebox)"
|
||||||
|
|
||||||
|
# Check the permissions of the data dir (or create if it doesn't exist)
|
||||||
if [[ -d "$DATA_DIR/archive" ]]; then
|
if [[ -d "$DATA_DIR/archive" ]]; then
|
||||||
# check data directory permissions
|
if touch "$DATA_DIR/archive/.permissions_test_safe_to_delete" 2>/dev/null; then
|
||||||
if [[ ! "$(stat -c %u $DATA_DIR/archive)" = "$(id -u archivebox)" ]]; then
|
# It's fine, we are able to write to the data directory (as root inside the container)
|
||||||
echo "Change in ownership detected, please be patient while we chown existing files"
|
rm -f "$DATA_DIR/archive/.permissions_test_safe_to_delete"
|
||||||
echo "This could take some time..."
|
# echo "[√] Permissions are correct"
|
||||||
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER -R "$DATA_DIR"
|
else
|
||||||
|
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
|
||||||
|
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir." >&2
|
||||||
|
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:"
|
||||||
|
echo -e " \$ chown -R $PUID:$PGID ./data\n" >&2
|
||||||
|
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" >&2
|
||||||
|
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" >&2
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
# create data directory
|
# create data directory
|
||||||
mkdir -p "$DATA_DIR/logs"
|
mkdir -p "$DATA_DIR/logs"
|
||||||
chown -R $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
|
|
||||||
fi
|
fi
|
||||||
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
|
|
||||||
|
|
||||||
|
# force set the ownership of the data dir contents to the archivebox user and group
|
||||||
|
# this is needed because Docker Desktop often does not map user permissions from the host properly
|
||||||
|
chown $PUID:$PGID "$DATA_DIR"
|
||||||
|
chown $PUID:$PGID "$DATA_DIR"/*
|
||||||
|
|
||||||
|
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome
|
||||||
|
PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
|
||||||
|
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
|
||||||
|
chown $PUID:$PGID "${PLAYWRIGHT_BROWSERS_PATH}/*"
|
||||||
|
|
||||||
|
# (this check is written in blood, QEMU silently breaks things in ways that are not obvious)
|
||||||
|
export IN_QEMU="$(pmap 1 | grep qemu | wc -l | grep -E '^0$' >/dev/null && echo 'False' || echo 'True')"
|
||||||
|
if [[ "$IN_QEMU" == 'True' ]]; then
|
||||||
|
echo -e "\n[!] Warning: Running $(uname -m) emulated container in QEMU, some things will break!" >&2
|
||||||
|
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." >&2
|
||||||
|
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
# Drop permissions to run commands as the archivebox user
|
# Drop permissions to run commands as the archivebox user
|
||||||
if [[ "$1" == /* || "$1" == "echo" || "$1" == "archivebox" ]]; then
|
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
|
||||||
# arg 1 is a binary, execute it verbatim
|
# handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c
|
||||||
# e.g. "archivebox init"
|
# e.g. "docker run archivebox /venv/bin/archivebox-alt init"
|
||||||
# "/bin/bash"
|
# "docker run archivebox /bin/bash -c '...'"
|
||||||
# "echo"
|
# "docker run archivebox echo test"
|
||||||
exec gosu "$ARCHIVEBOX_USER" bash -c "$*"
|
exec gosu "$PUID" bash -c "$*"
|
||||||
else
|
else
|
||||||
# no command given, assume args were meant to be passed to archivebox cmd
|
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
|
||||||
# e.g. "add https://example.com"
|
# e.g. "docker run archivebox add --depth=1 https://example.com"
|
||||||
# "manage createsupseruser"
|
# "docker run archivebox manage createsupseruser"
|
||||||
# "server 0.0.0.0:8000"
|
# "docker run archivebox server 0.0.0.0:8000"
|
||||||
exec gosu "$ARCHIVEBOX_USER" bash -c "archivebox $*"
|
exec gosu "$PUID" bash -c "archivebox $*"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,15 @@
|
||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
# Helper script to export browser history and bookmarks to a format ArchiveBox can ingest.
|
||||||
|
# Usage:
|
||||||
|
# curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/bin/export_browser_history.sh'
|
||||||
|
# bash export_browser_history.sh --chrome
|
||||||
|
# bash export_browser_history.sh --firefox
|
||||||
|
# bash export_browser_history.sh --safari
|
||||||
|
# ls
|
||||||
|
# chrome_history.json
|
||||||
|
# firefox_history.json
|
||||||
|
# firefox_bookmarks.json
|
||||||
|
# safari_history.json
|
||||||
|
|
||||||
OUTPUT_DIR="$(pwd)"
|
OUTPUT_DIR="$(pwd)"
|
||||||
|
|
||||||
|
|
@ -16,9 +27,9 @@ if [[ "$1" == "--chrome" ]]; then
|
||||||
sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_time, 'description', title, 'href', url)) || \"]\" FROM urls;" > "$OUTPUT_DIR/chrome_history.json"
|
sqlite3 "$OUTPUT_DIR/chrome_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_time, 'description', title, 'href', url)) || \"]\" FROM urls;" > "$OUTPUT_DIR/chrome_history.json"
|
||||||
jq < "$(dirname "${2:-$default}")"/Bookmarks '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' > "$OUTPUT_DIR/chrome_bookmarks.json"
|
jq < "$(dirname "${2:-$default}")"/Bookmarks '.roots.other.children[] | {href: .url, description: .name, timestamp: .date_added}' > "$OUTPUT_DIR/chrome_bookmarks.json"
|
||||||
|
|
||||||
rm "$DATA_DIR"/output/sources/chrome_history.db.*
|
rm "$OUTPUT_DIR"/chrome_history.db.*
|
||||||
echo "Chrome history exported to:"
|
echo "Chrome history exported to:"
|
||||||
echo " output/sources/chrome_history.json"
|
echo " $OUTPUT_DIR/chrome_history.json"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$1" == "--firefox" ]]; then
|
if [[ "$1" == "--firefox" ]]; then
|
||||||
|
|
@ -33,12 +44,29 @@ if [[ "$1" == "--firefox" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || \"]\" FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json"
|
sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', last_visit_date, 'description', title, 'href', url)) || \"]\" FROM moz_places;" > "$OUTPUT_DIR/firefox_history.json"
|
||||||
sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "SELECT \"[\" || group_concat(json_object('timestamp', b.dateAdded, 'description', b.title, 'href', f.url)) || \"]\" FROM moz_bookmarks AS b JOIN moz_places AS f ON f.id = b.fk" > "$OUTPUT_DIR/firefox_bookmarks.json"
|
|
||||||
|
sqlite3 "$OUTPUT_DIR/firefox_history.db.tmp" "
|
||||||
|
with recursive tags AS (
|
||||||
|
select id, title, '' AS tags
|
||||||
|
FROM moz_bookmarks
|
||||||
|
where parent == 0
|
||||||
|
UNION ALL
|
||||||
|
select c.id, p.title, c.title || ',' || tags AS tags
|
||||||
|
from moz_bookmarks AS c
|
||||||
|
JOIN tags AS p
|
||||||
|
ON c.parent = p.id
|
||||||
|
)
|
||||||
|
|
||||||
|
SELECT '[' || group_concat(json_object('timestamp', b.dateAdded, 'description', b.title, 'href', f.url, 'tags', tags.tags)) || ']'
|
||||||
|
FROM moz_bookmarks AS b
|
||||||
|
JOIN moz_places AS f ON f.id = b.fk
|
||||||
|
JOIN tags ON tags.id = b.parent
|
||||||
|
WHERE f.url LIKE '%://%';" > "$OUTPUT_DIR/firefox_bookmarks.json"
|
||||||
|
|
||||||
rm "$DATA_DIR"/output/sources/firefox_history.db.*
|
rm "$OUTPUT_DIR"/firefox_history.db.*
|
||||||
echo "Firefox history exported to:"
|
echo "Firefox history exported to:"
|
||||||
echo " output/sources/firefox_history.json"
|
echo " $OUTPUT_DIR/firefox_history.json"
|
||||||
echo " output/sources/firefox_bookmarks.json"
|
echo " $OUTPUT_DIR/firefox_bookmarks.json"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$1" == "--safari" ]]; then
|
if [[ "$1" == "--safari" ]]; then
|
||||||
|
|
@ -54,7 +82,7 @@ if [[ "$1" == "--safari" ]]; then
|
||||||
|
|
||||||
sqlite3 "$OUTPUT_DIR/safari_history.db.tmp" "select url from history_items" > "$OUTPUT_DIR/safari_history.json"
|
sqlite3 "$OUTPUT_DIR/safari_history.db.tmp" "select url from history_items" > "$OUTPUT_DIR/safari_history.json"
|
||||||
|
|
||||||
rm "$DATA_DIR"/output/sources/safari_history.db.*
|
rm "$OUTPUT_DIR"/safari_history.db.*
|
||||||
echo "Safari history exported to:"
|
echo "Safari history exported to:"
|
||||||
echo " output/sources/safari_history.json"
|
echo " $OUTPUT_DIR/safari_history.json"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,8 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||||
source "$DIR/.venv/bin/activate"
|
source "$DIR/.venv/bin/activate"
|
||||||
|
|
||||||
echo "[*] Running flake8..."
|
echo "[*] Running flake8..."
|
||||||
flake8 archivebox && echo "√ No errors found."
|
cd archivebox
|
||||||
|
flake8 . && echo "√ No errors found."
|
||||||
|
|
||||||
echo
|
echo
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,20 @@ set -o pipefail
|
||||||
IFS=$'\n'
|
IFS=$'\n'
|
||||||
|
|
||||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
|
||||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
|
||||||
|
CURRENT_PLAFORM="$(uname)"
|
||||||
|
REQUIRED_PLATFORM="Darwin"
|
||||||
|
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
|
||||||
|
echo "[!] Skipping the Homebrew package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
cd "$REPO_DIR"
|
cd "$REPO_DIR"
|
||||||
|
|
||||||
# TODO
|
./bin/build_brew.sh
|
||||||
exit 0
|
|
||||||
|
git add '*.bottle.tar.gz'
|
||||||
|
git commit -m "add new release bottle"
|
||||||
|
git pull
|
||||||
|
git push
|
||||||
|
|
@ -11,15 +11,35 @@ set -o pipefail
|
||||||
IFS=$'\n'
|
IFS=$'\n'
|
||||||
|
|
||||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
|
||||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
|
||||||
cd "$REPO_DIR"
|
cd "$REPO_DIR"
|
||||||
|
|
||||||
|
SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
|
||||||
|
|
||||||
|
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||||
|
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||||
|
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||||
|
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
||||||
|
|
||||||
|
|
||||||
|
# echo "[*] Logging in to Docker Hub & Github Container Registry"
|
||||||
|
# docker login --username=nikisweeting
|
||||||
|
# docker login ghcr.io --username=pirate
|
||||||
|
|
||||||
|
echo "[^] Building docker image"
|
||||||
|
./bin/build_docker.sh "$TAG_NAME" "$SELECTED_PLATFORMS"
|
||||||
|
|
||||||
echo "[^] Uploading docker image"
|
echo "[^] Uploading docker image"
|
||||||
# docker login --username=nikisweeting
|
docker buildx build --platform "$SELECTED_PLATFORMS" --push . \
|
||||||
# docker login docker.pkg.github.com --username=pirate
|
-t archivebox/archivebox \
|
||||||
docker push archivebox/archivebox:$VERSION archivebox/archivebox:$SHORT_VERSION archivebox/archivebox:latest
|
-t archivebox/archivebox:$TAG_NAME \
|
||||||
docker push docker.io/nikisweeting/archivebox
|
-t archivebox/archivebox:$VERSION \
|
||||||
docker push docker.io/archivebox/archivebox
|
-t archivebox/archivebox:$SHORT_VERSION \
|
||||||
docker push docker.pkg.github.com/archivebox/archivebox/archivebox
|
-t archivebox/archivebox:latest \
|
||||||
|
-t nikisweeting/archivebox \
|
||||||
|
-t nikisweeting/archivebox:$TAG_NAME \
|
||||||
|
-t nikisweeting/archivebox:$VERSION \
|
||||||
|
-t nikisweeting/archivebox:$SHORT_VERSION \
|
||||||
|
-t nikisweeting/archivebox:latest \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||||
|
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
|
||||||
|
|
@ -11,17 +11,11 @@ set -o pipefail
|
||||||
IFS=$'\n'
|
IFS=$'\n'
|
||||||
|
|
||||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
|
||||||
cd "$REPO_DIR"
|
cd "$REPO_DIR"
|
||||||
source "$REPO_DIR/.venv/bin/activate"
|
source "$REPO_DIR/.venv/bin/activate"
|
||||||
|
|
||||||
|
echo "[^] Publishing to Test PyPI..."
|
||||||
|
pdm publish --repository testpypi
|
||||||
|
|
||||||
# apt install python3 python3-all python3-dev
|
echo "[^] Publishing to PyPI..."
|
||||||
# pip install '.[dev]'
|
pdm publish --no-build
|
||||||
|
|
||||||
|
|
||||||
echo "[^] Uploading to test.pypi.org"
|
|
||||||
python3 -m twine upload --repository testpypi pip_dist/archivebox-${VERSION}*.{whl,tar.gz}
|
|
||||||
|
|
||||||
echo "[^] Uploading to pypi.org"
|
|
||||||
python3 -m twine upload --repository pypi pip_dist/archivebox-${VERSION}*.{whl,tar.gz}
|
|
||||||
274
bin/setup.sh
274
bin/setup.sh
|
|
@ -1,120 +1,200 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env sh
|
||||||
# ArchiveBox Setup Script
|
# ArchiveBox Setup Script: https://github.com/ArchiveBox/ArchiveBox
|
||||||
# https://github.com/ArchiveBox/ArchiveBox
|
# Supported Platforms: Ubuntu/Debian/FreeBSD/macOS
|
||||||
|
# Usage:
|
||||||
|
# curl -sSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/dev/bin/setup.sh' | sh
|
||||||
|
|
||||||
|
clear
|
||||||
|
|
||||||
|
if [ $(id -u) -eq 0 ]; then
|
||||||
|
echo ""
|
||||||
|
echo "[X] You cannot run this script as root. You must run it as a non-root user with sudo ability."
|
||||||
|
echo " Create a new non-privileged user 'archivebox' if necessary."
|
||||||
|
echo " adduser archivebox && usermod -a archivebox -G sudo && su archivebox"
|
||||||
|
echo " https://www.digitalocean.com/community/tutorials/how-to-create-a-new-sudo-enabled-user-on-ubuntu-20-04-quickstart"
|
||||||
|
echo " https://www.vultr.com/docs/create-a-sudo-user-on-freebsd"
|
||||||
|
echo " Then re-run this script as the non-root user."
|
||||||
|
echo ""
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (which docker-compose > /dev/null && docker pull archivebox/archivebox:latest); then
|
||||||
|
echo "[+] Initializing an ArchiveBox data folder at ~/archivebox/data using Docker Compose..."
|
||||||
|
mkdir -p ~/archivebox
|
||||||
|
cd ~/archivebox
|
||||||
|
mkdir -p data
|
||||||
|
if [ -f "./index.sqlite3" ]; then
|
||||||
|
mv ~/archivebox/* ~/archivebox/data/
|
||||||
|
fi
|
||||||
|
curl -O 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/docker-compose.yml'
|
||||||
|
docker-compose run --rm archivebox init --setup
|
||||||
|
echo
|
||||||
|
echo "[+] Starting ArchiveBox server using: docker-compose up -d..."
|
||||||
|
docker-compose up -d
|
||||||
|
sleep 7
|
||||||
|
open http://127.0.0.1:8000 || true
|
||||||
|
echo
|
||||||
|
echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox/data. Usage:"
|
||||||
|
echo " cd ~/archivebox"
|
||||||
|
echo " docker-compose ps"
|
||||||
|
echo " docker-compose down"
|
||||||
|
echo " docker-compose pull"
|
||||||
|
echo " docker-compose up"
|
||||||
|
echo " docker-compose run archivebox manage createsuperuser"
|
||||||
|
echo " docker-compose run archivebox add 'https://example.com'"
|
||||||
|
echo " docker-compose run archivebox list"
|
||||||
|
echo " docker-compose run archivebox help"
|
||||||
|
exit 0
|
||||||
|
elif (which docker > /dev/null && docker pull archivebox/archivebox:latest); then
|
||||||
|
echo "[+] Initializing an ArchiveBox data folder at ~/archivebox using Docker..."
|
||||||
|
mkdir -p ~/archivebox
|
||||||
|
cd ~/archivebox
|
||||||
|
if [ -f "./data/index.sqlite3" ]; then
|
||||||
|
cd ./data
|
||||||
|
fi
|
||||||
|
docker run -v "$PWD":/data -it --rm archivebox/archivebox:latest init --setup
|
||||||
|
echo
|
||||||
|
echo "[+] Starting ArchiveBox server using: docker run -d archivebox/archivebox..."
|
||||||
|
docker run -v "$PWD":/data -it -d -p 8000:8000 --name=archivebox archivebox/archivebox:latest
|
||||||
|
sleep 7
|
||||||
|
open http://127.0.0.1:8000 || true
|
||||||
|
echo
|
||||||
|
echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox. Usage:"
|
||||||
|
echo " cd ~/archivebox"
|
||||||
|
echo " docker ps --filter name=archivebox"
|
||||||
|
echo " docker kill archivebox"
|
||||||
|
echo " docker pull archivebox/archivebox"
|
||||||
|
echo " docker run -v $PWD:/data -d -p 8000:8000 --name=archivebox archivebox/archivebox"
|
||||||
|
echo " docker run -v $PWD:/data -it archivebox/archivebox manage createsuperuser"
|
||||||
|
echo " docker run -v $PWD:/data -it archivebox/archivebox add 'https://example.com'"
|
||||||
|
echo " docker run -v $PWD:/data -it archivebox/archivebox list"
|
||||||
|
echo " docker run -v $PWD:/data -it archivebox/archivebox help"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "[!] It's highly recommended to use ArchiveBox with Docker, but Docker wasn't found."
|
||||||
|
echo ""
|
||||||
|
echo " ⚠️ If you want to use Docker, press [Ctrl-C] to cancel now. ⚠️"
|
||||||
|
echo " Get Docker: https://docs.docker.com/get-docker/"
|
||||||
|
echo " After you've installed Docker, run this script again."
|
||||||
|
echo ""
|
||||||
|
echo "Otherwise, install will continue with apt/brew/pip in 12s... (press [Ctrl+C] to cancel)"
|
||||||
|
echo ""
|
||||||
|
sleep 12 || exit 1
|
||||||
|
echo "Proceeding with system package manager..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
echo "[i] ArchiveBox Setup Script 📦"
|
echo "[i] ArchiveBox Setup Script 📦"
|
||||||
echo ""
|
echo ""
|
||||||
echo " This is a helper script which installs the ArchiveBox dependencies on your system using homebrew/aptitude."
|
echo " This is a helper script which installs the ArchiveBox dependencies on your system using brew/apt/pip3."
|
||||||
echo " You may be prompted for a password in order to install the following:"
|
echo " You may be prompted for a sudo password in order to install the following:"
|
||||||
echo ""
|
echo ""
|
||||||
echo " - python3, python3-pip, python3-distutils"
|
echo " - archivebox"
|
||||||
echo " - curl"
|
echo " - python3, pip, nodejs, npm (languages used by ArchiveBox, and its extractor modules)"
|
||||||
echo " - wget"
|
echo " - curl, wget, git, youtube-dl, yt-dlp (used for extracting title, favicon, git, media, and more)"
|
||||||
echo " - git"
|
echo " - chromium (skips this if any Chrome/Chromium version is already installed)"
|
||||||
echo " - youtube-dl"
|
|
||||||
echo " - chromium-browser (skip this if Chrome/Chromium is already installed)"
|
|
||||||
echo " - nodejs (used for singlefile, readability, mercury, and more)"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo " If you'd rather install these manually, you can find documentation here:"
|
echo " If you'd rather install these manually as-needed, you can find detailed documentation here:"
|
||||||
echo " https://github.com/ArchiveBox/ArchiveBox/wiki/Install"
|
echo " https://github.com/ArchiveBox/ArchiveBox/wiki/Install"
|
||||||
echo ""
|
echo ""
|
||||||
read -p "Press [enter] to continue with the automatic install, or Ctrl+C to cancel..." REPLY
|
echo "Continuing in 12s... (press [Ctrl+C] to cancel)"
|
||||||
|
echo ""
|
||||||
|
sleep 12 || exit 1
|
||||||
|
echo "Proceeding to install dependencies..."
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# On Linux:
|
# On Linux:
|
||||||
if which apt-get > /dev/null; then
|
if which apt-get > /dev/null; then
|
||||||
echo "[+] Adding ArchiveBox apt repo to sources..."
|
echo "[+] Adding ArchiveBox apt repo and signing key to sources..."
|
||||||
sudo apt install software-properties-common
|
if ! (sudo apt install -y software-properties-common && sudo add-apt-repository -u ppa:archivebox/archivebox); then
|
||||||
sudo add-apt-repository -u ppa:archivebox/archivebox
|
echo "deb http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main" | sudo tee /etc/apt/sources.list.d/archivebox.list
|
||||||
echo "[+] Installing python3, wget, curl..."
|
echo "deb-src http://ppa.launchpad.net/archivebox/archivebox/ubuntu focal main" | sudo tee -a /etc/apt/sources.list.d/archivebox.list
|
||||||
sudo apt install -y git python3 python3-pip python3-distutils wget curl youtube-dl nodejs npm ripgrep
|
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C258F79DCC02E369
|
||||||
# sudo apt install archivebox
|
sudo apt-get update -qq
|
||||||
|
|
||||||
if which google-chrome; then
|
|
||||||
echo "[i] You already have google-chrome installed, if you would like to download chromium instead (they work pretty much the same), follow the Manual Setup instructions"
|
|
||||||
google-chrome --version
|
|
||||||
elif which chromium-browser; then
|
|
||||||
echo "[i] chromium-browser already installed, using existing installation."
|
|
||||||
chromium-browser --version
|
|
||||||
elif which chromium; then
|
|
||||||
echo "[i] chromium already installed, using existing installation."
|
|
||||||
chromium --version
|
|
||||||
else
|
|
||||||
echo "[+] Installing chromium..."
|
|
||||||
sudo apt install chromium || sudo apt install chromium-browser
|
|
||||||
fi
|
fi
|
||||||
|
echo
|
||||||
|
echo "[+] Installing ArchiveBox system dependencies using apt..."
|
||||||
|
sudo apt-get install -y git python3 python3-pip python3-distutils wget curl youtube-dl yt-dlp ffmpeg git nodejs npm ripgrep
|
||||||
|
sudo apt-get install -y libgtk2.0-0 libgtk-3-0 libnotify-dev libgconf-2-4 libnss3 libxss1 libasound2 libxtst6 xauth xvfb libgbm-dev || sudo apt-get install -y chromium || sudo apt-get install -y chromium-browser || true
|
||||||
|
sudo apt-get install -y archivebox
|
||||||
|
sudo apt-get --only-upgrade install -y archivebox
|
||||||
|
echo ""
|
||||||
|
echo "[+] Installing ArchiveBox python dependencies using pip3..."
|
||||||
|
sudo python3 -m pip install --upgrade --ignore-installed archivebox
|
||||||
# On Mac:
|
# On Mac:
|
||||||
elif which brew > /dev/null; then # 🐍 eye of newt
|
elif which brew > /dev/null; then
|
||||||
echo "[+] Installing python3, wget, curl (ignore 'already installed' warnings)..."
|
echo "[+] Installing ArchiveBox system dependencies using brew..."
|
||||||
brew install git wget curl youtube-dl ripgrep node
|
brew tap archivebox/archivebox
|
||||||
if which python3; then
|
brew update
|
||||||
if python3 -c 'import sys; raise SystemExit(sys.version_info < (3,5,0))'; then
|
brew install --fetch-HEAD -f archivebox
|
||||||
echo "[√] Using existing $(which python3)..."
|
echo ""
|
||||||
else
|
echo "[+] Installing ArchiveBox python dependencies using pip3..."
|
||||||
echo "[+] Installing python3..."
|
python3 -m pip install --upgrade --ignore-installed archivebox
|
||||||
brew install python3
|
elif which pkg > /dev/null; then
|
||||||
fi
|
echo "[+] Installing ArchiveBox system dependencies using pkg and pip (python3.9)..."
|
||||||
else
|
sudo pkg install -y python3 py39-pip py39-sqlite3 npm wget curl youtube_dl ffmpeg git ripgrep
|
||||||
echo "[+] Installing python3..."
|
sudo pkg install -y chromium
|
||||||
brew install python3
|
echo ""
|
||||||
fi
|
echo "[+] Installing ArchiveBox python dependencies using pip..."
|
||||||
|
# don't use sudo here so that pip installs in $HOME/.local instead of into /usr/local
|
||||||
if ls /Applications/Google\ Chrome*.app > /dev/null; then
|
python3 -m pip install --upgrade --ignore-installed archivebox
|
||||||
echo "[√] Using existing /Applications/Google Chrome.app"
|
|
||||||
elif ls /Applications/Chromium.app; then
|
|
||||||
echo "[√] Using existing /Applications/Chromium.app"
|
|
||||||
elif which chromium-browser; then
|
|
||||||
echo "[√] Using existing $(which chromium-browser)"
|
|
||||||
elif which chromium; then
|
|
||||||
echo "[√] Using existing $(which chromium)"
|
|
||||||
else
|
|
||||||
echo "[+] Installing chromium..."
|
|
||||||
brew cask install chromium
|
|
||||||
fi
|
|
||||||
else
|
else
|
||||||
echo "[X] Could not find aptitude or homebrew! ‼️"
|
echo "[!] Warning: Could not find aptitude/homebrew/pkg! May not be able to install all dependencies automatically."
|
||||||
echo ""
|
echo ""
|
||||||
echo " If you're on macOS, make sure you have homebrew installed: https://brew.sh/"
|
echo " If you're on macOS, make sure you have homebrew installed: https://brew.sh/"
|
||||||
echo " If you're on Ubuntu/Debian, make sure you have apt installed: https://help.ubuntu.com/lts/serverguide/apt.html"
|
echo " If you're on Linux, only Ubuntu/Debian/BSD systems are officially supported with this script."
|
||||||
echo " (those are the only currently supported systems for the automatic setup script)"
|
echo " If you're on Windows, this script is not officially supported (Docker is recommeded instead)."
|
||||||
echo ""
|
echo ""
|
||||||
echo "See the README.md for Manual Setup & Troubleshooting instructions."
|
echo "See the README.md for Manual Setup & Troubleshooting instructions if you you're unable to run ArchiveBox after this script completes."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if ! (python3 --version && python3 -m pip --version && python3 -m django --version); then
|
||||||
|
echo "[X] Python 3 pip was not found on your system!"
|
||||||
|
echo " You must first install Python >= 3.7 (and pip3):"
|
||||||
|
echo " https://www.python.org/downloads/"
|
||||||
|
echo " https://wiki.python.org/moin/BeginnersGuide/Download"
|
||||||
|
echo " After installing, run this script again."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
npm i -g npm
|
if ! (python3 -m django --version && python3 -m archivebox version --quiet); then
|
||||||
pip3 install --upgrade pip setuptools
|
echo "[X] Django and ArchiveBox were not found after installing!"
|
||||||
|
echo " Check to see if a previous step failed."
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
pip3 install --upgrade archivebox
|
# echo ""
|
||||||
npm install -g 'git+https://github.com/ArchiveBox/ArchiveBox.git'
|
# echo "[+] Upgrading npm and pip..."
|
||||||
|
# sudo npm i -g npm || true
|
||||||
|
# sudo python3 -m pip install --upgrade pip setuptools || true
|
||||||
|
|
||||||
# Check:
|
echo
|
||||||
echo ""
|
echo "[+] Initializing ArchiveBox data folder at ~/archivebox..."
|
||||||
echo "[*] Checking installed versions:"
|
mkdir -p ~/archivebox
|
||||||
echo "---------------------------------------------------"
|
cd ~/archivebox
|
||||||
which python3 &&
|
if [ -f "./data/index.sqlite3" ]; then
|
||||||
python3 --version | head -n 1 &&
|
cd ./data
|
||||||
echo "" &&
|
fi
|
||||||
which git &&
|
: | python3 -m archivebox init --setup || true # pipe in empty command to make sure stdin is closed
|
||||||
git --version | head -n 1 &&
|
|
||||||
echo "" &&
|
|
||||||
which wget &&
|
|
||||||
wget --version | head -n 1 &&
|
|
||||||
echo "" &&
|
|
||||||
which curl &&
|
|
||||||
curl --version | head -n 1 &&
|
|
||||||
echo "" &&
|
|
||||||
which youtube-dl &&
|
|
||||||
youtube-dl --version | head -n 1 &&
|
|
||||||
echo "---------------------------------------------------" &&
|
|
||||||
archivebox version &&
|
|
||||||
echo "[√] All dependencies installed. ✅" &&
|
|
||||||
exit 0
|
|
||||||
|
|
||||||
echo "---------------------------------------------------"
|
echo
|
||||||
echo "[X] Failed to install some dependencies! ‼️"
|
echo "[+] Starting ArchiveBox server using: nohup archivebox server &..."
|
||||||
echo " - Try the Manual Setup instructions in the README.md"
|
nohup python3 -m archivebox server 0.0.0.0:8000 > ./logs/server.log 2>&1 &
|
||||||
echo " - Try the Troubleshooting: Dependencies instructions in the README.md"
|
sleep 7
|
||||||
echo " - Open an issue on github to get help: https://github.com/ArchiveBox/ArchiveBox/issues"
|
which open > /dev/null && open http://127.0.0.1:8000 || true
|
||||||
exit 1
|
|
||||||
|
echo
|
||||||
|
echo "[√] Server started on http://0.0.0.0:8000 and data directory initialized in ~/archivebox. Usage:"
|
||||||
|
echo " cd ~/archivebox"
|
||||||
|
echo " ps aux | grep archivebox"
|
||||||
|
echo " pkill -f archivebox"
|
||||||
|
echo " python3 -m pip install --upgrade archivebox"
|
||||||
|
echo " archivebox server --quick-init 0.0.0.0:8000"
|
||||||
|
echo " archivebox manage createsuperuser"
|
||||||
|
echo " archivebox add 'https://example.com'"
|
||||||
|
echo " archivebox list"
|
||||||
|
echo " archivebox help"
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
Subproject commit 95a1c1a0875841d076f06106bd4c2307504928c2
|
Subproject commit ec64946796d46a31e5a4d6449908b4060edb3079
|
||||||
2
deb_dist
2
deb_dist
|
|
@ -1 +1 @@
|
||||||
Subproject commit f8e3a0247c09a2f9aaea2848ee7da9c486e14669
|
Subproject commit 88e4b7e5a6c8998f781e45be1e264c48f3ed5e65
|
||||||
2
docker
2
docker
|
|
@ -1 +1 @@
|
||||||
Subproject commit 236f7881e3105b218864d9b3185b17c44b306106
|
Subproject commit 2cbe77e39c275b4a5a274ff7e75c0b13b39a9dbe
|
||||||
|
|
@ -1,72 +1,133 @@
|
||||||
# Usage:
|
# Usage:
|
||||||
# docker-compose run archivebox init --setup
|
# docker compose run archivebox init --setup
|
||||||
# docker-compose up
|
# docker compose up
|
||||||
# echo "https://example.com" | docker-compose run archivebox archivebox add
|
# echo "https://example.com" | docker compose run archivebox archivebox add
|
||||||
# docker-compose run archivebox add --depth=1 https://example.com/some/feed.rss
|
# docker compose run archivebox add --depth=1 https://example.com/some/feed.rss
|
||||||
# docker-compose run archivebox config --set PUBLIC_INDEX=True
|
# docker compose run archivebox config --set MEDIA_MAX_SIZE=750m
|
||||||
# docker-compose run archivebox help
|
# docker compose run archivebox help
|
||||||
# Documentation:
|
# Documentation:
|
||||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
|
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
|
||||||
|
|
||||||
version: '2.4'
|
version: '3.9'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
archivebox:
|
archivebox:
|
||||||
# build: . # for developers working on archivebox
|
image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
|
||||||
image: ${DOCKER_IMAGE:-archivebox/archivebox:master}
|
|
||||||
command: server --quick-init 0.0.0.0:8000
|
command: server --quick-init 0.0.0.0:8000
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- 8000:8000
|
||||||
environment:
|
|
||||||
- ALLOWED_HOSTS=* # add any config options you want as env vars
|
|
||||||
- MEDIA_MAX_SIZE=750m
|
|
||||||
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these if you enable sonic below
|
|
||||||
# - SEARCH_BACKEND_HOST_NAME=sonic
|
|
||||||
# - SEARCH_BACKEND_PASSWORD=SecretPassword
|
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
# - ./archivebox:/app/archivebox # for developers working on archivebox
|
# - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
|
||||||
|
# - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
|
||||||
|
# build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
|
||||||
|
|
||||||
|
environment:
|
||||||
|
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
|
||||||
|
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
|
||||||
|
# - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
|
||||||
|
# - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
|
||||||
|
# - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo
|
||||||
|
# - ADMIN_PASSWORD=SomeSecretPassword
|
||||||
|
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
|
||||||
|
# - PGID=911
|
||||||
|
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search
|
||||||
|
# - SEARCH_BACKEND_HOST_NAME=sonic
|
||||||
|
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
||||||
|
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
|
||||||
|
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
|
||||||
|
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
|
||||||
|
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
|
||||||
|
# ...
|
||||||
|
# add further configuration options from archivebox/config.py as needed (to apply them only to this container)
|
||||||
|
# or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers)
|
||||||
|
|
||||||
|
# For ad-blocking during archiving, uncomment this section and pihole service section below
|
||||||
|
# networks:
|
||||||
|
# - dns
|
||||||
|
# dns:
|
||||||
|
# - 172.20.0.53
|
||||||
|
|
||||||
|
|
||||||
|
######## Optional Addons: tweak examples below as needed for your specific use case ########
|
||||||
|
|
||||||
|
### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg
|
||||||
|
# $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
|
||||||
|
# After starting, backfill any existing Snapshots into the full-text index:
|
||||||
|
# $ docker-compose run archivebox update --index-only
|
||||||
|
|
||||||
# To run the Sonic full-text search backend, first download the config file to sonic.cfg
|
|
||||||
# curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
|
|
||||||
# after starting, backfill any existing Snapshots into the index: docker-compose run archivebox update --index-only
|
|
||||||
# sonic:
|
# sonic:
|
||||||
# image: valeriansaliou/sonic:v1.3.0
|
# image: valeriansaliou/sonic:latest
|
||||||
# expose:
|
# expose:
|
||||||
# - 1491
|
# - 1491
|
||||||
# environment:
|
# environment:
|
||||||
# - SEARCH_BACKEND_PASSWORD=SecretPassword
|
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
||||||
# volumes:
|
# volumes:
|
||||||
# - ./sonic.cfg:/etc/sonic.cfg:ro
|
# - ./sonic.cfg:/etc/sonic.cfg:ro
|
||||||
# - ./data/sonic:/var/lib/sonic/store
|
# - ./data/sonic:/var/lib/sonic/store
|
||||||
|
|
||||||
|
|
||||||
|
### Example: To run pihole in order to block ad/tracker requests during archiving,
|
||||||
|
# uncomment this block and set up pihole using its admin interface
|
||||||
|
|
||||||
|
# pihole:
|
||||||
|
# image: pihole/pihole:latest
|
||||||
|
# ports:
|
||||||
|
# - 127.0.0.1:8090:80 # uncomment to access the admin HTTP interface on http://localhost:8090
|
||||||
|
# environment:
|
||||||
|
# - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD
|
||||||
|
# - DNSMASQ_LISTENING=all
|
||||||
|
# dns:
|
||||||
|
# - 127.0.0.1
|
||||||
|
# - 1.1.1.1
|
||||||
|
# networks:
|
||||||
|
# dns:
|
||||||
|
# ipv4_address: 172.20.0.53
|
||||||
|
# volumes:
|
||||||
|
# - ./etc/pihole:/etc/pihole
|
||||||
|
# - ./etc/dnsmasq:/etc/dnsmasq.d
|
||||||
|
|
||||||
|
|
||||||
### Optional Addons: tweak these examples as needed for your specific use case
|
### Example: Enable ability to run regularly scheduled archiving tasks by uncommenting this container
|
||||||
|
# $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml'
|
||||||
|
# then restart the scheduler container to apply the changes to the schedule
|
||||||
|
# $ docker compose restart archivebox_scheduler
|
||||||
|
|
||||||
# Example: Run scheduled imports in a docker instead of using cron on the
|
# archivebox_scheduler:
|
||||||
# host machine, add tasks and see more info with archivebox schedule --help
|
# image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
|
||||||
# scheduler:
|
# command: schedule --foreground
|
||||||
# image: archivebox/archivebox:latest
|
|
||||||
# command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNAME/feed/all'
|
|
||||||
# environment:
|
# environment:
|
||||||
# - USE_COLOR=True
|
# - MEDIA_MAX_SIZE=750m # increase this number to allow archiving larger audio/video files
|
||||||
# - SHOW_PROGRESS=False
|
# # - TIMEOUT=60 # increase if you see timeouts often during archiving / on slow networks
|
||||||
|
# # - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them
|
||||||
|
# # - CHECK_SSL_VALIDITY=True # set to False to allow saving URLs w/ broken SSL certs
|
||||||
|
# # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting URLs to Archive.org when archiving
|
||||||
|
# # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues
|
||||||
|
# # - PGID=20
|
||||||
# volumes:
|
# volumes:
|
||||||
# - ./data:/data
|
# - ./data:/data
|
||||||
|
# - ./etc/crontabs:/var/spool/cron/crontabs
|
||||||
|
# # cpus: 2 # uncomment / edit these values to limit container resource consumption
|
||||||
|
# # mem_limit: 2048m
|
||||||
|
# # shm_size: 1024m
|
||||||
|
|
||||||
|
|
||||||
|
### Example: Put Nginx in front of the ArchiveBox server for SSL termination
|
||||||
|
|
||||||
# Example: Put Nginx in front of the ArchiveBox server for SSL termination
|
|
||||||
# nginx:
|
# nginx:
|
||||||
# image: nginx:alpine
|
# image: nginx:alpine
|
||||||
# ports:
|
# ports:
|
||||||
# - 443:443
|
# - 443:443
|
||||||
# - 80:80
|
# - 80:80
|
||||||
# volumes:
|
# volumes:
|
||||||
# - ./etc/nginx/nginx.conf:/etc/nginx/nginx.conf
|
# - ./etc/nginx.conf:/etc/nginx/nginx.conf
|
||||||
# - ./data:/var/www
|
# - ./data:/var/www
|
||||||
|
|
||||||
# Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
|
|
||||||
|
### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
|
||||||
|
|
||||||
# wireguard:
|
# wireguard:
|
||||||
# image: linuxserver/wireguard
|
# image: linuxserver/wireguard:latest
|
||||||
# network_mode: 'service:archivebox'
|
# network_mode: 'service:archivebox'
|
||||||
# cap_add:
|
# cap_add:
|
||||||
# - NET_ADMIN
|
# - NET_ADMIN
|
||||||
|
|
@ -78,14 +139,26 @@ services:
|
||||||
# - /lib/modules:/lib/modules
|
# - /lib/modules:/lib/modules
|
||||||
# - ./wireguard.conf:/config/wg0.conf:ro
|
# - ./wireguard.conf:/config/wg0.conf:ro
|
||||||
|
|
||||||
# Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
|
|
||||||
|
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
|
||||||
|
|
||||||
# pywb:
|
# pywb:
|
||||||
# image: webrecorder/pywb:latest
|
# image: webrecorder/pywb:latest
|
||||||
# entrypoint: /bin/sh 'wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback --proxy;'
|
# entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;'
|
||||||
# environment:
|
# environment:
|
||||||
# - INIT_COLLECTION=archivebox
|
# - INIT_COLLECTION=archivebox
|
||||||
# ports:
|
# ports:
|
||||||
# - 8080:8080
|
# - 8080:8080
|
||||||
# volumes:
|
# volumes:
|
||||||
# ./data:/archivebox
|
# - ./data:/archivebox
|
||||||
# ./data/wayback:/webarchive
|
# - ./data/wayback:/webarchive
|
||||||
|
|
||||||
|
|
||||||
|
networks:
|
||||||
|
|
||||||
|
# network needed for pihole container to offer :53 dns resolving on fixed ip for archivebox container
|
||||||
|
dns:
|
||||||
|
ipam:
|
||||||
|
driver: default
|
||||||
|
config:
|
||||||
|
- subnet: 172.20.0.0/24
|
||||||
|
|
@ -55,7 +55,7 @@
|
||||||
# CURL_BINARY = curl
|
# CURL_BINARY = curl
|
||||||
# GIT_BINARY = git
|
# GIT_BINARY = git
|
||||||
# WGET_BINARY = wget
|
# WGET_BINARY = wget
|
||||||
# YOUTUBEDL_BINARY = youtube-dl
|
# YOUTUBEDL_BINARY = yt-dlp
|
||||||
# CHROME_BINARY = chromium
|
# CHROME_BINARY = chromium
|
||||||
|
|
||||||
# CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"
|
# CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
In this folder are some example config files you can use for setting up ArchiveBox on your machine.
|
In this folder are some example config files you can use for setting up ArchiveBox on your machine.
|
||||||
|
|
||||||
E.g. see `etc/nginx` for an example nginx config to serve your archive, or `etc/cron` for an example cron job that crawls a feed every 24 hours.
|
E.g. see `nginx.conf` for an example nginx config to serve your archive with SSL, or `fly.toml` for an example deployment to the Fly.io hosting platform.
|
||||||
|
|
||||||
Please contribute your etc files here! Example contributions
|
Please contribute your etc files here! Example contributions
|
||||||
|
|
||||||
|
|
|
||||||
29
etc/archivebox.service
Normal file
29
etc/archivebox.service
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
# This is an example systemd service config definition for ArchiveBox.
|
||||||
|
#
|
||||||
|
# Link it into place on your system to use systemd to auto-start the ArchiveBox server on boot:
|
||||||
|
# https://unix.stackexchange.com/questions/224992/where-do-i-put-my-systemd-unit-file
|
||||||
|
#
|
||||||
|
# Review and change these lines as-needed for your specific environment and needs:
|
||||||
|
# WorkingDirectory, ExecStart, User, Group
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=Open source self-hosted web archiving
|
||||||
|
Documentation=https://github.com/ArchiveBox/ArchiveBox/wiki
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
WorkingDirectory=/home/archivebox/archivebox/
|
||||||
|
ExecStart=/usr/local/bin/archivebox server 0.0.0.0:8000
|
||||||
|
ExecReload=/bin/kill -s HUP $MAINPID
|
||||||
|
ExecStop=/bin/kill -s QUIT $MAINPID
|
||||||
|
Restart=always
|
||||||
|
RestartSec=2
|
||||||
|
StandardOutput=syslog
|
||||||
|
StandardError=syslog
|
||||||
|
SyslogIdentifier=archivebox
|
||||||
|
User=archivebox
|
||||||
|
Group=archivebox
|
||||||
|
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
0 24 * * * www-data /opt/ArchiveBox/bin/archive "https://getpocket.com/users/example/feed/all" >> /var/log/ArchiveBox.log
|
|
||||||
40
etc/fly.toml
Normal file
40
etc/fly.toml
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
# fly.toml file generated for archivebox on 2021-04-23T16:35:11-04:00
|
||||||
|
|
||||||
|
app = "archivebox"
|
||||||
|
|
||||||
|
kill_signal = "SIGINT"
|
||||||
|
kill_timeout = 5
|
||||||
|
|
||||||
|
[env]
|
||||||
|
|
||||||
|
[mounts]
|
||||||
|
source="archivebox_data"
|
||||||
|
destination="/data"
|
||||||
|
|
||||||
|
[experimental]
|
||||||
|
auto_rollback = true
|
||||||
|
|
||||||
|
[[services]]
|
||||||
|
http_checks = []
|
||||||
|
internal_port = 8000
|
||||||
|
protocol = "tcp"
|
||||||
|
script_checks = []
|
||||||
|
|
||||||
|
[services.concurrency]
|
||||||
|
hard_limit = 25
|
||||||
|
soft_limit = 20
|
||||||
|
type = "connections"
|
||||||
|
|
||||||
|
[[services.ports]]
|
||||||
|
handlers = ["http"]
|
||||||
|
port = 80
|
||||||
|
|
||||||
|
[[services.ports]]
|
||||||
|
handlers = ["tls", "http"]
|
||||||
|
port = 443
|
||||||
|
|
||||||
|
[[services.tcp_checks]]
|
||||||
|
grace_period = "1s"
|
||||||
|
interval = "15s"
|
||||||
|
restart_limit = 6
|
||||||
|
timeout = "2s"
|
||||||
|
|
@ -34,12 +34,14 @@ http {
|
||||||
server {
|
server {
|
||||||
listen 80 default_server;
|
listen 80 default_server;
|
||||||
server_name _;
|
server_name _;
|
||||||
|
|
||||||
root /var/www;
|
|
||||||
index index.html;
|
index index.html;
|
||||||
autoindex on;
|
autoindex on;
|
||||||
|
|
||||||
try_files $uri $uri/ $uri.html =404;
|
try_files $uri $uri/ $uri.html =404;
|
||||||
|
|
||||||
|
location /archive {
|
||||||
|
root /var/www/archive;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
3310
package-lock.json
generated
3310
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
|
|
@ -1,13 +1,13 @@
|
||||||
{
|
{
|
||||||
"name": "archivebox",
|
"name": "archivebox",
|
||||||
"version": "0.6.2",
|
"version": "0.7.1",
|
||||||
"description": "ArchiveBox: The self-hosted internet archive",
|
"description": "ArchiveBox: The self-hosted internet archive",
|
||||||
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
||||||
"repository": "github:ArchiveBox/ArchiveBox",
|
"repository": "github:ArchiveBox/ArchiveBox",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@postlight/mercury-parser": "^2.2.0",
|
"@postlight/parser": "^2.2.3",
|
||||||
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
|
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
|
||||||
"single-file": "git+https://github.com/gildas-lormeau/SingleFile.git"
|
"single-file-cli": "^1.1.12"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
920
pdm.lock
Normal file
920
pdm.lock
Normal file
|
|
@ -0,0 +1,920 @@
|
||||||
|
# This file is @generated by PDM.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
groups = ["default", "ldap", "sonic"]
|
||||||
|
strategy = ["cross_platform"]
|
||||||
|
lock_version = "4.4"
|
||||||
|
content_hash = "sha256:3355b57d87304093c9176a6387d80f5c5226b169964d8039f14a5998046faf4d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "appnope"
|
||||||
|
version = "0.1.3"
|
||||||
|
summary = "Disable App Nap on macOS >= 10.9"
|
||||||
|
files = [
|
||||||
|
{file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"},
|
||||||
|
{file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "asgiref"
|
||||||
|
version = "3.7.2"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "ASGI specs, helper code, and adapters"
|
||||||
|
dependencies = [
|
||||||
|
"typing-extensions>=4; python_version < \"3.11\"",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"},
|
||||||
|
{file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "asttokens"
|
||||||
|
version = "2.4.1"
|
||||||
|
summary = "Annotate AST trees with source code positions"
|
||||||
|
dependencies = [
|
||||||
|
"six>=1.12.0",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"},
|
||||||
|
{file = "asttokens-2.4.1.tar.gz", hash = "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "brotli"
|
||||||
|
version = "1.1.0"
|
||||||
|
summary = "Python bindings for the Brotli compression library"
|
||||||
|
files = [
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ae56aca0402a0f9a3431cddda62ad71666ca9d4dc3a10a142b9dce2e3c0cda3"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43ce1b9935bfa1ede40028054d7f48b5469cd02733a365eec8a329ffd342915d"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7c4855522edb2e6ae7fdb58e07c3ba9111e7621a8956f481c68d5d979c93032e"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:38025d9f30cf4634f8309c6874ef871b841eb3c347e90b0851f63d1ded5212da"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6a904cb26bfefc2f0a6f240bdf5233be78cd2488900a2f846f3c3ac8489ab80"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"},
|
||||||
|
{file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"},
|
||||||
|
{file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"},
|
||||||
|
{file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7905193081db9bfa73b1219140b3d315831cbff0d8941f22da695832f0dd188f"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a77def80806c421b4b0af06f45d65a136e7ac0bdca3c09d9e2ea4e515367c7e9"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dadd1314583ec0bf2d1379f7008ad627cd6336625d6679cf2f8e67081b83acf"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:901032ff242d479a0efa956d853d16875d42157f98951c0230f69e69f9c09bac"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:22fc2a8549ffe699bfba2256ab2ed0421a7b8fadff114a3d201794e45a9ff578"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae15b066e5ad21366600ebec29a7ccbc86812ed267e4b28e860b8ca16a2bc474"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"},
|
||||||
|
{file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"},
|
||||||
|
{file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "brotlicffi"
|
||||||
|
version = "1.1.0.0"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Python CFFI bindings to the Brotli library"
|
||||||
|
dependencies = [
|
||||||
|
"cffi>=1.0.0",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "brotlicffi-1.1.0.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9b7ae6bd1a3f0df532b6d67ff674099a96d22bc0948955cb338488c31bfb8851"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19ffc919fa4fc6ace69286e0a23b3789b4219058313cf9b45625016bf7ff996b"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9feb210d932ffe7798ee62e6145d3a757eb6233aa9a4e7db78dd3690d7755814"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84763dbdef5dd5c24b75597a77e1b30c66604725707565188ba54bab4f114820"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-cp37-abi3-win32.whl", hash = "sha256:1b12b50e07c3911e1efa3a8971543e7648100713d4e0971b13631cce22c587eb"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:994a4f0681bb6c6c3b0925530a1926b7a189d878e6e5e38fae8efa47c5d9c613"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2e4aeb0bd2540cb91b069dbdd54d458da8c4334ceaf2d25df2f4af576d6766ca"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b7b0033b0d37bb33009fb2fef73310e432e76f688af76c156b3594389d81391"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54a07bb2374a1eba8ebb52b6fafffa2afd3c4df85ddd38fcc0511f2bb387c2a8"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7901a7dc4b88f1c1475de59ae9be59799db1007b7d059817948d8e4f12e24e35"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce01c7316aebc7fce59da734286148b1d1b9455f89cf2c8a4dfce7d41db55c2d"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:246f1d1a90279bb6069de3de8d75a8856e073b8ff0b09dcca18ccc14cec85979"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc4bc5d82bc56ebd8b514fb8350cfac4627d6b0743382e46d033976a5f80fab6"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37c26ecb14386a44b118ce36e546ce307f4810bc9598a6e6cb4f7fca725ae7e6"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca72968ae4eaf6470498d5c2887073f7efe3b1e7d7ec8be11a06a79cc810e990"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:add0de5b9ad9e9aa293c3aa4e9deb2b61e99ad6c1634e01d01d98c03e6a354cc"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9b6068e0f3769992d6b622a1cd2e7835eae3cf8d9da123d7f51ca9c1e9c333e5"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8557a8559509b61e65083f8782329188a250102372576093c88930c875a69838"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a7ae37e5d79c5bdfb5b4b99f2715a6035e6c5bf538c3746abc8e26694f92f33"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391151ec86bb1c683835980f4816272a87eaddc46bb91cbf44f62228b84d8cca"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2f3711be9290f0453de8eed5275d93d286abe26b08ab4a35d7452caa1fef532f"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a807d760763e398bbf2c6394ae9da5815901aa93ee0a37bca5efe78d4ee3171"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa8ca0623b26c94fccc3a1fdd895be1743b838f3917300506d04aa3346fd2a14"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3de0cf28a53a3238b252aca9fed1593e9d36c1d116748013339f0949bfc84112"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6be5ec0e88a4925c91f3dea2bb0013b3a2accda6f77238f76a34a1ea532a1cb0"},
|
||||||
|
{file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d9eb71bb1085d996244439154387266fd23d6ad37161f6f52f1cd41dd95a3808"},
|
||||||
|
{file = "brotlicffi-1.1.0.0.tar.gz", hash = "sha256:b77827a689905143f87915310b93b273ab17888fd43ef350d4832c4a71083c13"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "certifi"
|
||||||
|
version = "2023.7.22"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "Python package for providing Mozilla's CA Bundle."
|
||||||
|
files = [
|
||||||
|
{file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
|
||||||
|
{file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cffi"
|
||||||
|
version = "1.16.0"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "Foreign Function Interface for Python calling C code."
|
||||||
|
dependencies = [
|
||||||
|
"pycparser",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"},
|
||||||
|
{file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"},
|
||||||
|
{file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"},
|
||||||
|
{file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"},
|
||||||
|
{file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"},
|
||||||
|
{file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "charset-normalizer"
|
||||||
|
version = "3.3.2"
|
||||||
|
requires_python = ">=3.7.0"
|
||||||
|
summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||||
|
files = [
|
||||||
|
{file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
|
||||||
|
{file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
|
||||||
|
{file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorama"
|
||||||
|
version = "0.4.6"
|
||||||
|
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||||
|
summary = "Cross-platform colored terminal text."
|
||||||
|
files = [
|
||||||
|
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||||
|
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "croniter"
|
||||||
|
version = "2.0.1"
|
||||||
|
requires_python = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
summary = "croniter provides iteration for datetime object with cron like format"
|
||||||
|
dependencies = [
|
||||||
|
"python-dateutil",
|
||||||
|
"pytz>2021.1",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "croniter-2.0.1-py2.py3-none-any.whl", hash = "sha256:4cb064ce2d8f695b3b078be36ff50115cf8ac306c10a7e8653ee2a5b534673d7"},
|
||||||
|
{file = "croniter-2.0.1.tar.gz", hash = "sha256:d199b2ec3ea5e82988d1f72022433c5f9302b3b3ea9e6bfd6a1518f6ea5e700a"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dateparser"
|
||||||
|
version = "1.1.8"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Date parsing library designed to parse dates from HTML pages"
|
||||||
|
dependencies = [
|
||||||
|
"python-dateutil",
|
||||||
|
"pytz",
|
||||||
|
"regex!=2019.02.19,!=2021.8.27",
|
||||||
|
"tzlocal",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "dateparser-1.1.8-py2.py3-none-any.whl", hash = "sha256:070b29b5bbf4b1ec2cd51c96ea040dc68a614de703910a91ad1abba18f9f379f"},
|
||||||
|
{file = "dateparser-1.1.8.tar.gz", hash = "sha256:86b8b7517efcc558f085a142cdb7620f0921543fcabdb538c8a4c4001d8178e3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "decorator"
|
||||||
|
version = "5.1.1"
|
||||||
|
requires_python = ">=3.5"
|
||||||
|
summary = "Decorators for Humans"
|
||||||
|
files = [
|
||||||
|
{file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
|
||||||
|
{file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "django"
|
||||||
|
version = "3.1.14"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "A high-level Python Web framework that encourages rapid development and clean, pragmatic design."
|
||||||
|
dependencies = [
|
||||||
|
"asgiref<4,>=3.2.10",
|
||||||
|
"pytz",
|
||||||
|
"sqlparse>=0.2.2",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "Django-3.1.14-py3-none-any.whl", hash = "sha256:0fabc786489af16ad87a8c170ba9d42bfd23f7b699bd5ef05675864e8d012859"},
|
||||||
|
{file = "Django-3.1.14.tar.gz", hash = "sha256:72a4a5a136a214c39cf016ccdd6b69e2aa08c7479c66d93f3a9b5e4bb9d8a347"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "django-auth-ldap"
|
||||||
|
version = "4.1.0"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Django LDAP authentication backend."
|
||||||
|
dependencies = [
|
||||||
|
"Django>=2.2",
|
||||||
|
"python-ldap>=3.1",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "django-auth-ldap-4.1.0.tar.gz", hash = "sha256:77f749d3b17807ce8eb56a9c9c8e5746ff316567f81d5ba613495d9c7495a949"},
|
||||||
|
{file = "django_auth_ldap-4.1.0-py3-none-any.whl", hash = "sha256:68870e7921e84b1a9867e268a9c8a3e573e8a0d95ea08bcf31be178f5826ff36"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "django-extensions"
|
||||||
|
version = "3.1.5"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "Extensions for Django"
|
||||||
|
dependencies = [
|
||||||
|
"Django>=2.2",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "django-extensions-3.1.5.tar.gz", hash = "sha256:28e1e1bf49f0e00307ba574d645b0af3564c981a6dfc87209d48cb98f77d0b1a"},
|
||||||
|
{file = "django_extensions-3.1.5-py3-none-any.whl", hash = "sha256:9238b9e016bb0009d621e05cf56ea8ce5cce9b32e91ad2026996a7377ca28069"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "exceptiongroup"
|
||||||
|
version = "1.1.3"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Backport of PEP 654 (exception groups)"
|
||||||
|
files = [
|
||||||
|
{file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
|
||||||
|
{file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "executing"
|
||||||
|
version = "2.0.1"
|
||||||
|
requires_python = ">=3.5"
|
||||||
|
summary = "Get the currently executing AST node of a frame, and other information"
|
||||||
|
files = [
|
||||||
|
{file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"},
|
||||||
|
{file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna"
|
||||||
|
version = "3.4"
|
||||||
|
requires_python = ">=3.5"
|
||||||
|
summary = "Internationalized Domain Names in Applications (IDNA)"
|
||||||
|
files = [
|
||||||
|
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
|
||||||
|
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ipython"
|
||||||
|
version = "8.17.2"
|
||||||
|
requires_python = ">=3.9"
|
||||||
|
summary = "IPython: Productive Interactive Computing"
|
||||||
|
dependencies = [
|
||||||
|
"appnope; sys_platform == \"darwin\"",
|
||||||
|
"colorama; sys_platform == \"win32\"",
|
||||||
|
"decorator",
|
||||||
|
"exceptiongroup; python_version < \"3.11\"",
|
||||||
|
"jedi>=0.16",
|
||||||
|
"matplotlib-inline",
|
||||||
|
"pexpect>4.3; sys_platform != \"win32\"",
|
||||||
|
"prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30",
|
||||||
|
"pygments>=2.4.0",
|
||||||
|
"stack-data",
|
||||||
|
"traitlets>=5",
|
||||||
|
"typing-extensions; python_version < \"3.10\"",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "ipython-8.17.2-py3-none-any.whl", hash = "sha256:1e4d1d666a023e3c93585ba0d8e962867f7a111af322efff6b9c58062b3e5444"},
|
||||||
|
{file = "ipython-8.17.2.tar.gz", hash = "sha256:126bb57e1895594bb0d91ea3090bbd39384f6fe87c3d57fd558d0670f50339bb"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jedi"
|
||||||
|
version = "0.19.1"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "An autocompletion tool for Python that can be used for text editors."
|
||||||
|
dependencies = [
|
||||||
|
"parso<0.9.0,>=0.8.3",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"},
|
||||||
|
{file = "jedi-0.19.1.tar.gz", hash = "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "matplotlib-inline"
|
||||||
|
version = "0.1.6"
|
||||||
|
requires_python = ">=3.5"
|
||||||
|
summary = "Inline Matplotlib backend for Jupyter"
|
||||||
|
dependencies = [
|
||||||
|
"traitlets",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"},
|
||||||
|
{file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mutagen"
|
||||||
|
version = "1.47.0"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "read and write audio tags for many formats"
|
||||||
|
files = [
|
||||||
|
{file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719"},
|
||||||
|
{file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy-extensions"
|
||||||
|
version = "1.0.0"
|
||||||
|
requires_python = ">=3.5"
|
||||||
|
summary = "Type system extensions for programs checked with the mypy type checker."
|
||||||
|
files = [
|
||||||
|
{file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
|
||||||
|
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parso"
|
||||||
|
version = "0.8.3"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "A Python Parser"
|
||||||
|
files = [
|
||||||
|
{file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"},
|
||||||
|
{file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pexpect"
|
||||||
|
version = "4.8.0"
|
||||||
|
summary = "Pexpect allows easy control of interactive console applications."
|
||||||
|
dependencies = [
|
||||||
|
"ptyprocess>=0.5",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"},
|
||||||
|
{file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "prompt-toolkit"
|
||||||
|
version = "3.0.39"
|
||||||
|
requires_python = ">=3.7.0"
|
||||||
|
summary = "Library for building powerful interactive command lines in Python"
|
||||||
|
dependencies = [
|
||||||
|
"wcwidth",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "prompt_toolkit-3.0.39-py3-none-any.whl", hash = "sha256:9dffbe1d8acf91e3de75f3b544e4842382fc06c6babe903ac9acb74dc6e08d88"},
|
||||||
|
{file = "prompt_toolkit-3.0.39.tar.gz", hash = "sha256:04505ade687dc26dc4284b1ad19a83be2f2afe83e7a828ace0c72f3a1df72aac"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ptyprocess"
|
||||||
|
version = "0.7.0"
|
||||||
|
summary = "Run a subprocess in a pseudo terminal"
|
||||||
|
files = [
|
||||||
|
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
|
||||||
|
{file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pure-eval"
|
||||||
|
version = "0.2.2"
|
||||||
|
summary = "Safely evaluate AST nodes without side effects"
|
||||||
|
files = [
|
||||||
|
{file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"},
|
||||||
|
{file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyasn1"
|
||||||
|
version = "0.5.0"
|
||||||
|
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
|
||||||
|
summary = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
|
||||||
|
files = [
|
||||||
|
{file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"},
|
||||||
|
{file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyasn1-modules"
|
||||||
|
version = "0.3.0"
|
||||||
|
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
|
||||||
|
summary = "A collection of ASN.1-based protocols modules"
|
||||||
|
dependencies = [
|
||||||
|
"pyasn1<0.6.0,>=0.4.6",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"},
|
||||||
|
{file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pycparser"
|
||||||
|
version = "2.21"
|
||||||
|
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
summary = "C parser in Python"
|
||||||
|
files = [
|
||||||
|
{file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
|
||||||
|
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pycryptodomex"
|
||||||
|
version = "3.19.0"
|
||||||
|
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
summary = "Cryptographic library for Python"
|
||||||
|
files = [
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:a77b79852175064c822b047fee7cf5a1f434f06ad075cc9986aa1c19a0c53eb0"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:5b883e1439ab63af976656446fb4839d566bb096f15fc3c06b5a99cde4927188"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3866d68e2fc345162b1b9b83ef80686acfe5cec0d134337f3b03950a0a8bf56"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74eb1f73f788facece7979ce91594dc177e1a9b5d5e3e64697dd58299e5cb4d"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cb51096a6a8d400724104db8a7e4f2206041a1f23e58924aa3d8d96bcb48338"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a588a1cb7781da9d5e1c84affd98c32aff9c89771eac8eaa659d2760666f7139"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:d4dd3b381ff5a5907a3eb98f5f6d32c64d319a840278ceea1dcfcc65063856f3"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:263de9a96d2fcbc9f5bd3a279f14ea0d5f072adb68ebd324987576ec25da084d"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-win32.whl", hash = "sha256:67c8eb79ab33d0fbcb56842992298ddb56eb6505a72369c20f60bc1d2b6fb002"},
|
||||||
|
{file = "pycryptodomex-3.19.0-cp35-abi3-win_amd64.whl", hash = "sha256:09c9401dc06fb3d94cb1ec23b4ea067a25d1f4c6b7b118ff5631d0b5daaab3cc"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:edbe083c299835de7e02c8aa0885cb904a75087d35e7bab75ebe5ed336e8c3e2"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp27-pypy_73-win32.whl", hash = "sha256:136b284e9246b4ccf4f752d435c80f2c44fc2321c198505de1d43a95a3453b3c"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5d73e9fa3fe830e7b6b42afc49d8329b07a049a47d12e0ef9225f2fd220f19b2"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f1982c5bc311f0aab8c293524b861b485d76f7c9ab2c3ac9a25b6f7655975"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb040b5dda1dff1e197d2ef71927bd6b8bfcb9793bc4dfe0bb6df1e691eaacb"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:800a2b05cfb83654df80266692f7092eeefe2a314fa7901dcefab255934faeec"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c01678aee8ac0c1a461cbc38ad496f953f9efcb1fa19f5637cbeba7544792a53"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2126bc54beccbede6eade00e647106b4f4c21e5201d2b0a73e9e816a01c50905"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b801216c48c0886742abf286a9a6b117e248ca144d8ceec1f931ce2dd0c9cb40"},
|
||||||
|
{file = "pycryptodomex-3.19.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:50cb18d4dd87571006fd2447ccec85e6cec0136632a550aa29226ba075c80644"},
|
||||||
|
{file = "pycryptodomex-3.19.0.tar.gz", hash = "sha256:af83a554b3f077564229865c45af0791be008ac6469ef0098152139e6bd4b5b6"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pygments"
|
||||||
|
version = "2.16.1"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Pygments is a syntax highlighting package written in Python."
|
||||||
|
files = [
|
||||||
|
{file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"},
|
||||||
|
{file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "python-crontab"
|
||||||
|
version = "3.0.0"
|
||||||
|
summary = "Python Crontab API"
|
||||||
|
dependencies = [
|
||||||
|
"python-dateutil",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "python-crontab-3.0.0.tar.gz", hash = "sha256:79fb7465039ddfd4fb93d072d6ee0d45c1ac8bf1597f0686ea14fd4361dba379"},
|
||||||
|
{file = "python_crontab-3.0.0-py3-none-any.whl", hash = "sha256:6d5ba3c190ec76e4d252989a1644fcb233dbf53fbc8fceeb9febe1657b9fb1d4"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "python-dateutil"
|
||||||
|
version = "2.8.2"
|
||||||
|
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
|
||||||
|
summary = "Extensions to the standard Python datetime module"
|
||||||
|
dependencies = [
|
||||||
|
"six>=1.5",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
|
||||||
|
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "python-ldap"
|
||||||
|
version = "3.4.3"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "Python modules for implementing LDAP clients"
|
||||||
|
dependencies = [
|
||||||
|
"pyasn1-modules>=0.1.5",
|
||||||
|
"pyasn1>=0.3.7",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "python-ldap-3.4.3.tar.gz", hash = "sha256:ab26c519a0ef2a443a2a10391fa3c5cb52d7871323399db949ebfaa9f25ee2a0"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytz"
|
||||||
|
version = "2023.3.post1"
|
||||||
|
summary = "World timezone definitions, modern and historical"
|
||||||
|
files = [
|
||||||
|
{file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
|
||||||
|
{file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "2023.10.3"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Alternative regular expression module, to replace re."
|
||||||
|
files = [
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"},
|
||||||
|
{file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"},
|
||||||
|
{file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"},
|
||||||
|
{file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"},
|
||||||
|
{file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"},
|
||||||
|
{file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "requests"
|
||||||
|
version = "2.31.0"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Python HTTP for Humans."
|
||||||
|
dependencies = [
|
||||||
|
"certifi>=2017.4.17",
|
||||||
|
"charset-normalizer<4,>=2",
|
||||||
|
"idna<4,>=2.5",
|
||||||
|
"urllib3<3,>=1.21.1",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
|
||||||
|
{file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "six"
|
||||||
|
version = "1.16.0"
|
||||||
|
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||||
|
summary = "Python 2 and 3 compatibility utilities"
|
||||||
|
files = [
|
||||||
|
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||||
|
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sonic-client"
|
||||||
|
version = "1.0.0"
|
||||||
|
summary = "python client for sonic search backend"
|
||||||
|
files = [
|
||||||
|
{file = "sonic-client-1.0.0.tar.gz", hash = "sha256:fe324c7354670488ed84847f6a6727d3cb5fb3675cb9b61396dcf5720e5aca66"},
|
||||||
|
{file = "sonic_client-1.0.0-py3-none-any.whl", hash = "sha256:291bf292861e97a2dd765ff0c8754ea9631383680d31a63ec3da6f5aa5f4beda"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sqlparse"
|
||||||
|
version = "0.4.4"
|
||||||
|
requires_python = ">=3.5"
|
||||||
|
summary = "A non-validating SQL parser."
|
||||||
|
files = [
|
||||||
|
{file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"},
|
||||||
|
{file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stack-data"
|
||||||
|
version = "0.6.3"
|
||||||
|
summary = "Extract data from python stack frames and tracebacks for informative displays"
|
||||||
|
dependencies = [
|
||||||
|
"asttokens>=2.1.0",
|
||||||
|
"executing>=1.2.0",
|
||||||
|
"pure-eval",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
|
||||||
|
{file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "traitlets"
|
||||||
|
version = "5.13.0"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "Traitlets Python configuration system"
|
||||||
|
files = [
|
||||||
|
{file = "traitlets-5.13.0-py3-none-any.whl", hash = "sha256:baf991e61542da48fe8aef8b779a9ea0aa38d8a54166ee250d5af5ecf4486619"},
|
||||||
|
{file = "traitlets-5.13.0.tar.gz", hash = "sha256:9b232b9430c8f57288c1024b34a8f0251ddcc47268927367a0dd3eeaca40deb5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typing-extensions"
|
||||||
|
version = "4.8.0"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "Backported and Experimental Type Hints for Python 3.8+"
|
||||||
|
files = [
|
||||||
|
{file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"},
|
||||||
|
{file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tzdata"
|
||||||
|
version = "2023.3"
|
||||||
|
requires_python = ">=2"
|
||||||
|
summary = "Provider of IANA time zone data"
|
||||||
|
files = [
|
||||||
|
{file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
|
||||||
|
{file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tzlocal"
|
||||||
|
version = "5.2"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "tzinfo object for the local timezone"
|
||||||
|
dependencies = [
|
||||||
|
"tzdata; platform_system == \"Windows\"",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"},
|
||||||
|
{file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "urllib3"
|
||||||
|
version = "2.0.7"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
|
files = [
|
||||||
|
{file = "urllib3-2.0.7-py3-none-any.whl", hash = "sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e"},
|
||||||
|
{file = "urllib3-2.0.7.tar.gz", hash = "sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "w3lib"
|
||||||
|
version = "2.1.2"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "Library of web-related functions"
|
||||||
|
files = [
|
||||||
|
{file = "w3lib-2.1.2-py3-none-any.whl", hash = "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7"},
|
||||||
|
{file = "w3lib-2.1.2.tar.gz", hash = "sha256:ed5b74e997eea2abe3c1321f916e344144ee8e9072a6f33463ee8e57f858a4b1"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wcwidth"
|
||||||
|
version = "0.2.9"
|
||||||
|
summary = "Measures the displayed width of unicode strings in a terminal"
|
||||||
|
files = [
|
||||||
|
{file = "wcwidth-0.2.9-py2.py3-none-any.whl", hash = "sha256:9a929bd8380f6cd9571a968a9c8f4353ca58d7cd812a4822bba831f8d685b223"},
|
||||||
|
{file = "wcwidth-0.2.9.tar.gz", hash = "sha256:a675d1a4a2d24ef67096a04b85b02deeecd8e226f57b5e3a72dbb9ed99d27da8"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "websockets"
|
||||||
|
version = "12.0"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
|
||||||
|
files = [
|
||||||
|
{file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"},
|
||||||
|
{file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"},
|
||||||
|
{file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"},
|
||||||
|
{file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yt-dlp"
|
||||||
|
version = "2023.10.13"
|
||||||
|
requires_python = ">=3.7"
|
||||||
|
summary = "A youtube-dl fork with additional features and patches"
|
||||||
|
dependencies = [
|
||||||
|
"brotli; platform_python_implementation == \"CPython\"",
|
||||||
|
"brotlicffi; platform_python_implementation != \"CPython\"",
|
||||||
|
"certifi",
|
||||||
|
"mutagen",
|
||||||
|
"pycryptodomex",
|
||||||
|
"websockets",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "yt-dlp-2023.10.13.tar.gz", hash = "sha256:e026ea1c435ff36eef1215bc4c5bb8c479938b90054997ba99f63a4541fe63b4"},
|
||||||
|
{file = "yt_dlp-2023.10.13-py2.py3-none-any.whl", hash = "sha256:2b069f22675532eebacdfd6372b1825651a751fef848de9ae6efe6491b2dc38a"},
|
||||||
|
]
|
||||||
2
pip_dist
2
pip_dist
|
|
@ -1 +1 @@
|
||||||
Subproject commit 534998571c9a2ddff462a9c8f3ed5ea825f91958
|
Subproject commit 5323fc773d33ef3f219c35c946f3b353b1251d37
|
||||||
129
pyproject.toml
Normal file
129
pyproject.toml
Normal file
|
|
@ -0,0 +1,129 @@
|
||||||
|
[project]
|
||||||
|
name = "archivebox"
|
||||||
|
version = "0.7.1"
|
||||||
|
description = "Self-hosted internet archiving solution."
|
||||||
|
authors = [
|
||||||
|
{name = "Nick Sweeting", email = "setup.py@archivebox.io"},
|
||||||
|
]
|
||||||
|
dependencies = [
|
||||||
|
# "setuptools>=68.2.2",
|
||||||
|
"croniter>=0.3.34",
|
||||||
|
"dateparser>=1.0.0",
|
||||||
|
"django-extensions>=3.0.3",
|
||||||
|
"django>=3.1.3,<3.2",
|
||||||
|
"ipython>5.0.0",
|
||||||
|
"mypy-extensions>=0.4.3",
|
||||||
|
"python-crontab>=2.5.1",
|
||||||
|
"requests>=2.24.0",
|
||||||
|
"w3lib>=1.22.0",
|
||||||
|
# "youtube-dl>=2021.04.17",
|
||||||
|
"yt-dlp>=2021.4.11",
|
||||||
|
# "playwright>=1.39.0; platform_machine != 'armv7l'",
|
||||||
|
]
|
||||||
|
requires-python = ">=3.9"
|
||||||
|
readme = "README.md"
|
||||||
|
license = {text = "MIT"}
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 4 - Beta",
|
||||||
|
"Environment :: Console",
|
||||||
|
"Environment :: Web Environment",
|
||||||
|
"Framework :: Django",
|
||||||
|
"Intended Audience :: Developers",
|
||||||
|
"Intended Audience :: Education",
|
||||||
|
"Intended Audience :: End Users/Desktop",
|
||||||
|
"Intended Audience :: Information Technology",
|
||||||
|
"Intended Audience :: Legal Industry",
|
||||||
|
"Intended Audience :: System Administrators",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Natural Language :: English",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.7",
|
||||||
|
"Programming Language :: Python :: 3.8",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Topic :: Internet :: WWW/HTTP",
|
||||||
|
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
|
||||||
|
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
|
||||||
|
"Topic :: Sociology :: History",
|
||||||
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||||
|
"Topic :: System :: Archiving",
|
||||||
|
"Topic :: System :: Archiving :: Backup",
|
||||||
|
"Topic :: System :: Recovery Tools",
|
||||||
|
"Topic :: Utilities",
|
||||||
|
"Typing :: Typed",
|
||||||
|
]
|
||||||
|
|
||||||
|
# pdm lock -G:all
|
||||||
|
# pdm install -G:all
|
||||||
|
[tool.pdm.dev-dependencies]
|
||||||
|
build = [
|
||||||
|
"setuptools",
|
||||||
|
"wheel",
|
||||||
|
"pdm",
|
||||||
|
# "bottle",
|
||||||
|
# "stdeb",
|
||||||
|
# "twine",
|
||||||
|
]
|
||||||
|
lint = [
|
||||||
|
"flake8",
|
||||||
|
"mypy",
|
||||||
|
"django-stubs",
|
||||||
|
]
|
||||||
|
test = [
|
||||||
|
"pytest",
|
||||||
|
]
|
||||||
|
debug = [
|
||||||
|
"django-debug-toolbar",
|
||||||
|
"djdt_flamegraph",
|
||||||
|
"ipdb",
|
||||||
|
]
|
||||||
|
doc = [
|
||||||
|
"recommonmark",
|
||||||
|
"sphinx",
|
||||||
|
"sphinx-rtd-theme",
|
||||||
|
]
|
||||||
|
dev = [
|
||||||
|
"homebrew-pypi-poet>=0.10.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.pdm.scripts]
|
||||||
|
lint = "./bin/lint.sh"
|
||||||
|
test = "./bin/test.sh"
|
||||||
|
# all = {composite = ["lint mypackage/", "test -v tests/"]}
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
sonic = [
|
||||||
|
# echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
|
||||||
|
# curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
|
||||||
|
"sonic-client>=0.0.5",
|
||||||
|
]
|
||||||
|
ldap = [
|
||||||
|
# apt install libldap2-dev libsasl2-dev
|
||||||
|
"python-ldap>=3.4.3",
|
||||||
|
"django-auth-ldap>=4.1.0",
|
||||||
|
]
|
||||||
|
# playwright = [
|
||||||
|
# platform_machine isnt respected by pdm export -o requirements.txt, this breaks arm/v7
|
||||||
|
# "playwright>=1.39.0; platform_machine != 'armv7l'",
|
||||||
|
# ]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
archivebox = "archivebox.cli:main"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["pdm-backend"]
|
||||||
|
build-backend = "pdm.backend"
|
||||||
|
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
Homepage = "https://github.com/ArchiveBox/ArchiveBox"
|
||||||
|
Source = "https://github.com/ArchiveBox/ArchiveBox"
|
||||||
|
Documentation = "https://github.com/ArchiveBox/ArchiveBox/wiki"
|
||||||
|
"Bug Tracker" = "https://github.com/ArchiveBox/ArchiveBox/issues"
|
||||||
|
Changelog = "https://github.com/ArchiveBox/ArchiveBox/releases"
|
||||||
|
Roadmap = "https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap"
|
||||||
|
Community = "https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community"
|
||||||
|
Demo = "https://demo.archivebox.io"
|
||||||
|
Donate = "https://github.com/ArchiveBox/ArchiveBox/wiki/Donations"
|
||||||
|
|
||||||
|
|
||||||
142
setup.py
142
setup.py
|
|
@ -1,142 +0,0 @@
|
||||||
import json
|
|
||||||
import setuptools
|
|
||||||
from setuptools.command.test import test
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
PKG_NAME = "archivebox"
|
|
||||||
DESCRIPTION = "The self-hosted internet archive."
|
|
||||||
LICENSE = "MIT"
|
|
||||||
AUTHOR = "Nick Sweeting"
|
|
||||||
AUTHOR_EMAIL="git@nicksweeting.com"
|
|
||||||
REPO_URL = "https://github.com/ArchiveBox/ArchiveBox"
|
|
||||||
PROJECT_URLS = {
|
|
||||||
"Source": f"{REPO_URL}",
|
|
||||||
"Documentation": f"{REPO_URL}/wiki",
|
|
||||||
"Bug Tracker": f"{REPO_URL}/issues",
|
|
||||||
"Changelog": f"{REPO_URL}/wiki/Changelog",
|
|
||||||
"Roadmap": f"{REPO_URL}/wiki/Roadmap",
|
|
||||||
"Community": f"{REPO_URL}/wiki/Web-Archiving-Community",
|
|
||||||
"Donate": f"{REPO_URL}/wiki/Donations",
|
|
||||||
}
|
|
||||||
|
|
||||||
ROOT_DIR = Path(__file__).parent.resolve()
|
|
||||||
PACKAGE_DIR = ROOT_DIR / PKG_NAME
|
|
||||||
|
|
||||||
README = (PACKAGE_DIR / "README.md").read_text(encoding='utf-8', errors='ignore')
|
|
||||||
VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['version']
|
|
||||||
|
|
||||||
PYTHON_REQUIRES = ">=3.7"
|
|
||||||
SETUP_REQUIRES = ["wheel"]
|
|
||||||
INSTALL_REQUIRES = [
|
|
||||||
# only add things here that have corresponding apt python3-packages available
|
|
||||||
# anything added here also needs to be added to our package dependencies in
|
|
||||||
# stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
|
|
||||||
# if there is no apt python3-package equivalent, then vendor it instead in
|
|
||||||
# ./archivebox/vendor/
|
|
||||||
"requests>=2.24.0",
|
|
||||||
"mypy-extensions>=0.4.3",
|
|
||||||
"django>=3.1.3,<3.2",
|
|
||||||
"django-extensions>=3.0.3",
|
|
||||||
"dateparser",
|
|
||||||
"ipython",
|
|
||||||
"youtube-dl",
|
|
||||||
"python-crontab>=2.5.1",
|
|
||||||
"croniter>=0.3.34",
|
|
||||||
"w3lib>=1.22.0",
|
|
||||||
]
|
|
||||||
EXTRAS_REQUIRE = {
|
|
||||||
'sonic': [
|
|
||||||
"sonic-client>=0.0.5",
|
|
||||||
],
|
|
||||||
'dev': [
|
|
||||||
"setuptools",
|
|
||||||
"twine",
|
|
||||||
"wheel",
|
|
||||||
"flake8",
|
|
||||||
"ipdb",
|
|
||||||
"mypy",
|
|
||||||
"django-stubs",
|
|
||||||
"sphinx",
|
|
||||||
"sphinx-rtd-theme",
|
|
||||||
"recommonmark",
|
|
||||||
"pytest",
|
|
||||||
"bottle",
|
|
||||||
"stdeb",
|
|
||||||
"django-debug-toolbar",
|
|
||||||
"djdt_flamegraph",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
# To see when setup.py gets called (uncomment for debugging):
|
|
||||||
# import sys
|
|
||||||
# print(PACKAGE_DIR, f" (v{VERSION})")
|
|
||||||
# print('>', sys.executable, *sys.argv)
|
|
||||||
|
|
||||||
|
|
||||||
class DisabledTestCommand(test):
|
|
||||||
def run(self):
|
|
||||||
# setup.py test is deprecated, disable it here by force so stdeb doesnt run it
|
|
||||||
print()
|
|
||||||
print('[X] Running tests via setup.py test is deprecated.')
|
|
||||||
print(' Hint: Use the ./bin/test.sh script or pytest instead')
|
|
||||||
|
|
||||||
|
|
||||||
setuptools.setup(
|
|
||||||
name=PKG_NAME,
|
|
||||||
version=VERSION,
|
|
||||||
license=LICENSE,
|
|
||||||
author=AUTHOR,
|
|
||||||
author_email=AUTHOR_EMAIL,
|
|
||||||
description=DESCRIPTION,
|
|
||||||
long_description=README,
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
url=REPO_URL,
|
|
||||||
project_urls=PROJECT_URLS,
|
|
||||||
python_requires=PYTHON_REQUIRES,
|
|
||||||
setup_requires=SETUP_REQUIRES,
|
|
||||||
install_requires=INSTALL_REQUIRES,
|
|
||||||
extras_require=EXTRAS_REQUIRE,
|
|
||||||
packages=[PKG_NAME],
|
|
||||||
include_package_data=True, # see MANIFEST.in
|
|
||||||
entry_points={
|
|
||||||
"console_scripts": [
|
|
||||||
f"{PKG_NAME} = {PKG_NAME}.cli:main",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
classifiers=[
|
|
||||||
"License :: OSI Approved :: MIT License",
|
|
||||||
"Natural Language :: English",
|
|
||||||
"Operating System :: OS Independent",
|
|
||||||
"Development Status :: 4 - Beta",
|
|
||||||
|
|
||||||
"Topic :: Utilities",
|
|
||||||
"Topic :: System :: Archiving",
|
|
||||||
"Topic :: System :: Archiving :: Backup",
|
|
||||||
"Topic :: System :: Recovery Tools",
|
|
||||||
"Topic :: Sociology :: History",
|
|
||||||
"Topic :: Internet :: WWW/HTTP",
|
|
||||||
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
|
|
||||||
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
|
|
||||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
||||||
|
|
||||||
"Intended Audience :: Developers",
|
|
||||||
"Intended Audience :: Education",
|
|
||||||
"Intended Audience :: End Users/Desktop",
|
|
||||||
"Intended Audience :: Information Technology",
|
|
||||||
"Intended Audience :: Legal Industry",
|
|
||||||
"Intended Audience :: System Administrators",
|
|
||||||
|
|
||||||
"Environment :: Console",
|
|
||||||
"Environment :: Web Environment",
|
|
||||||
"Programming Language :: Python :: 3",
|
|
||||||
"Programming Language :: Python :: 3.7",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
|
||||||
"Framework :: Django",
|
|
||||||
"Typing :: Typed",
|
|
||||||
],
|
|
||||||
cmdclass={
|
|
||||||
"test": DisabledTestCommand,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
@ -4,7 +4,8 @@ Package: archivebox
|
||||||
Package3: archivebox
|
Package3: archivebox
|
||||||
Suite: focal
|
Suite: focal
|
||||||
Suite3: focal
|
Suite3: focal
|
||||||
Build-Depends: dh-python, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
|
Build-Depends: debhelper, dh-python, python3-all, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
|
||||||
Depends3: nodejs, wget, curl, git, ffmpeg, youtube-dl, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
|
Depends3: nodejs, wget, curl, git, ffmpeg, yt-dlp, ripgrep, python3-all, python3-pip, python3-setuptools, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib
|
||||||
XS-Python-Version: >= 3.7
|
X-Python3-Version: >= 3.9
|
||||||
|
XS-Python-Version: >= 3.9
|
||||||
Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck
|
Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck
|
||||||
|
|
|
||||||
|
|
@ -13,12 +13,51 @@ def test_ignore_methods():
|
||||||
Takes the passed method out of the default methods list and returns that value
|
Takes the passed method out of the default methods list and returns that value
|
||||||
"""
|
"""
|
||||||
ignored = ignore_methods(['title'])
|
ignored = ignore_methods(['title'])
|
||||||
assert should_save_title not in ignored
|
assert "title" not in ignored
|
||||||
|
|
||||||
|
def test_save_allowdenylist_works(tmp_path, process, disable_extractors_dict):
|
||||||
|
allow_list = {
|
||||||
|
r'/static': ["headers", "singlefile"],
|
||||||
|
r'example\.com\.html$': ["headers"],
|
||||||
|
}
|
||||||
|
deny_list = {
|
||||||
|
"/static": ["singlefile"],
|
||||||
|
}
|
||||||
|
disable_extractors_dict.update({
|
||||||
|
"SAVE_HEADERS": "true",
|
||||||
|
"USE_SINGLEFILE": "true",
|
||||||
|
"SAVE_ALLOWLIST": pyjson.dumps(allow_list),
|
||||||
|
"SAVE_DENYLIST": pyjson.dumps(deny_list),
|
||||||
|
})
|
||||||
|
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
|
||||||
|
capture_output=True, env=disable_extractors_dict)
|
||||||
|
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||||
|
singlefile_file = archived_item_path / "singlefile.html"
|
||||||
|
assert not singlefile_file.exists()
|
||||||
|
headers_file = archived_item_path / "headers.json"
|
||||||
|
assert headers_file.exists()
|
||||||
|
|
||||||
|
def test_save_denylist_works(tmp_path, process, disable_extractors_dict):
|
||||||
|
deny_list = {
|
||||||
|
"/static": ["singlefile"],
|
||||||
|
}
|
||||||
|
disable_extractors_dict.update({
|
||||||
|
"SAVE_HEADERS": "true",
|
||||||
|
"USE_SINGLEFILE": "true",
|
||||||
|
"SAVE_DENYLIST": pyjson.dumps(deny_list),
|
||||||
|
})
|
||||||
|
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
|
||||||
|
capture_output=True, env=disable_extractors_dict)
|
||||||
|
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||||
|
singlefile_file = archived_item_path / "singlefile.html"
|
||||||
|
assert not singlefile_file.exists()
|
||||||
|
headers_file = archived_item_path / "headers.json"
|
||||||
|
assert headers_file.exists()
|
||||||
|
|
||||||
def test_singlefile_works(tmp_path, process, disable_extractors_dict):
|
def test_singlefile_works(tmp_path, process, disable_extractors_dict):
|
||||||
disable_extractors_dict.update({"USE_SINGLEFILE": "true"})
|
disable_extractors_dict.update({"USE_SINGLEFILE": "true"})
|
||||||
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
|
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
|
||||||
capture_output=True, env=disable_extractors_dict)
|
capture_output=True, env=disable_extractors_dict)
|
||||||
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||||
output_file = archived_item_path / "singlefile.html"
|
output_file = archived_item_path / "singlefile.html"
|
||||||
assert output_file.exists()
|
assert output_file.exists()
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ from archivebox.config import OUTPUT_PERMISSIONS
|
||||||
|
|
||||||
from .fixtures import *
|
from .fixtures import *
|
||||||
|
|
||||||
|
DIR_PERMISSIONS = OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
|
||||||
|
|
||||||
def test_init(tmp_path, process):
|
def test_init(tmp_path, process):
|
||||||
assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
|
assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
|
||||||
|
|
||||||
|
|
@ -55,7 +57,7 @@ def test_correct_permissions_output_folder(tmp_path, process):
|
||||||
index_files = ['index.sqlite3', 'archive']
|
index_files = ['index.sqlite3', 'archive']
|
||||||
for file in index_files:
|
for file in index_files:
|
||||||
file_path = tmp_path / file
|
file_path = tmp_path / file
|
||||||
assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
|
assert oct(file_path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||||
|
|
||||||
def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
|
def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
|
||||||
os.chdir(tmp_path)
|
os.chdir(tmp_path)
|
||||||
|
|
@ -63,7 +65,7 @@ def test_correct_permissions_add_command_results(tmp_path, process, disable_extr
|
||||||
env=disable_extractors_dict)
|
env=disable_extractors_dict)
|
||||||
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||||
for path in archived_item_path.iterdir():
|
for path in archived_item_path.iterdir():
|
||||||
assert oct(path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
|
assert oct(path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||||
|
|
||||||
def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
|
def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
|
||||||
os.chdir(tmp_path)
|
os.chdir(tmp_path)
|
||||||
|
|
|
||||||
|
|
@ -25,4 +25,36 @@ def test_oneshot_command_saves_page_in_right_folder(tmp_path, disable_extractors
|
||||||
assert "index.json" in items
|
assert "index.json" in items
|
||||||
assert not "index.sqlite3" in current_path
|
assert not "index.sqlite3" in current_path
|
||||||
assert "output.html" in items
|
assert "output.html" in items
|
||||||
|
|
||||||
|
def test_oneshot_command_succeeds(tmp_path, disable_extractors_dict):
|
||||||
|
disable_extractors_dict.update({"SAVE_DOM": "true"})
|
||||||
|
process = subprocess.run(
|
||||||
|
[
|
||||||
|
"archivebox",
|
||||||
|
"oneshot",
|
||||||
|
f"--out-dir={tmp_path}",
|
||||||
|
"--extract=title,favicon,dom",
|
||||||
|
"http://127.0.0.1:8080/static/example.com.html",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
env=disable_extractors_dict,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert process.returncode == 0
|
||||||
|
|
||||||
|
def test_oneshot_command_logs_archiving_finished(tmp_path, disable_extractors_dict):
|
||||||
|
disable_extractors_dict.update({"SAVE_DOM": "true"})
|
||||||
|
process = subprocess.run(
|
||||||
|
[
|
||||||
|
"archivebox",
|
||||||
|
"oneshot",
|
||||||
|
f"--out-dir={tmp_path}",
|
||||||
|
"--extract=title,favicon,dom",
|
||||||
|
"http://127.0.0.1:8080/static/example.com.html",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
env=disable_extractors_dict,
|
||||||
|
)
|
||||||
|
|
||||||
|
output_str = process.stdout.decode("utf-8")
|
||||||
|
assert "4 files" in output_str
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user