-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathdocker-compose.yml
216 lines (184 loc) · 9.86 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# Usage:
# mkdir -p ~/archivebox/data && cd ~/archivebox
# curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml
# docker compose run archivebox version
# docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
# docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
# docker compose run -T archivebox add < bookmarks.txt
# docker compose up -d && open 'https://localhost:8000'
# docker compose run archivebox help
# Documentation:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
services:
archivebox:
image: archivebox/archivebox:latest
ports:
- 8000:8000
volumes:
- ./data:/data
# ./data/personas/Default/chrome_profile/Default:/data/personas/Default/chrome_profile/Default
environment:
# - ADMIN_USERNAME=admin # creates an admin user on first run with the given user/pass combo
# - ADMIN_PASSWORD=SomeSecretPassword
- ALLOWED_HOSTS=* # set this to the hostname(s) you're going to serve the site from!
- CSRF_TRUSTED_ORIGINS=http://localhost:8000 # you MUST set this to the server's URL for admin login and the REST API to work
- PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
- PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
- PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
- SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use sonic container below for fast full-text search
- SEARCH_BACKEND_HOST_NAME=sonic
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=911 # UID/GIDs lower than 500 may clash with system uids and are not recommended
# For options below, it's better to set in data/ArchiveBox.conf or use `docker compose run archivebox config --set SOME_KEY=someval` instead of setting here:
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
# - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot
# ...
# For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration
# For ad-blocking during archiving, uncomment this section and the pihole service below
# networks:
# - dns
# dns:
# - 172.20.0.53
######## Optional Addons: tweak examples below as needed for your specific use case ########
### This optional container runs scheduled jobs in the background (and retries failed ones). To add a new job:
# $ docker compose run archivebox schedule --add --every=day --depth=1 'https://example.com/some/rss/feed.xml'
# then restart the scheduler container to apply any changes to the scheduled task list:
# $ docker compose restart archivebox_scheduler
# https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving
archivebox_scheduler:
image: archivebox/archivebox:latest
command: schedule --foreground --update --every=day
environment:
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=911
- TIMEOUT=120 # use a higher timeout than the main container to give slow tasks more time when retrying
- SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use sonic container below for fast full-text search
- SEARCH_BACKEND_HOST_NAME=sonic
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# For other config it's better to set using `docker compose run archivebox config --set SOME_KEY=someval` instead of setting here
# ...
# For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration
volumes:
- ./data:/data
# cpus: 2 # uncomment / edit these values to limit scheduler container resource consumption
# mem_limit: 2048m
# restart: always
### This runs the optional Sonic full-text search backend (much faster than default rg backend).
# If Sonic is ever started after not running for a while, update its full-text index by running:
# $ docker-compose run archivebox update --index-only
# https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Search
sonic:
image: archivebox/sonic:latest
expose:
- 1491
environment:
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
volumes:
#- ./sonic.cfg:/etc/sonic.cfg:ro # mount to customize: https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg
- ./data/sonic:/var/lib/sonic/store
### This optional container runs xvfb+noVNC so you can watch the ArchiveBox browser as it archives things,
# or remote control it to set up a chrome profile w/ login credentials for sites you want to archive.
# https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile
# https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#docker-vnc-setup
novnc:
image: theasp/novnc:latest
environment:
- DISPLAY_WIDTH=1920
- DISPLAY_HEIGHT=1080
- RUN_XTERM=no
ports:
# to view/control ArchiveBox's browser, visit: http://127.0.0.1:8080/vnc.html
# restricted to access from localhost by default because it has no authentication
- 127.0.0.1:8080:8080
### Example: Put Nginx in front of the ArchiveBox server for SSL termination and static file serving.
# You can also any other ingress provider for SSL like Apache, Caddy, Traefik, Cloudflare Tunnels, etc.
# nginx:
# image: nginx:alpine
# ports:
# - 443:443
# - 80:80
# volumes:
# - ./etc/nginx.conf:/etc/nginx/nginx.conf
# - ./data:/var/www
### Example: To run pihole in order to block ad/tracker requests during archiving,
# uncomment this optional block and set up pihole using its admin interface
# pihole:
# image: pihole/pihole:latest
# ports:
# # access the admin HTTP interface on http://localhost:8090
# - 127.0.0.1:8090:80
# environment:
# - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD
# - DNSMASQ_LISTENING=all
# dns:
# - 127.0.0.1
# - 1.1.1.1
# networks:
# dns:
# ipv4_address: 172.20.0.53
# volumes:
# - ./etc/pihole:/etc/pihole
# - ./etc/dnsmasq:/etc/dnsmasq.d
### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel to avoid IP blocks.
# You can also use any other VPN that works at the docker/IP level, e.g. Tailscale, OpenVPN, etc.
# wireguard:
# image: linuxserver/wireguard:latest
# network_mode: 'service:archivebox'
# cap_add:
# - NET_ADMIN
# - SYS_MODULE
# sysctls:
# - net.ipv4.conf.all.rp_filter=2
# - net.ipv4.conf.all.src_valid_mark=1
# volumes:
# - /lib/modules:/lib/modules
# - ./wireguard.conf:/config/wg0.conf:ro
### Example: Run ChangeDetection.io to watch for changes to websites, then trigger ArchiveBox to archive them
# Documentation: https://github.com/dgtlmoon/changedetection.io
# More info: https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml
# changedetection:
# image: ghcr.io/dgtlmoon/changedetection.io
# volumes:
# - ./data-changedetection:/datastore
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
# pywb:
# image: webrecorder/pywb:latest
# entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;'
# environment:
# - INIT_COLLECTION=archivebox
# ports:
# - 8686:8080
# volumes:
# - ./data:/archivebox
# - ./data/wayback:/webarchive
networks:
# network just used for pihole container to offer :53 dns resolving on fixed ip for archivebox container
dns:
ipam:
driver: default
config:
- subnet: 172.20.0.0/24
# HOW TO: Set up cloud storage for your ./data/archive (e.g. Amazon S3, Backblaze B2, Google Drive, OneDrive, SFTP, etc.)
# https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-Up-Storage
#
# Follow the steps here to set up the Docker RClone Plugin https://rclone.org/docker/
# $ docker plugin install rclone/docker-volume-rclone:amd64 --grant-all-permissions --alias rclone
# $ nano /var/lib/docker-plugins/rclone/config/rclone.conf
# [examplegdrive]
# type = drive
# scope = drive
# drive_id = 1234567...
# root_folder_id = 0Abcd...
# token = {"access_token":...}
# volumes:
# archive:
# driver: rclone
# driver_opts:
# remote: 'examplegdrive:archivebox'
# allow_other: 'true'
# vfs_cache_mode: full
# poll_interval: 0