Skip to content

Commit 09553d8

Browse files
committed
hardcode EXTRACTOR_CHOICES to prevent nondeterministic migrations
1 parent 0a5b227 commit 09553d8

File tree

5 files changed

+44
-19
lines changed

5 files changed

+44
-19
lines changed

archivebox/core/admin.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,8 @@ class SnapshotActionForm(ActionForm):
180180
)
181181

182182
# TODO: allow selecting actions for specific extractors? is this useful?
183-
# EXTRACTOR_CHOICES = [
184-
# (name, name.title())
185-
# for name, _, _ in get_default_archive_methods()
186-
# ]
187183
# extractor = forms.ChoiceField(
188-
# choices=EXTRACTOR_CHOICES,
184+
# choices=ArchiveResult.EXTRACTOR_CHOICES,
189185
# required=False,
190186
# widget=forms.MultileChoiceField(attrs={'class': "form-control"})
191187
# )

archivebox/core/migrations/0023_alter_archiveresult_options_archiveresult_abid_and_more.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,21 @@ class Migration(migrations.Migration):
3838
migrations.AlterField(
3939
model_name='archiveresult',
4040
name='extractor',
41-
field=models.CharField(choices=[('htmltotext', 'htmltotext'), ('git', 'git'), ('singlefile', 'singlefile'), ('media', 'media'), ('archive_org', 'archive_org'), ('readability', 'readability'), ('mercury', 'mercury'), ('favicon', 'favicon'), ('pdf', 'pdf'), ('headers', 'headers'), ('screenshot', 'screenshot'), ('dom', 'dom'), ('title', 'title'), ('wget', 'wget')], max_length=32),
41+
field=models.CharField(choices=(
42+
('htmltotext', 'htmltotext'),
43+
('git', 'git'),
44+
('singlefile', 'singlefile'),
45+
('media', 'media'),
46+
('archive_org', 'archive_org'),
47+
('readability', 'readability'),
48+
('mercury', 'mercury'),
49+
('favicon', 'favicon'),
50+
('pdf', 'pdf'),
51+
('headers', 'headers'),
52+
('screenshot', 'screenshot'),
53+
('dom', 'dom'),
54+
('title', 'title'),
55+
('wget', 'wget'),
56+
), max_length=32),
4257
),
4358
]

archivebox/core/models.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,6 @@
2828
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
2929

3030

31-
EXTRACTOR_CHOICES = [(extractor_name, extractor_name) for extractor_name in EXTRACTORS.keys()]
32-
STATUS_CHOICES = [
33-
("succeeded", "succeeded"),
34-
("failed", "failed"),
35-
("skipped", "skipped")
36-
]
37-
3831
def rand_int_id():
3932
return random.getrandbits(32)
4033

@@ -376,7 +369,28 @@ class ArchiveResult(ABIDModel):
376369
abid_uri_src = 'self.snapshot.url'
377370
abid_subtype_src = 'self.extractor'
378371
abid_rand_src = 'self.old_id'
379-
EXTRACTOR_CHOICES = EXTRACTOR_CHOICES
372+
373+
EXTRACTOR_CHOICES = (
374+
('htmltotext', 'htmltotext'),
375+
('git', 'git'),
376+
('singlefile', 'singlefile'),
377+
('media', 'media'),
378+
('archive_org', 'archive_org'),
379+
('readability', 'readability'),
380+
('mercury', 'mercury'),
381+
('favicon', 'favicon'),
382+
('pdf', 'pdf'),
383+
('headers', 'headers'),
384+
('screenshot', 'screenshot'),
385+
('dom', 'dom'),
386+
('title', 'title'),
387+
('wget', 'wget'),
388+
)
389+
STATUS_CHOICES = [
390+
("succeeded", "succeeded"),
391+
("failed", "failed"),
392+
("skipped", "skipped")
393+
]
380394

381395
old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID')
382396

archivebox/index/html.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def snapshot_icons(snapshot) -> str:
121121
cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons'
122122

123123
def calc_snapshot_icons():
124-
from core.models import EXTRACTOR_CHOICES
124+
from core.models import ArchiveResult
125125
# start = datetime.now(timezone.utc)
126126

127127
archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False)
@@ -147,12 +147,12 @@ def calc_snapshot_icons():
147147
# Missing specific entry for WARC
148148

149149
extractor_outputs = defaultdict(lambda: None)
150-
for extractor, _ in EXTRACTOR_CHOICES:
150+
for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
151151
for result in archive_results:
152152
if result.extractor == extractor and result:
153153
extractor_outputs[extractor] = result
154154

155-
for extractor, _ in EXTRACTOR_CHOICES:
155+
for extractor, _ in ArchiveResult.EXTRACTOR_CHOICES:
156156
if extractor not in exclude:
157157
existing = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
158158
# Check filesystsem to see if anything is actually present (too slow, needs optimization/caching)

archivebox/logging_util.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -529,8 +529,8 @@ def log_shell_welcome_msg():
529529
from .cli import list_subcommands
530530

531531
print('{green}# ArchiveBox Imports{reset}'.format(**ANSI))
532-
print('{green}from archivebox.core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
533-
print('{green}from archivebox.cli import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
532+
print('{green}from core.models import Snapshot, ArchiveResult, Tag, User{reset}'.format(**ANSI))
533+
print('{green}from cli import *\n {}{reset}'.format("\n ".join(list_subcommands().keys()), **ANSI))
534534
print()
535535
print('[i] Welcome to the ArchiveBox Shell!')
536536
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#Shell-Usage')

0 commit comments

Comments
 (0)