|
|
|
@ -59,7 +59,7 @@ WGET_AT = find_executable(
|
|
|
|
|
),
|
|
|
|
|
[
|
|
|
|
|
'./wget-at',
|
|
|
|
|
'/home/warrior/data/wget-at'
|
|
|
|
|
'/home/warrior/data/wget-at-gnutls'
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
@ -72,7 +72,7 @@ if not WGET_AT:
|
|
|
|
|
#
|
|
|
|
|
# Update this each time you make a non-cosmetic change.
|
|
|
|
|
# It will be added to the WARC files and reported to the tracker.
|
|
|
|
|
VERSION = '20231115.01'
|
|
|
|
|
VERSION = '20231118.01'
|
|
|
|
|
TRACKER_ID = 'reddit'
|
|
|
|
|
TRACKER_HOST = 'legacy-api.arpa.li'
|
|
|
|
|
MULTI_ITEM_SIZE = 100
|
|
|
|
@ -280,8 +280,7 @@ class WgetArgs(object):
|
|
|
|
|
'--warc-dedup-url-agnostic',
|
|
|
|
|
'--warc-compression-use-zstd',
|
|
|
|
|
'--warc-zstd-dict-no-include',
|
|
|
|
|
'--header', 'Accept-Language: en-US;q=0.9, en;q=0.8',
|
|
|
|
|
'--ciphers', 'HIGH:+CHACHA20'
|
|
|
|
|
'--header', 'Accept-Language: en-US;q=0.9, en;q=0.8'
|
|
|
|
|
]
|
|
|
|
|
dict_data = ZstdDict.get_dict()
|
|
|
|
|
with open(os.path.join(item['item_dir'], 'zstdict'), 'wb') as f:
|
|
|
|
|