diff --git a/pipeline.py b/pipeline.py index daf220c..764deb3 100644 --- a/pipeline.py +++ b/pipeline.py @@ -55,7 +55,7 @@ WGET_AT = find_executable( 'Wget+AT', HigherVersion( r'(GNU Wget 1\.[0-9]{2}\.[0-9]{1}-at\.[0-9]{8}\.[0-9]{2})[^0-9a-zA-Z\.-_]', - 'GNU Wget 1.21.3-at.20230623.01' + 'GNU Wget 1.21.3-at.20231213.03' ), [ './wget-at', @@ -72,7 +72,7 @@ if not WGET_AT: # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. -VERSION = '20231201.01' +VERSION = '20240216.01' TRACKER_ID = 'reddit' TRACKER_HOST = 'legacy-api.arpa.li' MULTI_ITEM_SIZE = 100 @@ -281,7 +281,7 @@ class WgetArgs(object): '--warc-compression-use-zstd', '--warc-zstd-dict-no-include', '--header', 'Accept-Language: en-US;q=0.9, en;q=0.8', - '--secure-protocol', 'PFS', + '--secure-protocol', 'TLSv1_2', #'--ciphers', '+ECDHE-RSA:+AES-256-CBC:+SHA384' ] dict_data = ZstdDict.get_dict() diff --git a/reddit.lua b/reddit.lua index ef257a5..45f4301 100644 --- a/reddit.lua +++ b/reddit.lua @@ -739,6 +739,8 @@ wget.callbacks.write_to_warc = function(url, http_stat) if ( string.match(url["url"], "^https?://[^/]+/svc/") and string.match(html, 'level%s*=%s*"') + and not string.match(html, '