From f5fcb21d7389443fdbfab619062de37ceb0e412f Mon Sep 17 00:00:00 2001 From: spike Date: Fri, 28 Aug 2020 13:41:07 +0100 Subject: [PATCH] update --- .gitignore | 3 +- __pycache__/schema.cpython-38.pyc | Bin 1282 -> 0 bytes decrypt_attachment.py | 62 ++++++++++++++++++++++++++++++ download_images.py | 62 ++++++++++++++++++++++-------- export_messages.py | 7 ++-- schema.py | 3 +- 6 files changed, 113 insertions(+), 24 deletions(-) delete mode 100644 __pycache__/schema.cpython-38.pyc create mode 100644 decrypt_attachment.py diff --git a/.gitignore b/.gitignore index 623ecd5..6af41d0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ thumbnails/ images/ -messages.html +*.html +__pycache__/ diff --git a/__pycache__/schema.cpython-38.pyc b/__pycache__/schema.cpython-38.pyc deleted file mode 100644 index 946068f1f817a573f81d8fbe89bb3af11d637297..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1282 zcmZuxO>fjN5Vf7`Ci~THTR=tPg2ZK2k>$hzp{jxkxX??>C8`vKc)A-JCtD{e?5a0< z<;D+akMNUx<-}j$0yECGDip6gI}?v5@4d0}ZZPOF98X7YqVEo4KPb34Tnu(_mQT<~ zCV9pb54jSdP)_J5H*{4iY;pFQNg_Ttp^mxn9Y-m2`&!8r?u zuqIdJ>N)$&!*%qBa?Sc1avjNU81F5VWjujJ`(d#4EZ902apP+^sZ$Md;T}E;wv4BX zLPe<@3GE{j)2s9Jq=x8YT1^-29lP(vi6R{fgy`%UPn8oYweVDsDg=u)y`;!1>W(vk>Kiw13E9y% zrq{8?ukG=)P%tiM=`oChtVrT)Tk8XOQnLV#al!u(Gt0b&KbL4mxu=mA)WhdACD%|5qrj|b%s*j!6dyxhIm`xjoHA^ zbnN>hOeIDQ2C-PX#y~RA>Mao0YCgbyOAwn^ATERymwX8Ym37nns4#S<;DMPW$&d7rz&GukignPGO-Dka=d%i=C1?DZE^vP}*F?gyxzPcVTvHpghlYxlq^8gNd<7@jZu1kqBX}35 q&De1>}=7Z}4gANQ2B0BxE-Ge=jut^G2L)x*4|Z51c=rHZ67l diff --git a/decrypt_attachment.py b/decrypt_attachment.py new file mode 100644 index 0000000..df2ce0b --- /dev/null +++ b/decrypt_attachment.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +import json +import unpaddedbase64 +from Crypto import Random +from Crypto.Cipher import AES +from Crypto.Hash import SHA256 +from Crypto.Util import Counter + +class EncryptionError(Exception): + pass + +def decrypt(ciphertext: bytes, key: str, hash: str, iv: str): + """Decrypt an encrypted attachment. + Args: + ciphertext (bytes): The data to decrypt. + key (str): AES_CTR JWK key object. + hash (str): Base64 encoded SHA-256 hash of the ciphertext. + iv (str): Base64 encoded 16 byte AES-CTR IV. + Returns: + The plaintext bytes. + Raises: + EncryptionError if the integrity check fails. + """ + expected_hash = unpaddedbase64.decode_base64(hash) + + h = SHA256.new() + h.update(ciphertext) + + if h.digest() != expected_hash: + raise EncryptionError("Mismatched SHA-256 digest.") + + try: + byte_key: bytes = unpaddedbase64.decode_base64(key) + except (BinAsciiError, TypeError): + raise EncryptionError("Error decoding key.") + + try: + # Drop last 8 bytes, which are 0 + byte_iv: bytes = unpaddedbase64.decode_base64(iv)[:8] + except (BinAsciiError, TypeError): + raise EncryptionError("Error decoding initial values.") + + ctr = Counter.new(64, prefix=byte_iv, initial_value=0) + + try: + cipher = AES.new(byte_key, AES.MODE_CTR, counter=ctr) + except ValueError as e: + raise EncryptionError(e) + + return cipher.decrypt(ciphertext) + + +# if __name__ == "__main__": + # with open('images/output', 'wb') as output: + # with open('images/LUJAssHxtTWsnYPbSlTcMdvl.octet-stream', 'rb') as cipher: + # with open('images/LUJAssHxtTWsnYPbSlTcMdvl.metadata', 'r') as rawmeta: + # meta = json.load(rawmeta) + # key = meta['file']['key'] + # decrypted = decrypt_attachment(cipher.read(), key['k'], meta['file']['hashes']['sha256'], meta['file']['iv']) + # output.write(decrypted) + + diff --git a/download_images.py b/download_images.py index 7ae35db..518d334 100644 --- a/download_images.py +++ b/download_images.py @@ -1,11 +1,15 @@ - +# Requires PyCrypto and python-magic +import sys from pathlib import Path from urllib.parse import urlparse +import json +import magic import click import requests import database_connection # noqa: F401 +import decrypt_attachment from matrix_connection import get_download_url from schema import Message @@ -19,20 +23,44 @@ def download_stem(message, prefer_thumbnails): def run_downloads(messages, download_dir, prefer_thumbnails): for msg in messages: image_url = (msg.thumbnail_url if prefer_thumbnails else None) or msg.image_url - res = requests.head(get_download_url(image_url)) - assert res.status_code == 200 - mtype, subtype = res.headers['content-type'].split('/', 2) - if mtype != 'image': - print(f"Skipping {image_url}: {res.headers['content-type']}") - continue - - res = requests.get(get_download_url(image_url)) - assert res.status_code == 200 - filename = (download_dir / download_stem(msg, prefer_thumbnails) + download_url = get_download_url(image_url).replace('//bitcointechweekly.com','//matrix.bitcointechweekly.com:8448') + try: + res = requests.head(download_url, verify=False) + assert res.status_code == 200 + print(res.status_code) + # mtype, subtype = res.headers['content-type'].split('/', 2) + # if mtype != 'image': + # print(f"Skipping {download_url}: {res.headers['content-type']}") + # continue + res = requests.get(download_url, verify=False) + # print(res.status_code) + assert res.status_code == 200 + + filename = (download_dir / download_stem(msg, prefer_thumbnails)) + print('Downloading', download_url, '->', filename) + + #Encryption details + file_meta = msg.content.get('file') + key_meta = file_meta['key']['k'] + iv = file_meta['iv'] + _hash = file_meta['hashes']['sha256'] + decrypted = decrypt_attachment.decrypt(res.content, key_meta, _hash, iv) + mtype, subtype = magic.from_buffer(decrypted, mime=True).split('/', 2) + + filename = (download_dir / download_stem(msg, prefer_thumbnails) ).with_suffix('.' + subtype) - print('Downloading', image_url, '->', filename) - with open(filename, 'wb') as fp: - fp.write(res.content) + + # metadata_file = (download_dir / download_stem(msg, prefer_thumbnails) + # ).with_suffix('.metadata') + + with open(filename, 'wb') as fp: + fp.write(decrypted) + # with open(metadata_file, 'w') as fp: + # json.dump(msg.content, fp) + except: + e = sys.exc_info()[0] + print( "

Error downloading '%s' : %s

" % (download_url, e) ) + # break @click.command() @@ -49,9 +77,9 @@ def download_images(thumbnails, output): new_messages = [msg for msg in messages if download_stem(msg, thumbnails) not in current_stems] - skip_count = len(messages) - len(new_messages) - if skip_count: - print(f"Skipping {skip_count} already-downloaded {noun}") + # skip_count = len(messages) - len(new_messages) + # if skip_count: + # print(f"Skipping {skip_count} already-downloaded {noun}") if new_messages: print(f"Downloading {len(new_messages)} new {noun}...") else: diff --git a/export_messages.py b/export_messages.py index b35d249..e501ac1 100644 --- a/export_messages.py +++ b/export_messages.py @@ -32,13 +32,12 @@ def replace_by_local_image(data): data = data.copy() content = data['content'] if content.get('msgtype') == 'm.image' and 'info' in content: - url = content['file']['url'] if 'file' in content else content['url'] - mimetype = content['info']['mimetype'] - if 'thumbnail_url' in content['info'] and content['info']['thumbnail_url'] != '': + url, mimetype = content['url'], content.get('info', {}).get('mimetype', 'image/jpeg') + if 'thumbnail_url' in content.get('info', {}) : url, mimetype = content['info']['thumbnail_url'], content['info']['thumbnail_info']['mimetype'] _, subtype = mimetype.split('/', 2) url = urlparse(url) - content['url'] = 'thumbnails/' + url.path.strip('/') + '.' + subtype + content['url'] = 'images/' + os.path.basename(url.path.strip('/')) + '.' + subtype return data diff --git a/schema.py b/schema.py index 4afab35..c39a082 100644 --- a/schema.py +++ b/schema.py @@ -21,5 +21,4 @@ class Message(Document): @property def thumbnail_url(self): - return (self.content['info'].get('thumbnail_url') - if self.is_image() else None) + return self.content['url'] if self.is_image() else None