master
spike 4 years ago
parent c68c0e138f
commit f5fcb21d73

3
.gitignore vendored

@ -1,3 +1,4 @@
thumbnails/ thumbnails/
images/ images/
messages.html *.html
__pycache__/

Binary file not shown.

@ -0,0 +1,62 @@
#!/usr/bin/env python
import json
import unpaddedbase64
from Crypto import Random
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto.Util import Counter
class EncryptionError(Exception):
pass
def decrypt(ciphertext: bytes, key: str, hash: str, iv: str):
"""Decrypt an encrypted attachment.
Args:
ciphertext (bytes): The data to decrypt.
key (str): AES_CTR JWK key object.
hash (str): Base64 encoded SHA-256 hash of the ciphertext.
iv (str): Base64 encoded 16 byte AES-CTR IV.
Returns:
The plaintext bytes.
Raises:
EncryptionError if the integrity check fails.
"""
expected_hash = unpaddedbase64.decode_base64(hash)
h = SHA256.new()
h.update(ciphertext)
if h.digest() != expected_hash:
raise EncryptionError("Mismatched SHA-256 digest.")
try:
byte_key: bytes = unpaddedbase64.decode_base64(key)
except (BinAsciiError, TypeError):
raise EncryptionError("Error decoding key.")
try:
# Drop last 8 bytes, which are 0
byte_iv: bytes = unpaddedbase64.decode_base64(iv)[:8]
except (BinAsciiError, TypeError):
raise EncryptionError("Error decoding initial values.")
ctr = Counter.new(64, prefix=byte_iv, initial_value=0)
try:
cipher = AES.new(byte_key, AES.MODE_CTR, counter=ctr)
except ValueError as e:
raise EncryptionError(e)
return cipher.decrypt(ciphertext)
# if __name__ == "__main__":
# with open('images/output', 'wb') as output:
# with open('images/LUJAssHxtTWsnYPbSlTcMdvl.octet-stream', 'rb') as cipher:
# with open('images/LUJAssHxtTWsnYPbSlTcMdvl.metadata', 'r') as rawmeta:
# meta = json.load(rawmeta)
# key = meta['file']['key']
# decrypted = decrypt_attachment(cipher.read(), key['k'], meta['file']['hashes']['sha256'], meta['file']['iv'])
# output.write(decrypted)

@ -1,11 +1,15 @@
# Requires PyCrypto and python-magic
import sys
from pathlib import Path from pathlib import Path
from urllib.parse import urlparse from urllib.parse import urlparse
import json
import magic
import click import click
import requests import requests
import database_connection # noqa: F401 import database_connection # noqa: F401
import decrypt_attachment
from matrix_connection import get_download_url from matrix_connection import get_download_url
from schema import Message from schema import Message
@ -19,20 +23,44 @@ def download_stem(message, prefer_thumbnails):
def run_downloads(messages, download_dir, prefer_thumbnails): def run_downloads(messages, download_dir, prefer_thumbnails):
for msg in messages: for msg in messages:
image_url = (msg.thumbnail_url if prefer_thumbnails else None) or msg.image_url image_url = (msg.thumbnail_url if prefer_thumbnails else None) or msg.image_url
res = requests.head(get_download_url(image_url)) download_url = get_download_url(image_url).replace('//bitcointechweekly.com','//matrix.bitcointechweekly.com:8448')
assert res.status_code == 200 try:
mtype, subtype = res.headers['content-type'].split('/', 2) res = requests.head(download_url, verify=False)
if mtype != 'image': assert res.status_code == 200
print(f"Skipping {image_url}: {res.headers['content-type']}") print(res.status_code)
continue # mtype, subtype = res.headers['content-type'].split('/', 2)
# if mtype != 'image':
res = requests.get(get_download_url(image_url)) # print(f"Skipping {download_url}: {res.headers['content-type']}")
assert res.status_code == 200 # continue
filename = (download_dir / download_stem(msg, prefer_thumbnails) res = requests.get(download_url, verify=False)
# print(res.status_code)
assert res.status_code == 200
filename = (download_dir / download_stem(msg, prefer_thumbnails))
print('Downloading', download_url, '->', filename)
#Encryption details
file_meta = msg.content.get('file')
key_meta = file_meta['key']['k']
iv = file_meta['iv']
_hash = file_meta['hashes']['sha256']
decrypted = decrypt_attachment.decrypt(res.content, key_meta, _hash, iv)
mtype, subtype = magic.from_buffer(decrypted, mime=True).split('/', 2)
filename = (download_dir / download_stem(msg, prefer_thumbnails)
).with_suffix('.' + subtype) ).with_suffix('.' + subtype)
print('Downloading', image_url, '->', filename)
with open(filename, 'wb') as fp: # metadata_file = (download_dir / download_stem(msg, prefer_thumbnails)
fp.write(res.content) # ).with_suffix('.metadata')
with open(filename, 'wb') as fp:
fp.write(decrypted)
# with open(metadata_file, 'w') as fp:
# json.dump(msg.content, fp)
except:
e = sys.exc_info()[0]
print( "<p>Error downloading '%s' : %s</p>" % (download_url, e) )
# break
@click.command() @click.command()
@ -49,9 +77,9 @@ def download_images(thumbnails, output):
new_messages = [msg for msg in messages new_messages = [msg for msg in messages
if download_stem(msg, thumbnails) if download_stem(msg, thumbnails)
not in current_stems] not in current_stems]
skip_count = len(messages) - len(new_messages) # skip_count = len(messages) - len(new_messages)
if skip_count: # if skip_count:
print(f"Skipping {skip_count} already-downloaded {noun}") # print(f"Skipping {skip_count} already-downloaded {noun}")
if new_messages: if new_messages:
print(f"Downloading {len(new_messages)} new {noun}...") print(f"Downloading {len(new_messages)} new {noun}...")
else: else:

@ -32,13 +32,12 @@ def replace_by_local_image(data):
data = data.copy() data = data.copy()
content = data['content'] content = data['content']
if content.get('msgtype') == 'm.image' and 'info' in content: if content.get('msgtype') == 'm.image' and 'info' in content:
url = content['file']['url'] if 'file' in content else content['url'] url, mimetype = content['url'], content.get('info', {}).get('mimetype', 'image/jpeg')
mimetype = content['info']['mimetype'] if 'thumbnail_url' in content.get('info', {}) :
if 'thumbnail_url' in content['info'] and content['info']['thumbnail_url'] != '':
url, mimetype = content['info']['thumbnail_url'], content['info']['thumbnail_info']['mimetype'] url, mimetype = content['info']['thumbnail_url'], content['info']['thumbnail_info']['mimetype']
_, subtype = mimetype.split('/', 2) _, subtype = mimetype.split('/', 2)
url = urlparse(url) url = urlparse(url)
content['url'] = 'thumbnails/' + url.path.strip('/') + '.' + subtype content['url'] = 'images/' + os.path.basename(url.path.strip('/')) + '.' + subtype
return data return data

@ -21,5 +21,4 @@ class Message(Document):
@property @property
def thumbnail_url(self): def thumbnail_url(self):
return (self.content['info'].get('thumbnail_url') return self.content['url'] if self.is_image() else None
if self.is_image() else None)

Loading…
Cancel
Save