master
spike 4 years ago
parent c68c0e138f
commit f5fcb21d73

3
.gitignore vendored

@ -1,3 +1,4 @@
thumbnails/
images/
messages.html
*.html
__pycache__/

Binary file not shown.

@ -0,0 +1,62 @@
#!/usr/bin/env python
import json
import unpaddedbase64
from Crypto import Random
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto.Util import Counter
class EncryptionError(Exception):
pass
def decrypt(ciphertext: bytes, key: str, hash: str, iv: str):
"""Decrypt an encrypted attachment.
Args:
ciphertext (bytes): The data to decrypt.
key (str): AES_CTR JWK key object.
hash (str): Base64 encoded SHA-256 hash of the ciphertext.
iv (str): Base64 encoded 16 byte AES-CTR IV.
Returns:
The plaintext bytes.
Raises:
EncryptionError if the integrity check fails.
"""
expected_hash = unpaddedbase64.decode_base64(hash)
h = SHA256.new()
h.update(ciphertext)
if h.digest() != expected_hash:
raise EncryptionError("Mismatched SHA-256 digest.")
try:
byte_key: bytes = unpaddedbase64.decode_base64(key)
except (BinAsciiError, TypeError):
raise EncryptionError("Error decoding key.")
try:
# Drop last 8 bytes, which are 0
byte_iv: bytes = unpaddedbase64.decode_base64(iv)[:8]
except (BinAsciiError, TypeError):
raise EncryptionError("Error decoding initial values.")
ctr = Counter.new(64, prefix=byte_iv, initial_value=0)
try:
cipher = AES.new(byte_key, AES.MODE_CTR, counter=ctr)
except ValueError as e:
raise EncryptionError(e)
return cipher.decrypt(ciphertext)
# if __name__ == "__main__":
# with open('images/output', 'wb') as output:
# with open('images/LUJAssHxtTWsnYPbSlTcMdvl.octet-stream', 'rb') as cipher:
# with open('images/LUJAssHxtTWsnYPbSlTcMdvl.metadata', 'r') as rawmeta:
# meta = json.load(rawmeta)
# key = meta['file']['key']
# decrypted = decrypt_attachment(cipher.read(), key['k'], meta['file']['hashes']['sha256'], meta['file']['iv'])
# output.write(decrypted)

@ -1,11 +1,15 @@
# Requires PyCrypto and python-magic
import sys
from pathlib import Path
from urllib.parse import urlparse
import json
import magic
import click
import requests
import database_connection # noqa: F401
import decrypt_attachment
from matrix_connection import get_download_url
from schema import Message
@ -19,20 +23,44 @@ def download_stem(message, prefer_thumbnails):
def run_downloads(messages, download_dir, prefer_thumbnails):
for msg in messages:
image_url = (msg.thumbnail_url if prefer_thumbnails else None) or msg.image_url
res = requests.head(get_download_url(image_url))
assert res.status_code == 200
mtype, subtype = res.headers['content-type'].split('/', 2)
if mtype != 'image':
print(f"Skipping {image_url}: {res.headers['content-type']}")
continue
res = requests.get(get_download_url(image_url))
assert res.status_code == 200
filename = (download_dir / download_stem(msg, prefer_thumbnails)
download_url = get_download_url(image_url).replace('//bitcointechweekly.com','//matrix.bitcointechweekly.com:8448')
try:
res = requests.head(download_url, verify=False)
assert res.status_code == 200
print(res.status_code)
# mtype, subtype = res.headers['content-type'].split('/', 2)
# if mtype != 'image':
# print(f"Skipping {download_url}: {res.headers['content-type']}")
# continue
res = requests.get(download_url, verify=False)
# print(res.status_code)
assert res.status_code == 200
filename = (download_dir / download_stem(msg, prefer_thumbnails))
print('Downloading', download_url, '->', filename)
#Encryption details
file_meta = msg.content.get('file')
key_meta = file_meta['key']['k']
iv = file_meta['iv']
_hash = file_meta['hashes']['sha256']
decrypted = decrypt_attachment.decrypt(res.content, key_meta, _hash, iv)
mtype, subtype = magic.from_buffer(decrypted, mime=True).split('/', 2)
filename = (download_dir / download_stem(msg, prefer_thumbnails)
).with_suffix('.' + subtype)
print('Downloading', image_url, '->', filename)
with open(filename, 'wb') as fp:
fp.write(res.content)
# metadata_file = (download_dir / download_stem(msg, prefer_thumbnails)
# ).with_suffix('.metadata')
with open(filename, 'wb') as fp:
fp.write(decrypted)
# with open(metadata_file, 'w') as fp:
# json.dump(msg.content, fp)
except:
e = sys.exc_info()[0]
print( "<p>Error downloading '%s' : %s</p>" % (download_url, e) )
# break
@click.command()
@ -49,9 +77,9 @@ def download_images(thumbnails, output):
new_messages = [msg for msg in messages
if download_stem(msg, thumbnails)
not in current_stems]
skip_count = len(messages) - len(new_messages)
if skip_count:
print(f"Skipping {skip_count} already-downloaded {noun}")
# skip_count = len(messages) - len(new_messages)
# if skip_count:
# print(f"Skipping {skip_count} already-downloaded {noun}")
if new_messages:
print(f"Downloading {len(new_messages)} new {noun}...")
else:

@ -32,13 +32,12 @@ def replace_by_local_image(data):
data = data.copy()
content = data['content']
if content.get('msgtype') == 'm.image' and 'info' in content:
url = content['file']['url'] if 'file' in content else content['url']
mimetype = content['info']['mimetype']
if 'thumbnail_url' in content['info'] and content['info']['thumbnail_url'] != '':
url, mimetype = content['url'], content.get('info', {}).get('mimetype', 'image/jpeg')
if 'thumbnail_url' in content.get('info', {}) :
url, mimetype = content['info']['thumbnail_url'], content['info']['thumbnail_info']['mimetype']
_, subtype = mimetype.split('/', 2)
url = urlparse(url)
content['url'] = 'thumbnails/' + url.path.strip('/') + '.' + subtype
content['url'] = 'images/' + os.path.basename(url.path.strip('/')) + '.' + subtype
return data

@ -21,5 +21,4 @@ class Message(Document):
@property
def thumbnail_url(self):
return (self.content['info'].get('thumbnail_url')
if self.is_image() else None)
return self.content['url'] if self.is_image() else None

Loading…
Cancel
Save