You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
262 lines
10 KiB
Python
262 lines
10 KiB
Python
4 years ago
|
import re
|
||
|
import os
|
||
|
import sys
|
||
|
import json
|
||
|
import time
|
||
|
import random
|
||
|
import traceback
|
||
|
import subprocess
|
||
|
from uuid import uuid4
|
||
|
from pathlib import Path
|
||
|
from datetime import datetime as dt
|
||
|
from json.decoder import JSONDecodeError
|
||
|
from concurrent.futures import ProcessPoolExecutor
|
||
|
from threading import Timer
|
||
|
|
||
|
import requests
|
||
|
|
||
|
from stem.control import Controller
|
||
|
from stem import Signal
|
||
|
|
||
|
from selenium import webdriver
|
||
|
from selenium.webdriver.firefox.options import Options
|
||
|
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
|
||
|
|
||
|
from onionscraper.operators import Operator
|
||
|
|
||
|
class Plugin(Operator):
|
||
|
"""OnionScraper main work logic.
|
||
|
|
||
|
Handles reading the config file, calling sources, maintaining state and
|
||
|
sending artifacts to operators.
|
||
|
"""
|
||
|
def __init__(self, logger, **kwargs):
|
||
|
self.logger = logger
|
||
|
self.logger.info('Initializing OnionScanner')
|
||
|
screenshots = kwargs.pop('screenshots_path', None)
|
||
|
if screenshots:
|
||
|
self.screenshots = Path(screenshots)
|
||
|
else:
|
||
|
self.screenshots = Path(__file__).parents[1]/'screenshots'
|
||
|
self.onionscan = kwargs['binpath']
|
||
|
self.timeout = int(kwargs['timeout'])
|
||
|
self.proxy = kwargs['socks5']
|
||
|
self.torControl = kwargs['TorController']
|
||
|
self.retries = int(kwargs['retries'])
|
||
|
self.headers ={
|
||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
|
||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||
|
'Accept-Language':'en-US,en;q=0.5',
|
||
|
'DNT': '1', 'Connection':
|
||
|
'keep-alive',
|
||
|
'Upgrade-Insecure-Requests': '1'}
|
||
|
|
||
|
|
||
|
blacklist = kwargs['blacklist'].split(',')
|
||
|
self.blacklist = re.compile('|'.join([re.escape(word) for word in blacklist]), re.IGNORECASE)
|
||
|
keywords = kwargs['interestingKeywords'].split(',')
|
||
|
self.keywords = re.compile('|'.join([re.escape(word) for word in keywords]), re.IGNORECASE)
|
||
|
self.session = self.get_tor_session()
|
||
|
|
||
|
def response(self, status, content, onion):
|
||
|
"""
|
||
|
status: success/failure
|
||
|
content: dict
|
||
|
onion: str
|
||
|
return: dict
|
||
|
"""
|
||
|
return {'status': status, 'data': content, 'onion': onion}
|
||
|
|
||
|
def parseDoc(self, data):
|
||
|
data['onionscan'].pop('simpleReport', None)
|
||
|
crawls = data['onionscan'].pop('crawls', None)
|
||
|
hiddenService = data['onionscan'].pop('hiddenService', None)
|
||
|
data['onionscan']['crawls'] = [*crawls]
|
||
|
data['hiddenService'] = hiddenService
|
||
|
for onion in crawls.keys():
|
||
|
print(onion)
|
||
|
#q.enqueue(self.crawl, onion)
|
||
|
#with open('test.json', 'w', encoding='utf-8') as f:
|
||
|
# json.dump(data, f, ensure_ascii=False, indent=4)
|
||
|
return data
|
||
|
|
||
|
def format_directory(self, directory):
|
||
|
d = dt.now()
|
||
|
year = str(d.year)
|
||
|
month = str(d.month)
|
||
|
# prefix month and day with "0" if it is only one digit
|
||
|
if len(month) < 2:
|
||
|
month = "0" + month
|
||
|
day = str(d.day)
|
||
|
if len(day) < 2:
|
||
|
day = "0" + day
|
||
|
save_path = directory/year/month/day
|
||
|
if not os.path.isdir(save_path):
|
||
|
self.logger.info("[*] Creating directory to save screenshots")
|
||
|
os.makedirs(save_path)
|
||
|
|
||
|
return save_path
|
||
|
|
||
|
def take_screenshot(self, save_path, onion):
|
||
|
binary = FirefoxBinary('/home/tony/Projects/OnionScraper/geckodriver')
|
||
|
fp = webdriver.FirefoxProfile()
|
||
|
fp.set_preference('network.proxy.type', 1)
|
||
|
fp.set_preference('network.proxy.socks', '127.0.0.1')
|
||
|
fp.set_preference('network.proxy.socks_port', 9050)
|
||
|
fp.set_preference('network.proxy.socks_remote_dns', True)
|
||
|
|
||
|
options = Options()
|
||
|
options.headless = True
|
||
|
driver = webdriver.Firefox(
|
||
|
executable_path='/home/tony/Projects/OnionScraper/geckodriver',
|
||
|
options=options,
|
||
|
firefox_profile=fp)
|
||
|
url = 'http://' + onion
|
||
|
driver.get(url)
|
||
|
uid = str(uuid4()).split('-')[0]
|
||
|
filename = f"{onion}_screenshot_{uid}.png"
|
||
|
f_name = f"{save_path}/{filename}"
|
||
|
driver.save_screenshot(f_name)
|
||
|
|
||
|
driver.quit()
|
||
|
|
||
|
if os.path.isfile(f_name):
|
||
|
self.logger.info(f'[*] Screenshot was taken. {f_name}')
|
||
|
dateScreenshoted = dt.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%f')+ 'Z'
|
||
|
result = {'dateScreenshoted':dateScreenshoted,'filename':filename}
|
||
|
return self.response("success",result,onion)
|
||
|
else:
|
||
|
self.logger.error('[x] Unable to take screenshot')
|
||
|
return self.response("failure",None,onion)
|
||
|
|
||
|
|
||
|
|
||
|
def get_tor_session(self):
|
||
|
try:
|
||
|
s = requests.session()
|
||
|
s.proxies = self.proxy
|
||
|
s.headers.update(self.headers)
|
||
|
except Exception as e:
|
||
|
self.logger.error(e)
|
||
|
self.logger.debug(traceback.print_exc())
|
||
|
return s
|
||
|
|
||
|
# signal TOR for a new connection
|
||
|
def renew_connection(self):
|
||
|
with Controller.from_port(port = self.torControl['port']) as controller:
|
||
|
# Now we switch TOR identities to make sure we have a good connection
|
||
|
self.logger.info('Getting new Tor IP')
|
||
|
# authenticate to our local TOR controller
|
||
|
controller.authenticate(self.torControl['password'])
|
||
|
# send the signal for a new identity
|
||
|
controller.signal(Signal.NEWNYM)
|
||
|
# wait for the new identity to be initialized
|
||
|
time.sleep(controller.get_newnym_wait())
|
||
|
session = self.get_tor_session()
|
||
|
self.logger.info(f"IP is {session.get('http://httpbin.org/ip').json()['origin']}")
|
||
|
|
||
|
def handle_timeout(self, process, onion):
|
||
|
#
|
||
|
# Handle a timeout from the onionscan process.
|
||
|
#
|
||
|
|
||
|
try:
|
||
|
# kill the onionscan process
|
||
|
process.kill()
|
||
|
self.logger.info("[!!!] Killed the onionscan process.")
|
||
|
except:
|
||
|
pass
|
||
|
self.renew_connection()
|
||
|
return
|
||
|
|
||
|
def run_sessions(self, onion):
|
||
|
retry = 0
|
||
|
result = None
|
||
|
while True:
|
||
|
try:
|
||
|
url = 'http://'+onion
|
||
|
self.logger.info(url)
|
||
|
content = self.session.get(url)
|
||
|
if content.status_code == 200:
|
||
|
result = content.json()
|
||
|
except JSONDecodeError as e:
|
||
|
self.logger.debug(f'JSONDecodeError {e}')
|
||
|
result = content.text
|
||
|
except Exception as e:
|
||
|
self.logger.error(e)
|
||
|
self.logger.debug(traceback.print_exc())
|
||
|
finally:
|
||
|
if result:
|
||
|
return self.response("success",result,onion)
|
||
|
else:
|
||
|
self.logger.info('[x] No results found retrying ...')
|
||
|
retry += 1
|
||
|
self.renew_connection()
|
||
|
if retry > self.retries:
|
||
|
self.logger.error('[x] Max retries exceeded')
|
||
|
return self.response("failure",None, onion)
|
||
|
|
||
|
def run_onionscan(self, onion):
|
||
|
self.logger.info("[*] Running onionscan on %s", onion)
|
||
|
|
||
|
# fire up onionscan
|
||
|
process = subprocess.Popen([self.onionscan,"--webport=0","--jsonReport","--simpleReport=false",onion],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
|
||
|
|
||
|
# start the timer and let it run till timeout minutes
|
||
|
process_timer = Timer(300,self.handle_timeout,args=[process,onion])
|
||
|
process_timer.start()
|
||
|
|
||
|
# wait for the onion scan results
|
||
|
stdout = process.communicate()[0]
|
||
|
|
||
|
# we have received valid results so we can kill the timer
|
||
|
if process_timer.is_alive():
|
||
|
process_timer.cancel()
|
||
|
return self.response("success",stdout.decode(),onion)
|
||
|
|
||
|
self.logger.info("[!!!] Process timed out for %s", onion)
|
||
|
|
||
|
return self.response("failure",None, onion)
|
||
|
|
||
|
def handle_onion(self, onion_tuple):
|
||
|
onion = onion_tuple.url
|
||
|
self.logger.info(f'Processing {onion} with onionscan')
|
||
|
try:
|
||
|
blacklist_URL = self.blacklist.search(onion)
|
||
|
if blacklist_URL:
|
||
|
self.logger.info(f"[X] Blocked by blacklist => matched keyword {blacklist_URL.group()}")
|
||
|
else:
|
||
|
self.logger.debug("[*] URL blacklist test: PASSED")
|
||
|
results = self.run_onionscan(onion)
|
||
|
if results['status'] == 'success' and results['data']['webDetected'] == 'true':
|
||
|
content = self.run_sessions(onion)
|
||
|
print(content)
|
||
|
#sys.exit(0)
|
||
|
#if content['status'] == 'success':
|
||
|
# blacklist_CONTENT = self.blacklist.search(content['data'])
|
||
|
# if blacklist_CONTENT:
|
||
|
# self.logger.info(f"[X] Blocked by blacklist content => matched keyword {blacklist_CONTENT.group()}")
|
||
|
# else:
|
||
|
# self.logger.debug("[*] CONTENT blacklist test: PASSED")
|
||
|
# screenshot = self.take_screenshot(self.format_directory(self.screenshots), onion)
|
||
|
# self.logger.info("Indexing!")
|
||
|
# doc = {
|
||
|
# 'onionscan':json.loads(results['data']),
|
||
|
# 'html':content['data'],
|
||
|
# 'screenshots':screenshot['data'],
|
||
|
# 'interestingKeywords':self.interestingKeywords.findall(content['data'])
|
||
|
# }
|
||
|
# return self.parseDoc(doc)
|
||
|
|
||
|
else:
|
||
|
self.logger.info(f"[x] hidden service {onion} is not active")
|
||
|
except Exception as e:
|
||
|
self.logger.error(e)
|
||
|
self.logger.error(traceback.print_exc())
|
||
|
finally:
|
||
|
pass
|
||
|
#sys.exit(0)
|
||
|
|
||
|
|
||
|
|