You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
117 lines
4.0 KiB
Python
117 lines
4.0 KiB
Python
import json
|
|
import time
|
|
import traceback
|
|
import subprocess
|
|
from threading import Timer
|
|
from json.decoder import JSONDecodeError
|
|
from concurrent.futures import ProcessPoolExecutor
|
|
|
|
import requests
|
|
|
|
from onioningestor.operators import Operator
|
|
|
|
|
|
class Plugin(Operator):
|
|
"""OnionScraper main work logic.
|
|
|
|
Handles reading the config file, calling sources, maintaining state and
|
|
sending artifacts to operators.
|
|
"""
|
|
def __init__(self, logger, denylist, **kwargs):
|
|
super(Plugin, self).__init__(logger, denylist)
|
|
self.name = kwargs['name']
|
|
self.logger = logger
|
|
self.logger.info(f'Initializing {self.name}')
|
|
self.onionscan = kwargs['binpath']
|
|
self.timeout = int(kwargs.get('timeout', 300))
|
|
self.torControl = 9051
|
|
self.torControl = "Zue5a29v4xE6FciWpPF93rR2M2T"
|
|
|
|
def parseDoc(self, data):
|
|
data.pop('simpleReport', None)
|
|
crawls = data.pop('crawls', None)
|
|
hiddenService = data.pop('hiddenService', None)
|
|
data['crawls'] = [*crawls]
|
|
crawl = set()
|
|
for onion in re.findall(r'\s?(\w+.onion)', str(crawls.keys())):
|
|
if onion != hiddenService:
|
|
crawl.add(onion)
|
|
for items in crawl:
|
|
print(f'crawling queue added: {item}')
|
|
self.queueCrawl.put((
|
|
3,
|
|
self.onion(
|
|
url=item,
|
|
source='crawled',
|
|
type='domain',
|
|
status='offline',
|
|
monitor=False,
|
|
denylist=False)))
|
|
return data
|
|
|
|
# signal TOR for a new connection
|
|
def renew_connection(self):
|
|
with Controller.from_port(port = self.torControl['port']) as controller:
|
|
# Now we switch TOR identities to make sure we have a good connection
|
|
self.logger.info('Getting new Tor IP')
|
|
# authenticate to our local TOR controller
|
|
controller.authenticate(self.torControl['password'])
|
|
# send the signal for a new identity
|
|
controller.signal(Signal.NEWNYM)
|
|
# wait for the new identity to be initialized
|
|
time.sleep(controller.get_newnym_wait())
|
|
self.logger.info(f"IP is {requests.get('http://httpbin.org/ip').json()['origin']}")
|
|
|
|
def handle_timeout(self, process, onion):
|
|
#
|
|
# Handle a timeout from the onionscan process.
|
|
#
|
|
|
|
try:
|
|
# kill the onionscan process
|
|
process.kill()
|
|
self.logger.info("[!!!] Killed the onionscan process.")
|
|
except:
|
|
pass
|
|
self.renew_connection()
|
|
return
|
|
|
|
def run_onionscan(self, onion):
|
|
self.logger.info("[*] Running onionscan on %s", onion)
|
|
|
|
# fire up onionscan
|
|
process = subprocess.Popen([self.onionscan,"--webport=0","--jsonReport","--simpleReport=false",onion],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
|
|
|
|
# start the timer and let it run till timeout minutes
|
|
process_timer = Timer(self.timeout,self.handle_timeout,args=[process,onion])
|
|
process_timer.start()
|
|
|
|
# wait for the onion scan results
|
|
stdout = process.communicate()[0]
|
|
|
|
# we have received valid results so we can kill the timer
|
|
if process_timer.is_alive():
|
|
process_timer.cancel()
|
|
try:
|
|
return self.response(
|
|
"success",
|
|
self.parseDoc(json.loads(stdout)))
|
|
except json.decoder.JSONDecodeError:
|
|
return self.response(
|
|
"success",
|
|
self.parseDoc(stdout))
|
|
|
|
self.logger.info("[!!!] Process timed out for %s", onion)
|
|
print(stdout)
|
|
return self.response("failed",stdout)
|
|
|
|
def handle_onion(self, onion):
|
|
try:
|
|
results = self.run_onionscan(onion.url)
|
|
onion.onionscan(results)
|
|
except Exception as e:
|
|
self.logger.error(e)
|
|
self.logger.error(traceback.print_exc())
|
|
finally:
|
|
pass
|