First Commit

pull/1/head
danieleperera 4 years ago
commit 9986c99147

9
.gitignore vendored

@ -0,0 +1,9 @@
OnionScraper.egg-info
screenshots
dump.rdb
onionscandb
config.ini
*.log
*.pyc
__pycache__
venv

@ -0,0 +1,125 @@
<p align="center">
<img src="docs/img/logo.png">
</p>
<h1 align="center">OnionIngestor</h1>
<p align="center">
<a href="https://python.org/">
<img src="https://img.shields.io/pypi/pyversions/3.svg">
</a>
<a href="https://opensource.org">
<img src="https://img.shields.io/badge/Open%20Source-%E2%9D%A4-brightgreen.svg">
</a>
</p>
<p align="center">
An extendable tool to Collect, Crawl and Monitor onion sites on tor network and index collected information on Elasticsearch
</p>
## Introduction
OnionIngestor is based on ThreatIngestor tool structure to enable modular and extendable access for Cyber Threat Intelligence teams so that they can monitor and collect information on hidden sites over tor network.
The project is at it's early stages of development.
## To-do-list
- [ ] Add multiprocessing to improve analyzing speed
- [ ] Add more sources like reddit, gmail, pastebin, twitter and other hidden sites
- [ ] Add more operators like checking changes of the screenshots for monitoring sites, adding yara rules to eliminate false positives
- [ ] Add more notifiers like slack, smpt, discord
## Basic Implementation Logic
The OnionIngestor runs and managers 3 important type of classes:
Sources - These will collect hidden sites from clear net sources like pastebin, twitter, gist and crawled links
Operators - These will process the onion link. For example get the html, take screenshots and run other scanners like [onionscan](https://github.com/s-rah/onionscan)
Notifiers - These will notify the user - daily with a report and if any new changes has occured to a monitoring hidden site
OnionIngestor is designed to run as a daemon where it collects hidden sites from enabled sources and pass it to the operators and
when finished sleep until user defined time and restart the process from the beginning.
<p align="center">
<img src="docs/img/workflow.png">
</p>
## Installation
Install requirements
pip install -r requirements.txt
After the tor client and the installed libraries use the `--help` command to get details of its use.
```
python3 -m onionscraper --help
OnionScraper
A Python3 application for indexing and scraping hidden services ElasticSearch
Installation:
This application assumes you have python3 and pip3 installed.
pip3 install -r requirements.txt
This software is provided subject to the MIT license stated below.
--------------------------------------------------
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
--------------------------------------------------
optional arguments:
-h, --help show this help message and exit
-c CONFIGFILE, --config CONFIGFILE
Path to config file
--log {DEBUG,INFO,WARNING,ERROR,CRITICAL}
Set the logging level, default is INFO
```
The yaml config file contains all the information for OnionIngestor to work
### Operator [Onionscan](https://github.com/s-rah/onionscan)
onionscan --mode analysis -verbose -webport 8081
To run the webapp by onionscan
## Output
The output of the result is json, and in the same format it is sent to the chosen syslog.
```
show output here
```
## Authors
Daniele Perera
## Acknowledgments
Special thanks to:
andreyglauzer
InQuest
s-rah
Their code was used to implement this project
Feel free to fork or open an issue to collaborate with the project.
## License
This project is licensed under the [MIT](https://choosealicense.com/licenses/mit/) License - see the LICENSE.md file for details.

Binary file not shown.

After

Width:  |  Height:  |  Size: 638 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

@ -0,0 +1,76 @@
# This is an example ThreatIngestor config file with some preconfigured RSS
# sources, feeding extracted artifacts into a CSV file.
general:
# Run forever, check feeds once an hour.
daemon: False
sleep: 3600
elasticsearch:
index: darkweb
port : 9200
host : 127.0.0.1
sources:
# A few threat intel blogs to get you started!
- name: source-gist
module: gist
url: https://gist.github.com/search?l=Text&q=.onion
# - name: source-reddit
# module: reddit
# url: https://api.pushshift.io/reddit/search/comment/?subreddit=onions&limit=1000000
# feed_type: messy
#
# - name: pastebin
# module: pastebin-account
# url: https://gist.github.com/search?l=Text&q=.onion
# feed_type: messy
#
# - name: hunchly-report
# module: gmail-hunchly
# url: https://gist.github.com/search?l=Text&q=.onion
# feed_type: messy
#
# - name: onionland-search
# module: collect-onions
# url: http://3bbaaaccczcbdddz.onion/discover
# feed_type: messy
#
# - name: torch
# module: collect-onions
# url: http://xmh57jrzrnw6insl.onion
# feed_type: messy
operators:
- name: onionscan-go
module: onionscan
binpath: /home/tony/go/bin/onionscan
socks5:
http: 'socks5h://127.0.0.1:9050'
https: 'socks5h://127.0.0.1:9050'
TorController:
port: 9051
password: Xk5QP2haFMh8Y8D1060F1D7xaWEFG
timeout: 300
retries: 2
screenshots_path: null
blacklist: pedo,xxx,infant,loli,porn,child,abuse,sex,drug,cocaine,dope,zoo,daddy,daughter,boy,girl,young,muder
interestingKeywords: t.me,feed,rss,xml,atom,dataleak,breach,blog,ransomware,source code,data breach
# - name: yara-rule
# module: yara
# filename: categories.yar
# base_score: 50
#
# - name: regex-match
# module: regex
# keywords: test,test2
# base_score: 20
notifiers:
# Simple telegram notifier
- name: telegram-notifer
module: telegram
chat_id:
token:

Binary file not shown.

@ -0,0 +1,131 @@
import sys
import time
import traceback
import collections
from . import config
from . import dbhandler
from . import loghandler
class OnionManager:
"""ThreatIngestor main work logic.
Handles reading the config file, calling sources, maintaining state, and
sending artifacts to operators.
"""
def __init__(self, args):
# Load logger
log = loghandler.LoggerHandler(args.logLevel)
self.logger = log.start_logging()
# Load config
self.config = config.Config(args.configFile, self.logger)
# Load Elasticsearch.
try:
self.es = dbhandler.DbHandlerElasticSearch(
self.config.elasticsearch(),
self.logger)
except Exception as e:
# Error loading elasticsearch.
self.logger.error(e)
self.logger.debug(traceback.print_exc())
sys.exit(1)
# Instantiate plugins.
try:
self.logger.info("Initializing sources")
self.sources = {name: source(self.logger, **kwargs)
for name, source, kwargs in self.config.sources()}
self.logger.info("initializing operators")
self.operators = {name: operator(self.logger, **kwargs)
for name, operator, kwargs in self.config.operators()}
self.logger.info("initializing notifiers")
#self.notifiers = {name: operator(**kwargs)
# for name, operator, kwargs in self.config.notifiers()}
except Exception as e:
# Error loading elasticsearch.
self.logger.error(e)
self.logger.debug(traceback.print_exc())
sys.exit(1)
def run(self):
"""Run once, or forever, depending on config."""
if self.config.daemon():
selfl.logger.info("Running forever, in a loop")
self.run_forever()
else:
self.logger.info("Running once, to completion")
self.run_once()
def run_once(self):
"""Run each source once, passing artifacts to each operator."""
# Track some statistics about artifacts in a summary object.
summary = collections.Counter()
for source in self.sources:
# Run the source to collect artifacts.
self.logger.info(f"Running source '{source}'")
try:
onions = self.sources[source].run()
if onions:
self.logger.info(f'Found hidden links')
else:
self.logger.info('No links found')
except Exception as e:
self.logger.error(e)
self.logger.error(traceback.print_exc())
continue
# Process artifacts with each operator.
for operator in self.operators:
self.logger.info(f"Processing found onions with operator '{operator}'")
try:
doc = self.operators[operator].process(onions)
# Save the source state.
self.es.save(doc)
except Exception as e:
self.logger.error(e)
self.logger.error(traceback.print_exc())
continue
# # Record stats and update the summary.
# types = artifact_types(doc.get('interestingKeywords'))
# summary.update(types)
# for artifact_type in types:
# self.logger.info(f'types[artifact_type]')
# Log the summary.
self.logger.info(f"New artifacts: {dict(summary)}")
def run_forever(self):
"""Run forever, sleeping for the configured interval between each run."""
while True:
self.run_once()
self.logger.info(f"Sleeping for {self.config.sleep()} seconds")
time.sleep(self.config.sleep())
def artifact_types(artifact_list):
"""Return a dictionary with counts of each artifact type."""
types = {}
for artifact in artifact_list:
artifact_type = artifact.__class__.__name__.lower()
if artifact_type in types:
types[artifact_type] += 1
else:
types[artifact_type] = 1
return types

@ -0,0 +1,50 @@
"""OnionScraper
A Python3 application for indexing and scraping hidden services ElasticSearch
Installation:
This application assumes you have python3 and pip3 installed.
pip3 install -r requirements.txt
This software is provided subject to the MIT license stated below.
--------------------------------------------------
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
--------------------------------------------------
"""
import argparse
from onionscraper import OnionManager
# Load arguments from user
parser = argparse.ArgumentParser(
prog='onionscraper',
description=__doc__,formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-c', '--config',dest="configFile", required = True, help='Path to config file')
parser.add_argument("--log", dest="logLevel",default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help="Set the logging level, default is INFO")
args = parser.parse_args()
app = OnionManager(args)
app.run()

@ -0,0 +1,170 @@
import io
import importlib
import traceback
import yaml
from pathlib import Path
SOURCE = 'onionscraper.sources'
OPERATOR = 'onionscraper.operators'
INTERNAL_OPTIONS = [
'saved_state',
'module',
'credentials',
]
ARTIFACT_TYPES = 'artifact_types'
FILTER_STRING = 'filter'
ALLOWED_SOURCES = 'allowed_sources'
NAME = 'name'
class Config:
"""Config read/write operations, and convenience methods."""
def __init__(self, filename, logger):
"""Read a config file."""
self.logger = logger
self.filename = filename
with io.open(self.filename, 'r') as f:
try:
self.logger.info("Loading config file")
self.config = yaml.safe_load(f.read())
except yaml.error.YAMLError:
self.logger.error("YAML error in config")
@staticmethod
def _load_plugin(plugin_type, plugin):
"""Returns plugin class or raises an exception.
:raises: threatingestor.exceptions.PluginError
"""
try:
module = importlib.import_module('.'.join([plugin_type, plugin]))
return module.Plugin
except Exception as e:
print(e)
print(traceback.print_exc())
def daemon(self):
"""Returns boolean, are we daemonizing?"""
return self.config['general']['daemon']
def elasticsearch(self):
"""Returns elasticsaerch config"""
return self.config['general']['elasticsearch']
def sleep(self):
"""Returns number of seconds to sleep between iterations, if daemonizing."""
return self.config['general']['sleep']
# def onionscanner(self):
# """Returns onionscanner config dict"""
# screenshots = self.config['onionscanner'].pop('screenshots_path', None)
# if screenshots:
# self.config['onionscanner']['screenshots_path'] = Path(screenshots)
# else:
# self.config['onionscanner']['screenshots_path'] = Path(__file__).parents[1]/'screenshots'
# blacklist = self.config['onionscanner'].pop('blacklist', None)
# if blacklist:
# self.config['onionscanner']['blacklist'] = blacklist.split(',')
# interestingKeywords = self.config['onionscanner'].pop('interestingKeywords', None)
# if interestingKeywords:
# self.config['onionscanner']['interestingKeywords'] = blacklist.split(',')
# return self.config['onionscanner']
def notifiers(self):
"""Returns notifiers config dictionary."""
return self.config.get('notifiers', {})
def logging(self):
"""Returns logging config dictionary."""
return self.config.get('logging', {})
def credentials(self, credential_name):
"""Return a dictionary with the specified credentials."""
for credential in self.config['credentials']:
for key, value in credential.items():
if key == NAME and value == credential_name:
return credential
return {}
def sources(self):
"""Return a list of (name, Source class, {kwargs}) tuples.
:raises: threatingestor.exceptions.PluginError
"""
sources = []
for source in self.config['sources']:
kwargs = {}
for key, value in source.items():
if key not in INTERNAL_OPTIONS:
kwargs[key] = value
elif key == 'credentials':
# Grab these named credentials
credential_name = value
for credential_key, credential_value in self.credentials(credential_name).items():
if credential_key != NAME:
kwargs[credential_key] = credential_value
# load and initialize the plugin
self.logger.info(f"Found source '{source[NAME]}'")
sources.append((source[NAME], self._load_plugin(SOURCE, source['module']), kwargs))
self.logger.info(f"Found {len(sources)} total sources")
return sources
def operators(self):
"""Return a list of (name, Operator class, {kwargs}) tuples.
:raises: threatingestor.exceptions.PluginError
"""
operators = []
for operator in self.config['operators']:
kwargs = {}
for key, value in operator.items():
if key not in INTERNAL_OPTIONS:
if key == ARTIFACT_TYPES:
# parse out special artifact_types option
artifact_types = []
for artifact in value:
try:
artifact_types.append(threatingestor.artifacts.STRING_MAP[artifact.lower().strip()])
except KeyError:
# ignore invalid artifact types
pass
kwargs[key] = artifact_types
elif key == FILTER_STRING:
# pass in special filter_string option
kwargs['filter_string'] = value
elif key == NAME:
# exclude name key from operator kwargs, since it's not used
pass
else:
kwargs[key] = value
elif key == 'credentials':
# Grab these named credentials
credential_name = value
for credential_key, credential_value in self.credentials(credential_name).items():
if credential_key != NAME:
kwargs[credential_key] = credential_value
# load and initialize the plugin
self.logger.info(f"Found operator '{operator[NAME]}'")
operators.append((operator[NAME], self._load_plugin(OPERATOR, operator['module']), kwargs))
self.logger.info(f"Found {len(operators)} total operators")
return operators

@ -0,0 +1,774 @@
import sys
import traceback
from elasticsearch import Elasticsearch, helpers
class DbHandlerElasticSearch:
def __init__(self, config, logger):
self.logger = logger
self.logger.info('Creating Elasticsearch mapping')
self.config = config
self.mapping = '''
{
"mappings": {
"_doc": {
"properties": {
"html": {
"type": "text"
},
"onionscan": {
"type": "nested",
"properties": {
"bitcoinDetected": {
"type": "boolean"
},
"bitcoinServices": {
"properties": {
"bitcoin": {
"properties": {
"detected": {
"type": "boolean"
},
"prototocolVersion": {
"type": "long"
},
"userAgent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"bitcoin_test": {
"properties": {
"detected": {
"type": "boolean"
},
"prototocolVersion": {
"type": "long"
},
"userAgent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"dogecoin": {
"properties": {
"detected": {
"type": "boolean"
},
"prototocolVersion": {
"type": "long"
},
"userAgent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"litecoin": {
"properties": {
"detected": {
"type": "boolean"
},
"prototocolVersion": {
"type": "long"
},
"userAgent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
},
"certificates": {
"type": "nested",
"properties": {
"AuthorityKeyId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"BasicConstraintsValid": {
"type": "boolean"
},
"CRLDistributionPoints": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"DNSNames": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ExtKeyUsage": {
"type": "long"
},
"Extensions": {
"properties": {
"Critical": {
"type": "boolean"
},
"Id": {
"type": "long"
},
"Value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"IsCA": {
"type": "boolean"
},
"Issuer": {
"properties": {
"CommonName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Country": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Locality": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Names": {
"properties": {
"Type": {
"type": "long"
},
"Value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"Organization": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"OrganizationalUnit": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Province": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"SerialNumber": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"IssuingCertificateURL": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"KeyUsage": {
"type": "long"
},
"MaxPathLen": {
"type": "long"
},
"MaxPathLenZero": {
"type": "boolean"
},
"NotAfter": {
"type": "date"
},
"NotBefore": {
"type": "date"
},
"OCSPServer": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"PermittedDNSDomainsCritical": {
"type": "boolean"
},
"PolicyIdentifiers": {
"type": "long"
},
"PublicKey": {
"properties": {
"E": {
"type": "text"
},
"N": {
"type": "text"
}
}
},
"PublicKeyAlgorithm": {
"type": "long"
},
"Raw": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"RawIssuer": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"RawSubject": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"RawSubjectPublicKeyInfo": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"RawTBSCertificate": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"SerialNumber": {
"type": "text"
},
"Signature": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"SignatureAlgorithm": {
"type": "long"
},
"Subject": {
"properties": {
"CommonName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Country": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Locality": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Names": {
"properties": {
"Type": {
"type": "long"
},
"Value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"Organization": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"OrganizationalUnit": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Province": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"SerialNumber": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"SubjectKeyId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Version": {
"type": "long"
}
}
},
"crawls": {
"type": "nested",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dateScanned": {
"type": "date"
},
"f_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ftpBanner": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ftpDetected": {
"type": "boolean"
},
"ftpFingerprint": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"hiddenService": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"identifierReport": {
"properties": {
"analyticsIDs": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"bitcoinAddresses": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"emailAddresses": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"exifImages": {
"properties": {
"exifTags": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"location": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"foundApacheModStatus": {
"type": "boolean"
},
"linkedOnions": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"openDirectories": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"privateKeyDetected": {
"type": "boolean"
},
"serverVersion": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"ircDetected": {
"type": "boolean"
},
"lastAction": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"mongodbDetected": {
"type": "boolean"
},
"online": {
"type": "boolean"
},
"performedScans": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"pgpKeys": {
"properties": {
"armoredKey": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fingerprint": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"identity": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"ricochetDetected": {
"type": "boolean"
},
"skynetDetected": {
"type": "boolean"
},
"smtpBanner": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"smtpDetected": {
"type": "boolean"
},
"smtpFingerprint": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"sshBanner": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"sshDetected": {
"type": "boolean"
},
"sshKey": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"timedOut": {
"type": "boolean"
},
"tlsDetected": {
"type": "boolean"
},
"vncDetected": {
"type": "boolean"
},
"webDetected": {
"type": "boolean"
},
"xmppDetected": {
"type": "boolean"
}
}
},
"screenshots": {
"type": "nested",
"properties": {
"dateScreenshoted": {
"type": "date"
},
"filename": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
'''
try:
self.es = Elasticsearch([{
'host':self.config['host'],
'port':self.config['port']}])
self.es.indices.create(
index=self.config['index'],
body=self.mapping,
ignore=400)
except Exception as e:
self.logger.error(e)
self.logger.error(traceback.format_exc())
sys.exit(0)
def count(self):
self.es.indices.refresh(self.index)
status = self.es.count(index=self.index)
if status['_shards']['successful'] == 1:
self.logger.info('Successful')
self.logger.info('Count:%d',status['count'])
else:
self.logger.error(status)
def save(self, doc):
self.es.index(index=self.index,body=doc)
self.count()

@ -0,0 +1,33 @@
import os
import logging
from pathlib import Path
class LoggerHandler():
def __init__(self, level):
self.level = getattr(logging, level)
self.logger = logging.getLogger("OnionScraper")
self.logger.setLevel(self.level)
# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(self.level)
# create file logging
logFile = Path(__file__).parents[1]
logging_path = os.path.join(logFile, "info.log")
fh = logging.FileHandler(logging_path)
# create formatter
formatter = logging.Formatter('[%(asctime)s] - %(name)s - %(levelname)s - %(message)s',datefmt='%a, %d %b %Y %H:%M:%S')
formatter_console = logging.Formatter('[%(asctime)s] - %(levelname)s - %(message)s',datefmt='%d %b %Y %H:%M:%S')
# add formatter to ch
ch.setFormatter(formatter_console)
fh.setFormatter(formatter)
# add ch to logger
self.logger.addHandler(ch) #added logging into console
self.logger.addHandler(fh) #added logging into file
def start_logging(self):
self.logger.info('Starting OnionScraper')
return self.logger

@ -0,0 +1,78 @@
import re
class Operator:
"""Base class for all Operator plugins.
Note: This is an abstract class. You must extend ``__init__`` and call
``super`` to ensure this class's constructor is called. You must override
``handle_artifact`` with the same signature. You may define additional
``handle_{artifact_type}`` methods as needed (see the threatkb operator for
an example) - these methods are purely convention, and are not required.
When adding additional methods to child classes, consider prefixing the
method name with an underscore to denote a ``_private_method``. Do not
override other existing methods from this class.
"""
def __init__(self, artifact_types=None, filter_string=None, allowed_sources=None):
"""Override this constructor in child classes.
The arguments above (artifact_types, filter_string, allowed_sources)
should be accepted explicity as above, in all child classes.
Additional arguments should be added: url, auth, etc, whatever is
needed to set up the object.
Each operator should default self.artifact_types to a list of Artifacts
supported by the plugin, and allow passing in artifact_types to
overwrite that default.
Example:
>>> self.artifact_types = artifact_types or [
... artifacts.IPAddress,
... artifacts.Domain,
... ]
It's recommended to call this __init__ method via super from all child
classes. Remember to do so *before* setting any default artifact_types.
"""
self.artifact_types = artifact_types or []
self.filter_string = filter_string or ''
self.allowed_sources = allowed_sources or []
def handle_onion(self, url):
"""Override with the same signature.
:param artifact: A single ``Artifact`` object.
:returns: None (always ignored)
"""
raise NotImplementedError()
def _artifact_is_allowed(self, artifact):
"""Returns True if this is allowed by this plugin's filters."""
# # Must be in allowed_types.
# if not any(isinstance(artifact, t) for t in self.artifact_types):
# return False
#
# # Must match the filter string.
# if not artifact.match(self.filter_string):
# return False
#
# # Must be in allowed_sources, if set.
# if self.allowed_sources and not any(
# [re.compile(p).search(artifact.source_name)
# for p in self.allowed_sources]):
# return False
#
return True
def process(self, onions):
"""Process all applicable onions."""
for onion in onions:
if self._artifact_is_allowed(onion.url):
self.handle_onion(onion)

@ -0,0 +1,261 @@
import re
import os
import sys
import json
import time
import random
import traceback
import subprocess
from uuid import uuid4
from pathlib import Path
from datetime import datetime as dt
from json.decoder import JSONDecodeError
from concurrent.futures import ProcessPoolExecutor
from threading import Timer
import requests
from stem.control import Controller
from stem import Signal
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from onionscraper.operators import Operator
class Plugin(Operator):
"""OnionScraper main work logic.
Handles reading the config file, calling sources, maintaining state and
sending artifacts to operators.
"""
def __init__(self, logger, **kwargs):
self.logger = logger
self.logger.info('Initializing OnionScanner')
screenshots = kwargs.pop('screenshots_path', None)
if screenshots:
self.screenshots = Path(screenshots)
else:
self.screenshots = Path(__file__).parents[1]/'screenshots'
self.onionscan = kwargs['binpath']
self.timeout = int(kwargs['timeout'])
self.proxy = kwargs['socks5']
self.torControl = kwargs['TorController']
self.retries = int(kwargs['retries'])
self.headers ={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'en-US,en;q=0.5',
'DNT': '1', 'Connection':
'keep-alive',
'Upgrade-Insecure-Requests': '1'}
blacklist = kwargs['blacklist'].split(',')
self.blacklist = re.compile('|'.join([re.escape(word) for word in blacklist]), re.IGNORECASE)
keywords = kwargs['interestingKeywords'].split(',')
self.keywords = re.compile('|'.join([re.escape(word) for word in keywords]), re.IGNORECASE)
self.session = self.get_tor_session()
def response(self, status, content, onion):
"""
status: success/failure
content: dict
onion: str
return: dict
"""
return {'status': status, 'data': content, 'onion': onion}
def parseDoc(self, data):
data['onionscan'].pop('simpleReport', None)
crawls = data['onionscan'].pop('crawls', None)
hiddenService = data['onionscan'].pop('hiddenService', None)
data['onionscan']['crawls'] = [*crawls]
data['hiddenService'] = hiddenService
for onion in crawls.keys():
print(onion)
#q.enqueue(self.crawl, onion)
#with open('test.json', 'w', encoding='utf-8') as f:
# json.dump(data, f, ensure_ascii=False, indent=4)
return data
def format_directory(self, directory):
d = dt.now()
year = str(d.year)
month = str(d.month)
# prefix month and day with "0" if it is only one digit
if len(month) < 2:
month = "0" + month
day = str(d.day)
if len(day) < 2:
day = "0" + day
save_path = directory/year/month/day
if not os.path.isdir(save_path):
self.logger.info("[*] Creating directory to save screenshots")
os.makedirs(save_path)
return save_path
def take_screenshot(self, save_path, onion):
binary = FirefoxBinary('/home/tony/Projects/OnionScraper/geckodriver')
fp = webdriver.FirefoxProfile()
fp.set_preference('network.proxy.type', 1)
fp.set_preference('network.proxy.socks', '127.0.0.1')
fp.set_preference('network.proxy.socks_port', 9050)
fp.set_preference('network.proxy.socks_remote_dns', True)
options = Options()
options.headless = True
driver = webdriver.Firefox(
executable_path='/home/tony/Projects/OnionScraper/geckodriver',
options=options,
firefox_profile=fp)
url = 'http://' + onion
driver.get(url)
uid = str(uuid4()).split('-')[0]
filename = f"{onion}_screenshot_{uid}.png"
f_name = f"{save_path}/{filename}"
driver.save_screenshot(f_name)
driver.quit()
if os.path.isfile(f_name):
self.logger.info(f'[*] Screenshot was taken. {f_name}')
dateScreenshoted = dt.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%f')+ 'Z'
result = {'dateScreenshoted':dateScreenshoted,'filename':filename}
return self.response("success",result,onion)
else:
self.logger.error('[x] Unable to take screenshot')
return self.response("failure",None,onion)
def get_tor_session(self):
try:
s = requests.session()
s.proxies = self.proxy
s.headers.update(self.headers)
except Exception as e:
self.logger.error(e)
self.logger.debug(traceback.print_exc())
return s
# signal TOR for a new connection
def renew_connection(self):
with Controller.from_port(port = self.torControl['port']) as controller:
# Now we switch TOR identities to make sure we have a good connection
self.logger.info('Getting new Tor IP')
# authenticate to our local TOR controller
controller.authenticate(self.torControl['password'])
# send the signal for a new identity
controller.signal(Signal.NEWNYM)
# wait for the new identity to be initialized
time.sleep(controller.get_newnym_wait())
session = self.get_tor_session()
self.logger.info(f"IP is {session.get('http://httpbin.org/ip').json()['origin']}")
def handle_timeout(self, process, onion):
#
# Handle a timeout from the onionscan process.
#
try:
# kill the onionscan process
process.kill()
self.logger.info("[!!!] Killed the onionscan process.")
except:
pass
self.renew_connection()
return
def run_sessions(self, onion):
retry = 0
result = None
while True:
try:
url = 'http://'+onion
self.logger.info(url)
content = self.session.get(url)
if content.status_code == 200:
result = content.json()
except JSONDecodeError as e:
self.logger.debug(f'JSONDecodeError {e}')
result = content.text
except Exception as e:
self.logger.error(e)
self.logger.debug(traceback.print_exc())
finally:
if result:
return self.response("success",result,onion)
else:
self.logger.info('[x] No results found retrying ...')
retry += 1
self.renew_connection()
if retry > self.retries:
self.logger.error('[x] Max retries exceeded')
return self.response("failure",None, onion)
def run_onionscan(self, onion):
self.logger.info("[*] Running onionscan on %s", onion)
# fire up onionscan
process = subprocess.Popen([self.onionscan,"--webport=0","--jsonReport","--simpleReport=false",onion],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
# start the timer and let it run till timeout minutes
process_timer = Timer(300,self.handle_timeout,args=[process,onion])
process_timer.start()
# wait for the onion scan results
stdout = process.communicate()[0]
# we have received valid results so we can kill the timer
if process_timer.is_alive():
process_timer.cancel()
return self.response("success",stdout.decode(),onion)
self.logger.info("[!!!] Process timed out for %s", onion)
return self.response("failure",None, onion)
def handle_onion(self, onion_tuple):
onion = onion_tuple.url
self.logger.info(f'Processing {onion} with onionscan')
try:
blacklist_URL = self.blacklist.search(onion)
if blacklist_URL:
self.logger.info(f"[X] Blocked by blacklist => matched keyword {blacklist_URL.group()}")
else:
self.logger.debug("[*] URL blacklist test: PASSED")
results = self.run_onionscan(onion)
if results['status'] == 'success' and results['data']['webDetected'] == 'true':
content = self.run_sessions(onion)
print(content)
#sys.exit(0)
#if content['status'] == 'success':
# blacklist_CONTENT = self.blacklist.search(content['data'])
# if blacklist_CONTENT:
# self.logger.info(f"[X] Blocked by blacklist content => matched keyword {blacklist_CONTENT.group()}")
# else:
# self.logger.debug("[*] CONTENT blacklist test: PASSED")
# screenshot = self.take_screenshot(self.format_directory(self.screenshots), onion)
# self.logger.info("Indexing!")
# doc = {
# 'onionscan':json.loads(results['data']),
# 'html':content['data'],
# 'screenshots':screenshot['data'],
# 'interestingKeywords':self.interestingKeywords.findall(content['data'])
# }
# return self.parseDoc(doc)
else:
self.logger.info(f"[x] hidden service {onion} is not active")
except Exception as e:
self.logger.error(e)
self.logger.error(traceback.print_exc())
finally:
pass
#sys.exit(0)

@ -0,0 +1,15 @@
from onionscraper.operators import Operator
class Plugin(Operator):
"""Operator for output to flat CSV file."""
def __init__(self, filename, base_score):
"""CSV operator."""
self.filename = filename
#super(Plugin, self).__init__(artifact_types, filter_string, allowed_sources)
def handle_artifact(self, artifact):
"""Operate on a single artifact."""
pass

@ -0,0 +1,41 @@
from collections import namedtuple
class Source(object):
"""Base class for all Source plugins.
Note: This is an abstract class. You must override ``__init__`` and ``run``
in child classes. You should not override ``process_element``. When adding
additional methods to child classes, consider prefixing the method name
with an underscore to denote a ``_private_method``.
"""
def __init__(self, name, *args, **kwargs):
"""Override this constructor in child classes.
The first argument must always be ``name``.
Other argumentss should be url, auth, etc, whatever is needed to set
up the object.
"""
self.onion = namedtuple('onion', ['url','source','type'])
def run(self):
"""Run and return ``(saved_state, list(Artifact))``.
Override this method in child classes.
The method signature and return values must remain consistent.
The method should attempt to pick up where we left off using
``saved_state``, if supported. If ``saved_state`` is ``None``, you can
assume this is a first run. If state is maintained by the remote
resource (e.g. as it is with SQS), ``saved_state`` should always be
``None``.
"""
raise NotImplementedError()
def process_element(self, content, reference_link, include_nonobfuscated=False):
"""Take a single source content/url and return a list of Artifacts.
This is the main work block of Source plugins, which handles
IOC extraction and artifact creation.
:param content: String content to extract from.
:param reference_link: Reference link to attach to all artifacts.
:param include_nonobfuscated: Include non-defanged URLs in output?
"""
logger.debug(f"Processing in source '{self.name}'")

@ -0,0 +1,153 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
__author__ = 'Andrey Glauzer'
__license__ = "MIT"
__version__ = "1.0.1"
__maintainer__ = "Andrey Glauzer"
__status__ = "Development"
import requests
import json
import re
import re
import urllib.parse
from random import choice
import time
from bs4 import BeautifulSoup
from onionscraper.sources import Source
class Plugin(Source):
def __init__(self, logger, name, url):
self.logger = logger
self.name = name
self.url = url
self.desktop_agents = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0']
super().__init__(self)
def run(self):
self.logger.info('Starting Gist Scraper')
self.cookies()
self.pagination()
self.scraping()
return self.raw()
@property
def random_headers(self):
return {
'User-Agent': choice(self.desktop_agents),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
}
def cookies(self):
self.logger.info('Setting GIST cookies')
with requests.Session() as self.session:
self.headers = self.random_headers
request = self.session.get(self.url, headers=self.headers)
if request.status_code == 200:
pass
else:
self.logger.error('No Response from GIST')
def pagination(self):
request = self.session.get(
f"https://gist.github.com/search?l=Text&q={urllib.parse.quote('.onio')}", headers=self.headers)
self.soup = BeautifulSoup(request.content, features="lxml")
pages = []
self.urls = [self.url]
try:
for pagination in self.soup.find('div', {'class': 'pagination'}).findAll('a'):
pages.append(pagination.get_text())
except:
pages = False
if pages:
cont = 2
while cont <= 1: # int(pages[-2]):
cont += 1
full_url = f"https://gist.github.com/search?l=Text&p={cont-1}&q={urllib.parse.quote('.onio')}"
self.urls.append(full_url)
def scraping(self):
url = []
for inurl in self.urls:
self.logger.info(f"Connecting to {inurl}")
time.sleep(5)
request = self.session.get(inurl, headers=self.headers)
if request.status_code == 200:
soup = BeautifulSoup(request.content, features="lxml")
for code in soup.findAll('div', {'class': 'gist-snippet'}):
if '.onion' in code.get_text().lower():
for raw in code.findAll('a', {'class': 'link-overlay'}):
try:
url.append(raw['href'])
except:
pass
self.urls_raw = []
for get in url:
self.logger.info(f"Connecting to {get}")
time.sleep(5)
try:
request = self.session.get(get, headers=self.headers)
if request.status_code == 200:
soup = BeautifulSoup(request.content, features="lxml")
for raw in soup.findAll('a', {'class': 'btn btn-sm'}):
try:
gist_url = f"https://gist.githubusercontent.com{raw['href']}"
self.urls_raw.append(gist_url)
except:
pass
except(requests.exceptions.ConnectionError,
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ReadTimeout,
requests.exceptions.InvalidURL) as e:
self.logger.error(
f"I was unable to connect to the url, because an error occurred.\n{e}")
pass
def raw(self):
self.logger.info('Performing replaces and regex. WAIT...')
itens = []
onions = []
for raw in self.urls_raw:
if '.txt' in raw.lower() \
or '.csv' in raw.lower():
time.sleep(5)
request = self.session.get(raw, headers=self.headers)
self.soup = BeautifulSoup(request.content, features="lxml")
for pre in self.soup.findAll('body'):
list = pre.get_text().split('\n')
itens.extend(list)
regex = re.compile(
"[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion")
for lines in itens:
rurls = lines \
.replace('\xad', '') \
.replace('\n', '') \
.replace("http://", '') \
.replace("https://", '') \
.replace("www.", "")
url = regex.match(rurls)
if url is not None:
onions.append(self.onion(url=url.group(), source='gist', type='domain'))
return onions

@ -0,0 +1,3 @@
def hola(ass):
print(ass)

@ -0,0 +1,16 @@
beautifulsoup4==4.9.1
certifi==2020.6.20
chardet==3.0.4
click==7.1.2
elasticsearch==7.8.0
idna==2.10
lxml==4.5.1
# Editable Git install with no remote (OnionScraper==1.0.0)
-e /home/tony/Projects/OnionScraper
PySocks==1.7.1
PyYAML==5.3.1
requests==2.24.0
selenium==3.141.0
soupsieve==2.0.1
stem==1.8.0
urllib3==1.25.9

@ -0,0 +1,22 @@
from setuptools import setup
def readme_file_contents():
with open('README.md') as readme_file:
data = readme_file.read()
return data
setup(
name='OnionScraper',
version='1.0.0',
description='Python app to scraper and index hidden websites',
long_description=readme_file_contents(),
author='dan',
author_email='test@google.com',
license='MIT',
packages=['onionscraper'],
zip_safe=False,
install_requires=[]
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 650 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

@ -0,0 +1,328 @@
{{define "fields"}}
{{range .Fields}}<td><a href="/?search={{.}}">{{.}}</a></td>{{end}}<td><a href="/?search={{index .Fields 0}}" title="{{.Links}} Relationships Share an Identifier connection with this Identifier">{{.Links}}</a></td>
{{end}}
{{define "table"}}
<br/>
<div id="{{.Title}}" class="row">
<div class="col-lg-12">
<div class="panel panel-default">
<div class="panel-heading">{{.AltTitle}} linked to {{.SearchTerm}} ({{len .Rows}})</div>
{{ $length := len .RollupCounts }} {{ if ne $length 0 }}
<div class="panel-body text-center">
<canvas id="myChart{{.Title}}" style="max-width:300px;max-height:300px;margin:auto;" width="300px" height="300px"></canvas>
<script>
var ctx = document.getElementById("myChart{{.Title}}");
var myChart = new Chart(ctx, {
type: 'bar',
data: {
labels: [
{{ range $key, $value := .RollupCounts }}
"{{$key}}",
{{end}}
],
datasets: [{
data: [
{{ range $key, $value := .RollupCounts }}
{{$value}},
{{end}}
],
borderWidth: 1
}]
},
options: {
scales: {
yAxes: [{
ticks: {
beginAtZero:true
}
}]
},
title: {
text: "Breakdown of {{.Title}}s for {{.SearchTerm}}",
display:true
},
legend :{
display:false
},
}
});
</script>
{{ range $key, $value := .RollupCounts }}
{{if ne $key ""}}
<button class="btn btn-primary" style="margin: 5px;" type="button">
{{$key}} <span class="badge">{{$value}}</span>
</button>
{{end}}
{{end}}
</div>
{{end}}
<!-- Table -->
<table class="table table-bordered table-striped">
<tr>
<th>Tag</th>
{{range .Heading}}
<th>{{.}}</th>
{{end}}
<th>Other Links</th>
</tr>
{{range .Rows}}
<tr><td><span class="label label-default"><a href="/?search={{.Tag}}">{{.Tag}}</a></span></td>{{template "fields" .}}</tr>
{{end}}
</table>
</div>
</div>
</div>
<br/>
{{end}}
<!-- ############ SUMMARY ################# -->
{{define "summary"}}
<table class="table table-bordered table-striped">
{{range .Fields}}
<tr><th><a href="#{{.Key}}">{{.AltTitle}}</th>
<td>
<div class="progress">
<div class="progress-bar progress-bar-striped" style="width: {{.Total}}%">
{{.Value}}
</div>
</div>
</div>
</td></tr>
{{end}}
</table>
{{end}}
<!-- ############ Main Page ################# -->
<!DOCTYPE html>
<html lang="en"><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="">
<meta name="author" content="">
<link rel="icon" href="/images/favicon.ico">
<script src="/scripts/chart.bundle.js"></script>
<title>OnionScan Correlations Lab</title>
<link href="/style/bootstrap.css" rel="stylesheet">
<style>
@font-face {
font-family: 'Roboto Slab';
font-style: normal;
font-weight: 400;
src: local('Roboto Slab Regular'), local('RobotoSlab-Regular'), url(/fonts/RobotoSlab-Regular.woff) format('woff');
}
@font-face {
font-family: 'Roboto Slab';
font-style: normal;
font-weight: 700;
src: local('Roboto Slab Bold'), local('RobotoSlab-Bold'), url(/fonts/RobotoSlab-Bold.woff) format('woff');
}
body{
font-family: 'Roboto Slab';
}
.label a {
color:#fff;
}
.btn {
margin-bottom:5px;
}
</style>
</head>
<body role="document">
<nav class="navbar navbar-default navbar-fixed-top">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="/"><img style="margin-top: -16px;" width="75px" height="75px" src="/images/logo.png"/></a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li><a href="/" style="color:#fff;">Summary</a></li>
<li><a href="/saved" style="color:#fff;">Saved Searches</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>
</nav>
<br/><br/> <br/><br/>
<div class="container theme-showcase" role="main">
<form action="/">
<div class="row">
<div class="col-lg-12">
<div class="input-group">
<input name="search" type="text" class="form-control" placeholder="{{.SearchTerm}}" value="{{.SearchTerm}}"/>
<span class="input-group-btn">
<input class="btn btn-default" type="submit" value="Search!">
</span>
</div><!-- /input-group -->
</div><!-- /.col-lg-6 -->
</form>
</div>
<br/>
{{if ne .Error ""}}
<div class="alert alert-danger" role="alert">{{.Error}}</div>
{{end}}
{{if ne .Success ""}}
<div class="alert alert-success" role="alert">{{.Success}}</div>
{{end}}
{{if ne .SearchTerm ""}}
{{ $length := len .Tables }} {{ if ne $length 0 }}
<div class="row">
<div class="col-lg-3 text-center">
<h2>Options</h2>
<form action="/save" method="post">
<input type="hidden" name="search" value="{{.SearchTerm}}"/>
<input type="hidden" name="token" value="{{.Token}}"/>
<span class="input-group-btn">
<input class="btn btn-default" type="submit" value="Save Search">
</span>
</form>
{{ $lentags := len .UserTags }}
{{if ne 0 $lentags}}
<h2>Linked Tags</h2>
{{ $search := .SearchTerm }}
{{ $token := .Token }}
{{ range .UserTags }}
<form action="/delete-tag" method="post">
<input type="hidden" name="search" value="{{$search}}"/>
<input type="hidden" name="tag" value="{{.}}"/>
<input type="hidden" name="token" value="{{$token}}"/>
<div class="btn-group">
<button class="btn btn-default" type="button"><a href="/?search={{.}}">{{.}}</a></button>
{{if ne . $search}}
<button class="btn btn-default" type="submit"><img src="/images/remove.png" width="16px" height="16px" title="remove tag"/></button>
{{end}}
</div>
</form>
{{end}}
{{end}}
<h3>Tag Search Term</h3>
<form action="/tag" method="post">
<div class="input-group">
<input type="text" name="tag" class="form-control" placeholder="Enter Tag..."/>
<input type="hidden" name="search"value="{{.SearchTerm}}"/>
<input type="hidden" name="token" value="{{.Token}}"/>
<span class="input-group-btn">
<input class="btn btn-default" type="submit" value="Tag!">
</span>
</div>
</form>
</div>
<div class="col-lg-9">
<div class="panel panel-default">
<div class="panel-heading">Summary for {{.SearchTerm}} {{if ne "" .Summary.Title}}({{.Summary.Title}}){{end}}&nbsp;&nbsp;
{{range .Tags}}
<span class="label label-{{if eq . "mod_status"}}danger{{else}}primary{{end}}"><a href="/?search={{.}}">{{.}}</a></span>&nbsp;
{{end}}
</div>
{{template "summary" .Summary}}
</div>
{{range .Tables}}
{{template "table" .}}
{{end}}
</div>
</div>
{{else}}
<div class="alert alert-warning" role="alert">No Relationships Found for <strong>{{.SearchTerm}}</strong></div>
{{end}}
{{else}}
{{ $length := len .SearchResults }}
{{ if eq $length 0 }}
<div class="jumbotron">
<h1>Welcome to your OnionScan Correlation Lab!</h1>
<p>You have <strong>{{.RelationshipNum}}</strong> correlations to hunt through!</p>
</div>
{{else}}
<h2>Saved Searches</h2>
<ul>
{{ if eq $length 1}}
<div class="alert alert-warning" role="alert">You don't have any saved searches yet!</div>
{{else}}
{{range .SearchResults}}
{{if ne . "onionscan://dummy"}}
<li><a href="/?search={{.}}">{{.}}</a></li>
{{end}}
{{end}}
{{end}}
</ul>
{{end}}
{{end}}
</body>
</html>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,459 @@
package webui
import (
"errors"
"fmt"
"github.com/s-rah/onionscan/config"
"github.com/s-rah/onionscan/crawldb"
"github.com/s-rah/onionscan/utils"
"html/template"
"log"
"net/http"
"strconv"
"strings"
)
type WebUI struct {
osc *config.OnionScanConfig
token string
Done chan bool
}
type SummaryField struct {
Key string
Value int
AltTitle string
Total int
}
type Summary struct {
Fields []SummaryField
Total int
Title string
}
type Content struct {
SearchTerm string
Error string
Summary Summary
Tables []Table
Tags []string
RelationshipNum int
Token string
Success string
UserTags []string
SearchResults []string
}
type Row struct {
Fields []string
Tag string
Links int
}
type Table struct {
Title string
SearchTerm string
Heading []string
Rows []Row
Rollups []int
RollupCounts map[string]int
AltTitle string
}
// GetUserDefinedRow returns, from an initial relationship, a complete user
// defined relationship row - in the order it is defined in the crawl config.
func (wui *WebUI) GetUserDefinedTable(rel crawldb.Relationship) (Table, error) {
log.Printf("Loading User Defined Relationship %s", rel.From)
config, ok := wui.osc.CrawlConfigs[rel.From]
if ok {
var table Table
crName := strings.SplitN(rel.Type, "/", 2)
if len(crName) == 2 {
table.Title = crName[0]
cr, err := config.GetRelationship(crName[0])
if err == nil {
for i, er := range cr.ExtraRelationships {
table.Heading = append(table.Heading, er.Name)
if er.Rollup {
table.Rollups = append(table.Rollups, i)
}
}
table.Heading = append(table.Heading, "Onion")
log.Printf("Returning User Table Relationship %v", table)
return table, nil
}
}
}
log.Printf("Could not make Table")
return Table{}, errors.New("Invalid Table")
}
// GetUserDefinedRow returns, from an initial relationship, a complete user
// defined relationship row - in the order it is defined in the crawl config.
func (wui *WebUI) GetUserDefinedRow(rel crawldb.Relationship) (string, []string) {
log.Printf("Loading User Defined Relationship %s", rel.From)
config, ok := wui.osc.CrawlConfigs[rel.From]
if ok {
userrel, err := wui.osc.Database.GetUserRelationshipFromOnion(rel.Onion, rel.From)
if err == nil {
// We can now construct the user
// relationship in the right order.
crName := strings.SplitN(rel.Type, "/", 2)
if len(crName) == 2 {
cr, err := config.GetRelationship(crName[0])
row := make([]string, 0)
if err == nil {
for _, er := range cr.ExtraRelationships {
log.Printf("Field Value: %v", userrel[crName[0]+"/"+er.Name].Identifier)
row = append(row, userrel[crName[0]+"/"+er.Name].Identifier)
}
row = append(row, rel.From)
log.Printf("Returning User Row Relationship %s %v %s", crName[0], row, rel.Onion)
return crName[0], row
}
} else {
log.Printf("Could not derive config relationship from type %s", rel.Type)
}
}
}
log.Printf("Invalid Row")
return "", []string{}
}
// Save implements the Saved Searches Feature
func (wui *WebUI) Save(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again.", http.StatusFound)
return
}
search := r.PostFormValue("search")
token := r.PostFormValue("token")
if token != wui.token {
path := fmt.Sprintf("/?search=%v&error=Invalid random token, Please try again.", search)
http.Redirect(w, r, path, http.StatusFound)
return
}
wui.osc.Database.InsertRelationship(search, "onionscan://user-data", "search", "")
path := fmt.Sprintf("/?search=%v&success=Successfully Saved Search", search)
http.Redirect(w, r, path, http.StatusFound)
}
// Tag implements the /tag endpoint.
func (wui *WebUI) Tag(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again.", http.StatusFound)
return
}
search := r.PostFormValue("search")
tag := r.PostFormValue("tag")
token := r.PostFormValue("token")
if token != wui.token {
path := fmt.Sprintf("/?search=%v&error=Invalid random token, Please try again.", search)
http.Redirect(w, r, path, http.StatusFound)
return
}
wui.osc.Database.InsertRelationship(search, "onionscan://user-data", "tag", tag)
path := fmt.Sprintf("/?search=%v&success=Successfully Added Tag %v to %v", search, tag, search)
http.Redirect(w, r, path, http.StatusFound)
}
// Delete tag implements the /delete-tag endpoint
func (wui *WebUI) DeleteTag(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again.", http.StatusFound)
return
}
search := r.PostFormValue("search")
tag := r.PostFormValue("tag")
token := r.PostFormValue("token")
if token != wui.token {
path := fmt.Sprintf("/?search=%v&error=Invalid random token, Could not delete tag. Please try again.", search)
http.Redirect(w, r, path, http.StatusFound)
return
}
err = wui.osc.Database.DeleteRelationship(search, "onionscan://user-data", "tag", tag)
if err != nil {
http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again: "+err.Error(), http.StatusFound)
return
}
path := fmt.Sprintf("/?search=%v&success=Successfully Deleted Tag %v from %v", search, tag, search)
http.Redirect(w, r, path, http.StatusFound)
}
// SavedSearches provides the user with a list of searches they have saved.
func (wui *WebUI) SavedSearches(w http.ResponseWriter, r *http.Request) {
results, _ := wui.osc.Database.GetRelationshipsWithIdentifier("onionscan://user-data")
var content Content
content.SearchResults = append(content.SearchResults, "onionscan://dummy")
for _, rel := range results {
if rel.Type == "search" {
content.SearchResults = append(content.SearchResults, rel.Onion)
}
}
var templates = template.Must(template.ParseFiles("templates/index.html"))
templates.ExecuteTemplate(w, "index.html", content)
}
// Index implements the main search functionality of the webui
func (wui *WebUI) Index(w http.ResponseWriter, r *http.Request) {
search := strings.TrimSpace(r.URL.Query().Get("search"))
error := strings.TrimSpace(r.URL.Query().Get("error"))
success := strings.TrimSpace(r.URL.Query().Get("success"))
var content Content
mod_status := false
pgp := false
ssh := false
uriCount := 0
content.Token = wui.token
content.Error = error
content.Success = success
if search != "" {
content.SearchTerm = search
var results []crawldb.Relationship
tables := make(map[string]Table)
results, _ = wui.osc.Database.GetRelationshipsWithOnion(search)
results_identifier, _ := wui.osc.Database.GetRelationshipsWithIdentifier(search)
results = append(results, results_identifier...)
for _, rel := range results {
if rel.Type == "page-info" {
content.Summary.Title = rel.Identifier
}
if rel.From == "onionscan://user-data" {
if rel.Type == "tag" {
content.UserTags = append(content.UserTags, rel.Identifier)
utils.RemoveDuplicates(&content.UserTags)
if rel.Identifier == search {
// We want to surface the onions *not* the tag
table, ok := tables["search-results"]
log.Printf("%v %v", search, ok)
if !ok {
var newTable Table
newTable.Title = rel.Type
newTable.Heading = []string{"Onion"}
tables["search-results"] = newTable
table = newTable
}
links := wui.osc.Database.GetRelationshipsCount(rel.Identifier) - 1
table.Rows = append(table.Rows, Row{Fields: []string{rel.Onion}, Tag: rel.Identifier, Links: links})
tables["search-results"] = table
} else {
table, ok := tables["search-results"]
if !ok {
var newTable Table
newTable.Title = rel.Type
newTable.Heading = []string{"Tags"}
tables[rel.Type] = newTable
table = newTable
}
links := wui.osc.Database.GetRelationshipsCount(rel.Identifier) - 1
table.Rows = append(table.Rows, Row{Fields: []string{rel.Identifier}, Tag: rel.Onion, Links: links})
tables[rel.Type] = table
}
}
} else if utils.IsOnion(rel.Onion) && rel.Type != "database-id" && rel.Type != "user-relationship" {
table, ok := tables[rel.Type]
if !ok {
var newTable Table
newTable.Title = rel.Type
newTable.Heading = []string{"Identifier", "Onion"}
tables[rel.Type] = newTable
table = newTable
}
links := wui.osc.Database.GetRelationshipsCount(rel.Identifier) - 1
table.Rows = append(table.Rows, Row{Fields: []string{rel.Identifier, rel.Onion}, Tag: rel.From, Links: links})
tables[rel.Type] = table
if rel.From == "mod_status" {
mod_status = true
}
if rel.From == "pgp" {
pgp = true
}
if rel.From == "ssh" {
ssh = true
}
} else if utils.IsOnion(rel.From) {
tableName, row := wui.GetUserDefinedRow(rel)
if len(row) > 0 {
table, exists := tables[tableName]
if !exists {
newTable, err := wui.GetUserDefinedTable(rel)
if err == nil {
tables[tableName] = newTable
table = newTable
}
}
table.Rows = append(table.Rows, Row{Fields: row})
tables[tableName] = table
}
} else if rel.Type == "user-relationship" {
userrel := rel
userrel.Onion = rel.Identifier
userrel.From = rel.Onion
userrel.Type = rel.From + "/parent"
tableName, row := wui.GetUserDefinedRow(userrel)
if len(row) > 0 {
table, exists := tables[tableName]
if !exists {
newTable, err := wui.GetUserDefinedTable(userrel)
if err == nil {
tables[tableName] = newTable
table = newTable
}
}
table.Rows = append(table.Rows, Row{Fields: row})
tables[tableName] = table
}
} else if rel.Type == "database-id" {
uriCount++
}
}
// AutoTag our content
if mod_status {
content.Tags = append(content.Tags, "mod_status")
}
if pgp {
content.Tags = append(content.Tags, "pgp")
}
if ssh {
content.Tags = append(content.Tags, "ssh")
}
// We now have a bunch of tables, keyed by type.
// Build a Summary and add the tables to the Content
for _, v := range tables {
content.Summary.Total += len(v.Rows)
}
for k, v := range tables {
log.Printf("Adding Table %s %v", k, v)
// Lazy Plural
alt := k + "s"
switch k {
case "ip":
alt = "IP Addresses"
case "clearnet-link":
alt = "Co-Hosted Clearnet Sites"
case "uri":
alt = "Links to External Sites"
case "email-address":
alt = "Email Addresses"
case "server-version":
alt = "Server Information"
case "identity":
alt = "PGP Identities"
case "bitcoin-address":
alt = "Bitcoin Addresses"
case "software-banner":
alt = "Software Banners"
case "analytics-id":
alt = "Analytics IDs"
case "tag":
alt = "Tag Relationships"
case "onion":
alt = "Co-Hosted Onion Sites"
case "search-results":
alt = "Search Results"
case "http-header":
alt = "HTTP Headers"
case "page-info":
alt = "Webpage Information"
}
total := (float32(len(v.Rows)) / float32(content.Summary.Total)) * float32(100)
if total < 1 {
total = 2 // For Visibility
}
field := SummaryField{k, len(v.Rows), alt, int(total)}
content.Summary.Fields = append(content.Summary.Fields, field)
rollups := make(map[string]int)
for _, c := range v.Rollups {
for _, rows := range v.Rows {
rollups[rows.Fields[c]]++
}
}
v.RollupCounts = rollups
v.SearchTerm = search
v.AltTitle = alt
content.Tables = append(content.Tables, v)
}
} else {
content.RelationshipNum = wui.osc.Database.GetAllRelationshipsCount()
}
var templates = template.Must(template.ParseFiles("templates/index.html"))
templates.ExecuteTemplate(w, "index.html", content)
}
func (wui *WebUI) Listen(osc *config.OnionScanConfig, port int) {
wui.osc = osc
// We generate a random token on startup to mitigate the threat
// against CSRF style attacks.
token, err := utils.GenerateRandomString(64)
if err != nil {
log.Fatalf("Error generating random bytes for CSRF token: %v", err)
}
wui.token = token
http.HandleFunc("/", wui.Index)
http.HandleFunc("/save", wui.Save)
http.HandleFunc("/tag", wui.Tag)
http.HandleFunc("/saved", wui.SavedSearches)
http.HandleFunc("/delete-tag", wui.DeleteTag)
fs := http.FileServer(http.Dir("./templates/style"))
http.Handle("/style/", http.StripPrefix("/style/", fs))
fs = http.FileServer(http.Dir("./templates/scripts"))
http.Handle("/scripts/", http.StripPrefix("/scripts/", fs))
fs = http.FileServer(http.Dir("./templates/images"))
http.Handle("/images/", http.StripPrefix("/images/", fs))
fs = http.FileServer(http.Dir("./templates/fonts"))
http.Handle("/fonts/", http.StripPrefix("/fonts/", fs))
portstr := strconv.Itoa(port)
http.ListenAndServe("127.0.0.1:"+portstr, nil)
}
Loading…
Cancel
Save