First Commit

4 years ago · 9986c99147
commit 9986c99147
29 changed files with 51392 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
+OnionScraper.egg-info
+screenshots
+dump.rdb    
+onionscandb
+config.ini
+*.log
+*.pyc
+__pycache__
+venv
--- a/README.md
+++ b/README.md
@ -0,0 +1,125 @@
+<p align="center">
+  <img src="docs/img/logo.png">
+</p>
+
+<h1 align="center">OnionIngestor</h1>
+<p align="center">
+  <a href="https://python.org/">
+    <img src="https://img.shields.io/pypi/pyversions/3.svg">
+  </a>
+    <a href="https://opensource.org">
+    <img src="https://img.shields.io/badge/Open%20Source-%E2%9D%A4-brightgreen.svg">
+  </a>
+</p>
+
+<p align="center">
+  An extendable tool to Collect, Crawl and Monitor onion sites on tor network and index collected information on Elasticsearch
+</p>
+
+## Introduction
+
+OnionIngestor is based on ThreatIngestor tool structure to enable modular and extendable access for Cyber Threat Intelligence teams so that they can monitor and collect information on hidden sites over tor network.
+
+The project is at it's early stages of development.
+
+## To-do-list
+
+- [ ] Add multiprocessing to improve analyzing speed
+- [ ] Add more sources like reddit, gmail, pastebin, twitter and other hidden sites
+- [ ] Add more operators like checking changes of the screenshots for monitoring sites, adding yara rules to eliminate false positives
+- [ ] Add more notifiers like slack, smpt, discord
+
+## Basic Implementation Logic
+
+The OnionIngestor runs and managers 3 important type of classes:
+Sources - These will collect hidden sites from clear net sources like pastebin, twitter, gist and crawled links
+Operators - These will process the onion link. For example get the html, take screenshots and run other scanners like [onionscan](https://github.com/s-rah/onionscan)
+Notifiers - These will notify the user - daily with a report and if any new changes has occured to a monitoring hidden site
+
+OnionIngestor is designed to run as a daemon where it collects hidden sites from enabled sources and pass it to the operators and
+when finished sleep until user defined time and restart the process from the beginning.
+<p align="center">
+  <img src="docs/img/workflow.png">
+</p>
+
+## Installation
+
+Install requirements
+    pip install -r requirements.txt
+
+After the tor client and the installed libraries use the `--help` command to get details of its use.
+
+```
+python3 -m onionscraper --help
+
+OnionScraper
+
+A Python3 application for indexing and scraping hidden services ElasticSearch
+
+Installation:
+   This application assumes you have python3 and pip3 installed.
+
+   pip3 install -r requirements.txt
+
+This software is provided subject to the MIT license stated below.
+--------------------------------------------------
+        MIT License
+
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+--------------------------------------------------
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -c CONFIGFILE, --config CONFIGFILE
+                        Path to config file
+  --log {DEBUG,INFO,WARNING,ERROR,CRITICAL}
+                        Set the logging level, default is INFO
+
+```
+
+The yaml config file contains all the information for OnionIngestor to work
+
+### Operator [Onionscan](https://github.com/s-rah/onionscan)
+	onionscan --mode analysis -verbose -webport 8081
+
+To run the webapp by onionscan
+
+## Output
+
+The output of the result is json, and in the same format it is sent to the chosen syslog.
+
+```
+show output here
+```
+## Authors
+
+Daniele Perera
+
+## Acknowledgments 
+
+Special thanks to:
+andreyglauzer
+InQuest
+s-rah
+
+Their code was used to implement this project
+Feel free to fork or open an issue to collaborate with the project.
+
+## License
+This project is licensed under the [MIT](https://choosealicense.com/licenses/mit/) License - see the LICENSE.md file for details.
--- a/docs/img/logo.png
+++ b/docs/img/logo.png
--- a/docs/img/workflow.png
+++ b/docs/img/workflow.png
--- a/example.yml
+++ b/example.yml
@ -0,0 +1,76 @@
+# This is an example ThreatIngestor config file with some preconfigured RSS
+# sources, feeding extracted artifacts into a CSV file.
+
+general:
+    # Run forever, check feeds once an hour.
+    daemon: False
+    sleep: 3600
+    elasticsearch:
+            index: darkweb
+            port : 9200
+            host : 127.0.0.1
+
+sources:
+    # A few threat intel blogs to get you started!
+  - name: source-gist
+    module: gist
+    url: https://gist.github.com/search?l=Text&q=.onion
+
+    #  - name: source-reddit
+    #    module: reddit
+    #    url: https://api.pushshift.io/reddit/search/comment/?subreddit=onions&limit=1000000
+    #    feed_type: messy
+    #
+    #  - name: pastebin
+    #    module: pastebin-account
+    #    url: https://gist.github.com/search?l=Text&q=.onion
+    #    feed_type: messy
+    #
+    #  - name: hunchly-report
+    #    module: gmail-hunchly
+    #    url: https://gist.github.com/search?l=Text&q=.onion
+    #    feed_type: messy
+    #
+    #  - name: onionland-search
+    #    module: collect-onions
+    #    url: http://3bbaaaccczcbdddz.onion/discover
+    #    feed_type: messy
+    #
+    #  - name: torch
+    #    module: collect-onions
+    #    url: http://xmh57jrzrnw6insl.onion
+    #    feed_type: messy
+
+
+operators:
+  - name: onionscan-go
+    module: onionscan
+    binpath: /home/tony/go/bin/onionscan
+    socks5:
+      http: 'socks5h://127.0.0.1:9050'
+      https: 'socks5h://127.0.0.1:9050'
+    TorController:
+      port: 9051
+      password: Xk5QP2haFMh8Y8D1060F1D7xaWEFG
+    timeout: 300
+    retries: 2
+    screenshots_path: null
+    blacklist: pedo,xxx,infant,loli,porn,child,abuse,sex,drug,cocaine,dope,zoo,daddy,daughter,boy,girl,young,muder
+    interestingKeywords: t.me,feed,rss,xml,atom,dataleak,breach,blog,ransomware,source code,data breach
+
+    #  - name: yara-rule
+    #    module: yara
+    #    filename: categories.yar
+    #    base_score: 50
+    #
+    #  - name: regex-match
+    #    module: regex
+    #    keywords: test,test2
+    #    base_score: 20
+
+notifiers:
+    # Simple telegram notifier
+  - name: telegram-notifer
+    module: telegram
+    chat_id:
+    token:
--- a/BIN
+++ b/BIN
--- a/onionscraper/init.py
+++ b/onionscraper/init.py
@ -0,0 +1,131 @@
+import sys
+import time
+import traceback
+import collections
+
+from . import config
+from . import dbhandler
+from . import loghandler
+
+
+class OnionManager:
+    """ThreatIngestor main work logic.
+
+    Handles reading the config file, calling sources, maintaining state, and
+    sending artifacts to operators.
+    """
+    def __init__(self, args):
+        # Load logger
+        log = loghandler.LoggerHandler(args.logLevel)
+        self.logger = log.start_logging()
+        # Load config
+        self.config = config.Config(args.configFile, self.logger)
+
+
+        # Load Elasticsearch.
+        try:
+            self.es = dbhandler.DbHandlerElasticSearch(
+                    self.config.elasticsearch(),
+                    self.logger)
+        except Exception as e:
+            # Error loading elasticsearch.
+            self.logger.error(e)
+            self.logger.debug(traceback.print_exc())
+            sys.exit(1)
+
+
+        # Instantiate plugins.
+        try:
+            self.logger.info("Initializing sources")
+            self.sources = {name: source(self.logger, **kwargs)
+                            for name, source, kwargs in self.config.sources()}
+
+            self.logger.info("initializing operators")
+            self.operators = {name: operator(self.logger, **kwargs)
+                              for name, operator, kwargs in self.config.operators()}
+
+            self.logger.info("initializing notifiers")
+            #self.notifiers = {name: operator(**kwargs)
+            #                  for name, operator, kwargs in self.config.notifiers()}
+        except Exception as e:
+            # Error loading elasticsearch.
+            self.logger.error(e)
+            self.logger.debug(traceback.print_exc())
+            sys.exit(1)
+
+
+    def run(self):
+        """Run once, or forever, depending on config."""
+        if self.config.daemon():
+            selfl.logger.info("Running forever, in a loop")
+            self.run_forever()
+        else:
+            self.logger.info("Running once, to completion")
+            self.run_once()
+
+
+    def run_once(self):
+        """Run each source once, passing artifacts to each operator."""
+        # Track some statistics about artifacts in a summary object.
+        summary = collections.Counter()
+
+        for source in self.sources:
+            # Run the source to collect artifacts.
+            self.logger.info(f"Running source '{source}'")
+            try:
+                onions = self.sources[source].run()
+                if onions:
+                    self.logger.info(f'Found hidden links')
+                else:
+                    self.logger.info('No links found')
+            except Exception as e:
+                self.logger.error(e)
+                self.logger.error(traceback.print_exc())
+                continue
+
+            # Process artifacts with each operator.
+            for operator in self.operators:
+                self.logger.info(f"Processing found onions with operator '{operator}'")
+                try:
+                    doc = self.operators[operator].process(onions)
+                    # Save the source state.
+                    self.es.save(doc)
+                except Exception as e:
+                    self.logger.error(e)
+                    self.logger.error(traceback.print_exc())
+                    continue
+
+
+
+#            # Record stats and update the summary.
+#            types = artifact_types(doc.get('interestingKeywords'))
+#            summary.update(types)
+#            for artifact_type in types:
+#                self.logger.info(f'types[artifact_type]')
+
+        # Log the summary.
+        self.logger.info(f"New artifacts: {dict(summary)}")
+
+
+    def run_forever(self):
+        """Run forever, sleeping for the configured interval between each run."""
+        while True:
+            self.run_once()
+
+            self.logger.info(f"Sleeping for {self.config.sleep()} seconds")
+            time.sleep(self.config.sleep())
+
+
+def artifact_types(artifact_list):
+    """Return a dictionary with counts of each artifact type."""
+    types = {}
+    for artifact in artifact_list:
+        artifact_type = artifact.__class__.__name__.lower()
+        if artifact_type in types:
+            types[artifact_type] += 1
+        else:
+            types[artifact_type] = 1
+
+    return types
+
+
--- a/onionscraper/main.py
+++ b/onionscraper/main.py
@ -0,0 +1,50 @@
+"""OnionScraper
+
+A Python3 application for indexing and scraping hidden services ElasticSearch
+
+Installation:
+   This application assumes you have python3 and pip3 installed.
+
+   pip3 install -r requirements.txt
+
+
+This software is provided subject to the MIT license stated below.
+--------------------------------------------------
+        MIT License
+
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+--------------------------------------------------
+"""
+import argparse
+
+from onionscraper import OnionManager
+
+
+# Load arguments from user
+parser = argparse.ArgumentParser(
+        prog='onionscraper',
+        description=__doc__,formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument('-c', '--config',dest="configFile", required = True, help='Path to config file')
+parser.add_argument("--log", dest="logLevel",default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help="Set the logging level, default is INFO")
+
+args = parser.parse_args()
+
+app = OnionManager(args)
+
+app.run()
--- a/onionscraper/config.py
+++ b/onionscraper/config.py
@ -0,0 +1,170 @@
+import io
+import importlib
+import traceback
+
+import yaml
+
+from pathlib import Path
+
+SOURCE = 'onionscraper.sources'
+OPERATOR = 'onionscraper.operators'
+
+INTERNAL_OPTIONS = [
+    'saved_state',
+    'module',
+    'credentials',
+]
+
+ARTIFACT_TYPES = 'artifact_types'
+FILTER_STRING = 'filter'
+ALLOWED_SOURCES = 'allowed_sources'
+NAME = 'name'
+
+
+class Config:
+    """Config read/write operations, and convenience methods."""
+    def __init__(self, filename, logger):
+        """Read a config file."""
+        self.logger = logger
+        self.filename = filename
+        with io.open(self.filename, 'r') as f:
+            try:
+                self.logger.info("Loading config file")
+                self.config = yaml.safe_load(f.read())
+            except yaml.error.YAMLError:
+                self.logger.error("YAML error in config")
+
+
+    @staticmethod
+    def _load_plugin(plugin_type, plugin):
+        """Returns plugin class or raises an exception.
+        :raises: threatingestor.exceptions.PluginError
+        """
+        try:
+            module = importlib.import_module('.'.join([plugin_type, plugin]))
+            return module.Plugin
+        except Exception as e:
+            print(e)
+            print(traceback.print_exc())
+
+    def daemon(self):
+        """Returns boolean, are we daemonizing?"""
+        return self.config['general']['daemon']
+
+
+    def elasticsearch(self):
+        """Returns elasticsaerch config"""
+        return self.config['general']['elasticsearch']
+
+
+    def sleep(self):
+        """Returns number of seconds to sleep between iterations, if daemonizing."""
+        return self.config['general']['sleep']
+
+
+#    def onionscanner(self):
+#        """Returns onionscanner config dict"""
+#        screenshots = self.config['onionscanner'].pop('screenshots_path', None)
+#        if screenshots:
+#            self.config['onionscanner']['screenshots_path'] = Path(screenshots)
+#        else:
+#            self.config['onionscanner']['screenshots_path'] = Path(__file__).parents[1]/'screenshots'
+#        blacklist = self.config['onionscanner'].pop('blacklist', None)
+#        if blacklist:
+#            self.config['onionscanner']['blacklist'] = blacklist.split(',')
+#        interestingKeywords = self.config['onionscanner'].pop('interestingKeywords', None)
+#        if interestingKeywords:
+#            self.config['onionscanner']['interestingKeywords'] = blacklist.split(',')
+#        return self.config['onionscanner']
+
+
+    def notifiers(self):
+        """Returns notifiers config dictionary."""
+        return self.config.get('notifiers', {})
+
+
+    def logging(self):
+        """Returns logging config dictionary."""
+        return self.config.get('logging', {})
+
+
+    def credentials(self, credential_name):
+        """Return a dictionary with the specified credentials."""
+        for credential in self.config['credentials']:
+            for key, value in credential.items():
+                if key == NAME and value == credential_name:
+                    return credential
+        return {}
+
+
+    def sources(self):
+        """Return a list of (name, Source class, {kwargs}) tuples.
+        :raises: threatingestor.exceptions.PluginError
+        """
+        sources = []
+
+        for source in self.config['sources']:
+            kwargs = {}
+            for key, value in source.items():
+                if key not in INTERNAL_OPTIONS:
+                    kwargs[key] = value
+
+                elif key == 'credentials':
+                    # Grab these named credentials
+                    credential_name = value
+                    for credential_key, credential_value in self.credentials(credential_name).items():
+                        if credential_key != NAME:
+                            kwargs[credential_key] = credential_value
+
+            # load and initialize the plugin
+            self.logger.info(f"Found source '{source[NAME]}'")
+            sources.append((source[NAME], self._load_plugin(SOURCE, source['module']), kwargs))
+
+        self.logger.info(f"Found {len(sources)} total sources")
+        return sources
+
+
+    def operators(self):
+        """Return a list of (name, Operator class, {kwargs}) tuples.
+        :raises: threatingestor.exceptions.PluginError
+        """
+        operators = []
+        for operator in self.config['operators']:
+            kwargs = {}
+            for key, value in operator.items():
+                if key not in INTERNAL_OPTIONS:
+                    if key == ARTIFACT_TYPES:
+                        # parse out special artifact_types option
+                        artifact_types = []
+                        for artifact in value:
+                            try:
+                                artifact_types.append(threatingestor.artifacts.STRING_MAP[artifact.lower().strip()])
+                            except KeyError:
+                                # ignore invalid artifact types
+                                pass
+                        kwargs[key] = artifact_types
+
+                    elif key == FILTER_STRING:
+                        # pass in special filter_string option
+                        kwargs['filter_string'] = value
+
+                    elif key == NAME:
+                        # exclude name key from operator kwargs, since it's not used
+                        pass
+
+                    else:
+                        kwargs[key] = value
+
+                elif key == 'credentials':
+                    # Grab these named credentials
+                    credential_name = value
+                    for credential_key, credential_value in self.credentials(credential_name).items():
+                        if credential_key != NAME:
+                            kwargs[credential_key] = credential_value
+
+            # load and initialize the plugin
+            self.logger.info(f"Found operator '{operator[NAME]}'")
+            operators.append((operator[NAME], self._load_plugin(OPERATOR, operator['module']), kwargs))
+
+        self.logger.info(f"Found {len(operators)} total operators")
+        return operators
--- a/onionscraper/dbhandler.py
+++ b/onionscraper/dbhandler.py
@ -0,0 +1,774 @@
+import sys
+import traceback
+
+from elasticsearch import Elasticsearch, helpers
+
+class DbHandlerElasticSearch:
+    def __init__(self, config, logger):
+        self.logger = logger
+        self.logger.info('Creating Elasticsearch mapping')
+        self.config = config
+        self.mapping = '''
+        {
+      "mappings": {
+    "_doc": {
+      "properties": {
+        "html": {
+          "type": "text"
+        },
+        "onionscan": {
+          "type": "nested",
+          "properties": {
+            "bitcoinDetected": {
+              "type": "boolean"
+            },
+            "bitcoinServices": {
+              "properties": {
+                "bitcoin": {
+                  "properties": {
+                    "detected": {
+                      "type": "boolean"
+                    },
+                    "prototocolVersion": {
+                      "type": "long"
+                    },
+                    "userAgent": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                },
+                "bitcoin_test": {
+                  "properties": {
+                    "detected": {
+                      "type": "boolean"
+                    },
+                    "prototocolVersion": {
+                      "type": "long"
+                    },
+                    "userAgent": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                },
+                "dogecoin": {
+                  "properties": {
+                    "detected": {
+                      "type": "boolean"
+                    },
+                    "prototocolVersion": {
+                      "type": "long"
+                    },
+                    "userAgent": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                },
+                "litecoin": {
+                  "properties": {
+                    "detected": {
+                      "type": "boolean"
+                    },
+                    "prototocolVersion": {
+                      "type": "long"
+                    },
+                    "userAgent": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            },
+            "certificates": {
+              "type": "nested",
+              "properties": {
+                "AuthorityKeyId": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "BasicConstraintsValid": {
+                  "type": "boolean"
+                },
+                "CRLDistributionPoints": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "DNSNames": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "ExtKeyUsage": {
+                  "type": "long"
+                },
+                "Extensions": {
+                  "properties": {
+                    "Critical": {
+                      "type": "boolean"
+                    },
+                    "Id": {
+                      "type": "long"
+                    },
+                    "Value": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                },
+                "IsCA": {
+                  "type": "boolean"
+                },
+                "Issuer": {
+                  "properties": {
+                    "CommonName": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Country": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Locality": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Names": {
+                      "properties": {
+                        "Type": {
+                          "type": "long"
+                        },
+                        "Value": {
+                          "type": "text",
+                          "fields": {
+                            "keyword": {
+                              "type": "keyword",
+                              "ignore_above": 256
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "Organization": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "OrganizationalUnit": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Province": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "SerialNumber": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                },
+                "IssuingCertificateURL": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "KeyUsage": {
+                  "type": "long"
+                },
+                "MaxPathLen": {
+                  "type": "long"
+                },
+                "MaxPathLenZero": {
+                  "type": "boolean"
+                },
+                "NotAfter": {
+                  "type": "date"
+                },
+                "NotBefore": {
+                  "type": "date"
+                },
+                "OCSPServer": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "PermittedDNSDomainsCritical": {
+                  "type": "boolean"
+                },
+                "PolicyIdentifiers": {
+                  "type": "long"
+                },
+                "PublicKey": {
+                  "properties": {
+                    "E": {
+                      "type": "text"
+                    },
+                    "N": {
+                      "type": "text"
+                    }
+                  }
+                },
+                "PublicKeyAlgorithm": {
+                  "type": "long"
+                },
+                "Raw": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "RawIssuer": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "RawSubject": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "RawSubjectPublicKeyInfo": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "RawTBSCertificate": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "SerialNumber": {
+                  "type": "text"
+                },
+                "Signature": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "SignatureAlgorithm": {
+                  "type": "long"
+                },
+                "Subject": {
+                  "properties": {
+                    "CommonName": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Country": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Locality": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Names": {
+                      "properties": {
+                        "Type": {
+                          "type": "long"
+                        },
+                        "Value": {
+                          "type": "text",
+                          "fields": {
+                            "keyword": {
+                              "type": "keyword",
+                              "ignore_above": 256
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "Organization": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "OrganizationalUnit": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "Province": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    },
+                    "SerialNumber": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                },
+                "SubjectKeyId": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "Version": {
+                  "type": "long"
+                }
+              }
+            },
+            "crawls": {
+              "type": "nested",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "dateScanned": {
+              "type": "date"
+            },
+            "f_name": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "ftpBanner": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "ftpDetected": {
+              "type": "boolean"
+            },
+            "ftpFingerprint": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "hiddenService": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "identifierReport": {
+              "properties": {
+                "analyticsIDs": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "bitcoinAddresses": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "emailAddresses": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "exifImages": {
+                  "properties": {
+                    "exifTags": {
+                      "properties": {
+                        "name": {
+                          "type": "text",
+                          "fields": {
+                            "keyword": {
+                              "type": "keyword",
+                              "ignore_above": 256
+                            }
+                          }
+                        },
+                        "value": {
+                          "type": "text",
+                          "fields": {
+                            "keyword": {
+                              "type": "keyword",
+                              "ignore_above": 256
+                            }
+                          }
+                        }
+                      }
+                    },
+                    "location": {
+                      "type": "text",
+                      "fields": {
+                        "keyword": {
+                          "type": "keyword",
+                          "ignore_above": 256
+                        }
+                      }
+                    }
+                  }
+                },
+                "foundApacheModStatus": {
+                  "type": "boolean"
+                },
+                "linkedOnions": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "openDirectories": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "privateKeyDetected": {
+                  "type": "boolean"
+                },
+                "serverVersion": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                }
+              }
+            },
+            "ircDetected": {
+              "type": "boolean"
+            },
+            "lastAction": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "mongodbDetected": {
+              "type": "boolean"
+            },
+            "online": {
+              "type": "boolean"
+            },
+            "performedScans": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "pgpKeys": {
+              "properties": {
+                "armoredKey": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "fingerprint": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                },
+                "identity": {
+                  "type": "text",
+                  "fields": {
+                    "keyword": {
+                      "type": "keyword",
+                      "ignore_above": 256
+                    }
+                  }
+                }
+              }
+            },
+            "ricochetDetected": {
+              "type": "boolean"
+            },
+            "skynetDetected": {
+              "type": "boolean"
+            },
+            "smtpBanner": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "smtpDetected": {
+              "type": "boolean"
+            },
+            "smtpFingerprint": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "sshBanner": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "sshDetected": {
+              "type": "boolean"
+            },
+            "sshKey": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            },
+            "timedOut": {
+              "type": "boolean"
+            },
+            "tlsDetected": {
+              "type": "boolean"
+            },
+            "vncDetected": {
+              "type": "boolean"
+            },
+            "webDetected": {
+              "type": "boolean"
+            },
+            "xmppDetected": {
+              "type": "boolean"
+            }
+          }
+        },
+        "screenshots": {
+          "type": "nested",
+          "properties": {
+            "dateScreenshoted": {
+              "type": "date"
+            },
+            "filename": {
+              "type": "text",
+              "fields": {
+                "keyword": {
+                  "type": "keyword",
+                  "ignore_above": 256
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+        '''
+        try:
+            self.es = Elasticsearch([{
+                'host':self.config['host'],
+                'port':self.config['port']}])
+            self.es.indices.create(
+                    index=self.config['index'],
+                    body=self.mapping,
+                    ignore=400)
+        except Exception as e:
+            self.logger.error(e)
+            self.logger.error(traceback.format_exc())
+            sys.exit(0)
+
+    def count(self):
+        self.es.indices.refresh(self.index)
+        status = self.es.count(index=self.index)
+        if status['_shards']['successful'] == 1:
+            self.logger.info('Successful')
+            self.logger.info('Count:%d',status['count'])
+        else:
+            self.logger.error(status)
+
+    def save(self, doc):
+        self.es.index(index=self.index,body=doc)
+        self.count()
--- a/onionscraper/loghandler.py
+++ b/onionscraper/loghandler.py
@ -0,0 +1,33 @@
+import os
+import logging
+from pathlib import Path
+
+class LoggerHandler():
+    def __init__(self, level):
+        self.level = getattr(logging, level)
+        self.logger = logging.getLogger("OnionScraper")
+        self.logger.setLevel(self.level)
+
+        # create console handler and set level to debug
+        ch = logging.StreamHandler()
+        ch.setLevel(self.level)
+
+        # create file logging
+        logFile = Path(__file__).parents[1]
+        logging_path = os.path.join(logFile, "info.log")
+        fh = logging.FileHandler(logging_path)
+
+        # create formatter
+        formatter = logging.Formatter('[%(asctime)s] - %(name)s - %(levelname)s - %(message)s',datefmt='%a, %d %b %Y %H:%M:%S')
+        formatter_console = logging.Formatter('[%(asctime)s] - %(levelname)s - %(message)s',datefmt='%d %b %Y %H:%M:%S')
+        # add formatter to ch
+        ch.setFormatter(formatter_console)
+        fh.setFormatter(formatter)
+        # add ch to logger
+        self.logger.addHandler(ch)  #added logging into console
+        self.logger.addHandler(fh)  #added logging into file
+
+    def start_logging(self):
+        self.logger.info('Starting OnionScraper')
+        return self.logger
+
--- a/onionscraper/operators/init.py
+++ b/onionscraper/operators/init.py
@ -0,0 +1,78 @@
+import re
+
+
+class Operator:
+    """Base class for all Operator plugins.
+
+    Note: This is an abstract class. You must extend ``__init__`` and call
+    ``super`` to ensure this class's constructor is called. You must override
+    ``handle_artifact`` with the same signature. You may define additional
+    ``handle_{artifact_type}`` methods as needed (see the threatkb operator for
+    an example) - these methods are purely convention, and are not required.
+
+    When adding additional methods to child classes, consider prefixing the
+    method name with an underscore to denote a ``_private_method``. Do not
+    override other existing methods from this class.
+    """
+    def __init__(self, artifact_types=None, filter_string=None, allowed_sources=None):
+        """Override this constructor in child classes.
+
+        The arguments above (artifact_types, filter_string, allowed_sources)
+        should be accepted explicity as above, in all child classes.
+
+        Additional arguments should be added: url, auth, etc, whatever is
+        needed to set up the object.
+
+        Each operator should default self.artifact_types to a list of Artifacts
+        supported by the plugin, and allow passing in artifact_types to
+        overwrite that default.
+
+        Example:
+
+        >>> self.artifact_types = artifact_types or [
+        ...     artifacts.IPAddress,
+        ...     artifacts.Domain,
+        ... ]
+
+        It's recommended to call this __init__ method via super from all child
+        classes. Remember to do so *before* setting any default artifact_types.
+        """
+        self.artifact_types = artifact_types or []
+        self.filter_string = filter_string or ''
+        self.allowed_sources = allowed_sources or []
+
+
+    def handle_onion(self, url):
+        """Override with the same signature.
+
+        :param artifact: A single ``Artifact`` object.
+        :returns: None (always ignored)
+        """
+        raise NotImplementedError()
+
+
+    def _artifact_is_allowed(self, artifact):
+        """Returns True if this is allowed by this plugin's filters."""
+#        # Must be in allowed_types.
+#        if not any(isinstance(artifact, t) for t in self.artifact_types):
+#            return False
+#
+#        # Must match the filter string.
+#        if not artifact.match(self.filter_string):
+#            return False
+#
+#        # Must be in allowed_sources, if set.
+#        if self.allowed_sources and not any(
+#                [re.compile(p).search(artifact.source_name)
+#                 for p in self.allowed_sources]):
+#            return False
+#
+        return True
+
+
+    def process(self, onions):
+        """Process all applicable onions."""
+        for onion in onions:
+            if self._artifact_is_allowed(onion.url):
+                self.handle_onion(onion)
+
--- a/onionscraper/operators/onionscan.py
+++ b/onionscraper/operators/onionscan.py
@ -0,0 +1,261 @@
+import re
+import os
+import sys
+import json
+import time
+import random
+import traceback
+import subprocess
+from uuid import uuid4
+from pathlib import Path
+from datetime import datetime as dt
+from json.decoder import JSONDecodeError
+from concurrent.futures import ProcessPoolExecutor
+from threading import Timer
+
+import requests
+
+from stem.control import Controller
+from stem import Signal
+
+from selenium import webdriver
+from selenium.webdriver.firefox.options import Options
+from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
+
+from onionscraper.operators import Operator
+
+class Plugin(Operator):
+    """OnionScraper main work logic.
+
+    Handles reading the config file, calling sources, maintaining state and
+    sending artifacts to operators.
+    """
+    def __init__(self, logger, **kwargs):
+        self.logger = logger
+        self.logger.info('Initializing OnionScanner')
+        screenshots = kwargs.pop('screenshots_path', None)
+        if screenshots:
+            self.screenshots = Path(screenshots)
+        else:
+            self.screenshots = Path(__file__).parents[1]/'screenshots'
+        self.onionscan = kwargs['binpath']
+        self.timeout = int(kwargs['timeout'])
+        self.proxy = kwargs['socks5']
+        self.torControl = kwargs['TorController']
+        self.retries = int(kwargs['retries'])
+        self.headers ={
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language':'en-US,en;q=0.5',
+            'DNT': '1', 'Connection':
+            'keep-alive',
+            'Upgrade-Insecure-Requests': '1'}
+
+
+        blacklist = kwargs['blacklist'].split(',')
+        self.blacklist = re.compile('|'.join([re.escape(word) for word in blacklist]), re.IGNORECASE)
+        keywords = kwargs['interestingKeywords'].split(',')
+        self.keywords = re.compile('|'.join([re.escape(word) for word in keywords]), re.IGNORECASE)
+        self.session = self.get_tor_session()
+
+    def response(self, status, content, onion):
+        """
+        status: success/failure
+        content: dict
+        onion: str
+        return: dict
+        """
+        return {'status': status, 'data': content, 'onion': onion}
+
+    def parseDoc(self, data):
+        data['onionscan'].pop('simpleReport', None)
+        crawls = data['onionscan'].pop('crawls', None)
+        hiddenService = data['onionscan'].pop('hiddenService', None)
+        data['onionscan']['crawls'] = [*crawls]
+        data['hiddenService'] = hiddenService
+        for onion in crawls.keys():
+            print(onion)
+            #q.enqueue(self.crawl, onion)
+        #with open('test.json', 'w', encoding='utf-8') as f:
+        #    json.dump(data, f, ensure_ascii=False, indent=4)
+        return data
+
+    def format_directory(self, directory):
+        d = dt.now()
+        year = str(d.year)
+        month = str(d.month)
+        # prefix month and day with "0" if it is only one digit
+        if len(month) < 2:
+                month = "0" + month
+        day = str(d.day)
+        if len(day) < 2:
+                day = "0" + day
+        save_path = directory/year/month/day
+        if not os.path.isdir(save_path):
+            self.logger.info("[*] Creating directory to save screenshots")
+            os.makedirs(save_path)
+
+        return save_path
+
+    def take_screenshot(self, save_path, onion):
+        binary = FirefoxBinary('/home/tony/Projects/OnionScraper/geckodriver')
+        fp = webdriver.FirefoxProfile()
+        fp.set_preference('network.proxy.type', 1)
+        fp.set_preference('network.proxy.socks', '127.0.0.1')
+        fp.set_preference('network.proxy.socks_port', 9050)
+        fp.set_preference('network.proxy.socks_remote_dns', True)
+
+        options = Options()
+        options.headless = True
+        driver = webdriver.Firefox(
+                executable_path='/home/tony/Projects/OnionScraper/geckodriver',
+                options=options,
+                firefox_profile=fp)
+        url = 'http://' + onion
+        driver.get(url)
+        uid = str(uuid4()).split('-')[0]
+        filename = f"{onion}_screenshot_{uid}.png"
+        f_name = f"{save_path}/{filename}"
+        driver.save_screenshot(f_name)
+
+        driver.quit()
+
+        if os.path.isfile(f_name):
+            self.logger.info(f'[*] Screenshot was taken. {f_name}')
+            dateScreenshoted = dt.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%f')+ 'Z'
+            result = {'dateScreenshoted':dateScreenshoted,'filename':filename}
+            return self.response("success",result,onion)
+        else:
+            self.logger.error('[x] Unable to take screenshot')
+            return self.response("failure",None,onion)
+
+        
+    
+    def get_tor_session(self):
+        try:
+            s = requests.session()
+            s.proxies = self.proxy
+            s.headers.update(self.headers)
+        except Exception as e:
+            self.logger.error(e)
+            self.logger.debug(traceback.print_exc())
+        return s
+
+    # signal TOR for a new connection
+    def renew_connection(self):
+        with Controller.from_port(port = self.torControl['port']) as controller:
+            # Now we switch TOR identities to make sure we have a good connection
+            self.logger.info('Getting new Tor IP')
+            # authenticate to our local TOR controller
+            controller.authenticate(self.torControl['password'])
+            # send the signal for a new identity
+            controller.signal(Signal.NEWNYM)
+            # wait for the new identity to be initialized
+            time.sleep(controller.get_newnym_wait())
+            session = self.get_tor_session()
+            self.logger.info(f"IP is {session.get('http://httpbin.org/ip').json()['origin']}")
+
+    def handle_timeout(self, process, onion):
+        #
+        # Handle a timeout from the onionscan process.
+        #
+
+        try:
+            # kill the onionscan process
+            process.kill()
+            self.logger.info("[!!!] Killed the onionscan process.")
+        except:
+            pass
+        self.renew_connection()
+        return
+
+    def run_sessions(self, onion):
+            retry = 0
+            result = None
+            while True:
+                try:
+                    url = 'http://'+onion
+                    self.logger.info(url)
+                    content = self.session.get(url)
+                    if content.status_code == 200:
+                        result = content.json()
+                except JSONDecodeError as e:
+                    self.logger.debug(f'JSONDecodeError {e}')
+                    result = content.text
+                except Exception as e:
+                    self.logger.error(e)
+                    self.logger.debug(traceback.print_exc())
+                finally:
+                    if result:
+                        return self.response("success",result,onion)
+                    else:
+                        self.logger.info('[x] No results found retrying ...')
+                        retry += 1
+                        self.renew_connection()
+                if retry > self.retries:
+                    self.logger.error('[x] Max retries exceeded')
+                    return self.response("failure",None, onion)
+
+    def run_onionscan(self, onion):
+        self.logger.info("[*] Running onionscan on %s", onion)
+
+        # fire up onionscan
+        process = subprocess.Popen([self.onionscan,"--webport=0","--jsonReport","--simpleReport=false",onion],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+
+        # start the timer and let it run till timeout minutes
+        process_timer = Timer(300,self.handle_timeout,args=[process,onion])
+        process_timer.start()
+
+        # wait for the onion scan results
+        stdout = process.communicate()[0]
+
+        # we have received valid results so we can kill the timer
+        if process_timer.is_alive():
+            process_timer.cancel()
+            return self.response("success",stdout.decode(),onion)
+
+        self.logger.info("[!!!] Process timed out for %s", onion)
+
+        return self.response("failure",None, onion)
+
+    def handle_onion(self, onion_tuple):
+        onion = onion_tuple.url
+        self.logger.info(f'Processing {onion} with onionscan')
+        try:
+            blacklist_URL = self.blacklist.search(onion)
+            if blacklist_URL:
+                self.logger.info(f"[X] Blocked by blacklist => matched keyword {blacklist_URL.group()}")
+            else:
+                self.logger.debug("[*] URL blacklist test: PASSED")
+                results = self.run_onionscan(onion)
+                if results['status'] == 'success' and results['data']['webDetected'] == 'true':
+                    content = self.run_sessions(onion)
+                    print(content)
+                    #sys.exit(0)
+                    #if content['status'] == 'success':
+                    #    blacklist_CONTENT = self.blacklist.search(content['data'])
+                    #    if blacklist_CONTENT:
+                    #        self.logger.info(f"[X] Blocked by blacklist content => matched keyword {blacklist_CONTENT.group()}")
+                    #    else:
+                    #        self.logger.debug("[*] CONTENT blacklist test: PASSED")
+                    #        screenshot = self.take_screenshot(self.format_directory(self.screenshots), onion)
+                    #        self.logger.info("Indexing!")
+                    #        doc = {
+                    #                'onionscan':json.loads(results['data']),
+                    #                'html':content['data'],
+                    #                'screenshots':screenshot['data'],
+                    #                'interestingKeywords':self.interestingKeywords.findall(content['data'])
+                    #                }
+                    #        return self.parseDoc(doc)
+
+                else:
+                    self.logger.info(f"[x] hidden service {onion} is not active")
+        except Exception as e:
+            self.logger.error(e)
+            self.logger.error(traceback.print_exc())
+        finally:
+            pass
+            #sys.exit(0)
+
+        
+
--- a/onionscraper/operators/yara.py
+++ b/onionscraper/operators/yara.py
@ -0,0 +1,15 @@
+
+from onionscraper.operators import Operator
+
+class Plugin(Operator):
+    """Operator for output to flat CSV file."""
+    def __init__(self, filename, base_score):
+        """CSV operator."""
+        self.filename = filename
+
+        #super(Plugin, self).__init__(artifact_types, filter_string, allowed_sources)
+
+
+    def handle_artifact(self, artifact):
+        """Operate on a single artifact."""
+        pass
--- a/onionscraper/sources/init.py
+++ b/onionscraper/sources/init.py
@ -0,0 +1,41 @@
+from collections import namedtuple
+
+class Source(object):
+    """Base class for all Source plugins.
+    Note: This is an abstract class. You must override ``__init__`` and ``run``
+    in child classes. You should not override ``process_element``. When adding
+    additional methods to child classes, consider prefixing the method name
+    with an underscore to denote a ``_private_method``.
+    """
+    def __init__(self, name, *args, **kwargs):
+        """Override this constructor in child classes.
+        The first argument must always be ``name``.
+        Other argumentss should be url, auth, etc, whatever is needed to set
+        up the object.
+        """
+        self.onion = namedtuple('onion', ['url','source','type'])
+
+
+    def run(self):
+        """Run and return ``(saved_state, list(Artifact))``.
+        Override this method in child classes.
+        The method signature and return values must remain consistent.
+        The method should attempt to pick up where we left off using
+        ``saved_state``, if supported. If ``saved_state`` is ``None``, you can
+        assume this is a first run. If state is maintained by the remote
+        resource (e.g. as it is with SQS), ``saved_state`` should always be
+        ``None``.
+        """
+        raise NotImplementedError()
+
+
+    def process_element(self, content, reference_link, include_nonobfuscated=False):
+        """Take a single source content/url and return a list of Artifacts.
+        This is the main work block of Source plugins, which handles
+        IOC extraction and artifact creation.
+        :param content: String content to extract from.
+        :param reference_link: Reference link to attach to all artifacts.
+        :param include_nonobfuscated: Include non-defanged URLs in output?
+        """
+        logger.debug(f"Processing in source '{self.name}'")
+
--- a/onionscraper/sources/gist.py
+++ b/onionscraper/sources/gist.py
@ -0,0 +1,153 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+__author__ = 'Andrey Glauzer'
+__license__ = "MIT"
+__version__ = "1.0.1"
+__maintainer__ = "Andrey Glauzer"
+__status__ = "Development"
+
+import requests
+import json
+import re
+import re
+import urllib.parse
+from random import choice
+import time
+from bs4 import BeautifulSoup
+
+
+from onionscraper.sources import Source
+
+
+class Plugin(Source):
+
+    def __init__(self, logger, name, url):
+        self.logger = logger
+        self.name = name
+        self.url = url
+        self.desktop_agents = [
+                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0']
+        super().__init__(self)
+
+
+    def run(self):
+        self.logger.info('Starting Gist Scraper')
+        self.cookies()
+        self.pagination()
+        self.scraping()
+        return self.raw()
+
+    @property
+    def random_headers(self):
+        return {
+            'User-Agent': choice(self.desktop_agents),
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+        }
+
+    def cookies(self):
+
+        self.logger.info('Setting GIST cookies')
+
+        with requests.Session() as self.session:
+            self.headers = self.random_headers
+
+            request = self.session.get(self.url, headers=self.headers)
+
+            if request.status_code == 200:
+                pass
+            else:
+                self.logger.error('No Response from GIST')
+
+    def pagination(self):
+        request = self.session.get(
+            f"https://gist.github.com/search?l=Text&q={urllib.parse.quote('.onio')}", headers=self.headers)
+        self.soup = BeautifulSoup(request.content, features="lxml")
+
+        pages = []
+        self.urls = [self.url]
+        try:
+            for pagination in self.soup.find('div', {'class': 'pagination'}).findAll('a'):
+                pages.append(pagination.get_text())
+        except:
+            pages = False
+
+        if pages:
+            cont = 2
+            while cont <= 1:  # int(pages[-2]):
+                cont += 1
+                full_url = f"https://gist.github.com/search?l=Text&p={cont-1}&q={urllib.parse.quote('.onio')}"
+                self.urls.append(full_url)
+
+    def scraping(self):
+        url = []
+        for inurl in self.urls:
+            self.logger.info(f"Connecting to {inurl}")
+            time.sleep(5)
+            request = self.session.get(inurl, headers=self.headers)
+
+            if request.status_code == 200:
+                soup = BeautifulSoup(request.content, features="lxml")
+                for code in soup.findAll('div', {'class': 'gist-snippet'}):
+                    if '.onion' in code.get_text().lower():
+                        for raw in code.findAll('a', {'class': 'link-overlay'}):
+                            try:
+                                url.append(raw['href'])
+                            except:
+                                pass
+            self.urls_raw = []
+            for get in url:
+                self.logger.info(f"Connecting to {get}")
+                time.sleep(5)
+                try:
+                    request = self.session.get(get, headers=self.headers)
+
+                    if request.status_code == 200:
+                        soup = BeautifulSoup(request.content, features="lxml")
+
+                        for raw in soup.findAll('a', {'class': 'btn btn-sm'}):
+                            try:
+                                gist_url = f"https://gist.githubusercontent.com{raw['href']}"
+
+                                self.urls_raw.append(gist_url)
+
+                            except:
+                                pass
+                except(requests.exceptions.ConnectionError,
+                       requests.exceptions.ChunkedEncodingError,
+                       requests.exceptions.ReadTimeout,
+                       requests.exceptions.InvalidURL) as e:
+                    self.logger.error(
+                        f"I was unable to connect to the url, because an error occurred.\n{e}")
+                    pass
+
+    def raw(self):
+        self.logger.info('Performing replaces and regex. WAIT...')
+        itens = []
+        onions = []
+        for raw in self.urls_raw:
+            if '.txt' in raw.lower() \
+                    or '.csv' in raw.lower():
+                time.sleep(5)
+                request = self.session.get(raw, headers=self.headers)
+                self.soup = BeautifulSoup(request.content, features="lxml")
+                for pre in self.soup.findAll('body'):
+                    list = pre.get_text().split('\n')
+                    itens.extend(list)
+
+                regex = re.compile(
+                    "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion")
+
+                for lines in itens:
+                    rurls = lines \
+                        .replace('\xad', '') \
+                        .replace('\n', '') \
+                        .replace("http://", '') \
+                        .replace("https://", '') \
+                        .replace("www.", "")
+
+                    url = regex.match(rurls)
+
+                    if url is not None:
+                        onions.append(self.onion(url=url.group(), source='gist', type='domain'))
+        return onions
--- a/onionscraper/test.py
+++ b/onionscraper/test.py
@ -0,0 +1,3 @@
+
+def hola(ass):
+    print(ass)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,16 @@
+beautifulsoup4==4.9.1
+certifi==2020.6.20
+chardet==3.0.4
+click==7.1.2
+elasticsearch==7.8.0
+idna==2.10
+lxml==4.5.1
+# Editable Git install with no remote (OnionScraper==1.0.0)
+-e /home/tony/Projects/OnionScraper
+PySocks==1.7.1
+PyYAML==5.3.1
+requests==2.24.0
+selenium==3.141.0
+soupsieve==2.0.1
+stem==1.8.0
+urllib3==1.25.9
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,22 @@
+from setuptools import setup
+
+
+def readme_file_contents():
+    with open('README.md') as readme_file:
+        data = readme_file.read()
+    return data
+
+
+setup(
+    name='OnionScraper',
+    version='1.0.0',
+    description='Python app to scraper and index hidden websites',
+    long_description=readme_file_contents(),
+    author='dan',
+    author_email='test@google.com',
+    license='MIT',
+    packages=['onionscraper'],
+    zip_safe=False,
+    install_requires=[]
+)
+
--- a/templates/fonts/RobotoSlab-Bold.woff
+++ b/templates/fonts/RobotoSlab-Bold.woff
--- a/templates/fonts/RobotoSlab-Regular.woff
+++ b/templates/fonts/RobotoSlab-Regular.woff
--- a/templates/images/favicon.ico
+++ b/templates/images/favicon.ico
--- a/templates/images/logo.png
+++ b/templates/images/logo.png
--- a/templates/images/remove.png
+++ b/templates/images/remove.png
--- a/templates/index.html
+++ b/templates/index.html
@ -0,0 +1,328 @@
+{{define "fields"}}
+        {{range .Fields}}<td><a href="/?search={{.}}">{{.}}</a></td>{{end}}<td><a href="/?search={{index .Fields 0}}" title="{{.Links}} Relationships Share an Identifier connection with this Identifier">{{.Links}}</a></td>
+{{end}}
+
+{{define "table"}}
+<br/>
+<div id="{{.Title}}" class="row">
+<div class="col-lg-12">
+<div class="panel panel-default">
+ <div class="panel-heading">{{.AltTitle}} linked to {{.SearchTerm}} ({{len .Rows}})</div>
+ 
+ 
+
+  
+{{ $length := len .RollupCounts }} {{ if ne $length 0 }}
+  <div class="panel-body text-center">
+  <canvas id="myChart{{.Title}}" style="max-width:300px;max-height:300px;margin:auto;" width="300px" height="300px"></canvas>
+  
+  <script>
+var ctx = document.getElementById("myChart{{.Title}}");
+var myChart = new Chart(ctx, {
+    type: 'bar',
+    data: {
+    
+     labels: [
+      {{ range $key, $value := .RollupCounts }}
+        "{{$key}}",
+     {{end}}
+     ],
+        datasets: [{
+            data: [
+             {{ range $key, $value := .RollupCounts }}
+             {{$value}},
+             {{end}}
+            ],
+            borderWidth: 1
+        }]
+    },
+    options: {
+        scales: {
+            yAxes: [{
+                ticks: {
+                    beginAtZero:true
+                }
+            }]
+        },
+             title: {
+                text: "Breakdown of {{.Title}}s for {{.SearchTerm}}",
+                display:true
+            },
+            legend :{
+                display:false
+            },
+    }
+});
+</script>
+  
+  {{ range $key, $value := .RollupCounts }}
+  {{if ne $key ""}}
+  
+ <button class="btn btn-primary" style="margin: 5px;" type="button">
+  {{$key}} <span class="badge">{{$value}}</span>
+</button>
+{{end}}
+ {{end}}
+ </div>
+ {{end}}
+ 
+  <!-- Table -->
+  <table class="table table-bordered table-striped">
+  
+        <tr>
+        <th>Tag</th>
+        {{range .Heading}}
+             <th>{{.}}</th>   
+        {{end}}
+        <th>Other Links</th>
+        </tr>
+        
+        {{range .Rows}}
+                
+              <tr><td><span class="label label-default"><a href="/?search={{.Tag}}">{{.Tag}}</a></span></td>{{template "fields" .}}</tr>
+        {{end}}
+  </table>
+</div>
+</div>
+</div>
+<br/>
+{{end}}
+
+
+<!-- ############ SUMMARY ################# -->
+{{define "summary"}}
+
+
+
+
+  <table class="table table-bordered table-striped">
+
+ {{range .Fields}}
+      <tr><th><a href="#{{.Key}}">{{.AltTitle}}</th>
+      <td>
+                <div class="progress">
+                        <div class="progress-bar progress-bar-striped" style="width: {{.Total}}%">
+                        {{.Value}}
+                        </div>
+                        </div>
+                </div>
+        </td></tr>
+ {{end}}
+ </table>
+{{end}}
+
+<!-- ############ Main Page ################# -->
+<!DOCTYPE html>
+<html lang="en"><head>
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <meta name="description" content="">
+    <meta name="author" content="">
+    <link rel="icon" href="/images/favicon.ico">
+
+    <script src="/scripts/chart.bundle.js"></script>
+
+
+    <title>OnionScan Correlations Lab</title>
+
+    <link href="/style/bootstrap.css" rel="stylesheet">
+
+     <style>
+     @font-face {
+  font-family: 'Roboto Slab';
+  font-style: normal;
+  font-weight: 400;
+  src: local('Roboto Slab Regular'), local('RobotoSlab-Regular'), url(/fonts/RobotoSlab-Regular.woff) format('woff');
+}
+@font-face {
+  font-family: 'Roboto Slab';
+  font-style: normal;
+  font-weight: 700;
+  src: local('Roboto Slab Bold'), local('RobotoSlab-Bold'), url(/fonts/RobotoSlab-Bold.woff) format('woff');
+}
+
+body{
+  font-family: 'Roboto Slab';
+}
+.label a {
+ color:#fff;
+}
+
+.btn {
+ margin-bottom:5px;
+}
+
+
+     </style>
+
+  </head>
+
+  <body role="document">
+
+
+   <nav class="navbar navbar-default navbar-fixed-top">
+      <div class="container">
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
+            <span class="sr-only">Toggle navigation</span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="navbar-brand" href="/"><img style="margin-top: -16px;" width="75px" height="75px" src="/images/logo.png"/></a>
+        </div>
+        <div id="navbar" class="navbar-collapse collapse">
+          <ul class="nav navbar-nav">
+            <li><a href="/"  style="color:#fff;">Summary</a></li>
+            <li><a href="/saved"  style="color:#fff;">Saved Searches</a></li>
+          </ul>
+        </div><!--/.nav-collapse -->
+      </div>
+    </nav>
+    <br/><br/>    <br/><br/>
+    <div class="container theme-showcase" role="main">
+        
+        <form action="/">
+     <div class="row">
+  <div class="col-lg-12">
+    <div class="input-group">
+      <input name="search" type="text" class="form-control" placeholder="{{.SearchTerm}}" value="{{.SearchTerm}}"/>
+      <span class="input-group-btn">
+        <input class="btn btn-default" type="submit" value="Search!">
+      </span>
+    </div><!-- /input-group -->
+  </div><!-- /.col-lg-6 -->
+  </form>
+</div>
+
+<br/>
+
+{{if ne .Error ""}}
+<div class="alert alert-danger" role="alert">{{.Error}}</div>
+{{end}}
+
+{{if ne .Success ""}}
+<div class="alert alert-success" role="alert">{{.Success}}</div>
+{{end}}
+
+
+{{if ne .SearchTerm ""}}
+        {{ $length := len .Tables }} {{ if ne $length 0 }}
+<div class="row">
+        <div class="col-lg-3 text-center">
+        
+                <h2>Options</h2>
+                <form action="/save" method="post">
+                        <input type="hidden" name="search" value="{{.SearchTerm}}"/>
+                        <input type="hidden" name="token" value="{{.Token}}"/>
+                        <span class="input-group-btn">
+                               <input class="btn btn-default" type="submit" value="Save Search">
+                        </span>
+                </form>
+                       
+                {{ $lentags := len .UserTags }}
+                {{if ne 0 $lentags}}
+                        <h2>Linked Tags</h2>
+                        {{ $search := .SearchTerm }} 
+                        {{ $token := .Token }} 
+                        {{ range .UserTags }}
+                         <form action="/delete-tag" method="post">
+                                 <input type="hidden" name="search" value="{{$search}}"/>
+                                 <input type="hidden" name="tag" value="{{.}}"/>
+                                 <input type="hidden" name="token" value="{{$token}}"/>
+                                 <div class="btn-group">
+                                        <button class="btn btn-default" type="button"><a href="/?search={{.}}">{{.}}</a></button>
+                                 {{if ne . $search}}
+                                        <button class="btn btn-default" type="submit"><img src="/images/remove.png" width="16px" height="16px" title="remove tag"/></button>
+                                  {{end}}
+                                  </div>
+                          </form>
+                        {{end}}
+                {{end}}
+
+
+               <h3>Tag Search Term</h3>
+                
+               <form action="/tag" method="post">
+                    <div class="input-group">
+                      <input type="text" name="tag"  class="form-control" placeholder="Enter Tag..."/>
+                      <input type="hidden" name="search"value="{{.SearchTerm}}"/>
+                      <input type="hidden" name="token" value="{{.Token}}"/>
+                      <span class="input-group-btn">
+                        <input class="btn btn-default" type="submit" value="Tag!">
+                      </span>
+                    </div>
+               </form>
+    
+       
+       
+         </div>
+        <div class="col-lg-9">   
+
+        
+                <div class="panel panel-default">
+                        <div class="panel-heading">Summary for {{.SearchTerm}} {{if ne "" .Summary.Title}}({{.Summary.Title}}){{end}}&nbsp;&nbsp;
+                 
+                         {{range .Tags}}
+                         <span class="label label-{{if eq . "mod_status"}}danger{{else}}primary{{end}}"><a href="/?search={{.}}">{{.}}</a></span>&nbsp;
+                         {{end}}
+                         
+                         </div>
+         
+                        {{template "summary" .Summary}}
+                 </div>
+        
+
+                        {{range .Tables}}
+                        {{template "table" .}}
+                        {{end}}
+        </div>
+</div>
+        {{else}}
+
+        <div class="alert alert-warning" role="alert">No Relationships Found for <strong>{{.SearchTerm}}</strong></div>
+        {{end}}
+{{else}}
+
+{{ $length := len .SearchResults }} 
+
+{{ if eq $length 0 }}
+
+<div class="jumbotron">
+  <h1>Welcome to your OnionScan Correlation Lab!</h1>
+  <p>You have <strong>{{.RelationshipNum}}</strong> correlations to hunt through!</p>
+</div>
+
+{{else}}
+
+        <h2>Saved Searches</h2>
+        
+        <ul>
+        
+        {{ if eq $length 1}}
+                <div class="alert alert-warning" role="alert">You don't have any saved searches yet!</div>
+        {{else}}
+                {{range .SearchResults}}
+                        {{if ne . "onionscan://dummy"}}
+                        <li><a href="/?search={{.}}">{{.}}</a></li>
+                        {{end}}
+                {{end}}
+        {{end}}
+        
+        </ul>
+
+{{end}}
+
+
+{{end}}
+
+
+
+
+    </body>
+</html>
+
+
+
--- a/templates/scripts/chart.bundle.js
+++ b/templates/scripts/chart.bundle.js
--- a/templates/scripts/cytoscape.js
+++ b/templates/scripts/cytoscape.js
--- a/templates/style/bootstrap.css
+++ b/templates/style/bootstrap.css
--- a/webui/webui.go
+++ b/webui/webui.go
@ -0,0 +1,459 @@
+package webui
+
+import (
+	"errors"
+	"fmt"
+	"github.com/s-rah/onionscan/config"
+	"github.com/s-rah/onionscan/crawldb"
+	"github.com/s-rah/onionscan/utils"
+	"html/template"
+	"log"
+	"net/http"
+	"strconv"
+	"strings"
+)
+
+type WebUI struct {
+	osc   *config.OnionScanConfig
+	token string
+	Done  chan bool
+}
+
+type SummaryField struct {
+	Key      string
+	Value    int
+	AltTitle string
+	Total    int
+}
+
+type Summary struct {
+	Fields []SummaryField
+	Total  int
+	Title  string
+}
+
+type Content struct {
+	SearchTerm      string
+	Error           string
+	Summary         Summary
+	Tables          []Table
+	Tags            []string
+	RelationshipNum int
+	Token           string
+	Success         string
+	UserTags        []string
+	SearchResults   []string
+}
+
+type Row struct {
+	Fields []string
+	Tag    string
+	Links  int
+}
+
+type Table struct {
+	Title        string
+	SearchTerm   string
+	Heading      []string
+	Rows         []Row
+	Rollups      []int
+	RollupCounts map[string]int
+	AltTitle     string
+}
+
+// GetUserDefinedRow returns, from an initial relationship, a complete user
+// defined relationship row - in the order it is defined in the crawl config.
+func (wui *WebUI) GetUserDefinedTable(rel crawldb.Relationship) (Table, error) {
+	log.Printf("Loading User Defined Relationship %s", rel.From)
+	config, ok := wui.osc.CrawlConfigs[rel.From]
+	if ok {
+		var table Table
+		crName := strings.SplitN(rel.Type, "/", 2)
+		if len(crName) == 2 {
+			table.Title = crName[0]
+			cr, err := config.GetRelationship(crName[0])
+			if err == nil {
+				for i, er := range cr.ExtraRelationships {
+					table.Heading = append(table.Heading, er.Name)
+					if er.Rollup {
+						table.Rollups = append(table.Rollups, i)
+					}
+				}
+				table.Heading = append(table.Heading, "Onion")
+				log.Printf("Returning User Table Relationship %v", table)
+				return table, nil
+			}
+		}
+	}
+	log.Printf("Could not make Table")
+	return Table{}, errors.New("Invalid Table")
+}
+
+// GetUserDefinedRow returns, from an initial relationship, a complete user
+// defined relationship row - in the order it is defined in the crawl config.
+func (wui *WebUI) GetUserDefinedRow(rel crawldb.Relationship) (string, []string) {
+	log.Printf("Loading User Defined Relationship %s", rel.From)
+	config, ok := wui.osc.CrawlConfigs[rel.From]
+
+	if ok {
+		userrel, err := wui.osc.Database.GetUserRelationshipFromOnion(rel.Onion, rel.From)
+
+		if err == nil {
+			// We can now construct the user
+			// relationship in the right order.
+			crName := strings.SplitN(rel.Type, "/", 2)
+			if len(crName) == 2 {
+				cr, err := config.GetRelationship(crName[0])
+				row := make([]string, 0)
+				if err == nil {
+					for _, er := range cr.ExtraRelationships {
+						log.Printf("Field Value: %v", userrel[crName[0]+"/"+er.Name].Identifier)
+						row = append(row, userrel[crName[0]+"/"+er.Name].Identifier)
+					}
+					row = append(row, rel.From)
+					log.Printf("Returning User Row Relationship %s %v %s", crName[0], row, rel.Onion)
+					return crName[0], row
+				}
+
+			} else {
+				log.Printf("Could not derive config relationship from type %s", rel.Type)
+			}
+		}
+	}
+	log.Printf("Invalid Row")
+	return "", []string{}
+}
+
+// Save implements the Saved Searches Feature
+func (wui *WebUI) Save(w http.ResponseWriter, r *http.Request) {
+	err := r.ParseForm()
+	if err != nil {
+		http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again.", http.StatusFound)
+		return
+	}
+
+	search := r.PostFormValue("search")
+	token := r.PostFormValue("token")
+
+	if token != wui.token {
+		path := fmt.Sprintf("/?search=%v&error=Invalid random token, Please try again.", search)
+		http.Redirect(w, r, path, http.StatusFound)
+		return
+	}
+
+	wui.osc.Database.InsertRelationship(search, "onionscan://user-data", "search", "")
+	path := fmt.Sprintf("/?search=%v&success=Successfully Saved Search", search)
+	http.Redirect(w, r, path, http.StatusFound)
+}
+
+// Tag implements the /tag endpoint.
+func (wui *WebUI) Tag(w http.ResponseWriter, r *http.Request) {
+	err := r.ParseForm()
+	if err != nil {
+		http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again.", http.StatusFound)
+		return
+	}
+
+	search := r.PostFormValue("search")
+	tag := r.PostFormValue("tag")
+	token := r.PostFormValue("token")
+
+	if token != wui.token {
+		path := fmt.Sprintf("/?search=%v&error=Invalid random token, Please try again.", search)
+		http.Redirect(w, r, path, http.StatusFound)
+		return
+	}
+
+	wui.osc.Database.InsertRelationship(search, "onionscan://user-data", "tag", tag)
+	path := fmt.Sprintf("/?search=%v&success=Successfully Added Tag %v to %v", search, tag, search)
+	http.Redirect(w, r, path, http.StatusFound)
+}
+
+// Delete tag implements the /delete-tag endpoint
+func (wui *WebUI) DeleteTag(w http.ResponseWriter, r *http.Request) {
+	err := r.ParseForm()
+	if err != nil {
+		http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again.", http.StatusFound)
+		return
+	}
+
+	search := r.PostFormValue("search")
+	tag := r.PostFormValue("tag")
+	token := r.PostFormValue("token")
+
+	if token != wui.token {
+		path := fmt.Sprintf("/?search=%v&error=Invalid random token, Could not delete tag. Please try again.", search)
+		http.Redirect(w, r, path, http.StatusFound)
+		return
+	}
+
+	err = wui.osc.Database.DeleteRelationship(search, "onionscan://user-data", "tag", tag)
+
+	if err != nil {
+		http.Redirect(w, r, "/?error=Something Went Very Wrong! Please try again: "+err.Error(), http.StatusFound)
+		return
+	}
+
+	path := fmt.Sprintf("/?search=%v&success=Successfully Deleted Tag %v from %v", search, tag, search)
+	http.Redirect(w, r, path, http.StatusFound)
+}
+
+// SavedSearches provides the user with a list of searches they have saved.
+func (wui *WebUI) SavedSearches(w http.ResponseWriter, r *http.Request) {
+	results, _ := wui.osc.Database.GetRelationshipsWithIdentifier("onionscan://user-data")
+	var content Content
+	content.SearchResults = append(content.SearchResults, "onionscan://dummy")
+	for _, rel := range results {
+		if rel.Type == "search" {
+			content.SearchResults = append(content.SearchResults, rel.Onion)
+		}
+	}
+	var templates = template.Must(template.ParseFiles("templates/index.html"))
+	templates.ExecuteTemplate(w, "index.html", content)
+}
+
+// Index implements the main search functionality of the webui
+func (wui *WebUI) Index(w http.ResponseWriter, r *http.Request) {
+
+	search := strings.TrimSpace(r.URL.Query().Get("search"))
+	error := strings.TrimSpace(r.URL.Query().Get("error"))
+	success := strings.TrimSpace(r.URL.Query().Get("success"))
+	var content Content
+
+	mod_status := false
+	pgp := false
+	ssh := false
+	uriCount := 0
+	content.Token = wui.token
+	content.Error = error
+	content.Success = success
+
+	if search != "" {
+		content.SearchTerm = search
+
+		var results []crawldb.Relationship
+		tables := make(map[string]Table)
+
+		results, _ = wui.osc.Database.GetRelationshipsWithOnion(search)
+		results_identifier, _ := wui.osc.Database.GetRelationshipsWithIdentifier(search)
+		results = append(results, results_identifier...)
+
+		for _, rel := range results {
+			if rel.Type == "page-info" {
+				content.Summary.Title = rel.Identifier
+			}
+
+			if rel.From == "onionscan://user-data" {
+				if rel.Type == "tag" {
+					content.UserTags = append(content.UserTags, rel.Identifier)
+					utils.RemoveDuplicates(&content.UserTags)
+
+					if rel.Identifier == search {
+						// We want to surface the onions *not* the tag
+
+						table, ok := tables["search-results"]
+						log.Printf("%v %v", search, ok)
+						if !ok {
+							var newTable Table
+							newTable.Title = rel.Type
+							newTable.Heading = []string{"Onion"}
+							tables["search-results"] = newTable
+							table = newTable
+						}
+						links := wui.osc.Database.GetRelationshipsCount(rel.Identifier) - 1
+						table.Rows = append(table.Rows, Row{Fields: []string{rel.Onion}, Tag: rel.Identifier, Links: links})
+						tables["search-results"] = table
+					} else {
+						table, ok := tables["search-results"]
+						if !ok {
+							var newTable Table
+							newTable.Title = rel.Type
+							newTable.Heading = []string{"Tags"}
+							tables[rel.Type] = newTable
+							table = newTable
+						}
+						links := wui.osc.Database.GetRelationshipsCount(rel.Identifier) - 1
+						table.Rows = append(table.Rows, Row{Fields: []string{rel.Identifier}, Tag: rel.Onion, Links: links})
+						tables[rel.Type] = table
+					}
+				}
+			} else if utils.IsOnion(rel.Onion) && rel.Type != "database-id" && rel.Type != "user-relationship" {
+				table, ok := tables[rel.Type]
+				if !ok {
+					var newTable Table
+					newTable.Title = rel.Type
+					newTable.Heading = []string{"Identifier", "Onion"}
+					tables[rel.Type] = newTable
+					table = newTable
+				}
+				links := wui.osc.Database.GetRelationshipsCount(rel.Identifier) - 1
+				table.Rows = append(table.Rows, Row{Fields: []string{rel.Identifier, rel.Onion}, Tag: rel.From, Links: links})
+				tables[rel.Type] = table
+
+				if rel.From == "mod_status" {
+					mod_status = true
+				}
+
+				if rel.From == "pgp" {
+					pgp = true
+				}
+
+				if rel.From == "ssh" {
+					ssh = true
+				}
+			} else if utils.IsOnion(rel.From) {
+				tableName, row := wui.GetUserDefinedRow(rel)
+
+				if len(row) > 0 {
+					table, exists := tables[tableName]
+					if !exists {
+						newTable, err := wui.GetUserDefinedTable(rel)
+						if err == nil {
+							tables[tableName] = newTable
+							table = newTable
+						}
+					}
+					table.Rows = append(table.Rows, Row{Fields: row})
+					tables[tableName] = table
+				}
+			} else if rel.Type == "user-relationship" {
+				userrel := rel
+				userrel.Onion = rel.Identifier
+				userrel.From = rel.Onion
+				userrel.Type = rel.From + "/parent"
+				tableName, row := wui.GetUserDefinedRow(userrel)
+
+				if len(row) > 0 {
+					table, exists := tables[tableName]
+					if !exists {
+						newTable, err := wui.GetUserDefinedTable(userrel)
+						if err == nil {
+							tables[tableName] = newTable
+							table = newTable
+						}
+					}
+					table.Rows = append(table.Rows, Row{Fields: row})
+					tables[tableName] = table
+				}
+			} else if rel.Type == "database-id" {
+				uriCount++
+			}
+		}
+
+		// AutoTag our content
+		if mod_status {
+			content.Tags = append(content.Tags, "mod_status")
+		}
+
+		if pgp {
+			content.Tags = append(content.Tags, "pgp")
+		}
+
+		if ssh {
+			content.Tags = append(content.Tags, "ssh")
+		}
+
+		// We now have a bunch of tables, keyed by type.
+		// Build a Summary and add the tables to the Content
+
+		for _, v := range tables {
+			content.Summary.Total += len(v.Rows)
+		}
+
+		for k, v := range tables {
+			log.Printf("Adding Table %s %v", k, v)
+
+			// Lazy Plural
+			alt := k + "s"
+
+			switch k {
+			case "ip":
+				alt = "IP Addresses"
+			case "clearnet-link":
+				alt = "Co-Hosted Clearnet Sites"
+			case "uri":
+				alt = "Links to External Sites"
+			case "email-address":
+				alt = "Email Addresses"
+			case "server-version":
+				alt = "Server Information"
+			case "identity":
+				alt = "PGP Identities"
+			case "bitcoin-address":
+				alt = "Bitcoin Addresses"
+			case "software-banner":
+				alt = "Software Banners"
+			case "analytics-id":
+				alt = "Analytics IDs"
+			case "tag":
+				alt = "Tag Relationships"
+			case "onion":
+				alt = "Co-Hosted Onion Sites"
+			case "search-results":
+				alt = "Search Results"
+			case "http-header":
+				alt = "HTTP Headers"
+			case "page-info":
+				alt = "Webpage Information"
+			}
+
+			total := (float32(len(v.Rows)) / float32(content.Summary.Total)) * float32(100)
+			if total < 1 {
+				total = 2 // For Visibility
+			}
+
+			field := SummaryField{k, len(v.Rows), alt, int(total)}
+			content.Summary.Fields = append(content.Summary.Fields, field)
+
+			rollups := make(map[string]int)
+			for _, c := range v.Rollups {
+				for _, rows := range v.Rows {
+					rollups[rows.Fields[c]]++
+				}
+			}
+			v.RollupCounts = rollups
+			v.SearchTerm = search
+			v.AltTitle = alt
+			content.Tables = append(content.Tables, v)
+		}
+
+	} else {
+		content.RelationshipNum = wui.osc.Database.GetAllRelationshipsCount()
+	}
+
+	var templates = template.Must(template.ParseFiles("templates/index.html"))
+	templates.ExecuteTemplate(w, "index.html", content)
+}
+
+func (wui *WebUI) Listen(osc *config.OnionScanConfig, port int) {
+	wui.osc = osc
+
+	// We generate a random token on startup to mitigate the threat
+	// against CSRF style attacks.
+	token, err := utils.GenerateRandomString(64)
+	if err != nil {
+		log.Fatalf("Error generating random bytes for CSRF token: %v", err)
+	}
+	wui.token = token
+
+	http.HandleFunc("/", wui.Index)
+	http.HandleFunc("/save", wui.Save)
+	http.HandleFunc("/tag", wui.Tag)
+	http.HandleFunc("/saved", wui.SavedSearches)
+	http.HandleFunc("/delete-tag", wui.DeleteTag)
+
+	fs := http.FileServer(http.Dir("./templates/style"))
+	http.Handle("/style/", http.StripPrefix("/style/", fs))
+
+	fs = http.FileServer(http.Dir("./templates/scripts"))
+	http.Handle("/scripts/", http.StripPrefix("/scripts/", fs))
+
+	fs = http.FileServer(http.Dir("./templates/images"))
+	http.Handle("/images/", http.StripPrefix("/images/", fs))
+
+	fs = http.FileServer(http.Dir("./templates/fonts"))
+	http.Handle("/fonts/", http.StripPrefix("/fonts/", fs))
+
+	portstr := strconv.Itoa(port)
+	http.ListenAndServe("127.0.0.1:"+portstr, nil)
+}