omitted emails on README

4 years ago · e31f149af0
parent 8258056bef
commit e31f149af0
15 changed files with 35 additions and 1791 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,10 +1,11 @@
+onion_master_list.*
 webui
 templates
-OnionScraper.egg-info
+OnionIngestor.egg-info
 screenshots
 dump.rdb    
 onionscandb
-config.ini
+config.yml
 *.log
 *.pyc
 __pycache__
--- a/README.md
+++ b/README.md
@ -188,68 +188,7 @@ The output of the result is json, and in the same format it is sent to the chose
        "relatedOnionDomains": null,
        "ipAddresses": null,
        "emailAddresses": [
-          "hitman001@torbox3uiot6wchz.onion",
-          "jimmym0reno@yahoo.com",
-          "aimeerene1977@gmail.com",
-          "jennabrown15.jb@gmail.com",
-          "S.thames129@gmail.com",
-          "munira025@gmail.com",
-          "luisadavid20@gmail.com",
-          "cameron.stewart3@yahoo.com",
-          "janisea2013@gmail.com",
-          "Carinavieyra598@gmail.com",
-          "adrianmcdonald49@gmail.com",
-          "aaronjeans1@gmail.com",
-          "nsorrentino11@aol.com",
-          "amber4189@outlook.com",
-          "holliekestner@gmail.com",
-          "nattyperks01@gmail.com",
-          "dinavasa29@hotmail.com",
-          "lydiac612@gmail.com",
-          "bmduke24@gmail.com",
-          "markigharmony@gmail.com",
-          "ohdannyboy03@icloud.com",
-          "dkoontz18@gmail.com",
-          "janese_young@yahoo.com",
-          "gabssstobsss@gmail.com",
-          "thelake02@sbcglobal.net",
-          "timmyboston01@gmail.com",
-          "carloscharters1996@gmail.com",
-          "djamila28@outlook.com",
-          "heathermaeb@gmail.com",
-          "canelo2080@gmail.com",
-          "pamsanta.ps@gmail.com",
-          "horeka.mash98@gmail.com",
-          "oeh@gondtc.com",
-          "ohmygod990227@hotmail.com",
-          "marieazme@yahoo.com",
-          "shirleyteuta@gmail.com",
-          "janetcoppedge@sbcglobal.net",
-          "dimashilov30@gmail.com",
-          "benavides.kam@gmail.com",
-          "sonyainsonora@yahoo.com",
-          "benl04123@outlook.com",
-          "cmculbreath@fedex.com",
-          "antmeb@gmail.com",
-          "jrlopez61@hotmail.com",
-          "jaimie.mudge@hotmail.com",
-          "dreamworld1980@secmail.pro",
-          "tinajones@sympatico.ca",
-          "nobby@secmail.pro",
-          "twistedsun@secmail.pro",
-          "slayermodsv3@gmail.com",
-          "beastmodsv1@gmail.com",
-          "prestonkonicek@gmail.com",
-          "fnbrleaksv2@gmail.com",
-          "fnbrleaks@gmail.com",
-          "pushingeverythingyt@gmail.com",
-          "rachelkonicek@gmail.com",
-          "vsfortune@hotmail.com",
-          "dannajoywhite@gmail.com",
-          "jensenjody@gmail.com",
-          "jenniferjbisschop@gmail.com",
-          "hkbergado@gmail.com",
-          "mummifiedbabies@secmail.pro"
+	  OMMITTED
        ],
        "analyticsIDs": null,
        "bitcoinAddresses": [
--- a/example.yml
+++ b/example.yml
@ -3,8 +3,11 @@

 general:
    # Run forever, check feeds once an hour.
-    daemon: False
-    sleep: 3600
+    daemon: True
+    sleep: 10
+    onion_validation: ([a-z2-7]{16,56}\.onion)
+    blacklist: pedo,xxx,infant,loli,porn,child,abuse,sex,drug,cocaine,dope,zoo,daddy,daughter,boy,girl,young,muder,cocks,year,old
+    interestingKeywords: t.me,feed,rss,xml,atom,dataleak,breach,blog,ransomware,source code,data breach
    elasticsearch:
            index: darkweb
            port : 9200
@ -12,9 +15,13 @@ general:

 sources:
    # A few threat intel blogs to get you started!
-  - name: source-gist
-    module: gist
-    url: https://gist.github.com/search?l=Text&q=.onion
+    - name: simple-text-file
+      module: simplefile
+      filename: onion_master_list.txt
+
+    #  - name: source-gist
+    #    module: gist
+    #    url: https://gist.github.com/search?l=Text&q=.onion

    #  - name: source-reddit
    #    module: reddit
@ -43,20 +50,23 @@ sources:


 operators:
-  - name: onionscan-go
-    module: onionscan
-    binpath: /home/tony/go/bin/onionscan
-    socks5:
-      http: 'socks5h://127.0.0.1:9050'
-      https: 'socks5h://127.0.0.1:9050'
-    TorController:
-      port: 9051
-      password: Xk5QP2haFMh8Y8D1060F1D7xaWEFG
-    timeout: 300
-    retries: 2
-    screenshots_path: null
-    blacklist: pedo,xxx,infant,loli,porn,child,abuse,sex,drug,cocaine,dope,zoo,daddy,daughter,boy,girl,young,muder
-    interestingKeywords: t.me,feed,rss,xml,atom,dataleak,breach,blog,ransomware,source code,data breach
+        - name: simple-html
+          module: html
+          socks5:
+                  http: 'socks5h://127.0.0.1:9050'
+                  https: 'socks5h://127.0.0.1:9050'
+          TorController:
+                  port: 9051
+                  password: your-torcontroller-password-here
+
+        - name: simple-screenshot
+          module: screenshot
+          screenshots_path: null
+
+        - name: onionscan-go
+          module: onionscan
+          binpath: /home/tony/go/bin/onionscan
+

    #  - name: yara-rule
    #    module: yara
--- a/onionscraper/init.py
+++ b/onionscraper/init.py
@ -1,131 +0,0 @@
-import sys
-import time
-import traceback
-import collections
-
-from . import config
-from . import dbhandler
-from . import loghandler
-
-
-class OnionManager:
-    """ThreatIngestor main work logic.
-
-    Handles reading the config file, calling sources, maintaining state, and
-    sending artifacts to operators.
-    """
-    def __init__(self, args):
-        # Load logger
-        log = loghandler.LoggerHandler(args.logLevel)
-        self.logger = log.start_logging()
-        # Load config
-        self.config = config.Config(args.configFile, self.logger)
-
-
-        # Load Elasticsearch.
-        try:
-            self.es = dbhandler.DbHandlerElasticSearch(
-                    self.config.elasticsearch(),
-                    self.logger)
-        except Exception as e:
-            # Error loading elasticsearch.
-            self.logger.error(e)
-            self.logger.debug(traceback.print_exc())
-            sys.exit(1)
-
-
-        # Instantiate plugins.
-        try:
-            self.logger.info("Initializing sources")
-            self.sources = {name: source(self.logger, **kwargs)
-                            for name, source, kwargs in self.config.sources()}
-
-            self.logger.info("initializing operators")
-            self.operators = {name: operator(self.logger, **kwargs)
-                              for name, operator, kwargs in self.config.operators()}
-
-            self.logger.info("initializing notifiers")
-            #self.notifiers = {name: operator(**kwargs)
-            #                  for name, operator, kwargs in self.config.notifiers()}
-        except Exception as e:
-            # Error loading elasticsearch.
-            self.logger.error(e)
-            self.logger.debug(traceback.print_exc())
-            sys.exit(1)
-
-
-    def run(self):
-        """Run once, or forever, depending on config."""
-        if self.config.daemon():
-            selfl.logger.info("Running forever, in a loop")
-            self.run_forever()
-        else:
-            self.logger.info("Running once, to completion")
-            self.run_once()
-
-
-    def run_once(self):
-        """Run each source once, passing artifacts to each operator."""
-        # Track some statistics about artifacts in a summary object.
-        summary = collections.Counter()
-
-        for source in self.sources:
-            # Run the source to collect artifacts.
-            self.logger.info(f"Running source '{source}'")
-            try:
-                onions = self.sources[source].run()
-                if onions:
-                    self.logger.info(f'Found hidden links')
-                else:
-                    self.logger.info('No links found')
-            except Exception as e:
-                self.logger.error(e)
-                self.logger.error(traceback.print_exc())
-                continue
-
-            # Process artifacts with each operator.
-            for operator in self.operators:
-                self.logger.info(f"Processing found onions with operator '{operator}'")
-                try:
-                    doc = self.operators[operator].process(onions)
-                    # Save the source state.
-                    self.es.save(doc)
-                except Exception as e:
-                    self.logger.error(e)
-                    self.logger.error(traceback.print_exc())
-                    continue
-
-
-
-#            # Record stats and update the summary.
-#            types = artifact_types(doc.get('interestingKeywords'))
-#            summary.update(types)
-#            for artifact_type in types:
-#                self.logger.info(f'types[artifact_type]')
-
-        # Log the summary.
-        self.logger.info(f"New artifacts: {dict(summary)}")
-
-
-    def run_forever(self):
-        """Run forever, sleeping for the configured interval between each run."""
-        while True:
-            self.run_once()
-
-            self.logger.info(f"Sleeping for {self.config.sleep()} seconds")
-            time.sleep(self.config.sleep())
-
-
-def artifact_types(artifact_list):
-    """Return a dictionary with counts of each artifact type."""
-    types = {}
-    for artifact in artifact_list:
-        artifact_type = artifact.__class__.__name__.lower()
-        if artifact_type in types:
-            types[artifact_type] += 1
-        else:
-            types[artifact_type] = 1
-
-    return types
-
-
--- a/onionscraper/main.py
+++ b/onionscraper/main.py
@ -1,50 +0,0 @@
-"""OnionScraper
-
-A Python3 application for indexing and scraping hidden services ElasticSearch
-
-Installation:
-   This application assumes you have python3 and pip3 installed.
-
-   pip3 install -r requirements.txt
-
-
-This software is provided subject to the MIT license stated below.
--------------------------------------------------
-        MIT License
-
-        Permission is hereby granted, free of charge, to any person obtaining a copy
-        of this software and associated documentation files (the "Software"), to deal
-        in the Software without restriction, including without limitation the rights
-        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-        copies of the Software, and to permit persons to whom the Software is
-        furnished to do so, subject to the following conditions:
-
-        The above copyright notice and this permission notice shall be included in all
-        copies or substantial portions of the Software.
-
-        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-        SOFTWARE.
--------------------------------------------------
-"""
-import argparse
-
-from onionscraper import OnionManager
-
-
-# Load arguments from user
-parser = argparse.ArgumentParser(
-        prog='onionscraper',
-        description=__doc__,formatter_class=argparse.RawDescriptionHelpFormatter)
-parser.add_argument('-c', '--config',dest="configFile", required = True, help='Path to config file')
-parser.add_argument("--log", dest="logLevel",default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help="Set the logging level, default is INFO")
-
-args = parser.parse_args()
-
-app = OnionManager(args)
-
-app.run()
--- a/onionscraper/config.py
+++ b/onionscraper/config.py
@ -1,170 +0,0 @@
-import io
-import importlib
-import traceback
-
-import yaml
-
-from pathlib import Path
-
-SOURCE = 'onionscraper.sources'
-OPERATOR = 'onionscraper.operators'
-
-INTERNAL_OPTIONS = [
-    'saved_state',
-    'module',
-    'credentials',
-]
-
-ARTIFACT_TYPES = 'artifact_types'
-FILTER_STRING = 'filter'
-ALLOWED_SOURCES = 'allowed_sources'
-NAME = 'name'
-
-
-class Config:
-    """Config read/write operations, and convenience methods."""
-    def __init__(self, filename, logger):
-        """Read a config file."""
-        self.logger = logger
-        self.filename = filename
-        with io.open(self.filename, 'r') as f:
-            try:
-                self.logger.info("Loading config file")
-                self.config = yaml.safe_load(f.read())
-            except yaml.error.YAMLError:
-                self.logger.error("YAML error in config")
-
-
-    @staticmethod
-    def _load_plugin(plugin_type, plugin):
-        """Returns plugin class or raises an exception.
-        :raises: threatingestor.exceptions.PluginError
-        """
-        try:
-            module = importlib.import_module('.'.join([plugin_type, plugin]))
-            return module.Plugin
-        except Exception as e:
-            print(e)
-            print(traceback.print_exc())
-
-    def daemon(self):
-        """Returns boolean, are we daemonizing?"""
-        return self.config['general']['daemon']
-
-
-    def elasticsearch(self):
-        """Returns elasticsaerch config"""
-        return self.config['general']['elasticsearch']
-
-
-    def sleep(self):
-        """Returns number of seconds to sleep between iterations, if daemonizing."""
-        return self.config['general']['sleep']
-
-
-#    def onionscanner(self):
-#        """Returns onionscanner config dict"""
-#        screenshots = self.config['onionscanner'].pop('screenshots_path', None)
-#        if screenshots:
-#            self.config['onionscanner']['screenshots_path'] = Path(screenshots)
-#        else:
-#            self.config['onionscanner']['screenshots_path'] = Path(__file__).parents[1]/'screenshots'
-#        blacklist = self.config['onionscanner'].pop('blacklist', None)
-#        if blacklist:
-#            self.config['onionscanner']['blacklist'] = blacklist.split(',')
-#        interestingKeywords = self.config['onionscanner'].pop('interestingKeywords', None)
-#        if interestingKeywords:
-#            self.config['onionscanner']['interestingKeywords'] = blacklist.split(',')
-#        return self.config['onionscanner']
-
-
-    def notifiers(self):
-        """Returns notifiers config dictionary."""
-        return self.config.get('notifiers', {})
-
-
-    def logging(self):
-        """Returns logging config dictionary."""
-        return self.config.get('logging', {})
-
-
-    def credentials(self, credential_name):
-        """Return a dictionary with the specified credentials."""
-        for credential in self.config['credentials']:
-            for key, value in credential.items():
-                if key == NAME and value == credential_name:
-                    return credential
-        return {}
-
-
-    def sources(self):
-        """Return a list of (name, Source class, {kwargs}) tuples.
-        :raises: threatingestor.exceptions.PluginError
-        """
-        sources = []
-
-        for source in self.config['sources']:
-            kwargs = {}
-            for key, value in source.items():
-                if key not in INTERNAL_OPTIONS:
-                    kwargs[key] = value
-
-                elif key == 'credentials':
-                    # Grab these named credentials
-                    credential_name = value
-                    for credential_key, credential_value in self.credentials(credential_name).items():
-                        if credential_key != NAME:
-                            kwargs[credential_key] = credential_value
-
-            # load and initialize the plugin
-            self.logger.info(f"Found source '{source[NAME]}'")
-            sources.append((source[NAME], self._load_plugin(SOURCE, source['module']), kwargs))
-
-        self.logger.info(f"Found {len(sources)} total sources")
-        return sources
-
-
-    def operators(self):
-        """Return a list of (name, Operator class, {kwargs}) tuples.
-        :raises: threatingestor.exceptions.PluginError
-        """
-        operators = []
-        for operator in self.config['operators']:
-            kwargs = {}
-            for key, value in operator.items():
-                if key not in INTERNAL_OPTIONS:
-                    if key == ARTIFACT_TYPES:
-                        # parse out special artifact_types option
-                        artifact_types = []
-                        for artifact in value:
-                            try:
-                                artifact_types.append(threatingestor.artifacts.STRING_MAP[artifact.lower().strip()])
-                            except KeyError:
-                                # ignore invalid artifact types
-                                pass
-                        kwargs[key] = artifact_types
-
-                    elif key == FILTER_STRING:
-                        # pass in special filter_string option
-                        kwargs['filter_string'] = value
-
-                    elif key == NAME:
-                        # exclude name key from operator kwargs, since it's not used
-                        pass
-
-                    else:
-                        kwargs[key] = value
-
-                elif key == 'credentials':
-                    # Grab these named credentials
-                    credential_name = value
-                    for credential_key, credential_value in self.credentials(credential_name).items():
-                        if credential_key != NAME:
-                            kwargs[credential_key] = credential_value
-
-            # load and initialize the plugin
-            self.logger.info(f"Found operator '{operator[NAME]}'")
-            operators.append((operator[NAME], self._load_plugin(OPERATOR, operator['module']), kwargs))
-
-        self.logger.info(f"Found {len(operators)} total operators")
-        return operators
--- a/onionscraper/dbhandler.py
+++ b/onionscraper/dbhandler.py
@ -1,774 +0,0 @@
-import sys
-import traceback
-
-from elasticsearch import Elasticsearch, helpers
-
-class DbHandlerElasticSearch:
-    def __init__(self, config, logger):
-        self.logger = logger
-        self.logger.info('Creating Elasticsearch mapping')
-        self.config = config
-        self.mapping = '''
-        {
-      "mappings": {
-    "_doc": {
-      "properties": {
-        "html": {
-          "type": "text"
-        },
-        "onionscan": {
-          "type": "nested",
-          "properties": {
-            "bitcoinDetected": {
-              "type": "boolean"
-            },
-            "bitcoinServices": {
-              "properties": {
-                "bitcoin": {
-                  "properties": {
-                    "detected": {
-                      "type": "boolean"
-                    },
-                    "prototocolVersion": {
-                      "type": "long"
-                    },
-                    "userAgent": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                },
-                "bitcoin_test": {
-                  "properties": {
-                    "detected": {
-                      "type": "boolean"
-                    },
-                    "prototocolVersion": {
-                      "type": "long"
-                    },
-                    "userAgent": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                },
-                "dogecoin": {
-                  "properties": {
-                    "detected": {
-                      "type": "boolean"
-                    },
-                    "prototocolVersion": {
-                      "type": "long"
-                    },
-                    "userAgent": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                },
-                "litecoin": {
-                  "properties": {
-                    "detected": {
-                      "type": "boolean"
-                    },
-                    "prototocolVersion": {
-                      "type": "long"
-                    },
-                    "userAgent": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                }
-              }
-            },
-            "certificates": {
-              "type": "nested",
-              "properties": {
-                "AuthorityKeyId": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "BasicConstraintsValid": {
-                  "type": "boolean"
-                },
-                "CRLDistributionPoints": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "DNSNames": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "ExtKeyUsage": {
-                  "type": "long"
-                },
-                "Extensions": {
-                  "properties": {
-                    "Critical": {
-                      "type": "boolean"
-                    },
-                    "Id": {
-                      "type": "long"
-                    },
-                    "Value": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                },
-                "IsCA": {
-                  "type": "boolean"
-                },
-                "Issuer": {
-                  "properties": {
-                    "CommonName": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Country": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Locality": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Names": {
-                      "properties": {
-                        "Type": {
-                          "type": "long"
-                        },
-                        "Value": {
-                          "type": "text",
-                          "fields": {
-                            "keyword": {
-                              "type": "keyword",
-                              "ignore_above": 256
-                            }
-                          }
-                        }
-                      }
-                    },
-                    "Organization": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "OrganizationalUnit": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Province": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "SerialNumber": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                },
-                "IssuingCertificateURL": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "KeyUsage": {
-                  "type": "long"
-                },
-                "MaxPathLen": {
-                  "type": "long"
-                },
-                "MaxPathLenZero": {
-                  "type": "boolean"
-                },
-                "NotAfter": {
-                  "type": "date"
-                },
-                "NotBefore": {
-                  "type": "date"
-                },
-                "OCSPServer": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "PermittedDNSDomainsCritical": {
-                  "type": "boolean"
-                },
-                "PolicyIdentifiers": {
-                  "type": "long"
-                },
-                "PublicKey": {
-                  "properties": {
-                    "E": {
-                      "type": "text"
-                    },
-                    "N": {
-                      "type": "text"
-                    }
-                  }
-                },
-                "PublicKeyAlgorithm": {
-                  "type": "long"
-                },
-                "Raw": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "RawIssuer": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "RawSubject": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "RawSubjectPublicKeyInfo": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "RawTBSCertificate": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "SerialNumber": {
-                  "type": "text"
-                },
-                "Signature": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "SignatureAlgorithm": {
-                  "type": "long"
-                },
-                "Subject": {
-                  "properties": {
-                    "CommonName": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Country": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Locality": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Names": {
-                      "properties": {
-                        "Type": {
-                          "type": "long"
-                        },
-                        "Value": {
-                          "type": "text",
-                          "fields": {
-                            "keyword": {
-                              "type": "keyword",
-                              "ignore_above": 256
-                            }
-                          }
-                        }
-                      }
-                    },
-                    "Organization": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "OrganizationalUnit": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "Province": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    },
-                    "SerialNumber": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                },
-                "SubjectKeyId": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "Version": {
-                  "type": "long"
-                }
-              }
-            },
-            "crawls": {
-              "type": "nested",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "dateScanned": {
-              "type": "date"
-            },
-            "f_name": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "ftpBanner": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "ftpDetected": {
-              "type": "boolean"
-            },
-            "ftpFingerprint": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "hiddenService": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "identifierReport": {
-              "properties": {
-                "analyticsIDs": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "bitcoinAddresses": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "emailAddresses": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "exifImages": {
-                  "properties": {
-                    "exifTags": {
-                      "properties": {
-                        "name": {
-                          "type": "text",
-                          "fields": {
-                            "keyword": {
-                              "type": "keyword",
-                              "ignore_above": 256
-                            }
-                          }
-                        },
-                        "value": {
-                          "type": "text",
-                          "fields": {
-                            "keyword": {
-                              "type": "keyword",
-                              "ignore_above": 256
-                            }
-                          }
-                        }
-                      }
-                    },
-                    "location": {
-                      "type": "text",
-                      "fields": {
-                        "keyword": {
-                          "type": "keyword",
-                          "ignore_above": 256
-                        }
-                      }
-                    }
-                  }
-                },
-                "foundApacheModStatus": {
-                  "type": "boolean"
-                },
-                "linkedOnions": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "openDirectories": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "privateKeyDetected": {
-                  "type": "boolean"
-                },
-                "serverVersion": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                }
-              }
-            },
-            "ircDetected": {
-              "type": "boolean"
-            },
-            "lastAction": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "mongodbDetected": {
-              "type": "boolean"
-            },
-            "online": {
-              "type": "boolean"
-            },
-            "performedScans": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "pgpKeys": {
-              "properties": {
-                "armoredKey": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "fingerprint": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                },
-                "identity": {
-                  "type": "text",
-                  "fields": {
-                    "keyword": {
-                      "type": "keyword",
-                      "ignore_above": 256
-                    }
-                  }
-                }
-              }
-            },
-            "ricochetDetected": {
-              "type": "boolean"
-            },
-            "skynetDetected": {
-              "type": "boolean"
-            },
-            "smtpBanner": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "smtpDetected": {
-              "type": "boolean"
-            },
-            "smtpFingerprint": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "sshBanner": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "sshDetected": {
-              "type": "boolean"
-            },
-            "sshKey": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            },
-            "timedOut": {
-              "type": "boolean"
-            },
-            "tlsDetected": {
-              "type": "boolean"
-            },
-            "vncDetected": {
-              "type": "boolean"
-            },
-            "webDetected": {
-              "type": "boolean"
-            },
-            "xmppDetected": {
-              "type": "boolean"
-            }
-          }
-        },
-        "screenshots": {
-          "type": "nested",
-          "properties": {
-            "dateScreenshoted": {
-              "type": "date"
-            },
-            "filename": {
-              "type": "text",
-              "fields": {
-                "keyword": {
-                  "type": "keyword",
-                  "ignore_above": 256
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-        '''
-        try:
-            self.es = Elasticsearch([{
-                'host':self.config['host'],
-                'port':self.config['port']}])
-            self.es.indices.create(
-                    index=self.config['index'],
-                    body=self.mapping,
-                    ignore=400)
-        except Exception as e:
-            self.logger.error(e)
-            self.logger.error(traceback.format_exc())
-            sys.exit(0)
-
-    def count(self):
-        self.es.indices.refresh(self.index)
-        status = self.es.count(index=self.index)
-        if status['_shards']['successful'] == 1:
-            self.logger.info('Successful')
-            self.logger.info('Count:%d',status['count'])
-        else:
-            self.logger.error(status)
-
-    def save(self, doc):
-        self.es.index(index=self.index,body=doc)
-        self.count()
--- a/onionscraper/loghandler.py
+++ b/onionscraper/loghandler.py
@ -1,33 +0,0 @@
-import os
-import logging
-from pathlib import Path
-
-class LoggerHandler():
-    def __init__(self, level):
-        self.level = getattr(logging, level)
-        self.logger = logging.getLogger("OnionScraper")
-        self.logger.setLevel(self.level)
-
-        # create console handler and set level to debug
-        ch = logging.StreamHandler()
-        ch.setLevel(self.level)
-
-        # create file logging
-        logFile = Path(__file__).parents[1]
-        logging_path = os.path.join(logFile, "info.log")
-        fh = logging.FileHandler(logging_path)
-
-        # create formatter
-        formatter = logging.Formatter('[%(asctime)s] - %(name)s - %(levelname)s - %(message)s',datefmt='%a, %d %b %Y %H:%M:%S')
-        formatter_console = logging.Formatter('[%(asctime)s] - %(levelname)s - %(message)s',datefmt='%d %b %Y %H:%M:%S')
-        # add formatter to ch
-        ch.setFormatter(formatter_console)
-        fh.setFormatter(formatter)
-        # add ch to logger
-        self.logger.addHandler(ch)  #added logging into console
-        self.logger.addHandler(fh)  #added logging into file
-
-    def start_logging(self):
-        self.logger.info('Starting OnionScraper')
-        return self.logger
-
--- a/onionscraper/operators/init.py
+++ b/onionscraper/operators/init.py
@ -1,78 +0,0 @@
-import re
-
-
-class Operator:
-    """Base class for all Operator plugins.
-
-    Note: This is an abstract class. You must extend ``__init__`` and call
-    ``super`` to ensure this class's constructor is called. You must override
-    ``handle_artifact`` with the same signature. You may define additional
-    ``handle_{artifact_type}`` methods as needed (see the threatkb operator for
-    an example) - these methods are purely convention, and are not required.
-
-    When adding additional methods to child classes, consider prefixing the
-    method name with an underscore to denote a ``_private_method``. Do not
-    override other existing methods from this class.
-    """
-    def __init__(self, artifact_types=None, filter_string=None, allowed_sources=None):
-        """Override this constructor in child classes.
-
-        The arguments above (artifact_types, filter_string, allowed_sources)
-        should be accepted explicity as above, in all child classes.
-
-        Additional arguments should be added: url, auth, etc, whatever is
-        needed to set up the object.
-
-        Each operator should default self.artifact_types to a list of Artifacts
-        supported by the plugin, and allow passing in artifact_types to
-        overwrite that default.
-
-        Example:
-
-        >>> self.artifact_types = artifact_types or [
-        ...     artifacts.IPAddress,
-        ...     artifacts.Domain,
-        ... ]
-
-        It's recommended to call this __init__ method via super from all child
-        classes. Remember to do so *before* setting any default artifact_types.
-        """
-        self.artifact_types = artifact_types or []
-        self.filter_string = filter_string or ''
-        self.allowed_sources = allowed_sources or []
-
-
-    def handle_onion(self, url):
-        """Override with the same signature.
-
-        :param artifact: A single ``Artifact`` object.
-        :returns: None (always ignored)
-        """
-        raise NotImplementedError()
-
-
-    def _artifact_is_allowed(self, artifact):
-        """Returns True if this is allowed by this plugin's filters."""
-#        # Must be in allowed_types.
-#        if not any(isinstance(artifact, t) for t in self.artifact_types):
-#            return False
-#
-#        # Must match the filter string.
-#        if not artifact.match(self.filter_string):
-#            return False
-#
-#        # Must be in allowed_sources, if set.
-#        if self.allowed_sources and not any(
-#                [re.compile(p).search(artifact.source_name)
-#                 for p in self.allowed_sources]):
-#            return False
-#
-        return True
-
-
-    def process(self, onions):
-        """Process all applicable onions."""
-        for onion in onions:
-            if self._artifact_is_allowed(onion.url):
-                self.handle_onion(onion)
-
--- a/onionscraper/operators/onionscan.py
+++ b/onionscraper/operators/onionscan.py
@ -1,259 +0,0 @@
-import re
-import os
-import sys
-import json
-import time
-import random
-import traceback
-import subprocess
-from uuid import uuid4
-from pathlib import Path
-from datetime import datetime as dt
-from json.decoder import JSONDecodeError
-from concurrent.futures import ProcessPoolExecutor
-from threading import Timer
-
-import requests
-
-from stem.control import Controller
-from stem import Signal
-
-from selenium import webdriver
-from selenium.webdriver.firefox.options import Options
-from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
-
-from onionscraper.operators import Operator
-
-class Plugin(Operator):
-    """OnionScraper main work logic.
-
-    Handles reading the config file, calling sources, maintaining state and
-    sending artifacts to operators.
-    """
-    def __init__(self, logger, **kwargs):
-        self.logger = logger
-        self.logger.info('Initializing OnionScanner')
-        screenshots = kwargs.pop('screenshots_path', None)
-        if screenshots:
-            self.screenshots = Path(screenshots)
-        else:
-            self.screenshots = Path(__file__).parents[1]/'screenshots'
-        self.onionscan = kwargs['binpath']
-        self.timeout = int(kwargs['timeout'])
-        self.proxy = kwargs['socks5']
-        self.torControl = kwargs['TorController']
-        self.retries = int(kwargs['retries'])
-        self.headers ={
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-            'Accept-Language':'en-US,en;q=0.5',
-            'DNT': '1', 'Connection':
-            'keep-alive',
-            'Upgrade-Insecure-Requests': '1'}
-
-
-        blacklist = kwargs['blacklist'].split(',')
-        self.blacklist = re.compile('|'.join([re.escape(word) for word in blacklist]), re.IGNORECASE)
-        keywords = kwargs['interestingKeywords'].split(',')
-        self.keywords = re.compile('|'.join([re.escape(word) for word in keywords]), re.IGNORECASE)
-        self.session = self.get_tor_session()
-
-    def response(self, status, content, onion):
-        """
-        status: success/failure
-        content: dict
-        onion: str
-        return: dict
-        """
-        return {'status': status, 'data': content, 'onion': onion}
-
-    def parseDoc(self, data):
-        data['onionscan'].pop('simpleReport', None)
-        crawls = data['onionscan'].pop('crawls', None)
-        hiddenService = data['onionscan'].pop('hiddenService', None)
-        data['onionscan']['crawls'] = [*crawls]
-        data['hiddenService'] = hiddenService
-        for onion in crawls.keys():
-            print(onion)
-            #q.enqueue(self.crawl, onion)
-        #with open('test.json', 'w', encoding='utf-8') as f:
-        #    json.dump(data, f, ensure_ascii=False, indent=4)
-        return data
-
-    def format_directory(self, directory):
-        d = dt.now()
-        year = str(d.year)
-        month = str(d.month)
-        # prefix month and day with "0" if it is only one digit
-        if len(month) < 2:
-                month = "0" + month
-        day = str(d.day)
-        if len(day) < 2:
-                day = "0" + day
-        save_path = directory/year/month/day
-        if not os.path.isdir(save_path):
-            self.logger.info("[*] Creating directory to save screenshots")
-            os.makedirs(save_path)
-
-        return save_path
-
-    def take_screenshot(self, save_path, onion):
-        binary = FirefoxBinary('/home/tony/Projects/OnionScraper/geckodriver')
-        fp = webdriver.FirefoxProfile()
-        fp.set_preference('network.proxy.type', 1)
-        fp.set_preference('network.proxy.socks', '127.0.0.1')
-        fp.set_preference('network.proxy.socks_port', 9050)
-        fp.set_preference('network.proxy.socks_remote_dns', True)
-
-        options = Options()
-        options.headless = True
-        driver = webdriver.Firefox(
-                executable_path='/home/tony/Projects/OnionScraper/geckodriver',
-                options=options,
-                firefox_profile=fp)
-        url = 'http://' + onion
-        driver.get(url)
-        uid = str(uuid4()).split('-')[0]
-        filename = f"{onion}_screenshot_{uid}.png"
-        f_name = f"{save_path}/{filename}"
-        driver.save_screenshot(f_name)
-
-        driver.quit()
-
-        if os.path.isfile(f_name):
-            self.logger.info(f'[*] Screenshot was taken. {f_name}')
-            dateScreenshoted = dt.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%f')+ 'Z'
-            result = {'dateScreenshoted':dateScreenshoted,'filename':filename}
-            return self.response("success",result,onion)
-        else:
-            self.logger.error('[x] Unable to take screenshot')
-            return self.response("failure",None,onion)
-
-        
-    
-    def get_tor_session(self):
-        try:
-            s = requests.session()
-            s.proxies = self.proxy
-            s.headers.update(self.headers)
-        except Exception as e:
-            self.logger.error(e)
-            self.logger.debug(traceback.print_exc())
-        return s
-
-    # signal TOR for a new connection
-    def renew_connection(self):
-        with Controller.from_port(port = self.torControl['port']) as controller:
-            # Now we switch TOR identities to make sure we have a good connection
-            self.logger.info('Getting new Tor IP')
-            # authenticate to our local TOR controller
-            controller.authenticate(self.torControl['password'])
-            # send the signal for a new identity
-            controller.signal(Signal.NEWNYM)
-            # wait for the new identity to be initialized
-            time.sleep(controller.get_newnym_wait())
-            session = self.get_tor_session()
-            self.logger.info(f"IP is {session.get('http://httpbin.org/ip').json()['origin']}")
-
-    def handle_timeout(self, process, onion):
-        #
-        # Handle a timeout from the onionscan process.
-        #
-
-        try:
-            # kill the onionscan process
-            process.kill()
-            self.logger.info("[!!!] Killed the onionscan process.")
-        except:
-            pass
-        self.renew_connection()
-        return
-
-    def run_sessions(self, onion):
-            retry = 0
-            result = None
-            while True:
-                try:
-                    url = 'http://'+onion
-                    self.logger.info(url)
-                    content = self.session.get(url)
-                    if content.status_code == 200:
-                        result = content.json()
-                except JSONDecodeError as e:
-                    self.logger.debug(f'JSONDecodeError {e}')
-                    result = content.text
-                except Exception as e:
-                    self.logger.error(e)
-                    self.logger.debug(traceback.print_exc())
-                finally:
-                    if result:
-                        return self.response("success",result,onion)
-                    else:
-                        self.logger.info('[x] No results found retrying ...')
-                        retry += 1
-                        self.renew_connection()
-                if retry > self.retries:
-                    self.logger.error('[x] Max retries exceeded')
-                    return self.response("failure",None, onion)
-
-    def run_onionscan(self, onion):
-        self.logger.info("[*] Running onionscan on %s", onion)
-
-        # fire up onionscan
-        process = subprocess.Popen([self.onionscan,"--webport=0","--jsonReport","--simpleReport=false",onion],stdout=subprocess.PIPE,stderr=subprocess.PIPE)
-
-        # start the timer and let it run till timeout minutes
-        process_timer = Timer(300,self.handle_timeout,args=[process,onion])
-        process_timer.start()
-
-        # wait for the onion scan results
-        stdout = process.communicate()[0]
-
-        # we have received valid results so we can kill the timer
-        if process_timer.is_alive():
-            process_timer.cancel()
-            return self.response("success",json.loads(stdout),onion)
-
-        self.logger.info("[!!!] Process timed out for %s", onion)
-
-        return self.response("failure",None, onion)
-
-    def handle_onion(self, onion_tuple):
-        onion = onion_tuple.url
-        self.logger.info(f'Processing {onion} with onionscan')
-        try:
-            blacklist_URL = self.blacklist.search(onion)
-            if blacklist_URL:
-                self.logger.info(f"[X] Blocked by blacklist => matched keyword {blacklist_URL.group()}")
-            else:
-                self.logger.debug("[*] URL blacklist test: PASSED")
-                results = self.run_onionscan(onion)
-                if results['status'] == 'success' and results['data']['webDetected'] == 'true':
-                    content = self.run_sessions(onion)
-                    if content['status'] == 'success':
-                        blacklist_CONTENT = self.blacklist.search(content['data'])
-                        if blacklist_CONTENT:
-                            self.logger.info(f"[X] Blocked by blacklist content => matched keyword {blacklist_CONTENT.group()}")
-                        else:
-                            self.logger.debug("[*] CONTENT blacklist test: PASSED")
-                            screenshot = self.take_screenshot(self.format_directory(self.screenshots), onion)
-                            self.logger.info("Indexing!")
-                            doc = {
-                                    'onionscan':json.loads(results['data']),
-                                    'html':content['data'],
-                                    'screenshots':screenshot['data'],
-                                    'interestingKeywords':self.interestingKeywords.findall(content['data'])
-                                    }
-                            return self.parseDoc(doc)
-
-                else:
-                    self.logger.info(f"[x] hidden service {onion} is not active")
-        except Exception as e:
-            self.logger.error(e)
-            self.logger.error(traceback.print_exc())
-        finally:
-            pass
-            #sys.exit(0)
-
-        
-
--- a/onionscraper/operators/yara.py
+++ b/onionscraper/operators/yara.py
@ -1,15 +0,0 @@
-
-from onionscraper.operators import Operator
-
-class Plugin(Operator):
-    """Operator for output to flat CSV file."""
-    def __init__(self, filename, base_score):
-        """CSV operator."""
-        self.filename = filename
-
-        #super(Plugin, self).__init__(artifact_types, filter_string, allowed_sources)
-
-
-    def handle_artifact(self, artifact):
-        """Operate on a single artifact."""
-        pass
--- a/onionscraper/sources/init.py
+++ b/onionscraper/sources/init.py
@ -1,41 +0,0 @@
-from collections import namedtuple
-
-class Source(object):
-    """Base class for all Source plugins.
-    Note: This is an abstract class. You must override ``__init__`` and ``run``
-    in child classes. You should not override ``process_element``. When adding
-    additional methods to child classes, consider prefixing the method name
-    with an underscore to denote a ``_private_method``.
-    """
-    def __init__(self, name, *args, **kwargs):
-        """Override this constructor in child classes.
-        The first argument must always be ``name``.
-        Other argumentss should be url, auth, etc, whatever is needed to set
-        up the object.
-        """
-        self.onion = namedtuple('onion', ['url','source','type'])
-
-
-    def run(self):
-        """Run and return ``(saved_state, list(Artifact))``.
-        Override this method in child classes.
-        The method signature and return values must remain consistent.
-        The method should attempt to pick up where we left off using
-        ``saved_state``, if supported. If ``saved_state`` is ``None``, you can
-        assume this is a first run. If state is maintained by the remote
-        resource (e.g. as it is with SQS), ``saved_state`` should always be
-        ``None``.
-        """
-        raise NotImplementedError()
-
-
-    def process_element(self, content, reference_link, include_nonobfuscated=False):
-        """Take a single source content/url and return a list of Artifacts.
-        This is the main work block of Source plugins, which handles
-        IOC extraction and artifact creation.
-        :param content: String content to extract from.
-        :param reference_link: Reference link to attach to all artifacts.
-        :param include_nonobfuscated: Include non-defanged URLs in output?
-        """
-        logger.debug(f"Processing in source '{self.name}'")
-
--- a/onionscraper/sources/gist.py
+++ b/onionscraper/sources/gist.py
@ -1,153 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-__author__ = 'Andrey Glauzer'
-__license__ = "MIT"
-__version__ = "1.0.1"
-__maintainer__ = "Andrey Glauzer"
-__status__ = "Development"
-
-import requests
-import json
-import re
-import re
-import urllib.parse
-from random import choice
-import time
-from bs4 import BeautifulSoup
-
-
-from onionscraper.sources import Source
-
-
-class Plugin(Source):
-
-    def __init__(self, logger, name, url):
-        self.logger = logger
-        self.name = name
-        self.url = url
-        self.desktop_agents = [
-                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0']
-        super().__init__(self)
-
-
-    def run(self):
-        self.logger.info('Starting Gist Scraper')
-        self.cookies()
-        self.pagination()
-        self.scraping()
-        return self.raw()
-
-    @property
-    def random_headers(self):
-        return {
-            'User-Agent': choice(self.desktop_agents),
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
-        }
-
-    def cookies(self):
-
-        self.logger.info('Setting GIST cookies')
-
-        with requests.Session() as self.session:
-            self.headers = self.random_headers
-
-            request = self.session.get(self.url, headers=self.headers)
-
-            if request.status_code == 200:
-                pass
-            else:
-                self.logger.error('No Response from GIST')
-
-    def pagination(self):
-        request = self.session.get(
-            f"https://gist.github.com/search?l=Text&q={urllib.parse.quote('.onio')}", headers=self.headers)
-        self.soup = BeautifulSoup(request.content, features="lxml")
-
-        pages = []
-        self.urls = [self.url]
-        try:
-            for pagination in self.soup.find('div', {'class': 'pagination'}).findAll('a'):
-                pages.append(pagination.get_text())
-        except:
-            pages = False
-
-        if pages:
-            cont = 2
-            while cont <= 1:  # int(pages[-2]):
-                cont += 1
-                full_url = f"https://gist.github.com/search?l=Text&p={cont-1}&q={urllib.parse.quote('.onio')}"
-                self.urls.append(full_url)
-
-    def scraping(self):
-        url = []
-        for inurl in self.urls:
-            self.logger.info(f"Connecting to {inurl}")
-            time.sleep(5)
-            request = self.session.get(inurl, headers=self.headers)
-
-            if request.status_code == 200:
-                soup = BeautifulSoup(request.content, features="lxml")
-                for code in soup.findAll('div', {'class': 'gist-snippet'}):
-                    if '.onion' in code.get_text().lower():
-                        for raw in code.findAll('a', {'class': 'link-overlay'}):
-                            try:
-                                url.append(raw['href'])
-                            except:
-                                pass
-            self.urls_raw = []
-            for get in url:
-                self.logger.info(f"Connecting to {get}")
-                time.sleep(5)
-                try:
-                    request = self.session.get(get, headers=self.headers)
-
-                    if request.status_code == 200:
-                        soup = BeautifulSoup(request.content, features="lxml")
-
-                        for raw in soup.findAll('a', {'class': 'btn btn-sm'}):
-                            try:
-                                gist_url = f"https://gist.githubusercontent.com{raw['href']}"
-
-                                self.urls_raw.append(gist_url)
-
-                            except:
-                                pass
-                except(requests.exceptions.ConnectionError,
-                       requests.exceptions.ChunkedEncodingError,
-                       requests.exceptions.ReadTimeout,
-                       requests.exceptions.InvalidURL) as e:
-                    self.logger.error(
-                        f"I was unable to connect to the url, because an error occurred.\n{e}")
-                    pass
-
-    def raw(self):
-        self.logger.info('Performing replaces and regex. WAIT...')
-        itens = []
-        onions = []
-        for raw in self.urls_raw:
-            if '.txt' in raw.lower() \
-                    or '.csv' in raw.lower():
-                time.sleep(5)
-                request = self.session.get(raw, headers=self.headers)
-                self.soup = BeautifulSoup(request.content, features="lxml")
-                for pre in self.soup.findAll('body'):
-                    list = pre.get_text().split('\n')
-                    itens.extend(list)
-
-                regex = re.compile(
-                    "[A-Za-z0-9]{0,12}\.?[A-Za-z0-9]{12,50}\.onion")
-
-                for lines in itens:
-                    rurls = lines \
-                        .replace('\xad', '') \
-                        .replace('\n', '') \
-                        .replace("http://", '') \
-                        .replace("https://", '') \
-                        .replace("www.", "")
-
-                    url = regex.match(rurls)
-
-                    if url is not None:
-                        onions.append(self.onion(url=url.group(), source='gist', type='domain'))
-        return onions
--- a/requirements.txt
+++ b/requirements.txt
@ -5,8 +5,6 @@ click==7.1.2
 elasticsearch==7.8.0
 idna==2.10
 lxml==4.5.1
-# Editable Git install with no remote (OnionScraper==1.0.0)
-e /home/tony/Projects/OnionScraper
 PySocks==1.7.1
 PyYAML==5.3.1
 requests==2.24.0
--- a/setup.py
+++ b/setup.py
@ -8,14 +8,14 @@ def readme_file_contents():


 setup(
-    name='OnionScraper',
+    name='OnionIngestor',
    version='1.0.0',
    description='Python app to scraper and index hidden websites',
    long_description=readme_file_contents(),
    author='dan',
    author_email='test@google.com',
    license='MIT',
-    packages=['onionscraper'],
+    packages=['onioningestor'],
    zip_safe=False,
    install_requires=[]
 )