From 8a30a7ececc873c4e06533e74f9e91e52c4f6fa8 Mon Sep 17 00:00:00 2001 From: danieleperera Date: Mon, 24 Aug 2020 19:39:42 +0200 Subject: [PATCH] updated gitignore and operatore init --- .gitignore | 1 + onioningestor/onion.py | 6 +++--- onioningestor/operators/__init__.py | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 930836a..334aee6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +temp_crawl.txt monitoring.* onion_master_list.* webui diff --git a/onioningestor/onion.py b/onioningestor/onion.py index aa56a3e..70a1caa 100644 --- a/onioningestor/onion.py +++ b/onioningestor/onion.py @@ -19,10 +19,10 @@ class Onion(object): @url.setter def url(self, domain): onion_pattern = re.compile(r'([a-z2-7]{16,56}\.onion)') - if onion_pattern.match(domain): - self._url = domain + if onion_pattern.match(domain.lower()): + self._url = domain.lower() else: - raise Exception("Onion domain pattern does not match") + raise Exception(f"'{domain.lower()}' domain pattern does not match onion pattern") def set_operator(self, response): self.operators.update(response) diff --git a/onioningestor/operators/__init__.py b/onioningestor/operators/__init__.py index 6aa6201..931d947 100644 --- a/onioningestor/operators/__init__.py +++ b/onioningestor/operators/__init__.py @@ -100,11 +100,13 @@ class Operator: def findCrawls(self, content, hiddenService): crawl = set() + f1 = open("temp_crawl.txt", "a") for onion in re.findall(r'\s?(\w+.onion)', str(content)): if onion != hiddenService: crawl.add(onion) for item in crawl: self.logger.debug(f'crawling queue added: {item}') + f1.write(item+'\n') self.queueCrawl.put(( 3, self.onion(