You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
4.8 KiB
Python

import requests
from pathlib import Path
from urllib.parse import *
import uuid
from sqlite_utils import Database
import datetime
import gevent
from gevent import monkey
from gevent import Timeout
from gevent.pool import Pool
monkey.patch_socket()
def init_sites_db(dir="."):
path = Path(dir) / "sites.db"
db = Database(path)
if not "sites" in db.table_names():
db["sites"].create({
"uuid": str,
"url": str,
"hostnames": str,
"ports": str,
"country": int,
"isp": str,
"status": str,
"last_online": str,
"last_check": str,
"error": int,
# "schema_version": 1
# # TODO: add the most common formats
}, pk="uuid")
# }, pk="uuid", not_null=True)
# if not "sites" in db.table_names():
# db["sites"].create({
# "uuid": str
# }, pk="uuid",)
db.table("sites", pk='uuid', batch_size=100, alter=True)
return db
def save_site(db: Database, site):
# # TODO: Check if the site is not alreday present
# def save_sites(db, sites):
# db["sites"].insert_all(sites, alter=True, batch_size=100)
if not 'uuid' in site:
site['uuid']=str(uuid.uuid4())
print(site)
db["sites"].upsert(site, pk='uuid')
def check_and_save_site(db, site):
res= check_calibre_site(site)
print(res)
save_site(db, res)
# import pysnooper
# @pysnooper.snoop()
def check_calibre_site(site):
ret={}
ret['uuid']=site["uuid"]
now=str(datetime.datetime.now())
ret['last_check']=now
api=site['url']+'/ajax/'
timeout=15
library=""
url=api+'search'+library+'?num=0'
print()
print("Getting ebooks count:", site['url'])
print(url)
try:
r=requests.get(url, verify=False, timeout=(timeout, 30))
r.raise_for_status()
except requests.exceptions.HTTPError as e:
r.status_code
ret['error']=r.status_code
if (r.status_code == 401):
ret['status']="unauthorized"
else:
ret['status']="down"
return ret
except requests.RequestException as e:
print("Unable to open site:", url)
# print (getattr(e, 'message', repr(e)))
print (e)
ret['status']="down"
return ret
except Exception as e:
print ("Other issue:", e)
ret['status']='Unknown Error'
print (e)
return ret
except :
print("Wazza !!!!")
ret['status']='Critical Error'
print (e)
return ret
try:
print("Total count=",r.json()["total_num"])
except:
pass
status=ret['status']='online'
if status=="online":
ret['last_online']=now
return ret
def get_site_uuid_from_url(db, url):
site=urlparse(url)
hostname=site.hostname
site=site._replace(path='')
url=urlunparse(site)
# print (url)
# print (hostname)
row=db.conn.execute(f"select * from sites where instr(hostnames, '{hostname}')").fetchone()
# print(row)
if row:
return row
def map_site_from_url(url):
ret={}
site=urlparse(url)
print(site)
site=site._replace(path='')
ret['url']=urlunparse(site)
ret['hostnames']=[site.hostname]
ret['ports']=[str(site.port)]
return ret
def import_urls_from_file(filepath, dir='.'):
#TODO skip malformed urls
#TODO use cache instead
db=init_sites_db(dir)
with open(filepath) as f:
for url in f.readlines():
url=url.rstrip()
# url='http://'+url
if get_site_uuid_from_url(db, url):
print(f"'{url}'' already present")
continue
print(f"'{url}'' added")
save_site(db, map_site_from_url(url))
def get_libs_from_site(site):
server=site.rstrip('/')
api=server+'/ajax/'
timeout=30
print()
print("Server:", server)
url=api+'library-info'
print()
print("Getting libraries from", server)
# print(url)
try:
r=requests.get(url, verify=False, timeout=(timeout, 30))
r.raise_for_status()
except requests.RequestException as e:
print("Unable to open site:", url)
# return
except Exception as e:
print ("Other issue:", e)
return
# pass
libraries = r.json()["library_map"].keys()
print("Libraries:", ", ".join(libraries))
return libraries
def check_calibre_list(dir='.'):
db=init_sites_db(dir)
sites=[]
for row in db["sites"].rows:
print(f"Queueing:{row['url']}")
sites.append(row)
print(sites)
pool = Pool(100)
pool.map(lambda s: check_and_save_site (db, s), sites)
# example of a fts search sqlite-utils index.db "select * from summary_fts where summary_fts match 'title:fre*'"