mirror of https://github.com/Krazybug/calishot
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
206 lines
4.8 KiB
Python
206 lines
4.8 KiB
Python
import requests
|
|
from pathlib import Path
|
|
from urllib.parse import *
|
|
import uuid
|
|
from sqlite_utils import Database
|
|
import datetime
|
|
|
|
|
|
import gevent
|
|
from gevent import monkey
|
|
from gevent import Timeout
|
|
from gevent.pool import Pool
|
|
monkey.patch_socket()
|
|
|
|
|
|
def init_sites_db(dir="."):
|
|
|
|
path = Path(dir) / "sites.db"
|
|
|
|
db = Database(path)
|
|
if not "sites" in db.table_names():
|
|
db["sites"].create({
|
|
"uuid": str,
|
|
"url": str,
|
|
"hostnames": str,
|
|
"ports": str,
|
|
"country": int,
|
|
"isp": str,
|
|
"status": str,
|
|
"last_online": str,
|
|
"last_check": str,
|
|
"error": int,
|
|
# "schema_version": 1
|
|
# # TODO: add the most common formats
|
|
}, pk="uuid")
|
|
# }, pk="uuid", not_null=True)
|
|
|
|
# if not "sites" in db.table_names():
|
|
# db["sites"].create({
|
|
# "uuid": str
|
|
# }, pk="uuid",)
|
|
|
|
db.table("sites", pk='uuid', batch_size=100, alter=True)
|
|
return db
|
|
|
|
|
|
def save_site(db: Database, site):
|
|
# # TODO: Check if the site is not alreday present
|
|
# def save_sites(db, sites):
|
|
# db["sites"].insert_all(sites, alter=True, batch_size=100)
|
|
if not 'uuid' in site:
|
|
site['uuid']=str(uuid.uuid4())
|
|
print(site)
|
|
db["sites"].upsert(site, pk='uuid')
|
|
|
|
|
|
def check_and_save_site(db, site):
|
|
res= check_calibre_site(site)
|
|
print(res)
|
|
save_site(db, res)
|
|
|
|
# import pysnooper
|
|
# @pysnooper.snoop()
|
|
def check_calibre_site(site):
|
|
ret={}
|
|
ret['uuid']=site["uuid"]
|
|
now=str(datetime.datetime.now())
|
|
ret['last_check']=now
|
|
|
|
api=site['url']+'/ajax/'
|
|
timeout=15
|
|
library=""
|
|
url=api+'search'+library+'?num=0'
|
|
print()
|
|
print("Getting ebooks count:", site['url'])
|
|
print(url)
|
|
|
|
try:
|
|
r=requests.get(url, verify=False, timeout=(timeout, 30))
|
|
r.raise_for_status()
|
|
except requests.exceptions.HTTPError as e:
|
|
r.status_code
|
|
ret['error']=r.status_code
|
|
if (r.status_code == 401):
|
|
ret['status']="unauthorized"
|
|
else:
|
|
ret['status']="down"
|
|
return ret
|
|
except requests.RequestException as e:
|
|
print("Unable to open site:", url)
|
|
# print (getattr(e, 'message', repr(e)))
|
|
print (e)
|
|
ret['status']="down"
|
|
return ret
|
|
except Exception as e:
|
|
print ("Other issue:", e)
|
|
ret['status']='Unknown Error'
|
|
print (e)
|
|
return ret
|
|
except :
|
|
print("Wazza !!!!")
|
|
ret['status']='Critical Error'
|
|
print (e)
|
|
return ret
|
|
|
|
try:
|
|
print("Total count=",r.json()["total_num"])
|
|
except:
|
|
pass
|
|
|
|
status=ret['status']='online'
|
|
if status=="online":
|
|
ret['last_online']=now
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def get_site_uuid_from_url(db, url):
|
|
|
|
site=urlparse(url)
|
|
hostname=site.hostname
|
|
site=site._replace(path='')
|
|
|
|
url=urlunparse(site)
|
|
# print (url)
|
|
|
|
# print (hostname)
|
|
row=db.conn.execute(f"select * from sites where instr(hostnames, '{hostname}')").fetchone()
|
|
# print(row)
|
|
if row:
|
|
return row
|
|
|
|
def map_site_from_url(url):
|
|
ret={}
|
|
|
|
site=urlparse(url)
|
|
|
|
print(site)
|
|
site=site._replace(path='')
|
|
ret['url']=urlunparse(site)
|
|
ret['hostnames']=[site.hostname]
|
|
ret['ports']=[str(site.port)]
|
|
|
|
return ret
|
|
|
|
|
|
def import_urls_from_file(filepath, dir='.'):
|
|
|
|
#TODO skip malformed urls
|
|
#TODO use cache instead
|
|
|
|
db=init_sites_db(dir)
|
|
|
|
with open(filepath) as f:
|
|
for url in f.readlines():
|
|
url=url.rstrip()
|
|
# url='http://'+url
|
|
if get_site_uuid_from_url(db, url):
|
|
print(f"'{url}'' already present")
|
|
continue
|
|
print(f"'{url}'' added")
|
|
save_site(db, map_site_from_url(url))
|
|
|
|
|
|
|
|
def get_libs_from_site(site):
|
|
|
|
server=site.rstrip('/')
|
|
api=server+'/ajax/'
|
|
timeout=30
|
|
|
|
print()
|
|
print("Server:", server)
|
|
url=api+'library-info'
|
|
|
|
print()
|
|
print("Getting libraries from", server)
|
|
# print(url)
|
|
|
|
try:
|
|
r=requests.get(url, verify=False, timeout=(timeout, 30))
|
|
r.raise_for_status()
|
|
except requests.RequestException as e:
|
|
print("Unable to open site:", url)
|
|
# return
|
|
except Exception as e:
|
|
print ("Other issue:", e)
|
|
return
|
|
# pass
|
|
|
|
libraries = r.json()["library_map"].keys()
|
|
print("Libraries:", ", ".join(libraries))
|
|
return libraries
|
|
|
|
def check_calibre_list(dir='.'):
|
|
db=init_sites_db(dir)
|
|
sites=[]
|
|
for row in db["sites"].rows:
|
|
print(f"Queueing:{row['url']}")
|
|
sites.append(row)
|
|
print(sites)
|
|
pool = Pool(100)
|
|
pool.map(lambda s: check_and_save_site (db, s), sites)
|
|
|
|
# example of a fts search sqlite-utils index.db "select * from summary_fts where summary_fts match 'title:fre*'" |