Privacy respecting alternatives in results view (#106)
Full implementation of social media alt redirects (twitter/youtube/instagram -> nitter/invidious/bibliogram) depending on configuration. Verbatim search and option to ignore search autocorrect are now supported as well. Also cleaned up the javascript side of whoogle config so that it now uses arrays of available fields for parsing config values instead of manually assigning each one to a variable. This doesn't include support for Google Maps -> Open Street Maps, that seems a bit more involved than the social media redirects were, so it should likely be a separate effort.pull/114/head
parent
3d7456f37b
commit
975ece8cd0
@ -0,0 +1,79 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import urllib.parse as urlparse
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
SKIP_ARGS = ['ref_src', 'utm']
|
||||
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
|
||||
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
||||
LOGO_URL = GOOG_IMG + '_desk'
|
||||
BLANK_B64 = '''
|
||||
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
|
||||
'''
|
||||
|
||||
BLACKLIST = [
|
||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
|
||||
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
|
||||
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
|
||||
]
|
||||
|
||||
SITE_ALTS = {
|
||||
'twitter.com': 'nitter.net',
|
||||
'youtube.com': 'invidio.us',
|
||||
'instagram.com': 'bibliogram.art/u'
|
||||
}
|
||||
|
||||
|
||||
def has_ad_content(element: str):
|
||||
return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
|
||||
|
||||
|
||||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in a['href']:
|
||||
return filter_link_args(a['href'])
|
||||
|
||||
|
||||
def get_site_alt(link: str):
|
||||
for site_key in SITE_ALTS.keys():
|
||||
if site_key not in link:
|
||||
continue
|
||||
|
||||
link = link.replace(site_key, SITE_ALTS[site_key])
|
||||
break
|
||||
|
||||
return link
|
||||
|
||||
|
||||
def filter_link_args(query_link):
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
return query_link
|
||||
|
||||
|
||||
def gen_nojs(sibling):
|
||||
nojs_link = BeautifulSoup().new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + sibling['href']
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
sibling.append(nojs_link)
|
Loading…
Reference in New Issue