happybot/comics.py

260 lines
7.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
from subprocess import Popen, PIPE
from urllib.request import urlopen, quote, Request
from json import loads as dejson
from re import compile as regex
def cmd(args):
proc = Popen(args, stdout=PIPE)
while True:
line = proc.stdout.readline()
if line:
line = line[:-1]
yield str(line, 'utf-8', 'ignore')
else:
break
# Imports are finished, as is the preparation for things later.
# Time to implement the searchers!
def relevantxkcd(query):
try:
url = "https://relevantxkcd.appspot.com/process?action=xkcd&query=" + quote(query)
req = urlopen(url)
if req.code != 200:
return None
res = req.read().split()
sure = float(res[0])
if sure < 0.07:
return None
num = int(res[2])
return 'https://xkcd.com/' + str(num)
except:
return None
def explainxkcd(query):
try:
url = "https://explainxkcd.com/wiki/api.php?action=query&list=search&srwhat=text&format=json&srsearch=" + quote(query)
url = Request(url, headers={'User-Agent': 'Mozilla'})
req = urlopen(url)
if req.code != 200:
return None
res = dejson(req.read().decode())
for item in res['query']['search']:
try:
num = int(item['title'].split(':')[0])
return 'https://xkcd.com/' + str(num)
except:
pass
return None
except:
return None
googleapi = 'AIzaSyDr1gkHH-18QheEJpdGwUMmhYYvtlIJ3bA'
googlecse = '017423361205507730360:p6-h8trjn5c'
googlereg = regex(r'xkcd\.com/(\d+)')
def googlexkcd(query):
try:
url = 'https://www.googleapis.com/customsearch/v1?key=' + googleapi + '&cx=' + googlecse + '&q=' + quote(query)
req = urlopen(url)
if req.code != 200:
return None
print('| | Opened URL, status code 200.')
res = dejson(req.read().decode())
print('| | Decoded JSON.')
for item in res['items']:
print('| | Checking:', item['link'])
match = googlereg.search(item['link'])
try:
num = int(match.group(1))
print('| | Found a match:', num)
return 'https://xkcd.com/' + str(num)
except:
pass
return None
except:
return None
tvtropesapi = 'AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY'
tvtropescse = '006443654034974345143:kc4pt9dnkle'
def googletvtropes(query):
try:
url = 'https://www.googleapis.com/customsearch/v1element?key=' + tvtropesapi + '&cx=' + tvtropescse + '&q=' + quote(query)
req = urlopen(url)
if req.code != 200:
return None
print('| | Opened URL, status code 200.')
res = dejson(req.read().decode())
print('| | Decoded JSON.')
if 'results' in res.keys() and res['results']:
return(res['results'][0]['url'])
print('| | No results.')
return None
except:
return None
def numberxkcd(segment):
query = ''
original = segment
while segment and segment[0].isdigit():
query += segment[0]
segment = segment[1:]
if query:
return 'https://xkcd.com/' + query, '', segment
return '', '', original
# Righty, so all our searching mechanisms are above us. Let's table 'em up in a dict.
methods = {
'xkcd': (relevantxkcd, googlexkcd, explainxkcd),
# 'smbc': (),
# 'satw': (),
# 'ssss': (),
'tvtropes': (googletvtropes,),
}
# They return answer, query, segment
special = {
'xkcd': numberxkcd,
}
# Now for matching quotes.
matching = [
('"', '"'),
("'", "'"),
('', ''),
('', '', ''),
('<', '>'),
('«', '»'),
('»', '«'),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('(', ')'),
('[', ']'),
('{', '}'),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('', '', ''),
('lu', "li'u")
]
# And for an attempt to extract a comic.
def attempt(line):
output = []
segment = line
while segment:
i = len(segment)
c = ''
for comic in methods.keys():
try:
n = segment.index(comic + '!')
if n < i:
i = n
c = comic
except:
pass
if not c:
# We have not found any comic-related things in this line. Bye!
return None
print('Searching for ' + c + ':')
i += len(c) + 1 # len(comic + '!')
segment = segment[i:] # Right, skippity skip.
query = ''
# Special cases.
if c in special.keys():
print("| There's a special case:")
result, query, segment = special[c](segment)
if result:
print("| | Result:", result)
output.append(result)
continue
if not query:
print("| | Special case did not match.")
if not query:
# See if there's a quoted thing.
for pair in matching:
start, *ends = pair
l = len(start)
if segment[:l] != start:
continue
print('| Found matching initial quote:', start)
segment = segment[l:]
for end in ends:
try:
i = segment.index(end)
query = segment[:i]
segment = segment[i + len(end):]
print('| Found matching end:', end)
break
except:
pass
if query:
break
if not query:
while segment:
if not segment[0].isspace():
query += segment[0]
segment = segment[1:]
else:
break
if '_' in query:
print('| Replacing "_" with " ".')
query = query.replace('_', ' ')
elif '-' in query:
print('| Replacing "-" with " ".')
query = query.replace('-', ' ')
if query:
query = query.strip()
print('| Searching for:', query)
for method in methods[c]:
print('| Search using:', method.__name__)
result = method(query)
if result is not None:
output.append(result)
print('| Result:', result)
break
return ' '.join(output)
# Connect it to happybot.
from sys import argv
if len(argv) != 3:
print('Usage: ./xkcd.py out in')
exit(1)
for line in cmd(['tail', '-f', argv[1]]):
line = line.split(' ', 3)[3]
# ACTIONs shmacktions, I don't give a char 'bout that.
if line[:8] == '\x01ACTION ' and line[-1] == '\x01':
line = line[8:-1]
# Oh, oh noes! We can't look at these lines! HUMANS ARE EVIL!
if line.startswith('\u200b') or line.startswith('nolog:') or line.startswith('[nolog]'):
continue
# I'm trying... trying...
result = attempt(line)
if result:
with open(argv[2], 'w') as fh:
fh.write('\u200b' + result + '\n')