happybot/comics.py

260 lines
7.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
from subprocess import Popen, PIPE
from urllib.request import urlopen, quote, Request
from json import loads as dejson
from re import compile as regex
def cmd(args):
proc = Popen(args, stdout=PIPE)
while True:
line = proc.stdout.readline()
if line:
line = line[:-1]
yield str(line, 'utf-8', 'ignore')
else:
break
# Imports are finished, as is the preparation for things later.
# Time to implement the searchers!
def relevantxkcd(query):
try:
url = "https://relevantxkcd.appspot.com/process?action=xkcd&query=" + quote(query)
req = urlopen(url)
if req.code != 200:
return None
res = req.read().split()
sure = float(res[0])
if sure < 0.07:
return None
num = int(res[2])
return 'https://xkcd.com/' + str(num)
except:
return None
def explainxkcd(query):
try:
url = "https://explainxkcd.com/wiki/api.php?action=query&list=search&srwhat=text&format=json&srsearch=" + quote(query)
url = Request(url, headers={'User-Agent': 'Mozilla'})
req = urlopen(url)
if req.code != 200:
return None
res = dejson(req.read().decode())
for item in res['query']['search']:
try:
num = int(item['title'].split(':')[0])
return 'https://xkcd.com/' + str(num)
except:
pass
return None
except:
return None
googleapi = 'AIzaSyDr1gkHH-18QheEJpdGwUMmhYYvtlIJ3bA'
googlecse = '017423361205507730360:p6-h8trjn5c'
googlereg = regex(r'xkcd\.com/(\d+)')
def googlexkcd(query):
try:
url = 'https://www.googleapis.com/customsearch/v1?key=' + googleapi + '&cx=' + googlecse + '&q=' + quote(query)
req = urlopen(url)
if req.code != 200:
return None
print('| | Opened URL, status code 200.')
res = dejson(req.read().decode())
print('| | Decoded JSON.')
for item in res['items']:
print('| | Checking:', item['link'])
match = googlereg.search(item['link'])
try:
num = int(match.group(1))
print('| | Found a match:', num)
return 'https://xkcd.com/' + str(num)
except:
pass
return None
except:
return None
tvtropesapi = 'AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY'
tvtropescse = '006443654034974345143:kc4pt9dnkle'
def googletvtropes(query):
try:
url = 'https://www.googleapis.com/customsearch/v1element?key=' + tvtropesapi + '&cx=' + tvtropescse + '&q=' + quote(query)
req = urlopen(url)
if req.code != 200:
return None
print('| | Opened URL, status code 200.')
res = dejson(req.read().decode())
print('| | Decoded JSON.')
if 'results' in res.keys() and res['results']:
return(res['results'][0]['url'])
print('| | No results.')
return None
except:
return None
def numberxkcd(segment):
query = ''
original = segment
while segment and segment[0].isdigit():
query += segment[0]
segment = segment[1:]
if query:
return 'https://xkcd.com/' + query, '', segment
return '', '', original
# Righty, so all our searching mechanisms are above us. Let's table 'em up in a dict.
methods = {
'xkcd': (relevantxkcd, googlexkcd, explainxkcd),
# 'smbc': (),
# 'satw': (),
# 'ssss': (),
'tvtropes': (googletvtropes,),
}
# They return answer, query, segment
special = {
'xkcd': numberxkcd,
}
# Now for matching quotes.
matching = [
('"', '"'),
("'", "'"),
('', ''),
('', '', ''),
('<', '>'),
('«', '»'),
('»', '«'),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('(', ')'),
('[', ']'),
('{', '}'),
('', ''),
('', ''),
('', ''),
('', ''),
('', ''),
('', '', ''),
('lu', "li'u")
]
# And for an attempt to extract a comic.
def attempt(line):
output = []
segment = line
while segment:
i = len(segment)
c = ''
for comic in methods.keys():
try:
n = segment.index(comic + '!')
if n < i:
i = n
c = comic
except:
pass
if not c:
# We have not found any comic-related things in this line. Bye!
return None
print('Searching for ' + c + ':')
i += len(c) + 1 # len(comic + '!')
segment = segment[i:] # Right, skippity skip.
query = ''
# Special cases.
if c in special.keys():
print("| There's a special case:")
result, query, segment = special[c](segment)
if result:
print("| | Result:", result)
output.append(result)
continue
if not query:
print("| | Special case did not match.")
if not query:
# See if there's a quoted thing.
for pair in matching:
start, *ends = pair
l = len(start)
if segment[:l] != start:
continue
print('| Found matching initial quote:', start)
segment = segment[l:]
for end in ends:
try:
i = segment.index(end)
query = segment[:i]
segment = segment[i + len(end):]
print('| Found matching end:', end)
break
except:
pass
if query:
break
if not query:
while segment:
if not segment[0].isspace():
query += segment[0]
segment = segment[1:]
else:
break
if '_' in query:
print('| Replacing "_" with " ".')
query = query.replace('_', ' ')
elif '-' in query:
print('| Replacing "-" with " ".')
query = query.replace('-', ' ')
if query:
query = query.strip()
print('| Searching for:', query)
for method in methods[c]:
print('| Search using:', method.__name__)
result = method(query)
if result is not None:
output.append(result)
print('| Result:', result)
break
return ' '.join(output)
# Connect it to happybot.
from sys import argv
if len(argv) != 3:
print('Usage: ./xkcd.py out in')
exit(1)
for line in cmd(['tail', '-f', argv[1]]):
line = line.split(' ', 3)[3]
# ACTIONs shmacktions, I don't give a char 'bout that.
if line[:8] == '\x01ACTION ' and line[-1] == '\x01':
line = line[8:-1]
# Oh, oh noes! We can't look at these lines! HUMANS ARE EVIL!
if line.startswith('\u200b') or line.startswith('nolog:') or line.startswith('[nolog]'):
continue
# I'm trying... trying...
result = attempt(line)
if result:
with open(argv[2], 'w') as fh:
fh.write('\u200b' + result + '\n')