#!/usr/bin/env python3 from subprocess import Popen, PIPE from urllib.request import urlopen, quote, Request from json import loads as dejson from re import compile as regex def cmd(args): proc = Popen(args, stdout=PIPE) while True: line = proc.stdout.readline() if line: line = line[:-1] yield str(line, 'utf-8', 'ignore') else: break # Imports are finished, as is the preparation for things later. # Time to implement the searchers! def relevantxkcd(query): try: url = "https://relevantxkcd.appspot.com/process?action=xkcd&query=" + quote(query) req = urlopen(url) if req.code != 200: return None res = req.read().split() sure = float(res[0]) if sure < 0.07: return None num = int(res[2]) return 'https://xkcd.com/' + str(num) except: return None def explainxkcd(query): try: url = "https://explainxkcd.com/wiki/api.php?action=query&list=search&srwhat=text&format=json&srsearch=" + quote(query) url = Request(url, headers={'User-Agent': 'Mozilla'}) req = urlopen(url) if req.code != 200: return None res = dejson(req.read().decode()) for item in res['query']['search']: try: num = int(item['title'].split(':')[0]) return 'https://xkcd.com/' + str(num) except: pass return None except: return None googleapi = 'AIzaSyDr1gkHH-18QheEJpdGwUMmhYYvtlIJ3bA' googlecse = '017423361205507730360:p6-h8trjn5c' googlereg = regex(r'xkcd\.com/(\d+)') def googlexkcd(query): try: url = 'https://www.googleapis.com/customsearch/v1?key=' + googleapi + '&cx=' + googlecse + '&q=' + quote(query) req = urlopen(url) if req.code != 200: return None print('| | Opened URL, status code 200.') res = dejson(req.read().decode()) print('| | Decoded JSON.') for item in res['items']: print('| | Checking:', item['link']) match = googlereg.search(item['link']) try: num = int(match.group(1)) print('| | Found a match:', num) return 'https://xkcd.com/' + str(num) except: pass return None except: return None tvtropesapi = 'AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY' tvtropescse = '006443654034974345143:kc4pt9dnkle' def googletvtropes(query): try: url = 'https://www.googleapis.com/customsearch/v1element?key=' + tvtropesapi + '&cx=' + tvtropescse + '&q=' + quote(query) req = urlopen(url) if req.code != 200: return None print('| | Opened URL, status code 200.') res = dejson(req.read().decode()) print('| | Decoded JSON.') if 'results' in res.keys() and res['results']: return(res['results'][0]['url']) print('| | No results.') return None except: return None def numberxkcd(segment): query = '' original = segment while segment and segment[0].isdigit(): query += segment[0] segment = segment[1:] if query: return 'https://xkcd.com/' + query, '', segment return '', '', original # Righty, so all our searching mechanisms are above us. Let's table 'em up in a dict. methods = { 'xkcd': (relevantxkcd, googlexkcd, explainxkcd), # 'smbc': (), # 'satw': (), # 'ssss': (), 'tvtropes': (googletvtropes,), } # They return answer, query, segment special = { 'xkcd': numberxkcd, } # Now for matching quotes. matching = [ ('"', '"'), ("'", "'"), ('“', '”'), ('„', '”', '“'), ('<', '>'), ('«', '»'), ('»', '«'), ('‹', '›'), ('《', '》'), ('〈', '〉'), ('「', '」'), ('﹁', '﹂'), ('『', '』'), ('﹃', '﹄'), ('(', ')'), ('[', ']'), ('{', '}'), ('【', '】'), ('〔', '〕'), ('⦗', '⦘'), ('〖', '〗'), ('〘', '〙'), ('‚', '’', '‘'), ('lu', "li'u") ] # And for an attempt to extract a comic. def attempt(line): output = [] segment = line while segment: i = len(segment) c = '' for comic in methods.keys(): try: n = segment.index(comic + '!') if n < i: i = n c = comic except: pass if not c: # We have not found any comic-related things in this line. Bye! return None print('Searching for ' + c + ':') i += len(c) + 1 # len(comic + '!') segment = segment[i:] # Right, skippity skip. query = '' # Special cases. if c in special.keys(): print("| There's a special case:") result, query, segment = special[c](segment) if result: print("| | Result:", result) output.append(result) continue if not query: print("| | Special case did not match.") if not query: # See if there's a quoted thing. for pair in matching: start, *ends = pair l = len(start) if segment[:l] != start: continue print('| Found matching initial quote:', start) segment = segment[l:] for end in ends: try: i = segment.index(end) query = segment[:i] segment = segment[i + len(end):] print('| Found matching end:', end) break except: pass if query: break if not query: while segment: if not segment[0].isspace(): query += segment[0] segment = segment[1:] else: break if '_' in query: print('| Replacing "_" with " ".') query = query.replace('_', ' ') elif '-' in query: print('| Replacing "-" with " ".') query = query.replace('-', ' ') if query: query = query.strip() print('| Searching for:', query) for method in methods[c]: print('| Search using:', method.__name__) result = method(query) if result is not None: output.append(result) print('| Result:', result) break return ' '.join(output) # Connect it to happybot. from sys import argv if len(argv) != 3: print('Usage: ./xkcd.py out in') exit(1) for line in cmd(['tail', '-f', argv[1]]): line = line.split(' ', 3)[3] # ACTIONs shmacktions, I don't give a char 'bout that. if line[:8] == '\x01ACTION ' and line[-1] == '\x01': line = line[8:-1] # Oh, oh noes! We can't look at these lines! HUMANS ARE EVIL! if line.startswith('\u200b') or line.startswith('nolog:') or line.startswith('[nolog]'): continue # I'm trying... trying... result = attempt(line) if result: with open(argv[2], 'w') as fh: fh.write('\u200b' + result + '\n')