happybot/comics.py

#!/usr/bin/env python3

from subprocess import Popen, PIPE
from urllib.request import urlopen, quote, Request
from json import loads as dejson
from re import compile as regex

def cmd(args):
    proc = Popen(args, stdout=PIPE)
    while True:
        line = proc.stdout.readline()
        if line:
            line = line[:-1]
            yield str(line, 'utf-8', 'ignore')
        else:
            break

# Imports are finished, as is the preparation for things later.
# Time to implement the searchers!

def relevantxkcd(query):
    try:
        url = "https://relevantxkcd.appspot.com/process?action=xkcd&query=" + quote(query)
        req = urlopen(url)
        if req.code != 200:
            return None
        res = req.read().split()
        sure = float(res[0])
        if sure < 0.07:
            return None
        num = int(res[2])
        return 'https://xkcd.com/' + str(num)
    except:
        return None

def explainxkcd(query):
    try:
        url = "https://explainxkcd.com/wiki/api.php?action=query&list=search&srwhat=text&format=json&srsearch=" + quote(query)
        url = Request(url, headers={'User-Agent': 'Mozilla'})
        req = urlopen(url)
        if req.code != 200:
            return None
        res = dejson(req.read().decode())
        for item in res['query']['search']:
            try:
                num = int(item['title'].split(':')[0])
                return 'https://xkcd.com/' + str(num)
            except:
                pass
        return None
    except:
        return None

googleapi = 'AIzaSyDr1gkHH-18QheEJpdGwUMmhYYvtlIJ3bA'
googlecse = '017423361205507730360:p6-h8trjn5c'
googlereg = regex(r'xkcd\.com/(\d+)')

def googlexkcd(query):
    try:
        url = 'https://www.googleapis.com/customsearch/v1?key=' + googleapi + '&cx=' + googlecse + '&q=' + quote(query)
        req = urlopen(url)
        if req.code != 200:
            return None
        print('| | Opened URL, status code 200.')
        res = dejson(req.read().decode())
        print('| | Decoded JSON.')
        for item in res['items']:
            print('| | Checking:', item['link'])
            match = googlereg.search(item['link'])
            try:
                num = int(match.group(1))
                print('| | Found a match:', num)
                return 'https://xkcd.com/' + str(num)
            except:
                pass
        return None
    except:
        return None

tvtropesapi = 'AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY'
tvtropescse = '006443654034974345143:kc4pt9dnkle'
def googletvtropes(query):
    try:
        url = 'https://www.googleapis.com/customsearch/v1element?key=' + tvtropesapi + '&cx=' + tvtropescse + '&q=' + quote(query)
        req = urlopen(url)
        if req.code != 200:
            return None
        print('| | Opened URL, status code 200.')
        res = dejson(req.read().decode())
        print('| | Decoded JSON.')
        if 'results' in res.keys() and res['results']:
            return(res['results'][0]['url'])
        print('| | No results.')
        return None
    except:
        return None

def numberxkcd(segment):
    query = ''
    original = segment
    while segment and segment[0].isdigit():
        query += segment[0]
        segment = segment[1:]
    if query:
        return 'https://xkcd.com/' + query, '', segment
    return '', '', original

# Righty, so all our searching mechanisms are above us. Let's table 'em up in a dict.

methods = {
    'xkcd': (relevantxkcd, googlexkcd, explainxkcd),
#    'smbc': (),
#    'satw': (),
#    'ssss': (),
    'tvtropes': (googletvtropes,),
    }

# They return answer, query, segment
special = {
    'xkcd': numberxkcd,
    }

# Now for matching quotes.

matching = [
        ('"', '"'),
        ("'", "'"),
        ('“', '”'),
        ('„', '”', '“'),
        ('<', '>'),
        ('«', '»'),
        ('»', '«'),
        ('‹', '›'),
        ('《', '》'),
        ('〈', '〉'),
        ('「', '」'),
        ('﹁', '﹂'),
        ('『', '』'),
        ('﹃', '﹄'),
        ('(', ')'),
        ('[', ']'),
        ('{', '}'),
        ('【', '】'),
        ('〔', '〕'),
        ('⦗', '⦘'),
        ('〖', '〗'),
        ('〘', '〙'),
        ('‚', '’', '‘'),
        ('lu', "li'u")
        ]


# And for an attempt to extract a comic.

def attempt(line):
    output = []
    segment = line

    while segment:
        i = len(segment)
        c = ''
        for comic in methods.keys():
            try:
                n = segment.index(comic + '!')
                if n < i:
                    i = n
                    c = comic
            except:
                pass

        if not c:
            # We have not found any comic-related things in this line. Bye!
            return None
        print('Searching for ' + c + ':')

        i += len(c) + 1 # len(comic + '!')
        segment = segment[i:] # Right, skippity skip.
        query = ''

        # Special cases.
        if c in special.keys():
            print("| There's a special case:")
            result, query, segment = special[c](segment)
            if result:
                print("| | Result:", result)
                output.append(result)
                continue
            if not query:
                print("| | Special case did not match.")

        if not query:
            # See if there's a quoted thing.
            for pair in matching:
                start, *ends = pair
                l = len(start)
                if segment[:l] != start:
                    continue
                print('| Found matching initial quote:', start)
                segment = segment[l:]
                for end in ends:
                    try:
                        i = segment.index(end)
                        query = segment[:i]
                        segment = segment[i + len(end):]
                        print('| Found matching end:', end)
                        break
                    except:
                        pass
                if query:
                    break

        if not query:
            while segment:
                if not segment[0].isspace():
                    query += segment[0]
                    segment = segment[1:]
                else:
                    break
            if '_' in query:
                print('| Replacing "_" with " ".')
                query = query.replace('_', ' ')
            elif '-' in query:
                print('| Replacing "-" with " ".')
                query = query.replace('-', ' ')

        if query:
            query = query.strip()
            print('| Searching for:', query)
            for method in methods[c]:
                print('| Search using:', method.__name__)
                result = method(query)
                if result is not None:
                    output.append(result)
                    print('| Result:', result)
                    break

    return ' '.join(output)

# Connect it to happybot.

from sys import argv

if len(argv) != 3:
    print('Usage: ./xkcd.py out in')
    exit(1)

for line in cmd(['tail', '-f', argv[1]]):
    line = line.split(' ', 3)[3]
    # ACTIONs shmacktions, I don't give a char 'bout that.
    if line[:8] == '\x01ACTION ' and line[-1] == '\x01':
        line = line[8:-1]
    # Oh, oh noes! We can't look at these lines! HUMANS ARE EVIL!
    if line.startswith('\u200b') or line.startswith('nolog:') or line.startswith('[nolog]'):
        continue
    # I'm trying... trying...
    result = attempt(line)
    if result:
        with open(argv[2], 'w') as fh:
            fh.write('\u200b' + result + '\n')