cockatric4/botcmd.py

138 lines
3.9 KiB
Python

import html
import urllib.request
# initialize(*, config)
# Called to initialize the IRC bot
# Runs before even logger is brought up, and blocks further bringup until it's done
# config is a configpatser.ConfigParser object containig contents of bot.conf
def initialize(*, config):
...
# on_connect(*, irc)
# Called after IRC bot has connected and sent the USER/NICk commands but not yet attempted anything else
# Called for every reconnect
# Blocks the bot until it's done, including PING/PONG handling
# irc is the IRC API object
def on_connect(*, irc):
...
# on_quit(*, irc)
# Called just before IRC bot sends QUIT
# Blocks the bot until it's done, including PING/PONG handling
# irc is the IRC API object
def on_quit(*, irc):
...
def find_urls(message):
urls = []
index = 0
while index < len(message):
# Scan for "http" as the common subset of http:// and https://
maybe_next_url = message[index:].find('http')
# End if there are no longer urls to find
if maybe_next_url == -1: break
maybe_next_url += index
if message[maybe_next_url:maybe_next_url+7] == 'http://' or message[maybe_next_url:maybe_next_url+8] == 'https://':
# Looks like we found a URL, scan for its end
index = maybe_next_url
parens = 0
while index < len(message):
# Since browsers don't urlencode parens nowadays, try to avoid breaking those URLs while allowing (https://example.com) to work as well
if message[index] == '(':
parens += 1
elif message[index] == ')':
if parens > 0:
parens -= 1
else:
break
# Some people punctuate their URLs
elif message[index:index+2] in ('. ', ', '):
break
elif message[index] in (' ', '>'):
break
index += 1
urls.append(message[maybe_next_url:index])
else:
index = maybe_next_url + 1
return urls
def extract_title(page_data):
# Find the <title> tag
title_start = None
index = 0
while True:
maybe_tag = page_data[index:].find(b'<')
if maybe_tag == -1: break
maybe_tag += index
if page_data[maybe_tag:maybe_tag+7].lower() == b'<title>':
title_start = maybe_tag + 7
break
else:
index = maybe_tag + 1
if title_start is None:
return None
# Find the </title> tag
title_end = None
index = 0
while True:
maybe_tag = page_data[index:].find(b'<')
if maybe_tag == -1: break
maybe_tag += index
if page_data[maybe_tag:maybe_tag+8].lower() == b'</title>':
title_end = maybe_tag
break
else:
index = maybe_tag + 1
if title_end is None:
title_end = len(page_data)
return html.unescape(page_data[title_start:title_end].decode('utf-8', errors = 'replace'))
# handle_message(*, prefix, message, nick, channel, irc)
# Called for PRIVMSGs.
# prefix is the prefix at the start of the message, without the leading ':'
# message is the contents of the message
# nick is who sent the message
# channel is where you should send the response (note: in queries nick == channel)
# irc is the IRC API object
# All strings are bytestrings
def handle_message(*, prefix, message, nick, channel, irc):
urls = find_urls(message.decode('utf-8'))
# Don't titlebot >3 urls
possible_titles_left = 3
for url in urls:
if possible_titles_left == 0: break
try:
with urllib.request.urlopen(url, timeout = 1) as response:
if response.info().get_content_type() == 'text/html':
# First 4KB of a page should be enough for any <title>
first_kb = response.read(4 * 1024)
title = extract_title(first_kb)
print(title)#debg
possible_titles_left -= 1
except IOError:
continue
# handle_nonmessage(*, prefix, command, arguments, irc)
# Called for all other commands than PINGs and PRIVMSGs.
# prefix is the prefix at the start of the message, without the leading ':'
# command is the command or number code
# arguments is rest of the arguments of the command, represented as a list. ':'-arguments are handled automatically
# irc is the IRC API object
# All strings are bytestrings
def handle_nonmessage(*, prefix, command, arguments, irc):
...