Bot titles

This commit is contained in:
Juhani Krekelä 2018-10-12 07:48:57 +03:00
parent 5bd2ae5410
commit d936b9646e
1 changed files with 18 additions and 5 deletions

View File

@ -1,4 +1,6 @@
import html
import urllib.error
import urllib.parse
import urllib.request
# initialize(*, config)
@ -95,7 +97,14 @@ def extract_title(page_data):
if title_end is None:
title_end = len(page_data)
return html.unescape(page_data[title_start:title_end].decode('utf-8', errors = 'replace'))
title = html.unescape(page_data[title_start:title_end].decode('utf-8', errors = 'replace')).replace('\n', ' ').replace('\t', ' ').strip()
while ' ' in title:
title = title.replace(' ', ' ')
return title
def sanitize(title):
return ''.join('\ufffd' if ord(c) < 32 else c for c in title)
# handle_message(*, prefix, message, nick, channel, irc)
# Called for PRIVMSGs.
@ -118,14 +127,18 @@ def handle_message(*, prefix, message, nick, channel, irc):
if response.info().get_content_type() == 'text/html':
# First 4KB of a page should be enough for any <title>
first_kb = response.read(4 * 1024)
title = extract_title(first_kb)
print(title)#debg
title = sanitize(extract_title(first_kb))
domain = sanitize(urllib.parse.urlparse(url).netloc)
message = '%s: %s' % (domain, title)
irc.bot_response(channel, message)
possible_titles_left -= 1
except IOError:
except (IOError, urllib.error.URLError):
continue
# handle_nonmessage(*, prefix, command, arguments, irc)
# Called for all other commands than PINGs and PRIVMSGs.
# prefix is the prefix at the start of the message, without the leading ':'