Bot titles
This commit is contained in:
parent
5bd2ae5410
commit
d936b9646e
23
botcmd.py
23
botcmd.py
|
@ -1,4 +1,6 @@
|
|||
import html
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
# initialize(*, config)
|
||||
|
@ -95,7 +97,14 @@ def extract_title(page_data):
|
|||
if title_end is None:
|
||||
title_end = len(page_data)
|
||||
|
||||
return html.unescape(page_data[title_start:title_end].decode('utf-8', errors = 'replace'))
|
||||
title = html.unescape(page_data[title_start:title_end].decode('utf-8', errors = 'replace')).replace('\n', ' ').replace('\t', ' ').strip()
|
||||
while ' ' in title:
|
||||
title = title.replace(' ', ' ')
|
||||
|
||||
return title
|
||||
|
||||
def sanitize(title):
|
||||
return ''.join('\ufffd' if ord(c) < 32 else c for c in title)
|
||||
|
||||
# handle_message(*, prefix, message, nick, channel, irc)
|
||||
# Called for PRIVMSGs.
|
||||
|
@ -118,14 +127,18 @@ def handle_message(*, prefix, message, nick, channel, irc):
|
|||
if response.info().get_content_type() == 'text/html':
|
||||
# First 4KB of a page should be enough for any <title>
|
||||
first_kb = response.read(4 * 1024)
|
||||
title = extract_title(first_kb)
|
||||
print(title)#debg
|
||||
title = sanitize(extract_title(first_kb))
|
||||
|
||||
domain = sanitize(urllib.parse.urlparse(url).netloc)
|
||||
|
||||
message = '%s: %s' % (domain, title)
|
||||
irc.bot_response(channel, message)
|
||||
|
||||
possible_titles_left -= 1
|
||||
|
||||
except IOError:
|
||||
except (IOError, urllib.error.URLError):
|
||||
continue
|
||||
|
||||
|
||||
# handle_nonmessage(*, prefix, command, arguments, irc)
|
||||
# Called for all other commands than PINGs and PRIVMSGs.
|
||||
# prefix is the prefix at the start of the message, without the leading ':'
|
||||
|
|
Loading…
Reference in New Issue