Bot titles
This commit is contained in:
parent
5bd2ae5410
commit
d936b9646e
23
botcmd.py
23
botcmd.py
|
@ -1,4 +1,6 @@
|
||||||
import html
|
import html
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
# initialize(*, config)
|
# initialize(*, config)
|
||||||
|
@ -95,7 +97,14 @@ def extract_title(page_data):
|
||||||
if title_end is None:
|
if title_end is None:
|
||||||
title_end = len(page_data)
|
title_end = len(page_data)
|
||||||
|
|
||||||
return html.unescape(page_data[title_start:title_end].decode('utf-8', errors = 'replace'))
|
title = html.unescape(page_data[title_start:title_end].decode('utf-8', errors = 'replace')).replace('\n', ' ').replace('\t', ' ').strip()
|
||||||
|
while ' ' in title:
|
||||||
|
title = title.replace(' ', ' ')
|
||||||
|
|
||||||
|
return title
|
||||||
|
|
||||||
|
def sanitize(title):
|
||||||
|
return ''.join('\ufffd' if ord(c) < 32 else c for c in title)
|
||||||
|
|
||||||
# handle_message(*, prefix, message, nick, channel, irc)
|
# handle_message(*, prefix, message, nick, channel, irc)
|
||||||
# Called for PRIVMSGs.
|
# Called for PRIVMSGs.
|
||||||
|
@ -118,14 +127,18 @@ def handle_message(*, prefix, message, nick, channel, irc):
|
||||||
if response.info().get_content_type() == 'text/html':
|
if response.info().get_content_type() == 'text/html':
|
||||||
# First 4KB of a page should be enough for any <title>
|
# First 4KB of a page should be enough for any <title>
|
||||||
first_kb = response.read(4 * 1024)
|
first_kb = response.read(4 * 1024)
|
||||||
title = extract_title(first_kb)
|
title = sanitize(extract_title(first_kb))
|
||||||
print(title)#debg
|
|
||||||
|
domain = sanitize(urllib.parse.urlparse(url).netloc)
|
||||||
|
|
||||||
|
message = '%s: %s' % (domain, title)
|
||||||
|
irc.bot_response(channel, message)
|
||||||
|
|
||||||
possible_titles_left -= 1
|
possible_titles_left -= 1
|
||||||
|
|
||||||
except IOError:
|
except (IOError, urllib.error.URLError):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
# handle_nonmessage(*, prefix, command, arguments, irc)
|
# handle_nonmessage(*, prefix, command, arguments, irc)
|
||||||
# Called for all other commands than PINGs and PRIVMSGs.
|
# Called for all other commands than PINGs and PRIVMSGs.
|
||||||
# prefix is the prefix at the start of the message, without the leading ':'
|
# prefix is the prefix at the start of the message, without the leading ':'
|
||||||
|
|
Loading…
Reference in New Issue