4KiB was not enough for everyone

This commit is contained in:
Juhani Krekelä 2018-10-12 08:27:16 +03:00
parent d936b9646e
commit 179ff85f4c
1 changed files with 8 additions and 4 deletions

View File

@ -125,13 +125,17 @@ def handle_message(*, prefix, message, nick, channel, irc):
try:
with urllib.request.urlopen(url, timeout = 1) as response:
if response.info().get_content_type() == 'text/html':
# First 4KB of a page should be enough for any <title>
first_kb = response.read(4 * 1024)
title = sanitize(extract_title(first_kb))
# First 4KiB of a page should be enough for any <title>
# Turns out it's not, so download 64KiB
page_source_fragment = response.read(64 * 1024)
title = sanitize(extract_title(page_source_fragment))
domain = sanitize(urllib.parse.urlparse(url).netloc)
message = '%s: %s' % (domain, title)
if title is not None:
message = '%s: %s' % (domain, title)
else:
message = '%s: <no title found>' % domain
irc.bot_response(channel, message)
possible_titles_left -= 1