Handle <title>s with key="value"s

This commit is contained in:
Juhani Krekelä 2019-01-26 02:21:12 +00:00
parent 6fff4f207c
commit 9b27bc728f
1 changed files with 10 additions and 3 deletions

View File

@ -71,9 +71,15 @@ def extract_title(page_data):
if maybe_tag == -1: break
maybe_tag += index
if page_data[maybe_tag:maybe_tag+7].lower() == b'<title>':
title_start = maybe_tag + 7
break
if page_data[maybe_tag:maybe_tag+6].lower() == b'<title':
# Apparently <title> can have key="value" things
# Find the end of the tag
tag_closing = page_data[maybe_tag:].find(b'>')
if tag_closing != -1:
# It is relative to maybe_tag and we want
# the string after it
title_start = maybe_tag + tag_closing + 1
break
else:
index = maybe_tag + 1
@ -104,6 +110,7 @@ def extract_title(page_data):
return title
def sanitize(title):
if title is None: return None
return ''.join('\ufffd' if ord(c) < 32 else c for c in title)
# handle_message(*, prefix, message, nick, channel, irc)