35 lines
947 B
Python
35 lines
947 B
Python
import urllib.parse
|
|
import urllib.request
|
|
|
|
default_endpoint = 'https://en.wikipedia.org/w/index.php'
|
|
default_timeout = 10
|
|
|
|
def wikitext(title, endpoint = default_endpoint, timeout = default_timeout):
|
|
protocol, host, path, query, fragment = urllib.parse.urlsplit(endpoint)
|
|
query = urllib.parse.urlencode(urllib.parse.parse_qsl(query) + [
|
|
('action', 'raw'),
|
|
('title', title)
|
|
])
|
|
url = urllib.parse.urlunsplit((protocol, host, path, query, fragment))
|
|
with urllib.request.urlopen(url, timeout=timeout) as r:
|
|
contents = r.read()
|
|
charset = r.headers.get_content_charset()
|
|
|
|
if charset is not None:
|
|
try:
|
|
return contents.decode(charset)
|
|
except LookupError:
|
|
# Unknown encoding
|
|
pass
|
|
|
|
# Default to trying utf-8, windows-1252, iso-8859-1
|
|
try:
|
|
return contents.decode('utf-8')
|
|
except UnicodeDecodeError:
|
|
pass
|
|
try:
|
|
return contents.decode('windows-1252')
|
|
except UnicodeDecodeError:
|
|
pass
|
|
return contents.decode('iso-8859-1')
|