wikisms/wikipedia_api.py

35 lines
947 B
Python

import urllib.parse
import urllib.request
default_endpoint = 'https://en.wikipedia.org/w/index.php'
default_timeout = 10
def wikitext(title, endpoint = default_endpoint, timeout = default_timeout):
protocol, host, path, query, fragment = urllib.parse.urlsplit(endpoint)
query = urllib.parse.urlencode(urllib.parse.parse_qsl(query) + [
('action', 'raw'),
('title', title)
])
url = urllib.parse.urlunsplit((protocol, host, path, query, fragment))
with urllib.request.urlopen(url, timeout=timeout) as r:
contents = r.read()
charset = r.headers.get_content_charset()
if charset is not None:
try:
return contents.decode(charset)
except LookupError:
# Unknown encoding
pass
# Default to trying utf-8, windows-1252, iso-8859-1
try:
return contents.decode('utf-8')
except UnicodeDecodeError:
pass
try:
return contents.decode('windows-1252')
except UnicodeDecodeError:
pass
return contents.decode('iso-8859-1')