wikisms/wikipedia_api.py

import urllib.parse
import urllib.request

default_endpoint = 'https://en.wikipedia.org/w/index.php'
default_timeout = 10

def wikitext(title, endpoint = default_endpoint, timeout = default_timeout):
	protocol, host, path, query, fragment = urllib.parse.urlsplit(endpoint)
	query = urllib.parse.urlencode(urllib.parse.parse_qsl(query) + [
		('action', 'raw'),
		('title', title)
	])
	url = urllib.parse.urlunsplit((protocol, host, path, query, fragment))
	with urllib.request.urlopen(url, timeout=timeout) as r:
		contents = r.read()
		charset = r.headers.get_content_charset()

	if charset is not None:
		try:
			return contents.decode(charset)
		except LookupError:
			# Unknown encoding
			pass

	# Default to trying utf-8, windows-1252, iso-8859-1
	try:
		return contents.decode('utf-8')
	except UnicodeDecodeError:
		pass
	try:
		return contents.decode('windows-1252')
	except UnicodeDecodeError:
		pass
	return contents.decode('iso-8859-1')