202 lines
5.1 KiB
Python
202 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
|
import http.server
|
|
import socket
|
|
import sys
|
|
import urllib.parse
|
|
import xml.sax.saxutils
|
|
from collections import namedtuple
|
|
|
|
feeds = {}
|
|
|
|
def download_gophermap(feedurl):
|
|
split = urllib.parse.urlsplit(feedurl)
|
|
|
|
if split.scheme != 'gopher': raise ValueError(f'Must be a gopher url, {feedurl} is not')
|
|
|
|
host = split.hostname
|
|
port = split.port if split.port != None else 70
|
|
|
|
itemtype = '1' # Gophermap by default
|
|
query = None
|
|
if split.query != '':
|
|
query = urllib.parse.unquote(split.query)
|
|
if '\n' in query or '\r' in query: raise ValueError(f'Newlines not allowed in the query')
|
|
itemtype = '8' # Search has its own itemtype
|
|
|
|
if split.path.lstrip('/') == '':
|
|
# Special handling if the url doesn't include the path part
|
|
path = ''
|
|
elif split.path.lstrip('/')[0] != itemtype:
|
|
# If the url specifies something else than a gophermap, or search if we have a search query, explode
|
|
raise ValueError(f'Must be a gophermap or search, {feedurl} is not')
|
|
else:
|
|
path = urllib.parse.unquote(split.path.lstrip('/')[1:])
|
|
if '\n' in path or '\r' in path or '\t' in path: raise ValueError(f'Newlines or tabs not allowed in the path')
|
|
|
|
with socket.create_connection((host, port)) as sock:
|
|
if query is None:
|
|
sock.sendall(path.encode('utf-8') + b'\r\n')
|
|
else:
|
|
sock.sendall(path.encode('utf-8') + b'\t' + query.encode('utf-8') + b'\r\n')
|
|
|
|
lines = []
|
|
buf = bytearray()
|
|
stream_end = False
|
|
while not stream_end:
|
|
data = sock.recv(1024)
|
|
if data == b'': break
|
|
|
|
buf.extend(data)
|
|
|
|
while True:
|
|
newline_index = buf.find(b'\n')
|
|
if newline_index == -1: break
|
|
|
|
line = buf[:newline_index]
|
|
buf = buf[newline_index + 1:]
|
|
if line[-1:] == b'\r': line = line[:-1]
|
|
|
|
if line == b'.':
|
|
# End of gophermap reached. Stop parsing here
|
|
stream_end = True
|
|
break
|
|
elif line[:1] == b'.':
|
|
# Dot unquoting
|
|
line = line[1:]
|
|
|
|
lines.append(line)
|
|
|
|
return lines
|
|
|
|
def construct_url(itemtype, path, host, port):
|
|
if path.lstrip(b'/').startswith(b'URL:'):
|
|
# hURL
|
|
# Copy the URL (after sanitizing) directly
|
|
url = path.lstrip(b'/')[4:].decode('utf-8')
|
|
return url
|
|
|
|
host = host.decode('utf-8').encode('idna').decode()
|
|
path = urllib.parse.quote(itemtype + path)
|
|
|
|
if port == 70:
|
|
# No need to add the port explicitly
|
|
netloc = host
|
|
else:
|
|
# Do we have an IPv6 address that needs to be put in brackets?
|
|
if ':' in host:
|
|
netloc = f'[{host}]:port'
|
|
else:
|
|
netloc = f'{host}:{port}'
|
|
|
|
return urllib.parse.urlunsplit(('gopher', netloc, path, '', ''))
|
|
|
|
def get_links(gophermap):
|
|
links = []
|
|
for line in gophermap:
|
|
line = line.split(b'\t')
|
|
if len(line) < 4:
|
|
continue
|
|
|
|
itemtype_name, path, host, port, *_ = line
|
|
|
|
itemtype = itemtype_name[:1]
|
|
name = itemtype_name[1:].decode('utf-8', errors='replace')
|
|
if itemtype == b'3': raise Exception(f'From gopher: {name}')
|
|
if itemtype == b'i': continue # Don't care about info text
|
|
|
|
port = int(port)
|
|
|
|
url = construct_url(itemtype, path, host, port)
|
|
links.append((name, url))
|
|
|
|
return links
|
|
|
|
def construct_rss(links, feed):
|
|
rss = [f"""<?xml version="1.0" encoding="UTF-8" ?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<title>{xml.sax.saxutils.escape(feed)} (pheeder)</title>
|
|
<link>{xml.sax.saxutils.escape(feeds[feed])}</link>
|
|
<description></description>
|
|
"""]
|
|
for text, url in links:
|
|
rss.append(f"""\t\t<item>
|
|
<title>{xml.sax.saxutils.escape(text)}</title>
|
|
<link>{xml.sax.saxutils.escape(url)}</link>
|
|
</item>""")
|
|
|
|
rss.append('\t</channel>\n</rss>\n')
|
|
|
|
return '\n'.join(rss)
|
|
|
|
class Pheeder(http.server.BaseHTTPRequestHandler):
|
|
def send_404(self):
|
|
content = f'{self.path} not found'.encode('utf-8')
|
|
|
|
self.send_response(404)
|
|
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
|
self.send_header('Content-Length', len(content))
|
|
self.end_headers()
|
|
|
|
self.wfile.write(content)
|
|
|
|
def send_500(self):
|
|
content =f'Internal server error while processing {self.path}'.encode('utf-8')
|
|
|
|
self.send_response(500)
|
|
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
|
self.send_header('Content-Length', len(content))
|
|
self.end_headers()
|
|
|
|
self.wfile.write(content)
|
|
|
|
def send_rss(self, rss):
|
|
rss = rss.encode('utf-8')
|
|
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'application/rss+xml; charset=utf-8')
|
|
self.send_header('Content-Length', len(rss))
|
|
self.end_headers()
|
|
|
|
self.wfile.write(rss)
|
|
|
|
def do_GET(self):
|
|
feed = self.path.strip('/').split('/')[-1]
|
|
|
|
if feed in feeds:
|
|
try:
|
|
gophermap = download_gophermap(feeds[feed])
|
|
links = get_links(gophermap)
|
|
rss = construct_rss(links, feed)
|
|
except:
|
|
self.send_500()
|
|
else:
|
|
self.send_rss(rss)
|
|
else:
|
|
self.send_404()
|
|
|
|
def load_feedfile(feedfile):
|
|
global feeds
|
|
|
|
with open(feedfile, 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
|
|
comment_start = line.find('#')
|
|
if comment_start != -1:
|
|
line = line[:comment_start]
|
|
|
|
feed, url = line.split(' ', 2)
|
|
|
|
feeds[feed] = url
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) < 3:
|
|
print(f'Usage: {sys.argv[0]} port feedfile [feedfile …]', file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
for feedfile in sys.argv[2:]:
|
|
load_feedfile(feedfile)
|
|
|
|
http.server.HTTPServer(('', int(sys.argv[1])), Pheeder).serve_forever()
|