#!/usr/bin/env python3 import http.server import socket import sys import urllib.parse import xml.sax.saxutils from collections import namedtuple feeds = {} def download_gophermap(feedurl): split = urllib.parse.urlsplit(feedurl) if split.scheme != 'gopher': raise ValueError(f'Must be a gopher url, {feedurl} is not') host = split.hostname port = split.port if split.port != None else 70 itemtype = '1' # Gophermap by default query = None if split.query != '': query = urllib.parse.unquote(split.query) if '\n' in query or '\r' in query: raise ValueError(f'Newlines not allowed in the query') itemtype = '8' # Search has its own itemtype if split.path.lstrip('/') == '': # Special handling if the url doesn't include the path part path = '' elif split.path.lstrip('/')[0] != itemtype: # If the url specifies something else than a gophermap, or search if we have a search query, explode raise ValueError(f'Must be a gophermap or search, {feedurl} is not') else: path = urllib.parse.unquote(split.path.lstrip('/')[1:]) if '\n' in path or '\r' in path or '\t' in path: raise ValueError(f'Newlines or tabs not allowed in the path') with socket.create_connection((host, port)) as sock: if query is None: sock.sendall(path.encode('utf-8') + b'\r\n') else: sock.sendall(path.encode('utf-8') + b'\t' + query.encode('utf-8') + b'\r\n') lines = [] buf = bytearray() stream_end = False while not stream_end: data = sock.recv(1024) if data == b'': break buf.extend(data) while True: newline_index = buf.find(b'\n') if newline_index == -1: break line = buf[:newline_index] buf = buf[newline_index + 1:] if line[-1:] == b'\r': line = line[:-1] if line == b'.': # End of gophermap reached. Stop parsing here stream_end = True break elif line[:1] == b'.': # Dot unquoting line = line[1:] lines.append(line) return lines def construct_url(itemtype, path, host, port): if path.lstrip(b'/').startswith(b'URL:'): # hURL # Copy the URL (after sanitizing) directly url = path.lstrip(b'/')[4:].decode('utf-8') return url host = host.decode('utf-8').encode('idna').decode() path = urllib.parse.quote(itemtype + path) if port == 70: # No need to add the port explicitly netloc = host else: # Do we have an IPv6 address that needs to be put in brackets? if ':' in host: netloc = f'[{host}]:port' else: netloc = f'{host}:{port}' return urllib.parse.urlunsplit(('gopher', netloc, path, '', '')) def get_links(gophermap): links = [] for line in gophermap: line = line.split(b'\t') if len(line) < 4: continue itemtype_name, path, host, port, *_ = line itemtype = itemtype_name[:1] name = itemtype_name[1:].decode('utf-8', errors='replace') if itemtype == b'3': raise Exception(f'From gopher: {name}') if itemtype == b'i': continue # Don't care about info text port = int(port) url = construct_url(itemtype, path, host, port) links.append((name, url)) return links def construct_rss(links, feed): rss = [f""" {xml.sax.saxutils.escape(feed)} (pheeder) {xml.sax.saxutils.escape(feeds[feed])} """] for text, url in links: rss.append(f"""\t\t {xml.sax.saxutils.escape(text)} {xml.sax.saxutils.escape(url)} """) rss.append('\t\n\n') return '\n'.join(rss) class Pheeder(http.server.BaseHTTPRequestHandler): def send_404(self): content = f'{self.path} not found'.encode('utf-8') self.send_response(404) self.send_header('Content-Type', 'text/plain; charset=utf-8') self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) def send_500(self): content =f'Internal server error while processing {self.path}'.encode('utf-8') self.send_response(500) self.send_header('Content-Type', 'text/plain; charset=utf-8') self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) def send_rss(self, rss): rss = rss.encode('utf-8') self.send_response(200) self.send_header('Content-Type', 'application/rss+xml; charset=utf-8') self.send_header('Content-Length', len(rss)) self.end_headers() self.wfile.write(rss) def do_GET(self): feed = self.path.strip('/').split('/')[-1] if feed in feeds: try: gophermap = download_gophermap(feeds[feed]) links = get_links(gophermap) rss = construct_rss(links, feed) except: self.send_500() else: self.send_rss(rss) else: self.send_404() def load_feedfile(feedfile): global feeds with open(feedfile, 'r') as f: for line in f: line = line.strip() comment_start = line.find('#') if comment_start != -1: line = line[:comment_start] feed, url = line.split(' ', 2) feeds[feed] = url if __name__ == '__main__': if len(sys.argv) < 3: print(f'Usage: {sys.argv[0]} port feedfile [feedfile …]', file=sys.stderr) sys.exit(1) for feedfile in sys.argv[2:]: load_feedfile(feedfile) http.server.HTTPServer(('', int(sys.argv[1])), Pheeder).serve_forever()