pheeder/pheeder.py

202 lines
5.1 KiB
Python

#!/usr/bin/env python3
import http.server
import socket
import sys
import urllib.parse
import xml.sax.saxutils
from collections import namedtuple
feeds = {}
def download_gophermap(feedurl):
split = urllib.parse.urlsplit(feedurl)
if split.scheme != 'gopher': raise ValueError(f'Must be a gopher url, {feedurl} is not')
host = split.hostname
port = split.port if split.port != None else 70
itemtype = '1' # Gophermap by default
query = None
if split.query != '':
query = urllib.parse.unquote(split.query)
if '\n' in query or '\r' in query: raise ValueError(f'Newlines not allowed in the query')
itemtype = '8' # Search has its own itemtype
if split.path.lstrip('/') == '':
# Special handling if the url doesn't include the path part
path = ''
elif split.path.lstrip('/')[0] != itemtype:
# If the url specifies something else than a gophermap, or search if we have a search query, explode
raise ValueError(f'Must be a gophermap or search, {feedurl} is not')
else:
path = urllib.parse.unquote(split.path.lstrip('/')[1:])
if '\n' in path or '\r' in path or '\t' in path: raise ValueError(f'Newlines or tabs not allowed in the path')
with socket.create_connection((host, port)) as sock:
if query is None:
sock.sendall(path.encode('utf-8') + b'\r\n')
else:
sock.sendall(path.encode('utf-8') + b'\t' + query.encode('utf-8') + b'\r\n')
lines = []
buf = bytearray()
stream_end = False
while not stream_end:
data = sock.recv(1024)
if data == b'': break
buf.extend(data)
while True:
newline_index = buf.find(b'\n')
if newline_index == -1: break
line = buf[:newline_index]
buf = buf[newline_index + 1:]
if line[-1:] == b'\r': line = line[:-1]
if line == b'.':
# End of gophermap reached. Stop parsing here
stream_end = True
break
elif line[:1] == b'.':
# Dot unquoting
line = line[1:]
lines.append(line)
return lines
def construct_url(itemtype, path, host, port):
if path.lstrip(b'/').startswith(b'URL:'):
# hURL
# Copy the URL (after sanitizing) directly
url = path.lstrip(b'/')[4:].decode('utf-8')
return url
host = host.decode('utf-8').encode('idna').decode()
path = urllib.parse.quote(itemtype + path)
if port == 70:
# No need to add the port explicitly
netloc = host
else:
# Do we have an IPv6 address that needs to be put in brackets?
if ':' in host:
netloc = f'[{host}]:port'
else:
netloc = f'{host}:{port}'
return urllib.parse.urlunsplit(('gopher', netloc, path, '', ''))
def get_links(gophermap):
links = []
for line in gophermap:
line = line.split(b'\t')
if len(line) < 4:
continue
itemtype_name, path, host, port, *_ = line
itemtype = itemtype_name[:1]
name = itemtype_name[1:].decode('utf-8', errors='replace')
if itemtype == b'3': raise Exception(f'From gopher: {name}')
if itemtype == b'i': continue # Don't care about info text
port = int(port)
url = construct_url(itemtype, path, host, port)
links.append((name, url))
return links
def construct_rss(links, feed):
rss = [f"""<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>{xml.sax.saxutils.escape(feed)} (pheeder)</title>
<link>{xml.sax.saxutils.escape(feeds[feed])}</link>
<description></description>
"""]
for text, url in links:
rss.append(f"""\t\t<item>
<title>{xml.sax.saxutils.escape(text)}</title>
<link>{xml.sax.saxutils.escape(url)}</link>
</item>""")
rss.append('\t</channel>\n</rss>\n')
return '\n'.join(rss)
class Pheeder(http.server.BaseHTTPRequestHandler):
def send_404(self):
content = f'{self.path} not found'.encode('utf-8')
self.send_response(404)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.send_header('Content-Length', len(content))
self.end_headers()
self.wfile.write(content)
def send_500(self):
content =f'Internal server error while processing {self.path}'.encode('utf-8')
self.send_response(500)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.send_header('Content-Length', len(content))
self.end_headers()
self.wfile.write(content)
def send_rss(self, rss):
rss = rss.encode('utf-8')
self.send_response(200)
self.send_header('Content-Type', 'application/rss+xml; charset=utf-8')
self.send_header('Content-Length', len(rss))
self.end_headers()
self.wfile.write(rss)
def do_GET(self):
feed = self.path.strip('/').split('/')[-1]
if feed in feeds:
try:
gophermap = download_gophermap(feeds[feed])
links = get_links(gophermap)
rss = construct_rss(links, feed)
except:
self.send_500()
else:
self.send_rss(rss)
else:
self.send_404()
def load_feedfile(feedfile):
global feeds
with open(feedfile, 'r') as f:
for line in f:
line = line.strip()
comment_start = line.find('#')
if comment_start != -1:
line = line[:comment_start]
feed, url = line.split(' ', 2)
feeds[feed] = url
if __name__ == '__main__':
if len(sys.argv) < 3:
print(f'Usage: {sys.argv[0]} port feedfile [feedfile …]', file=sys.stderr)
sys.exit(1)
for feedfile in sys.argv[2:]:
load_feedfile(feedfile)
http.server.HTTPServer(('', int(sys.argv[1])), Pheeder).serve_forever()