From d5fed9641e2473048222f37402c903236fe0fc0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juhani=20Krekel=C3=A4?= <juhani@krekelä.fi>
Date: Sat, 3 Apr 2021 01:43:04 +0300
Subject: [PATCH] First commit

---
 CC0             | 116 ++++++++++++++++++++++++++++
 README.md       |  11 +++
 example.pheeder |   2 +
 pheeder.py      | 201 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 330 insertions(+)
 create mode 100644 CC0
 create mode 100644 README.md
 create mode 100644 example.pheeder
 create mode 100644 pheeder.py

diff --git a/CC0 b/CC0
new file mode 100644
index 0000000..670154e
--- /dev/null
+++ b/CC0
@@ -0,0 +1,116 @@
+CC0 1.0 Universal
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+
+  i. the right to reproduce, adapt, distribute, perform, display, communicate,
+  and translate a Work;
+
+  ii. moral rights retained by the original author(s) and/or performer(s);
+
+  iii. publicity and privacy rights pertaining to a person's image or likeness
+  depicted in a Work;
+
+  iv. rights protecting against unfair competition in regards to a Work,
+  subject to the limitations in paragraph 4(a), below;
+
+  v. rights protecting the extraction, dissemination, use and reuse of data in
+  a Work;
+
+  vi. database rights (such as those arising under Directive 96/9/EC of the
+  European Parliament and of the Council of 11 March 1996 on the legal
+  protection of databases, and under any national implementation thereof,
+  including any amended or successor version of such directive); and
+
+  vii. other similar, equivalent or corresponding rights throughout the world
+  based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+  a. No trademark or patent rights held by Affirmer are waived, abandoned,
+  surrendered, licensed or otherwise affected by this document.
+
+  b. Affirmer offers the Work as-is and makes no representations or warranties
+  of any kind concerning the Work, express, implied, statutory or otherwise,
+  including without limitation warranties of title, merchantability, fitness
+  for a particular purpose, non infringement, or the absence of latent or
+  other defects, accuracy, or the present or absence of errors, whether or not
+  discoverable, all to the greatest extent permissible under applicable law.
+
+  c. Affirmer disclaims responsibility for clearing rights of other persons
+  that may apply to the Work or any use thereof, including without limitation
+  any person's Copyright and Related Rights in the Work. Further, Affirmer
+  disclaims responsibility for obtaining any necessary consents, permissions
+  or other rights required for any use of the Work.
+
+  d. Affirmer understands and acknowledges that Creative Commons is not a
+  party to this document and has no duty or obligation with respect to this
+  CC0 or use of the Work.
+
+For more information, please see
+<http://creativecommons.org/publicdomain/zero/1.0/>
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c577042
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+Pheeder
+=======
+
+Pheeder is a proxy for translating gophermaps into RSS. The allowed gopher
+URLs are defined in feedfiles (see e.g. example.pheeder), and accessible
+with a short name.
+
+Running
+=======
+
+`python3 pheeder.py port /path/to/feeds.pheeder`
diff --git a/example.pheeder b/example.pheeder
new file mode 100644
index 0000000..079dbe1
--- /dev/null
+++ b/example.pheeder
@@ -0,0 +1,2 @@
+shatranj gopher://ahti.space/1/~nortti/shatranj
+floodfeeds gopher://gopher.floodgap.com/1/feeds/today
diff --git a/pheeder.py b/pheeder.py
new file mode 100644
index 0000000..ce92201
--- /dev/null
+++ b/pheeder.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+import http.server
+import socket
+import sys
+import urllib.parse
+import xml.sax.saxutils
+from collections import namedtuple
+
+feeds = {}
+
+def download_gophermap(feedurl):
+	split = urllib.parse.urlsplit(feedurl)
+
+	if split.scheme != 'gopher': raise ValueError(f'Must be a gopher url, {feedurl} is not')
+
+	host = split.hostname
+	port = split.port if split.port != None else 70
+
+	itemtype = '1' # Gophermap by default
+	query = None
+	if split.query != '':
+		query = urllib.parse.unquote(split.query)
+		if '\n' in query or '\r' in query: raise ValueError(f'Newlines not allowed in the query')
+		itemtype = '8' # Search has its own itemtype
+
+	if split.path.lstrip('/') == '':
+		# Special handling if the url doesn't include the path part
+		path = ''
+	elif split.path.lstrip('/')[0] != itemtype:
+		# If the url specifies something else than a gophermap, or search if we have a search query, explode
+		raise ValueError(f'Must be a gophermap or search, {feedurl} is not')
+	else:
+		path = urllib.parse.unquote(split.path.lstrip('/')[1:])
+		if '\n' in path or '\r' in path or '\t' in path: raise ValueError(f'Newlines or tabs not allowed in the path')
+
+	with socket.create_connection((host, port)) as sock:
+		if query is None:
+			sock.sendall(path.encode('utf-8') + b'\r\n')
+		else:
+			sock.sendall(path.encode('utf-8') + b'\t' + query.encode('utf-8') + b'\r\n')
+
+		lines = []
+		buf = bytearray()
+		stream_end = False
+		while not stream_end:
+			data = sock.recv(1024)
+			if data == b'': break
+
+			buf.extend(data)
+
+			while True:
+				newline_index = buf.find(b'\n')
+				if newline_index == -1: break
+
+				line = buf[:newline_index]
+				buf = buf[newline_index + 1:]
+				if line[-1:] == b'\r': line = line[:-1]
+
+				if line == b'.':
+					# End of gophermap reached. Stop parsing here
+					stream_end = True
+					break
+				elif line[:1] == b'.':
+					# Dot unquoting
+					line = line[1:]
+
+				lines.append(line)
+
+		return lines
+
+def construct_url(itemtype, path, host, port):
+	if path.lstrip(b'/').startswith(b'URL:'):
+		# hURL
+		# Copy the URL (after sanitizing) directly
+		url = path.lstrip(b'/')[4:].decode('utf-8')
+		return url
+
+	host = host.decode('utf-8').encode('idna').decode()
+	path = urllib.parse.quote(itemtype + path)
+
+	if port == 70:
+		# No need to add the port explicitly
+		netloc = host
+	else:
+		# Do we have an IPv6 address that needs to be put in brackets?
+		if ':' in host:
+			netloc = f'[{host}]:port'
+		else:
+			netloc = f'{host}:{port}'
+
+	return urllib.parse.urlunsplit(('gopher', netloc, path, '', ''))
+
+def get_links(gophermap):
+	links = []
+	for line in gophermap:
+		line = line.split(b'\t')
+		if len(line) < 4:
+			continue
+
+		itemtype_name, path, host, port, *_ = line
+
+		itemtype = itemtype_name[:1]
+		name = itemtype_name[1:].decode('utf-8', errors='replace')
+		if itemtype == b'3': raise Exception(f'From gopher: {name}')
+		if itemtype == b'i': continue # Don't care about info text
+
+		port = int(port)
+
+		url = construct_url(itemtype, path, host, port)
+		links.append((name, url))
+
+	return links
+
+def construct_rss(links, feed):
+	rss = [f"""<?xml version="1.0" encoding="UTF-8" ?>
+<rss version="2.0">
+	<channel>
+		<title>{xml.sax.saxutils.escape(feed)} (pheeder)</title>
+		<link>{xml.sax.saxutils.escape(feeds[feed])}</link>
+		<description></description>
+"""]
+	for text, url in links:
+		rss.append(f"""\t\t<item>
+			<title>{xml.sax.saxutils.escape(text)}</title>
+			<link>{xml.sax.saxutils.escape(url)}</link>
+		</item>""")
+	
+	rss.append('\t</channel>\n</rss>\n')
+
+	return '\n'.join(rss)
+
+class Pheeder(http.server.BaseHTTPRequestHandler):
+	def send_404(self):
+		content = f'{self.path} not found'.encode('utf-8')
+
+		self.send_response(404)
+		self.send_header('Content-Type', 'text/plain; charset=utf-8')
+		self.send_header('Content-Length', len(content))
+		self.end_headers()
+
+		self.wfile.write(content)
+
+	def send_500(self):
+		content =f'Internal server error while processing {self.path}'.encode('utf-8')
+
+		self.send_response(500)
+		self.send_header('Content-Type', 'text/plain; charset=utf-8')
+		self.send_header('Content-Length', len(content))
+		self.end_headers()
+
+		self.wfile.write(content)
+
+	def send_rss(self, rss):
+		rss = rss.encode('utf-8')
+
+		self.send_response(200)
+		self.send_header('Content-Type', 'application/rss+xml; charset=utf-8')
+		self.send_header('Content-Length', len(rss))
+		self.end_headers()
+
+		self.wfile.write(rss)
+
+	def do_GET(self):
+		feed = self.path.strip('/').split('/')[-1]
+
+		if feed in feeds:
+			try:
+				gophermap = download_gophermap(feeds[feed])
+				links = get_links(gophermap)
+				rss = construct_rss(links, feed)
+			except:
+				self.send_500()
+			else:
+				self.send_rss(rss)
+		else:
+			self.send_404()
+
+def load_feedfile(feedfile):
+	global feeds
+
+	with open(feedfile, 'r') as f:
+		for line in f:
+			line = line.strip()
+
+			comment_start = line.find('#')
+			if comment_start != -1:
+				line = line[:comment_start]
+
+			feed, url = line.split(' ', 2)
+
+			feeds[feed] = url
+
+if __name__ == '__main__':
+	if len(sys.argv) < 3:
+		print(f'Usage: {sys.argv[0]} port feedfile [feedfile …]', file=sys.stderr)
+		sys.exit(1)
+
+	for feedfile in sys.argv[2:]:
+		load_feedfile(feedfile)
+
+	http.server.HTTPServer(('', int(sys.argv[1])), Pheeder).serve_forever()