#!/usr/bin/env python3 # CC0 1.0 Universal # # Statement of Purpose # # The laws of most jurisdictions throughout the world automatically confer # exclusive Copyright and Related Rights (defined below) upon the creator and # subsequent owner(s) (each and all, an "owner") of an original work of # authorship and/or a database (each, a "Work"). # # Certain owners wish to permanently relinquish those rights to a Work for the # purpose of contributing to a commons of creative, cultural and scientific # works ("Commons") that the public can reliably and without fear of later # claims of infringement build upon, modify, incorporate in other works, reuse # and redistribute as freely as possible in any form whatsoever and for any # purposes, including without limitation commercial purposes. These owners may # contribute to the Commons to promote the ideal of a free culture and the # further production of creative, cultural and scientific works, or to gain # reputation or greater distribution for their Work in part through the use and # efforts of others. # # For these and/or other purposes and motivations, and without any expectation # of additional consideration or compensation, the person associating CC0 with a # Work (the "Affirmer"), to the extent that he or she is an owner of Copyright # and Related Rights in the Work, voluntarily elects to apply CC0 to the Work # and publicly distribute the Work under its terms, with knowledge of his or her # Copyright and Related Rights in the Work and the meaning and intended legal # effect of CC0 on those rights. # # 1. Copyright and Related Rights. A Work made available under CC0 may be # protected by copyright and related or neighboring rights ("Copyright and # Related Rights"). Copyright and Related Rights include, but are not limited # to, the following: # # i. the right to reproduce, adapt, distribute, perform, display, communicate, # and translate a Work; # # ii. moral rights retained by the original author(s) and/or performer(s); # # iii. publicity and privacy rights pertaining to a person's image or likeness # depicted in a Work; # # iv. rights protecting against unfair competition in regards to a Work, # subject to the limitations in paragraph 4(a), below; # # v. rights protecting the extraction, dissemination, use and reuse of data in # a Work; # # vi. database rights (such as those arising under Directive 96/9/EC of the # European Parliament and of the Council of 11 March 1996 on the legal # protection of databases, and under any national implementation thereof, # including any amended or successor version of such directive); and # # vii. other similar, equivalent or corresponding rights throughout the world # based on applicable law or treaty, and any national implementations thereof. # # 2. Waiver. To the greatest extent permitted by, but not in contravention of, # applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and # unconditionally waives, abandons, and surrenders all of Affirmer's Copyright # and Related Rights and associated claims and causes of action, whether now # known or unknown (including existing as well as future claims and causes of # action), in the Work (i) in all territories worldwide, (ii) for the maximum # duration provided by applicable law or treaty (including future time # extensions), (iii) in any current or future medium and for any number of # copies, and (iv) for any purpose whatsoever, including without limitation # commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes # the Waiver for the benefit of each member of the public at large and to the # detriment of Affirmer's heirs and successors, fully intending that such Waiver # shall not be subject to revocation, rescission, cancellation, termination, or # any other legal or equitable action to disrupt the quiet enjoyment of the Work # by the public as contemplated by Affirmer's express Statement of Purpose. # # 3. Public License Fallback. Should any part of the Waiver for any reason be # judged legally invalid or ineffective under applicable law, then the Waiver # shall be preserved to the maximum extent permitted taking into account # Affirmer's express Statement of Purpose. In addition, to the extent the Waiver # is so judged Affirmer hereby grants to each affected person a royalty-free, # non transferable, non sublicensable, non exclusive, irrevocable and # unconditional license to exercise Affirmer's Copyright and Related Rights in # the Work (i) in all territories worldwide, (ii) for the maximum duration # provided by applicable law or treaty (including future time extensions), (iii) # in any current or future medium and for any number of copies, and (iv) for any # purpose whatsoever, including without limitation commercial, advertising or # promotional purposes (the "License"). The License shall be deemed effective as # of the date CC0 was applied by Affirmer to the Work. Should any part of the # License for any reason be judged legally invalid or ineffective under # applicable law, such partial invalidity or ineffectiveness shall not # invalidate the remainder of the License, and in such case Affirmer hereby # affirms that he or she will not (i) exercise any of his or her remaining # Copyright and Related Rights in the Work or (ii) assert any associated claims # and causes of action with respect to the Work, in either case contrary to # Affirmer's express Statement of Purpose. # # 4. Limitations and Disclaimers. # # a. No trademark or patent rights held by Affirmer are waived, abandoned, # surrendered, licensed or otherwise affected by this document. # # b. Affirmer offers the Work as-is and makes no representations or warranties # of any kind concerning the Work, express, implied, statutory or otherwise, # including without limitation warranties of title, merchantability, fitness # for a particular purpose, non infringement, or the absence of latent or # other defects, accuracy, or the present or absence of errors, whether or not # discoverable, all to the greatest extent permissible under applicable law. # # c. Affirmer disclaims responsibility for clearing rights of other persons # that may apply to the Work or any use thereof, including without limitation # any person's Copyright and Related Rights in the Work. Further, Affirmer # disclaims responsibility for obtaining any necessary consents, permissions # or other rights required for any use of the Work. # # d. Affirmer understands and acknowledges that Creative Commons is not a # party to this document and has no duty or obligation with respect to this # CC0 or use of the Work. # # For more information, please see # import base64 import enum import select import socket import ssl import sys import time import threading class contexts(enum.Enum): text, tagname, attributename, after_attributename, after_equals, attributevalue, attributevalue_sq, attributevalue_dq = range(8) class HtmlProcessor: def __init__(self): self.context = contexts.text self.tag = None self.attribute = None self.value = None def process_attribute(self): tag = self.tag.lower() attribute = self.attribute.lower() # TODO: handle more attributes if tag == b'a' and attribute == b'href' or tag == b'img' and attribute == b'src': # TODO: entities # TODO: keep leading whitespace already in there if self.value.strip().lower().startswith(b'https://'): # Space is to keep the response size constant return b' http://' + self.value.strip()[len(b'https://'):] else: return self.value else: return self.value def process(self, data): processed = bytearray() for char in data: if self.context == contexts.text and char == ord('<'): self.context = contexts.tagname self.tag = bytearray() self.attribute = None self.value = None elif self.context not in (contexts.attributevalue_sq, contexts.attributevalue_dq) and char == ord('>'): if self.context == contexts.attributevalue: processed.extend(self.process_attribute()) self.context = contexts.text self.tag = None self.attribute = None self.value = None elif self.context in (contexts.tagname, contexts.attributevalue) and chr(char).isspace(): if self.context == contexts.attributevalue: processed.extend(self.process_attribute()) self.context = contexts.attributename self.attribute = bytearray() self.value = None elif self.context == contexts.attributename and chr(char).isspace(): self.context = contexts.after_attributename elif self.context == contexts.after_attributename and chr(char).isspace(): pass elif self.context in (contexts.attributename, contexts.after_attributename) and char == ord('='): self.context = contexts.after_equals elif self.context == contexts.after_equals and chr(char).isspace(): pass elif self.context == contexts.after_equals and char == ord("'"): self.context = contexts.attributevalue_sq self.value = bytearray() elif self.context == contexts.after_equals and char == ord('"'): self.context = contexts.attributevalue_dq self.value = bytearray() elif self.context == contexts.attributevalue_sq and char == ord("'"): processed.extend(self.process_attribute()) self.context = contexts.attributename elif self.context == contexts.attributevalue_dq and char == ord('"'): processed.extend(self.process_attribute()) self.context = contexts.attributename elif self.context == contexts.tagname: self.tag.append(char) elif self.context == contexts.attributename: self.attribute.append(char) elif self.context == contexts.after_attributename: self.context = contexts.attributename self.attribute = bytearray([char]) self.value = None elif self.context == contexts.after_equals: self.context = contexts.attributevalue self.value = bytearray([char]) elif self.context in (contexts.attributevalue, contexts.attributevalue_sq, contexts.attributevalue_dq): self.value.append(char) elif self.context == contexts.text: pass if self.context == contexts.attributevalue: pass elif self.context == contexts.attributevalue_sq and char != ord("'"): pass elif self.context == contexts.attributevalue_dq and char != ord('"'): pass else: processed.append(char) return processed def finalize(self): if self.context in (contexts.attributevalue, contexts.attributevalue_sq, contexts.attributevalue_dq): return self.process_attribute() return b'' def connect(host, port): try: for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res try: s = socket.socket(af, socktype, proto) except OSError: continue s.settimeout(10) try: s.connect((host, port)) except (OSError, socket.timeout): s.close() continue s.settimeout(None) return s except socket.gaierror: return None return None def timestamp(): return time.strftime('%Y-%m-%d %H:%M:%SZ', time.gmtime()) users = {} def authorized(username, password): try: username = username.decode('utf-8') password = password.decode('utf-8') except UnicodeDecodeError: return False return username in users and users[username] == password def proxy(sock, host): print(f'{timestamp()} {host}', file=sys.stderr, end=' ') sock.settimeout(2) request = bytearray() while True: if b'\r\n\r\n' in request: break try: data = sock.recv(1024) except socket.timeout: print('Timeout', file=sys.stderr) return if data == b'': print('Hung up', file=sys.stderr) return request.extend(data) sock.settimeout(None) request, _, request_data = request.partition(b'\r\n\r\n') lines = request.split(b'\r\n') del request # Get request method, URL, protocol fields = lines[0].split() if len(fields) != 3: print('Malformed request line', file=sys.stderr) sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nMalformed request line\n') return method = fields[0] url = fields[1] protocol = fields[2] print(f'{method.decode(errors="replace")} {url.decode(errors="replace")}', file=sys.stderr, end = ' ') # Parse headers headers = {} for line in lines[1:]: fields = line.split(b':', 1) if len(fields) != 2: print('Malformed headers', file=sys.stderr) sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nMalformed headers\n') return headers[bytes(fields[0].strip().lower())] = bytes(fields[1].strip()) del lines # Check authentication if b'proxy-authorization' not in headers: print('Proxy authentication required', file=sys.stderr) sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nProxy authentication required\n') return fields = headers[b'proxy-authorization'].split() if len(fields) != 2 or fields[0].lower() != b'basic': print('Unrecognized authentication method', file=sys.stderr) sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nUnrecognized authentication method\n') return try: username, password = base64.b64decode(fields[1], validate=True).split(b':', 1) except (base64.binascii.Error, ValueError): print('Malformed credentials', file=sys.stderr) sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nMalformed credentials\n') return if not authorized(username, password): print('Unathorized', file=sys.stderr) sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nUnathorized\n') return del username del password # Remove headers that don't need forwarding or are overwritten headers = dict((key, value) for key, value in headers.items() if not key.startswith(b'proxy-') and not key in (b'connection', b'accept-encoding', b'keep-alive')) headers[b'connection'] = b'close' headers[b'accept-enoding'] = b'identity' # Split url into its constituents fields = url.split(b'://', 1) if len(fields) != 2 or fields[0] not in (b'http', b'https'): print('Bad schema', file=sys.stderr) sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nBad schema\n') return remote_host, slash, path = fields[1].partition(b'/') path = slash + path if remote_host[-1:] == b']': #IPv6 [::1] syntax port = None elif b':' not in remote_host: port = None else: remote_host, port = remote_host.rsplit(b':', 1) try: port = int(port) if port < 1 or port > 0xffff: raise ValueError except ValueError: print('Bad port number', file=sys.stderr) sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nBad port number\n') return if remote_host[:1] == b'[' and remote_host[-1:] == b']': #IPv6 remote_host = remote_host[1:-1] try: remote_host = remote_host.decode('ascii') except UnicodeDecodeError: print('Bad host name', file=sys.stderr) sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nBad host name\n') return # Connect to remote host remote_sock = connect(remote_host, port if port is not None else 80) if remote_sock is None: print('Host not found', file=sys.stderr) return tls = False while True: # Send request remote_sock.settimeout(10) try: remote_sock.sendall(method + b' ' + path + b' ' + protocol + b'\r\n') for key, value in headers.items(): remote_sock.sendall(key + b': ' + value + b'\r\n') remote_sock.sendall(b'\r\n') remote_sock.sendall(request_data) except (ConnectionResetError, BrokenPipeError): print('Remote hung up', file=sys.stderr) return except socket.timeout: print('Remote timed out', file=sys.stderr) return remote_sock.settimeout(None) # Keep sending request body, if any, until we get a response from remote poll = select.poll() poll.register(remote_sock, select.POLLIN) poll.register(sock, select.POLLIN) no_response = True while no_response: for fd, _ in poll.poll(): if fd == remote_sock.fileno(): no_response = False break else: try: data = sock.recv(1024) except ConnectionResetError: break if data == b'': break # Save the part we've sent already in case we need to re-send request request_data.extend(data) try: remote_sock.sendall(data) except (ConnectionResetError, BrokenPipeError): print('Remote hung up', file=sys.stderr) return # Get response headers remote_sock.settimeout(10) response = bytearray() while True: if b'\r\n\r\n' in response: break try: data = remote_sock.recv(1024) except socket.timeout: print('Remote timed out', file=sys.stderr) return if data == b'': print('Remote hung up', file=sys.stderr) return response.extend(data) remote_sock.settimeout(None) response, _, response_data = response.partition(b'\r\n\r\n') # Process response headers # Figure out if this is a redirect to HTTPS and if so, rewrite to HTTP # Figure out whether response is html tls_redirect = False is_html = True rewritten_response = bytearray() rewritten_response.extend(response.split(b'\r\n')[0]) # Include response line as-is rewritten_response.extend(b'\r\n') for line in response.split(b'\r\n')[1:]: fields = line.split(b':', 1) if len(fields) == 2 and fields[0].lower() == b'location': destination_url = fields[1].strip() if destination_url.startswith(b'https://'): destination_url = b'http://' + destination_url[len(b'https://'):] if destination_url.split(b'#')[0] != url: rewritten_response.extend(b'Location: ') rewritten_response.extend(destination_url) rewritten_response.extend(b'\r\n') else: # This redirect is of the current URL but TLS tls_redirect = True elif len(fields) == 2 and fields[0].lower() == b'content-type': mimetype = fields[1].split(b';')[0].strip().lower() is_html = mimetype == b'text/html' rewritten_response.extend(line) rewritten_response.extend(b'\r\n') else: rewritten_response.extend(line) rewritten_response.extend(b'\r\n') if tls_redirect and not tls: # Do upgrade to TLS transparently to client print('TLS', file=sys.stderr, end=' ') remote_sock.close() remote_sock = connect(remote_host, port if port is not None else 443) if remote_sock is None: print('Host not found', file=sys.stderr) return ctx = ssl.create_default_context() remote_sock = ctx.wrap_socket(remote_sock, server_hostname = remote_host) tls = True continue # Forward response to client sock.sendall(rewritten_response) sock.sendall(b'\r\n') break del request_data if is_html: htmlprocessor = HtmlProcessor() sock.sendall(htmlprocessor.process(response_data)) else: sock.sendall(response_data) print('', file=sys.stderr) sock.settimeout(60) remote_sock.settimeout(60) last_transfer = time.monotonic() ending_connection = False while not ending_connection: events = poll.poll(60_000) if len(events) == 0 and time.monotonic() - last_transfer > 60: break for fd, _ in events: if fd == sock.fileno(): try: data = sock.recv(1024) except ConnectionResetError: ending_connection = True break if data != b'': try: remote_sock.sendall(data) except (ConnectionResetError, BrokenPipeError, socket.timeout): pass else: try: data = remote_sock.recv(1024) except (ConnectionResetError, socket.timeout): ending_connection = True break if data == b'': ending_connection = True break if is_html: data = htmlprocessor.process(data) try: sock.sendall(data) except (ConnectionResetError, BrokenPipeError, socket.timeout): ending_connection = True break last_transfer = time.monotonic() remote_sock.close() if is_html: try: sock.sendall(htmlprocessor.finalize()) except (ConnectionResetError, BrokenPipeError, socket.timeout): pass class ProxyThread(threading.Thread): def __init__(self, sock, host): self.sock = sock self.host = host super().__init__() def run(self): proxy(self.sock, self.host) self.sock.close() def listen(port): sockets = [] for res in socket.getaddrinfo(None, port, socket.AF_UNSPEC, socket.SOCK_STREAM, 0, socket.AI_PASSIVE): af, socktype, proto, canonname, sa = res try: s = socket.socket(af, socktype, proto) except OSError: continue # Make IPv6 socket only bind on IPv6 address, otherwise may clash with IPv4 and not get enabled if af == socket.AF_INET6: try: s.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) except OSError: pass # Set SO_REUSEADDR for less painful server restarting s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) try: s.bind(sa) s.listen(1) except OSError: s.close() continue sockets.append(s) if len(sockets) == 0: print(f'Could not bind to port {port}', file=sys.stderr) sys.exit(1) listening = select.poll() sock_by_fd = {} for s in sockets: listening.register(s, select.POLLIN) sock_by_fd[s.fileno()] = s del sockets while True: for fd, _ in listening.poll(): conn, (host, *_) = sock_by_fd[fd].accept() ProxyThread(conn, host).start() if __name__ == '__main__': if len(sys.argv) != 3: print(f'Usage: {sys.argv[0]} users-file port', file=sys.stderr) sys.exit(1) with open(sys.argv[1]) as f: for line in f.read().split('\n'): if line.strip() == '': continue username, password = line.split(':', 1) users[username] = password port = int(sys.argv[2]) listen(port)