#!/usr/bin/env python3 def sizeof_fmt(num, suffix='B'): for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: if abs(num) < 1024.0: return "%3.1f%s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f%s%s" % (num, 'Yi', suffix) from re import compile as regex urls = regex(r'(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?') from subprocess import Popen, PIPE def cmd(*args): proc = Popen(args, stdout=PIPE) while True: line = proc.stdout.readline() if line: try: yield str(line[:-1], 'utf-8', 'ignore') except: pass else: break from sys import argv if len(argv) != 2: print('Usage:', argv[0], '#channel') exit(1) chan = '/home/zgrep/offtopiabday/irc.freenode.net/' + argv[1] from urllib.request import Request, urlopen def quote(url): res = '' for c in url: if ord(c) > 127: res += ''.join('%' + hex(b)[2:] for b in c.encode('utf-8')) else: res += c return res irccloud_none = 'irccloud.com/pastebin/' irccloud_with = irccloud_none + 'raw/' for line in cmd('tail', '-n', '0', '-f', chan + '/out'): date, time, nick, line = line.split(' ', 3) nick = nick[1:-1] if nick in ('happybot', 'hatebot'): continue result = [] print('Doing line:', line) for url in urls.findall(line): url = quote(url) if irccloud_none in url and irccloud_with not in url: result.append(url.replace(irccloud_none, irccloud_with, 1)) continue if url[-5:] == '.gifv': # hack for imgur gifv's url = url[:-5] print('| Got gifv:', url) try: r = urlopen(Request(url + '.mp4', method='HEAD')) contenttype = r.getheader('content-type') length1 = r.getheader('content-length') r.close() except: print('| Could not get mp4.') if 'video' not in contenttype.lower(): print('| Video is not a video?') result.append('???') continue try: r = urlopen(Request(url + '.gif', method='HEAD')) contenttype = r.getheader('content-type') length2 = r.getheader('content-length') r.close() except: print('| Could not get gif.') if 'image' not in contenttype.lower(): print('| Image is not an image?') result.append('???') continue try: length1 = int(length1) length2 = int(length2) except: print('| Lengths are not ints.') continue if length1 <= length2: url += '.mp4' length = length1 else: url += '.gif' length = length2 result.append(url + ' ' + sizeof_fmt(length)) continue print('| Got URL:', url) rq = Request(url, method='HEAD') try: r = urlopen(rq) contenttype = r.getheader('content-type').lower() length = r.getheader('content-length') r.close() print('| | HEAD request completed.') download = 0 # 0 ignore, 1 get from HEAD, 2+ get from HEAD otherwise GET if 'image' in contenttype: download = 2 # download images, fine... elif 'video' in contenttype: download = 1 # Eh... I'll draw the line at videos. if download > 0: if length: try: b = int(length) except: b = -1 elif download > 1: try: rq = Request(url, method='HEAD') r = urlopen(rq) b = len(r.read()) r.close() print('| | Normal request required and complete.') except: b = -1 print('| | Normal request required and failed.') if b < 0: print('| | Failure.') result.append('???') else: print('| | Success.') result.append(sizeof_fmt(b)) except: print('| | Failure.') if result: with open(chan + '/in', 'w') as fh: fh.write('[' + '] ['.join(result) + ']\n')