happybot/urls.py

141 lines
4.9 KiB
Python

#!/usr/bin/env python3
def sizeof_fmt(num, suffix='B'):
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
from re import compile as regex
urls = regex(r'(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?')
from subprocess import Popen, PIPE
def cmd(*args):
proc = Popen(args, stdout=PIPE)
while True:
line = proc.stdout.readline()
if line:
try:
yield str(line[:-1], 'utf-8', 'ignore')
except:
pass
else:
break
from sys import argv
if len(argv) != 2:
print('Usage:', argv[0], '#channel')
exit(1)
chan = '/home/zgrep/offtopiabday/irc.freenode.net/' + argv[1]
from urllib.request import Request, urlopen
def quote(url):
res = ''
for c in url:
if ord(c) > 127:
res += ''.join('%' + hex(b)[2:] for b in c.encode('utf-8'))
else:
res += c
return res
irccloud_none = 'irccloud.com/pastebin/'
irccloud_with = irccloud_none + 'raw/'
for line in cmd('tail', '-n', '0', '-f', chan + '/out'):
date, time, nick, line = line.split(' ', 3)
nick = nick[1:-1]
if nick in ('happybot', 'hatebot'):
continue
result = []
print('Doing line:', line)
for url in urls.findall(line):
url = quote(url)
if irccloud_none in url and irccloud_with not in url:
result.append(url.replace(irccloud_none, irccloud_with, 1))
continue
if url[-5:] == '.gifv': # hack for imgur gifv's
url = url[:-5]
print('| Got gifv:', url)
try:
r = urlopen(Request(url + '.mp4', method='HEAD'))
contenttype = r.getheader('content-type')
length1 = r.getheader('content-length')
r.close()
except:
print('| Could not get mp4.')
if 'video' not in contenttype.lower():
print('| Video is not a video?')
result.append('???')
continue
try:
r = urlopen(Request(url + '.gif', method='HEAD'))
contenttype = r.getheader('content-type')
length2 = r.getheader('content-length')
r.close()
except:
print('| Could not get gif.')
if 'image' not in contenttype.lower():
print('| Image is not an image?')
result.append('???')
continue
try:
length1 = int(length1)
length2 = int(length2)
except:
print('| Lengths are not ints.')
continue
if length1 <= length2:
url += '.mp4'
length = length1
else:
url += '.gif'
length = length2
result.append(url + ' ' + sizeof_fmt(length))
continue
print('| Got URL:', url)
rq = Request(url, method='HEAD')
try:
r = urlopen(rq)
contenttype = r.getheader('content-type').lower()
length = r.getheader('content-length')
r.close()
print('| | HEAD request completed.')
download = 0 # 0 ignore, 1 get from HEAD, 2+ get from HEAD otherwise GET
if 'image' in contenttype:
download = 2 # download images, fine...
elif 'video' in contenttype:
download = 1 # Eh... I'll draw the line at videos.
if download > 0:
if length:
try:
b = int(length)
except:
b = -1
elif download > 1:
try:
rq = Request(url, method='HEAD')
r = urlopen(rq)
b = len(r.read())
r.close()
print('| | Normal request required and complete.')
except:
b = -1
print('| | Normal request required and failed.')
if b < 0:
print('| | Failure.')
result.append('???')
else:
print('| | Success.')
result.append(sizeof_fmt(b))
except:
print('| | Failure.')
if result:
with open(chan + '/in', 'w') as fh:
fh.write('[' + '] ['.join(result) + ']\n')