162 lines
3.9 KiB
Python
162 lines
3.9 KiB
Python
import html.parser
|
|
import json
|
|
import os
|
|
import time
|
|
import uuid
|
|
|
|
class UnknownTagError(Exception): pass
|
|
|
|
class DeHTMLifier(html.parser.HTMLParser):
|
|
def __init__(self):
|
|
self.result = []
|
|
html.parser.HTMLParser.__init__(self, convert_charrefs = True)
|
|
|
|
def processed(self):
|
|
return ''.join(self.result)
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == 'b':
|
|
# Bold
|
|
self.result.append('\x02') # ^B
|
|
elif tag == 'i':
|
|
# Italic
|
|
self.result.append('\x1d') # ^]
|
|
elif tag == 'small':
|
|
# Dunno, can't really do anything
|
|
pass
|
|
elif tag == 'br':
|
|
# Slashes work more nicely than a newline symbol I find
|
|
if len(self.result) > 0 and self.result[-1][-1:] == ' ':
|
|
self.result.append('/ ')
|
|
else:
|
|
self.result.append(' / ')
|
|
else:
|
|
raise UnknownTagError(tag)
|
|
|
|
def handle_endtag(self, tag):
|
|
# Since tags are symmetric (and ppl use </br>)
|
|
self.handle_starttag(tag, None)
|
|
|
|
def handle_startendtag(self, tag, attrs):
|
|
self.handle_starttag(tag, attrs)
|
|
|
|
def handle_data(self, data):
|
|
self.result.append(data)
|
|
|
|
def handle_entityref(self, name):
|
|
assert False
|
|
|
|
def handle_charref(self, name):
|
|
assert False
|
|
|
|
def dehtml(text):
|
|
dehtmlifier = DeHTMLifier()
|
|
dehtmlifier.feed(text)
|
|
dehtmlifier.close()
|
|
return dehtmlifier.processed()
|
|
|
|
def decks(data):
|
|
return data['order']
|
|
|
|
def deckname(data, deck):
|
|
return data[deck]['name']
|
|
|
|
def cards(data, deck):
|
|
black_ids = data[deck]['black']
|
|
white_ids = data[deck]['white']
|
|
|
|
black_cards = [data['blackCards'][i] for i in black_ids]
|
|
white_cards = [data['whiteCards'][i] for i in white_ids]
|
|
|
|
return black_cards, white_cards
|
|
|
|
def geninfo(data, deck, official):
|
|
datetime_str = time.strftime('%Y-%m-%dT%H:%M:%S+00:00', time.gmtime())
|
|
copyright_url = 'https://cardsagainsthumanity.com/' if official else 'https://crhallberg.com/cah/'
|
|
black_cards, white_cards = cards(data, deck)
|
|
|
|
return {
|
|
'name': deckname(data, deck),
|
|
'code': deck,
|
|
'description': 'converted from https://crhallberg.com/cah/ deck %s' % deck,
|
|
'unlisted': True,
|
|
'created_at': datetime_str,
|
|
'updated_at': datetime_str,
|
|
'external_copyright': True,
|
|
'copyright_holder_url': copyright_url,
|
|
'category': 'other',
|
|
'call_count': len(black_cards),
|
|
'response_count': len(white_cards),
|
|
'author': {
|
|
'id': '84897553-35f4-40c8-b104-9dd770199cce',
|
|
'username': 'jsonah2cardcast'
|
|
},
|
|
'rating': '0.0'
|
|
}
|
|
|
|
def gencards(data, deck, nsfw):
|
|
datetime_str = time.strftime('%Y-%m-%dT%H:%M:%S+00:00', time.gmtime())
|
|
|
|
def boilerplatify(segments):
|
|
return {
|
|
'id': str(uuid.uuid4()),
|
|
'text': segments,
|
|
'created_at': datetime_str,
|
|
'nsfw': nsfw
|
|
}
|
|
|
|
def segmentify(text, pick_count):
|
|
segments = []
|
|
remaining = text
|
|
for _ in range(pick_count):
|
|
if '_' not in remaining:
|
|
remaining += ' _'
|
|
segment, _, remaining = remaining.partition('_')
|
|
segments.append(segment)
|
|
segments.append(remaining)
|
|
return segments
|
|
|
|
black_cards, white_cards = cards(data, deck)
|
|
|
|
black_cards = [boilerplatify(segmentify(dehtml(i['text']), i['pick'])) for i in black_cards]
|
|
white_cards = [boilerplatify([dehtml(i)]) for i in white_cards]
|
|
|
|
return {
|
|
'calls': black_cards,
|
|
'responses': white_cards
|
|
}
|
|
|
|
def main():
|
|
with open('official.json', 'r') as f:
|
|
official_data = json.loads(f.read())
|
|
with open('unofficial.json', 'r') as f:
|
|
unofficial_data = json.loads(f.read())
|
|
|
|
try:
|
|
os.mkdir('output')
|
|
except FileExistsError:
|
|
pass
|
|
|
|
def writedeck(data, deck, official):
|
|
try:
|
|
os.mkdir('output/%s' % deck)
|
|
except FileExistsError:
|
|
pass
|
|
|
|
with open('output/%s/index.json' % deck, 'w') as f:
|
|
f.write(json.dumps(geninfo(data, deck, official)))
|
|
with open('output/%s/cards' % deck, 'w') as f:
|
|
# Just default to everything being nsfw
|
|
f.write(json.dumps(gencards(data, deck, nsfw=True)))
|
|
|
|
for deck in decks(official_data):
|
|
print(deck)
|
|
writedeck(official_data, deck, official=True)
|
|
|
|
for deck in decks(unofficial_data):
|
|
print(deck)
|
|
writedeck(unofficial_data, deck, official=False)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|