#!/usr/bin/env python import json import sys alphabet = 'abcdefghijklmnopqrstuvwxyz' srcpath = sys.argv[1] excludepath = sys.argv[2] rarestword = sys.argv[3] targetpath = sys.argv[4] with open(srcpath, 'r') as f: words = json.load(f) with open(excludepath, 'r') as f: exclude_all = json.load(f) # We only care about 5-letter words words = [word for word in words if len(word) == 5] exclude_all = [word for word in exclude_all if len(word) == 5] exclude = set() for word in exclude_all: exclude.add(word) if word == rarestword: break # Don't include words in the exclude list words = [word for word in words if word not in exclude] # Split dictionary into per-startletter arrays arrays = {letter: [] for letter in alphabet} for word in words: assert word[0] in alphabet number = 0 # First letter is implicit for index, letter in enumerate(word[1:]): number += alphabet.index(letter) << (5 * index) packed = bytes([number & 0xff, (number >> 8) & 0xff, number >> 16]) arrays[word[0]].append(packed) with open(targetpath, 'w') as f: for startletter, array in arrays.items(): f.write(f'dictionary_{startletter}:\n') for packed in array: f.write(f'\tdb {", ".join(str(byte) for byte in packed)}\n') f.write('\n') f.write('dictionaries:\n') for startletter in arrays: f.write(f'\tdw dictionary_{startletter}, {len(arrays[startletter])}\n')