from urllib.request import urlopen from collections import defaultdict from copy import deepcopy from os import chdir import string import re import unicodedata def keysym_names(): result = dict() keysym = re.compile(r'^#define XK_([a-zA-Z_0-9]+)\s+0x[0-9a-f]+\s*/\*[ (]U\+([0-9A-F]+) (.+?)\s*\*/\s*$') with urlopen('https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h') as web: for line in web: line = line.decode('utf-8', 'ignore').strip() if m := keysym.match(line): name, unicode, comment = m.groups() result[name] = (chr(int(unicode, 16)), comment) return result def compose_keys(): compose_line = re.compile(r'^((?:\s*<[a-zA-Z_0-9]+>)+)\s*:\s*"((?:[^"]|\\.)+)"\s*([a-zA-Z_0-9]*)\s*#\s*(.*)') char_to_sequence = defaultdict(list) char_to_name = defaultdict(set) char_to_comment = dict() name_to_char = dict() with urlopen('https://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre') as web: multilinecomment = False for line in web: line = line.decode('utf-8', 'error').strip() if multilinecomment: try: index = line.index('*/') multilinecomment = False line = line[index + 2:] except: continue else: try: index = line.index('/*') multilinecomment = True line = line[:index] except: pass if m := compose_line.match(line): sequence, char, name, comment = m.groups() sequence = [key[1:-1] for key in sequence.strip().split(' ')] char = re.sub(r'\\(.)', r'\1', char) char_to_sequence[char].append(sequence) if name: char_to_name[char].add(name) if name in name_to_char: try: assert name_to_char[name] == char except: print('Line:', line) print('Name:', name) print('Had char:', name_to_char[name]) print('Given char:', char) raise else: name_to_char[name] = char if char in char_to_comment: try: assert char_to_comment[char] == comment except: print('Line:', line) print('Char:', char) print('Had comment:', char_to_comment[char]) print('Given comment:', comment) raise else: char_to_comment[char] = comment return char_to_sequence, char_to_name, name_to_char, char_to_comment def merged(): char_to_sequence, char_to_name, name_to_char, char_to_comment = compose_keys() for name, (char, comment) in keysym_names().items(): if name in name_to_char: try: assert name_to_char[name] == char except: print('Name:', name) print('Compose char:', name_to_char[name]) print('Keysym char:', char) raise else: name_to_char[name] = char char_to_name[char].add(name) if char in char_to_comment: if char_to_comment[char] != comment: char_to_comment[char] += ';' + comment else: char_to_comment[char] = comment # Compose symbol: try: assert '\u2384' not in char_to_name except: raise ValueError('Please use different symbol for compose key.') # Space symbol: space_names = char_to_name['\u2423'] for name in char_to_name[' ']: name_to_char[name] = '\u2423' # Tab symbol: tab_sym_names = char_to_name['\u21e5'] name_to_char['Tab'] = '\u21e5' # Filter out sequences that have keys we don't know how to display nicely. # Includes stuff like deadkeys. for char in list(char_to_sequence.keys()): sequences = char_to_sequence[char] new_sequences = [] for sequence in sequences: if any(key in space_names for key in sequence): raise ValueError('Please switch to using \\u2420 (\u2420) for space.') if any(key in tab_sym_names for key in sequence): raise ValueError('Please switch to using \\u2420 (\u2420) for space.') for key in sequence: if key not in name_to_char and re.match(r'^U[0-9a-fA-F]+$', key): name_to_char[key] = chr(int(key[1:], 16)) if all(key in name_to_char for key in sequence): new_sequences.append(sequence) if new_sequences: char_to_sequence[char] = new_sequences else: del char_to_sequence[char] # Add spaces so that combining keys become more obvious as combining keys... # ...but this is still not obvious, but oh well. for name in list(name_to_char.keys()): if unicodedata.combining(name_to_char[name]): name_to_char[name] = '\u25cc' + name_to_char[name] return char_to_sequence, char_to_name, name_to_char, char_to_comment def uni(char, zf=4): return hex(ord(char))[2:].upper().zfill(zf) def main(): char_to_sequence, char_to_name, name_to_char, char_to_comment = merged() comments_str = '' chars_str = '' sequences_str = '' for char, sequences in char_to_sequence.items(): comments_str += ';'.join(char_to_name[char]) + ';' + char_to_comment[char] if len(char) == 1: comments_str += ';U+' + uni(char) + ';U' + uni(char) comments_str += '\n' chars_str += char + '\n' sequences_str += char + ' ← ' + ' '.join( '\u2384' + ''.join(name_to_char[name] for name in sequence) for sequence in sequences ) + '\n' chdir('/home/zgrep/offtopiabday/happybot/compose/') with open('comments.txt', 'w') as fh: fh.write(comments_str) with open('chars.txt', 'w') as fh: fh.write(chars_str) with open('sequences.txt', 'w') as fh: fh.write(sequences_str) print('Success!') # TODO: Deadkey combinations decoding? # TODO: Allow deadkeys in compose-key combinations? if __name__ == '__main__': main()