forked from zgrep/happybot
184 lines
6.5 KiB
Python
184 lines
6.5 KiB
Python
from urllib.request import urlopen
|
|
from collections import defaultdict
|
|
from copy import deepcopy
|
|
from os import chdir
|
|
import string
|
|
import re
|
|
import unicodedata
|
|
|
|
def keysym_names():
|
|
result = dict()
|
|
keysym = re.compile(r'^#define XK_([a-zA-Z_0-9]+)\s+0x[0-9a-f]+\s*/\*[ (]U\+([0-9A-F]+) (.+?)\s*\*/\s*$')
|
|
with urlopen('https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h') as web:
|
|
for line in web:
|
|
line = line.decode('utf-8', 'ignore').strip()
|
|
if m := keysym.match(line):
|
|
name, unicode, comment = m.groups()
|
|
result[name] = (chr(int(unicode, 16)), comment)
|
|
return result
|
|
|
|
def compose_keys():
|
|
compose_line = re.compile(r'^<Multi_key>((?:\s*<[a-zA-Z_0-9]+>)+)\s*:\s*"((?:[^"]|\\.)+)"\s*([a-zA-Z_0-9]*)\s*#\s*(.*)')
|
|
|
|
char_to_sequence = defaultdict(list)
|
|
char_to_name = defaultdict(set)
|
|
char_to_comment = dict()
|
|
name_to_char = dict()
|
|
|
|
with urlopen('https://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre') as web:
|
|
multilinecomment = False
|
|
for line in web:
|
|
line = line.decode('utf-8', 'error').strip()
|
|
if multilinecomment:
|
|
try:
|
|
index = line.index('*/')
|
|
multilinecomment = False
|
|
line = line[index + 2:]
|
|
except:
|
|
continue
|
|
else:
|
|
try:
|
|
index = line.index('/*')
|
|
multilinecomment = True
|
|
line = line[:index]
|
|
except:
|
|
pass
|
|
|
|
if m := compose_line.match(line):
|
|
sequence, char, name, comment = m.groups()
|
|
|
|
sequence = [key[1:-1] for key in sequence.strip().split(' ')]
|
|
char = re.sub(r'\\(.)', r'\1', char)
|
|
|
|
char_to_sequence[char].append(sequence)
|
|
|
|
if name:
|
|
char_to_name[char].add(name)
|
|
|
|
if name in name_to_char:
|
|
try:
|
|
assert name_to_char[name] == char
|
|
except:
|
|
print('Line:', line)
|
|
print('Name:', name)
|
|
print('Had char:', name_to_char[name])
|
|
print('Given char:', char)
|
|
raise
|
|
else:
|
|
name_to_char[name] = char
|
|
|
|
if char in char_to_comment:
|
|
try:
|
|
assert char_to_comment[char] == comment
|
|
except:
|
|
print('Line:', line)
|
|
print('Char:', char)
|
|
print('Had comment:', char_to_comment[char])
|
|
print('Given comment:', comment)
|
|
raise
|
|
else:
|
|
char_to_comment[char] = comment
|
|
|
|
return char_to_sequence, char_to_name, name_to_char, char_to_comment
|
|
|
|
def merged():
|
|
char_to_sequence, char_to_name, name_to_char, char_to_comment = compose_keys()
|
|
|
|
for name, (char, comment) in keysym_names().items():
|
|
if name in name_to_char:
|
|
try:
|
|
assert name_to_char[name] == char
|
|
except:
|
|
print('Name:', name)
|
|
print('Compose char:', name_to_char[name])
|
|
print('Keysym char:', char)
|
|
raise
|
|
else:
|
|
name_to_char[name] = char
|
|
|
|
char_to_name[char].add(name)
|
|
|
|
if char in char_to_comment:
|
|
if char_to_comment[char] != comment:
|
|
char_to_comment[char] += ';' + comment
|
|
else:
|
|
char_to_comment[char] = comment
|
|
|
|
# Compose symbol:
|
|
try:
|
|
assert '\u2384' not in char_to_name
|
|
except:
|
|
raise ValueError('Please use different symbol for compose key.')
|
|
# Space symbol:
|
|
space_names = char_to_name['\u2423']
|
|
for name in char_to_name[' ']:
|
|
name_to_char[name] = '\u2423'
|
|
# Tab symbol:
|
|
tab_sym_names = char_to_name['\u21e5']
|
|
name_to_char['Tab'] = '\u21e5'
|
|
|
|
# Filter out sequences that have keys we don't know how to display nicely.
|
|
# Includes stuff like deadkeys.
|
|
for char in list(char_to_sequence.keys()):
|
|
sequences = char_to_sequence[char]
|
|
new_sequences = []
|
|
for sequence in sequences:
|
|
if any(key in space_names for key in sequence):
|
|
raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
|
|
if any(key in tab_sym_names for key in sequence):
|
|
raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
|
|
for key in sequence:
|
|
if key not in name_to_char and re.match(r'^U[0-9a-fA-F]+$', key):
|
|
name_to_char[key] = chr(int(key[1:], 16))
|
|
if all(key in name_to_char for key in sequence):
|
|
new_sequences.append(sequence)
|
|
if new_sequences:
|
|
char_to_sequence[char] = new_sequences
|
|
else:
|
|
del char_to_sequence[char]
|
|
|
|
# Add spaces so that combining keys become more obvious as combining keys...
|
|
# ...but this is still not obvious, but oh well.
|
|
for name in list(name_to_char.keys()):
|
|
if unicodedata.combining(name_to_char[name]):
|
|
name_to_char[name] = '\u25cc' + name_to_char[name]
|
|
|
|
return char_to_sequence, char_to_name, name_to_char, char_to_comment
|
|
|
|
def uni(char, zf=4):
|
|
return hex(ord(char))[2:].upper().zfill(zf)
|
|
|
|
def main():
|
|
char_to_sequence, char_to_name, name_to_char, char_to_comment = merged()
|
|
|
|
comments_str = ''
|
|
chars_str = ''
|
|
sequences_str = ''
|
|
|
|
for char, sequences in char_to_sequence.items():
|
|
comments_str += ';'.join(char_to_name[char]) + ';' + char_to_comment[char]
|
|
if len(char) == 1:
|
|
comments_str += ';U+' + uni(char) + ';U' + uni(char)
|
|
comments_str += '\n'
|
|
chars_str += char + '\n'
|
|
sequences_str += char + ' ← ' + ' '.join(
|
|
'\u2384' + ''.join(name_to_char[name] for name in sequence)
|
|
for sequence in sequences
|
|
) + '\n'
|
|
|
|
chdir('/home/zgrep/offtopiabday/happybot/compose/')
|
|
with open('comments.txt', 'w') as fh:
|
|
fh.write(comments_str)
|
|
with open('chars.txt', 'w') as fh:
|
|
fh.write(chars_str)
|
|
with open('sequences.txt', 'w') as fh:
|
|
fh.write(sequences_str)
|
|
|
|
print('Success!')
|
|
|
|
# TODO: Deadkey combinations decoding?
|
|
# TODO: Allow deadkeys in compose-key combinations?
|
|
|
|
if __name__ == '__main__':
|
|
main()
|