We shall compose a sonnet or two together!

This commit is contained in:
zgrep 2021-07-10 23:21:12 -04:00
parent 855375ea18
commit d58a32d07d
2 changed files with 207 additions and 0 deletions

24
happybot/compose/compose.sh Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env sh
. /home/zgrep/offtopiabday/happybot/common.sh
dir="happybot/compose"
irc | while read -r n m; do
if hreg '^(?:@?happybot[:,] |!)compose(?:key)? (.+)$' "$m"; then
query="$(m 1)"
echo "Got: $query"
lines="$(grep -Fin -e "$query" "$dir/chars.txt" | sed 's/:.*$/p/' | tr '\n' ';')"
if [ -z "$lines" ]; then
echo "| Did not match chars.txt."
lines="$(grep -Fin -e "$query" "$dir/comments.txt" | sed 's/:.*$/p/' | tr '\n' ';')"
fi
echo "| Lines: $lines"
if [ -z "$lines" ]; then
echo "| No results."
var "$n: No results." | zwsp | say
else
var "$n: $(sed -n "$lines" "$dir/sequences.txt" | sed 's/$/ | /' | tr -d '\n' | sed 's/ | $/\n/')" | zwsp | say
fi
fi
done;

183
happybot/compose/update.py Normal file
View File

@ -0,0 +1,183 @@
from urllib.request import urlopen
from collections import defaultdict
from copy import deepcopy
from os import chdir
import string
import re
import unicodedata
def keysym_names():
result = dict()
keysym = re.compile(r'^#define XK_([a-zA-Z_0-9]+)\s+0x[0-9a-f]+\s*/\*[ (]U\+([0-9A-F]+) (.+?)\s*\*/\s*$')
with urlopen('https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h') as web:
for line in web:
line = line.decode('utf-8', 'ignore').strip()
if m := keysym.match(line):
name, unicode, comment = m.groups()
result[name] = (chr(int(unicode, 16)), comment)
return result
def compose_keys():
compose_line = re.compile(r'^<Multi_key>((?:\s*<[a-zA-Z_0-9]+>)+)\s*:\s*"((?:[^"]|\\.)+)"\s*([a-zA-Z_0-9]*)\s*#\s*(.*)')
char_to_sequence = defaultdict(list)
char_to_name = defaultdict(set)
char_to_comment = dict()
name_to_char = dict()
with urlopen('https://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre') as web:
multilinecomment = False
for line in web:
line = line.decode('utf-8', 'error').strip()
if multilinecomment:
try:
index = line.index('*/')
multilinecomment = False
line = line[index + 2:]
except:
continue
else:
try:
index = line.index('/*')
multilinecomment = True
line = line[:index]
except:
pass
if m := compose_line.match(line):
sequence, char, name, comment = m.groups()
sequence = [key[1:-1] for key in sequence.strip().split(' ')]
char = re.sub(r'\\(.)', r'\1', char)
char_to_sequence[char].append(sequence)
if name:
char_to_name[char].add(name)
if name in name_to_char:
try:
assert name_to_char[name] == char
except:
print('Line:', line)
print('Name:', name)
print('Had char:', name_to_char[name])
print('Given char:', char)
raise
else:
name_to_char[name] = char
if char in char_to_comment:
try:
assert char_to_comment[char] == comment
except:
print('Line:', line)
print('Char:', char)
print('Had comment:', char_to_comment[char])
print('Given comment:', comment)
raise
else:
char_to_comment[char] = comment
return char_to_sequence, char_to_name, name_to_char, char_to_comment
def merged():
char_to_sequence, char_to_name, name_to_char, char_to_comment = compose_keys()
for name, (char, comment) in keysym_names().items():
if name in name_to_char:
try:
assert name_to_char[name] == char
except:
print('Name:', name)
print('Compose char:', name_to_char[name])
print('Keysym char:', char)
raise
else:
name_to_char[name] = char
char_to_name[char].add(name)
if char in char_to_comment:
if char_to_comment[char] != comment:
char_to_comment[char] += ';' + comment
else:
char_to_comment[char] = comment
# Compose symbol:
try:
assert '\u2384' not in char_to_name
except:
raise ValueError('Please use different symbol for compose key.')
# Space symbol:
space_names = char_to_name['\u2423']
for name in char_to_name[' ']:
name_to_char[name] = '\u2423'
# Tab symbol:
tab_sym_names = char_to_name['\u21e5']
name_to_char['Tab'] = '\u21e5'
# Filter out sequences that have keys we don't know how to display nicely.
# Includes stuff like deadkeys.
for char in list(char_to_sequence.keys()):
sequences = char_to_sequence[char]
new_sequences = []
for sequence in sequences:
if any(key in space_names for key in sequence):
raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
if any(key in tab_sym_names for key in sequence):
raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
for key in sequence:
if key not in name_to_char and re.match(r'^U[0-9a-fA-F]+$', key):
name_to_char[key] = chr(int(key[1:], 16))
if all(key in name_to_char for key in sequence):
new_sequences.append(sequence)
if new_sequences:
char_to_sequence[char] = new_sequences
else:
del char_to_sequence[char]
# Add spaces so that combining keys become more obvious as combining keys...
# ...but this is still not obvious, but oh well.
for name in list(name_to_char.keys()):
if unicodedata.combining(name_to_char[name]):
name_to_char[name] = '\u25cc' + name_to_char[name]
return char_to_sequence, char_to_name, name_to_char, char_to_comment
def uni(char, zf=4):
return hex(ord(char))[2:].upper().zfill(zf)
def main():
char_to_sequence, char_to_name, name_to_char, char_to_comment = merged()
comments_str = ''
chars_str = ''
sequences_str = ''
for char, sequences in char_to_sequence.items():
comments_str += ';'.join(char_to_name[char]) + ';' + char_to_comment[char]
if len(char) == 1:
comments_str += ';U+' + uni(char) + ';U' + uni(char)
comments_str += '\n'
chars_str += char + '\n'
sequences_str += char + '' + ' '.join(
'\u2384' + ''.join(name_to_char[name] for name in sequence)
for sequence in sequences
) + '\n'
chdir('/home/zgrep/offtopiabday/happybot/compose/')
with open('comments.txt', 'w') as fh:
fh.write(comments_str)
with open('chars.txt', 'w') as fh:
fh.write(chars_str)
with open('sequences.txt', 'w') as fh:
fh.write(sequences_str)
print('Success!')
# TODO: Deadkey combinations decoding?
# TODO: Allow deadkeys in compose-key combinations?
if __name__ == '__main__':
main()