gir/gir.js

957 lines
24 KiB
JavaScript

'use strict';
// ------------------------------------------------------------------
// Definitions
// ------------------------------------------------------------------
// Use symbols for the names of the instructions
// Unsure if this helps in any way over strings, but I feel it neater
// +++++ → {type: add, value: 5}
// Can have offset property
const add = Symbol('add');
// > → {type: moveHead, value: 1}
const moveHead = Symbol('moveHead');
// . → {type: writeByte}
// Can have offset property
const writeByte = Symbol('writeByte');
// , → {type: readByte}
// Can have offset property
const readByte = Symbol('readByte');
// [-] → {type: loop, contents: [{type: add, value: -1}]}
// Can have isBalanced property
const loop = Symbol('loop');
// [-] → {type: clear}
// Can have offset property
const clear = Symbol('clear');
// [>+>++<<-] → {type: move, changes: Map { 0 → -1, 1 → 1, 2 → 2 }}
const multiply = Symbol('multiply');
// {type: jumpIfZero, target: 5}
const jumpIfZero = Symbol('jumpIfZero');
// {type: jumpIfNonZero, target: 2}
const jumpIfNonZero = Symbol('jumpIfNonZero');
// : → {type: writeInt}
// Can have offset property
const writeInt = Symbol('writeInt');
// ; → {type: readInt}
// Can have offset property
const readInt = Symbol('readInt');
// # → {type: breakPoint}
const breakPoint = Symbol('breakPoint');
class ParsingError extends Error {}
class UnknownIRError extends Error {}
// ------------------------------------------------------------------
// Parsing
// ------------------------------------------------------------------
// (string, bool) → [commandObjects]
// enableExtensions contols whether commands :;# are recognized
// May throw ParsingError
function parse(program, enableExtensions = true) {
// (string, int, bool) → {parsed: [commandObjects], lastIndex: int}
// index is the index of the next character to consume
// inLoop tells whether we're parsing a loop or the top level
// lastIndex is the last index the function consumed
function constructTree(program, index = 0, inLoop = false) {
let commands = [];
// Move this out of the loop body since we need to return
// the index of the last character we parsed
let i = index;
for(;;) {
if(i >= program.length) {
// If we're parsing a loop, we have a
// missing ]
// If we're parsing the top level, this is
// where we should exit
if(inLoop) {
throw new ParsingError('Missing ]');
} else {
break;
}
i++;
} else if(program[i] == ']') {
// If we're parsing a loop, this is where we
// should exit
// If we're parsing the top level, we have a
// missing [
if(inLoop) {
break;
} else {
throw new ParsingError('Missing [');
}
i++;
} else if(program[i] == '+' || program[i] == '-') {
// Fold a run of +s and -s into one node
let value = 0;
for(; i < program.length; i++) {
if(program[i] == '+') {
value++;
} else if(program[i] == '-') {
value--;
} else {
// Reached end of the run
break;
}
}
// Only add the command is value is not 0
if(value != 0) {
commands.push({type: add, value: value});
}
// i is not incremented, since it already
// points to a location containig a char we
// have not yet handled
} else if(program[i] == '<' || program[i] == '>') {
// Fold a run of <s and >s into one node
let value = 0;
for(; i < program.length; i++) {
if(program[i] == '>') {
value++;
} else if(program[i] == '<') {
value--;
} else {
// Reached end of the run
break;
}
}
// Only add the command is value is not 0
if(value != 0) {
commands.push({type: moveHead, value: value});
}
// see +/- for why we don't increment i
} else if(program[i] == '.') {
commands.push({type: writeByte});
i++;
} else if(program[i] == ',') {
commands.push({type: readByte});
i++;
} else if(program[i] == '[') {
// Parse a loop. This is done by calling the
// same parser function recursively
// Due to this the loop appears as one node
// in the parsed result
let {parsed, lastIndex} = constructTree(
program, // Same program data
i + 1, // Start at the next char
true // We're parsing a loop
);
commands.push({type: loop, contents: parsed});
// Since lastIndex was consumed by the inner
// function, we don't want to consume it a
// second time
i = lastIndex + 1;
} else if(program[i] == ':' && enableExtensions) {
commands.push({type: writeInt});
i++;
} else if(program[i] == ';' && enableExtensions) {
commands.push({type: readInt});
i++;
} else if(program[i] == '#' && enableExtensions) {
commands.push({type: breakPoint});
i++;
} else {
// All others characters are comments,
// ignore them
i++;
}
}
return {parsed: commands, lastIndex: i};
}
// We only care about the parsed contents, since under normal
// operarion we only get out of the loop if we've reached the end
// of the program
let {parsed} = constructTree(program);
return parsed;
}
// ([commandObjects/offsetCommandObjects/flatCommandObjects]) → str
function prettifyIR(ir) {
// ([commandObjects/offsetCommandObjectsflatCommandObjects], string) → str
function worker(ir, indent = '') {
let lines = [];
for(let i = 0; i < ir.length; i++) {
let command = ir[i];
let line = `${indent}${i} `;
if(command.type == add) {
line += `add ${command.value}`;
if('offset' in command) {
line += ` (${command.offset})`;
}
lines.push(line);
} else if(command.type == moveHead) {
line += `moveHead ${command.value}`;
lines.push(line);
} else if(command.type == writeByte) {
line += 'writeByte';
if('offset' in command) {
line += ` (${command.offset})`;
}
lines.push(line);
} else if(command.type == readByte) {
line += 'readByte';
if('offset' in command) {
line += ` (${command.offset})`;
}
lines.push(line);
} else if(command.type == loop) {
line += 'loop';
if('isBalanced' in command) {
line += ` (balanced: ${command.isBalanced})`;
}
lines.push(line);
lines = lines.concat(worker(command.contents, indent + ' '));
} else if(command.type == clear) {
line += 'clear';
if('offset' in command) {
line += ` (${command.offset})`;
}
lines.push(line);
} else if(command.type == multiply) {
let changes = [];
for(let [offset, value] of command.changes.entries()) {
changes.push(`${offset}: ${value}`);
}
line += `multiply ${changes.join(', ')}`;
lines.push(line);
} else if(command.type == jumpIfZero) {
line += `jumpIfZero ${command.target}`;
lines.push(line);
} else if(command.type == jumpIfNonZero) {
line += `jumpIfNonZero ${command.target}`;
lines.push(line);
} else if(command.type == writeInt) {
line += 'writeInt';
if('offset' in command) {
line += ` (${command.offset})`;
}
lines.push(line);
} else if(command.type == readInt) {
line += 'readInt';
if('offset' in command) {
line += ` (${command.offset})`;
}
lines.push(line);
} else if(command.type == breakPoint) {
line += 'breakPoint';
lines.push(line);
} else {
line += `unknown ${command.type.toString()}`;
lines.push(line);
}
}
return lines
}
return worker(ir).join('\n');
}
// ------------------------------------------------------------------
// Optimization passes
// ------------------------------------------------------------------
// ([commandObjects]) → [commandObjects]
function joinAdjacentOps(parsed) {
// ([commandObjects], commandType) → [commandObjects]
function worker(parsed, type) {
let optimized = [];
let prevOfType = false, value = 0;
for(let command of parsed) {
if(prevOfType && command.type == type) {
// Update value, don't add to optimized yet
value += command.value;
} else if(!prevOfType && command.type == type) {
// Start of a possible run of commands
prevOfType = true;
value = command.value;
} else {
// If a run has ended, add it to optimized
// However, skip it if value is 0
if(prevOfType && value != 0) {
optimized.push({type: type, value: value});
}
// Add the command for this round
if(command.type == loop) {
// Recurse
optimized.push({type: loop,
contents: worker(
command.contents,
type)});
} else {
optimized.push(command);
}
prevOfType = false;
}
}
// Did we end with a command of given type
if(prevOfType) {
// Yes, add it to optimized (unless value is 0)
if(value != 0) {
optimized.push({type: type, value: value});
}
}
return optimized;
}
return worker(worker(parsed, moveHead), add);
}
// ([commandObjects]) → [commandObjects]
function transformClearLoops(parsed) {
let optimized = [];
for(let command of parsed) {
// Only match loops like [-] or [+]
let isClearLoop = command.type == loop &&
command.contents.length == 1 &&
command.contents[0].type == add &&
(command.contents[0].value == 1 ||
command.contents[0].value == -1);
if(isClearLoop) {
optimized.push({type: clear});
} else if(command.type == loop) {
// Run for inner loops
optimized.push({type: loop,
contents: transformClearLoops(command.contents)});
} else {
optimized.push(command);
}
}
return optimized;
}
// ([commandObjects]) → [offsetCommandObjects]
function addOffsetProperties(parsed) {
// ([commandObjects]) → {offsetted: [offsetCommandObjects], isBalanced: bool}
function worker(parsed) {
let offsetted = [];
let isBalanced = true;
let headChange = 0;
let offset = 0;
for(let command of parsed) {
if(command.type == add) {
offsetted.push({type: add,
value: command.value,
offset: offset});
} else if(command.type == moveHead) {
offset += command.value;
} else if(command.type == writeByte) {
offsetted.push({type: writeByte,
offset: offset});
} else if(command.type == readByte) {
offsetted.push({type: readByte,
offset: offset});
} else if(command.type == clear) {
offsetted.push({type: clear,
offset: offset});
} else if(command.type == loop) {
// A loop should be self-contained
// If offset is not 0, add a moveHead
if(offset != 0) {
offsetted.push({type: moveHead,
value: offset});
// Mark we've moved the head
headChange += offset;
}
offset = 0;
// Run optimization on the loop
let result = worker(command.contents);
// We're only balanced if our loops are
isBalanced = isBalanced && result.isBalanced;
offsetted.push({type: loop,
contents: result.offsetted,
isBalanced: result.isBalanced});
// headChange's value becomes invalid if the
// loop is not balanced. However, we only
// care about its value when figuring out
// our isBalanced, which will be forced to
// false if any inner loop is not balanced
} else if(command.type == writeInt) {
offsetted.push({type: writeInt,
offset: offset});
} else if(command.type == readInt) {
offsetted.push({type: readInt,
offset: offset});
} else if(command.type == breakPoint) {
// A breakpoint should have the tape head
// where it would be without the offsets
// If offset is not 0, add a moveHead
if(offset != 0) {
offsetted.push({type: moveHead,
value: offset});
// Mark we've moved the head
headChange += offset;
}
offset = 0;
// Add the breakpoint
offsetted.push({type: breakPoint});
} else {
throw new UnknownIRError(
`Unknown command ${command.type.toString()}`);
}
}
// We need to move the tape head in the end anyways, so
// generate moveHead is offseet is not 0
if(offset != 0) {
offsetted.push({type: moveHead,
value: offset});
}
// We're only balanced if relative to start of the loop ends
// up as 0
isBalanced = isBalanced && offset + headChange == 0;
return {offsetted, isBalanced};
}
return worker(parsed).offsetted;
}
// ([offsetCommandObjects]) → [offsetCommandObjects]
function transformMultiplyLoops(offsetted) {
let optimized = [];
for(let command of offsetted) {
let isMultiplyLoop = false;
// Not necessarily a multiply loop, since multiply loops,
// in addition to being balanced loops with only adds,
// also decrement / increment the cell under tape head
// by one. However, these are loops we can run through the
// next processing step, and we can drop unfit ones after
// that
let maybeMultiplyLoop = command.type == loop &&
command.isBalanced &&
command.contents.every(x => x.type == add);
// TODO: Change this to use a Proxy thingie
let changes = new Map();
if(maybeMultiplyLoop) {
for(let addition of command.contents) {
// We already know all of these are adds
if(!changes.has(addition.offset)) {
changes.set(addition.offset, 0);
}
let current = changes.get(addition.offset);
changes.set(addition.offset,
current + addition.value);
}
// Did we actually have a multiply loop?
isMultiplyLoop = changes.has(0) &&
(changes.get(0) == 1 ||
changes.get(0) == -1);
}
if(isMultiplyLoop) {
// If changes[0] is 1, we are dealing with
// a loop of the type [>-<+], which is
// (except for run time) same as [>+<-].
// Transform former into latter
if(changes.get(0) == 1) {
for(let [offset, value] of changes.entries()) {
changes.set(offset, -value);
}
}
optimized.push({type: multiply,
changes: changes});
} else if(command.type == loop) {
// Recurse
optimized.push({type: loop,
contents: transformMultiplyLoops(command.contents),
balanced: command.balanced});
} else {
// Pass through
optimized.push(command);
}
}
return optimized;
}
// ([offsetCommandObjects]) → [flatCommandObjects]
function flattenLoops(offsetted) {
// ([offsetCommandObjects], int) → [flatCommandObjects]
// prevLength tells length of the flattened program up until now
function worker(offsetted, prevLength = 0) {
let flattened = [];
for(let command of offsetted) {
if(command.type == loop) {
// flattened.length is the index of the next
// command in out flattened
// flattened.length + prevLength is the
// index of it in the resulting combined
// flattened
// Since this should be the index of the
// start of the loop body we want to point
// after the next command, which is going
// to be the jump
let startIndex = flattened.length +
prevLength + 1;
let loopBody = worker(command.contents,
startIndex // length = index of next
);
// startIndex + loopBody.length is the index
// of the next command after the loop body
// The command after it is going to be the
// jump back to the start of the body, so we
// want to point to the command after it
let endIndex = startIndex +
loopBody.length + 1;
// Add the first jump
flattened.push({type: jumpIfZero,
target: endIndex});
// Add the loop body
flattened = flattened.concat(loopBody);
// Add the second loop
flattened.push({type: jumpIfNonZero,
target: startIndex});
} else {
flattened.push(command);
}
}
return flattened;
}
return worker(offsetted);
}
// ([commandObjects]) → [flatCommandObjects]
function optimize(parsed) {
const optimizations = [
joinAdjacentOps,
transformClearLoops,
addOffsetProperties,
transformMultiplyLoops,
flattenLoops
]
return optimizations.reduce((IR, optimization) =>
optimization(IR), parsed);
}
// ------------------------------------------------------------------
// Virtual machine
// ------------------------------------------------------------------
// ([flatCommandObject], [int], int/null) → girVMState
// onEof tells what to set the cell to in case of EOF, or if it null to keep
// the same value
function newVM(program, input, onEof = 0) {
return {
// Initial state for the machine
program: program,
ip: 0,
memory: new Map(),
tapeHead: 0,
input: input,
output: [],
// Configuration
onEof: onEof
};
}
// (girVMState, int/null) → {state: girVMState, complete: bool, cycles: int,
// intParseFailed: bool, breakPointReached: bool,
// lastIndex: int/null}
// complete is set to true is the program completed its execution
// cycles is the number of cycles the VM ran
// intParseFailed is true is parseInt failed to read a valid number
// breakPointReached is true if a breakPoint command was executed
// lastIndex tells the last memory index accessed by the VM
// If maxCycles is null, the program runs until completion
function runVM(state, maxCycles = null) {
// (int) → int
function readMemory(index) {
// Return 0 if index doesn't exist
if(memory.has(index)) {
return memory.get(index);
} else {
return 0;
}
}
let program = state.program;
let ip = state.ip;
// Create a copy of the memory, since we're going to modify it
// TODO: Make memory into a Proxied thing that returns 0 if it
// doesn't have the requested cell
let memory = new Map(state.memory.entries());
let tapeHead = state.tapeHead;
// Create copies of input and output, since we might modify them
let input = state.input.slice();
let output = state.output.slice();
// Flags we want to return
let complete = false;
let intParseFailed = false;
let breakPointReached = false;
// Debug features
let lastIndex = null;
let cycle = 0;
for(; maxCycles === null || cycle < maxCycles; cycle++) {
// Exit the loop if we run to the end of the program
if(ip >= program.length) {
// Program completed
complete = true;
break;
}
let command = program[ip];
// Calculate the index into the array of the cell we're
// accessing
let index = tapeHead;
switch(command.type) {
case add:
case writeByte:
case readByte:
case clear:
case writeInt:
case readInt:
// These have an offset property, add it
index += command.offset;
}
lastIndex = index;
// Run the command
switch(command.type) {
case add:
let old = readMemory(index);
memory.set(index, (command.value + old) & 0xFF);
ip++;
break;
case moveHead:
tapeHead += command.value;
// Set lastIndex to the new tape head
// position. Technically we do not access
// the cell, but otherwise it will point
// to the cell we were in previously, so
// this allows better debugging
lastIndex = tapeHead;
ip++;
break;
case writeByte:
output.push(readMemory(index));
ip++;
break;
case readByte:
// Have we reached EOF?
if(input.length == 0) {
// Yes
// If state.onEof is null, don't
// change the value
// If it's something else, set the
// cell to that
if(state.onEof !== null) {
memory.set(index,
state.onEof & 0xff);
}
} else {
// No, return character
memory.set(index, input.shift());
}
ip++;
break;
case clear:
memory.set(index, 0);
ip++;
break;
case multiply:
if(command.changes.get(0) != -1) {
throw new UnknownIRError(
`multiply where change for 0 is ${command.changes.get(0)}`);
}
let multiplier = readMemory(index);
for(let [offset, change] of command.changes.entries()) {
let index = tapeHead + offset;
let old = readMemory(index);
let value = old + multiplier * change;
memory.set(index, value & 0xff);
}
ip++;
break;
case jumpIfZero:
if(readMemory(index) == 0) {
ip = command.target;
} else {
ip++;
}
break;
case jumpIfNonZero:
if(readMemory(index) != 0) {
ip = command.target;
} else {
ip++;
}
break;
case writeInt:
let outputStr = readMemory(index).toString();
output.push(...encodeUTF8(outputStr))
ip++;
break;
case readInt:
// Skip any spaces in front
while(input.length > 0 && input[0] == 0x20) {
input.shift();
}
let number = 0;
// Read digits
let consumedInput = false;
while(input.length > 0 &&
input[0] >= 0x30 &&
input[0] <= 0x39) {
let digit = input.shift() - 0x30;
number = number * 10 + digit;
consumedInput = true;
}
// Did we read anything
if(consumedInput) {
// Yes, clamp value to [0, 255] and
// set cell to it
number = Math.max(
Math.min(number, 255),
0);
memory.set(index, number);
} else if(input.length == 0) {
// No, but there was an EOF
// If state.onEof is null, don't
// change the value
// If it's something else, set the
// cell to that
if(state.onEof !== null) {
memory.set(index,
state.onEof & 0xff);
}
} else {
// No, and there wasn't an EOF, so
// signal an error
intParseFailed = true;
}
ip++;
break;
case breakPoint:
breakPointReached = true;
ip++;
break;
default:
// Unknown command type
throw new UnknownIRError(
`Unknown command ${command.type.toString()}`);
}
// Since can't use 'break' from a switch(), do it here
if(intParseFailed || breakPointReached) {
break;
}
}
let newState = {
program,
ip,
memory,
tapeHead,
input,
output
};
return {state: newState, complete: complete, cycles: cycle,
intParseFailed: intParseFailed,
breakPointReached: breakPointReached,
lastIndex: lastIndex};
}
// ------------------------------------------------------------------
// UTF-8
// ------------------------------------------------------------------
// string → [int]
function encodeUTF8(string) {
let encoded = [];
for(let character of string) {
let codepoint = character.codePointAt(0);
if(codepoint < 0x80) {
// 0xxxxxxx
encoded.push(codepoint);
} else if(codepoint < 0x0800) {
// 110xxxxx 10xxxxxx
let b1 = codepoint >> 6 | 0b11000000;
let b2 = codepoint & 0b00111111 | 0b10000000;
encoded.push(b1);
encoded.push(b2);
} else if(codepoint < 0x10000) {
// 1110xxxx 10xxxxxx 10xxxxxx
let b1 = codepoint >> 12 | 0b11100000;
let b2 = codepoint >> 6 & 0b00111111 | 0b10000000;
let b3 = codepoint & 0b00111111 | 0b10000000;
encoded.push(b1);
encoded.push(b2);
encoded.push(b3);
} else {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
let b1 = codepoint >> 18 | 0b11110000;
let b2 = codepoint >> 12 & 0b00111111 | 0b10000000;
let b3 = codepoint >> 6 & 0b00111111 | 0b10000000;
let b4 = codepoint & 0b00111111 | 0b10000000;
encoded.push(b1);
encoded.push(b2);
encoded.push(b3);
encoded.push(b4);
}
}
return encoded;
}
// [int] → string
function decodeUTF8(encoded) {
let codePoints = [];
for(let i = 0; i < encoded.length;) {
let codePoint = 0;
let firstByte = encoded[i];
i++;
let toRead = null;
// Determine number of continuation bytes to read and
// decode the first byte into codePoint
// Since we'll do the shifts later, we just mask here
if(firstByte >> 7 == 0) {
// 0xxxxxxx
toRead = 0;
codePoint = firstByte;
} else if(firstByte >> 5 == 0b110) {
// 110xxxxx 10xxxxxx
toRead = 1;
codePoint = firstByte & 0b00011111;
} else if(firstByte >> 4 == 0b1110) {
// 1110xxxx 10xxxxxx 10xxxxxx
toRead = 2;
codePoint = firstByte & 0b00001111;
} else if(firstByte >> 3 == 0b11110) {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
toRead = 3;
codePoint = firstByte & 0b00000111;
} else {
// Illegal sequence, push replacement char
codePoints.push(0xFFFD);
continue;
}
for(; toRead > 0 && i < encoded.length; toRead--) {
let continuationByte = encoded[i];
i++;
// Check that we have a valid continuation byte
if(continuationByte >> 6 == 0b10) {
// We do, add its contents to codePoint
codePoint = codePoint << 6 |
continuationByte & 0b00111111;
} else {
// We don't, break out of the loop
break;
}
}
// Did we read all required continuation bytes?
if(toRead == 0) {
// We did, add the codepoint to the array
codePoints.push(codePoint);
} else {
// We didn't, push replacement char
codePoints.push(0xFFFD);
}
}
// Convert to a string
let decoded = codePoints.map(x => String.fromCodePoint(x)).join('');
return decoded;
}
// ------------------------------------------------------------------
// User-facing functions
// ------------------------------------------------------------------
// (string, bool) → [flatCommandObjects]
function compile(program, enableExtensions = true) {
return optimize(parse(program, enableExtensions));
}
exports.compile = compile;
exports.prettifyIR = prettifyIR;
exports.newVM = newVM;
exports.runVM = runVM;
exports.encodeUTF8 = encodeUTF8;
exports.decodeUTF8 = decodeUTF8;