cpu-simulator/js/assembler copy.js

295 lines
11 KiB
JavaScript
Executable File

Array.prototype.findIndexes=function(where) {
return this.reduce((a, e, i) => (where(e, i) ? a.concat(i) : a), []);
}
class assembler {
labels = [];
labelAddresses = [];
address = 0;
curLine = 0;
offset = 0; // account for any instructions the assembler needs to add
upper8(address){
return address>>8;
}
lower8(address){
return address&0x00FF;
}
intByte(n){
n=n.replace("#","");
var invert = false;
if(n[0]=="~") { invert=true; n=n.slice(1)};
var b = 0;
if(n.match(/^0b/)){
b = parseInt(n.slice(2),2);
} else {
b = parseInt(n);
}
if(isNaN(b)) {
throw 'Invalid number';
} else if(b>255 || b <-128){
throw 'Number will not fit in a byte!';
}
if(b<0){
// apply 2's complement to negative value
var r= ~Math.abs(b)+1;
} else {
var r= b;
};
return invert ? ((~r>>>0)&0xFF) : r;
}
encodeInstruction(line){
var bytes = [0,0];
var instruction = instructions.opcodes.find(oc=>oc.mnemonic==line[0].toLowerCase());
if(instruction.mnemonic=="bflag"||instruction.mnemonic=="bnoflag") {
var flag = instructions.flags.indexOf(line[1]);
if(flag>=0){
// evil stupid dumb
line[1] = line[1].replace(instructions.flags[flag],instructions.registers[flag]);
} else throw 'Invalid flag.';
}
if(!instruction) throw 'Unknown opcode';
var patterns = instructions.arg_patterns[instruction.patterns];
var mode = -1;
if(patterns[0].pattern.length!=line.length-1){
throw 'Wrong number of operands';
} else if(patterns.length==1){
mode = patterns[0].mode;
} else {
patterns.forEach(pattern=>{
var fit=0;
for(var i=0;i<pattern.pattern.length;i++){
if(line[i+1].match(pattern.pattern[i])){
fit++;
}
}
if(fit==pattern.pattern.length){
mode=pattern.mode;
}
})
}
if(mode==-1) { console.log(line); throw 'Ill-formatted instruction' };
// first byte - opcode, addressing mode, destination register
var destreg = instructions.registers.indexOf(line[1])
bytes[0] = (instruction.opcode << 4) + (mode << 3) + Math.max(destreg,0);
if(mode==0){
// register address in first 3 bits, the rest is 0
bytes[1]=instructions.registers.indexOf(line[2])<<5;
} else {
// immediate data (remove #)
bytes[1]=this.intByte(line[line.length-1].replace("#",""));
}
bytes[1]=Math.max(bytes[1],0);
//console.log(bytes);
return bytes;
}
processInstructions(instructions){
var bytecode=[];
var dataPage=0;
var instPage=0;
var instAddr=0;
console.log({labels:this.labels});
instructions.forEach((instruction,i)=>{
var l;
var mnemonic = instruction[0].toLowerCase();
this.curLine = i;
var offset=0;
// dealing with pseudo instructions
if(mnemonic==".data"){
return;
/*
} else if(mnemonic=="hlt"){ // halt: jump to same instruction
if(this.upper8(instAddr)!=instPage){
instPage=this.upper8(instAddr);
bytecode=bytecode.concat(this.encodeInstruction(["mov","ibank","#"+instPage]));
offset=2;
}
bytecode = bytecode.concat(this.encodeInstruction(["jmp","#"+this.lower8(instAddr+offset)]));
} else if(mnemonic=="nop"){ // nop: jump to next instruction
if(this.upper8(instAddr+2)!=instPage){
offset=2;
instPage=this.upper8(instAddr);
bytecode=bytecode.concat(this.encodeInstruction(["mov","ibank","#"+instPage]));
}
bytecode = bytecode.concat(this.encodeInstruction(["jmp","#"+this.lower8(instAddr+2+offset)]));
*/
// now process labels
} else if(mnemonic=="jsr"||mnemonic=="jmp"){
instruction[1] = instruction[1].replace("SKIP",'#'+(instAddr+4+offset));
l = this.labelAddresses.find(la=>instruction[1].includes(la.label));
if(l){ // we must replace
instruction[1] = instruction[1].replace(l.label, "#"+this.lower8(l.address));
if(this.upper8(l.address)!=instPage){
instPage=this.upper8(l.address);
bytecode=bytecode.concat(this.encodeInstruction(["mov","ip","#"+instPage]));
offset=2;
}
}
console.log(instruction);
bytecode=bytecode.concat(this.encodeInstruction(instruction));
} else if(mnemonic=="stb"||mnemonic=="ldb"||mnemonic=="mov"||mnemonic=="bflag"||mnemonic=="bnoflag"){
// QUICK AND DIRTY: WILL BREAK UNDER CERTAIN CONDITIONS
instruction[2] = instruction[2].replace("SKIP",'#'+(instAddr+4+offset));
l = this.labelAddresses.find(la=>instruction[2].includes(la.label));
if(l){ // we must replace
instruction[2] = instruction[2].replace(l.label, "#"+this.lower8(l.address));
if(this.upper8(l.address)!=dataPage){
dataPage=this.upper8(l.address);
bytecode=bytecode.concat(this.encodeInstruction(["mov","dp","#"+dataPage]));
offset=2;
}
}
bytecode=bytecode.concat(this.encodeInstruction(instruction));
} else bytecode=bytecode.concat(this.encodeInstruction(instruction));
console.log({i:i, l:this.labels});
l = this.labels.findIndex(la=>la.line==i);
if(l>-1){
this.labelAddresses.push({label: this.labels[l].label, address: instAddr});
}
instAddr += 2 + offset;
});
return bytecode;
}
assemble(code) {
// try
var tokens = this.tokenize(code).filter(line=>line.length>0 &&line[0]!='');
console.log(tokens);
var withPseudo = this.processAndOr(tokens);
var delabeled = this.processLabels(withPseudo);
var data = this.processData(delabeled);
var bytecode = this.processInstructions(delabeled);
var pad = data.bytes.length>0?Array(data.start - bytecode.length).fill(0):[];
console.log({tokens,withPseudo,delabeled,data,bytecode})
return bytecode.concat(pad).concat(data.bytes);
// catch
}
processAndOr(tokenLines) {
var processed=[];
tokenLines.forEach(line=>{
if(line.length<2){
processed.push(line)
}
else if(line[0].toLowerCase()=="and") {
line[0] = "nand";
processed.push(line); // NAND a, b
processed.push(['nand',line[1],line[1]]); // NAND a, a
} else if(line[1].toLowerCase()=="and") {
line[1] = "nand";
processed.push(line); // ibid
processed.push(['nand',line[2],line[2]]);
} else if(line[0].toLowerCase()=="or") {
processed.push(['nand',line[1],line[1]]); // NAND a, a
if(line[2].toLowerCase().match(instructions.match.reg)){
processed.push(['nand',line[2],line[2]]); // NAND b, b
processed.push(['nand',line[1],line[2]]); // NAND a, b
processed.push(['nand',line[2],line[2]]); // NAND b, b
} else if(line[2].toLowerCase().match(instructions.match.imm8)) {
//int imm8 = intByte(this.intByte(line[2]); // NAND b, b
processed.push(['nand',line[1],"~"+line[2]]);//line[2]]); // NAND a, b
} else if(line[2].toLowerCase().match(instructions.match.label)) {
processed.push(['nand',line[1],"~"+line[2]]);// NAND a, ~b
} else throw("error converting and");
} else if(line[1].toLowerCase()=="or"){
processed.push(['nand',line[2],line[2]]); // NAND a, a
if(line[3].toLowerCase().match(/(ip|dp|sp|r[0-3])/)){
processed.push(['nand',line[3],line[3]]); // NAND b, b
processed.push(['nand',line[2],line[3]]); // NAND a, b
processed.push(['nand',line[3],line[3]]); // NAND b, b
} else if(line[3].toLowerCase().match(instructions.match.imm8)) {
imm8 = ~this.intByte(line[3]); // NAND b, b
processed.push(['nand',line[2],line[3]]); // NAND a, b
} else if(line[3].toLowerCase().match(instructions.match.label)) {
processed.push(['nand',line[2],"~"+line[3]]);// NAND a, ~b
} else throw("error converting and");
} else {
processed.push(line);
}
});
return processed;
}
processLabels(tokenLines) {
var processed = [];
// first pass: extract labels
for(var i=0;i<tokenLines.length;i++){
var curLine = tokenLines[i];
if(curLine[0].match(/^[A-z0-9]+:$/)){
// if this line is just a label, we want to remove it
// and have the label point to the next line
// regardless, this label will point to the next line added
// to the processed array
var lineNum = processed.length;
if(curLine.length > 1 && curLine[1]!=''){
// first token was label; next token will be opcode or data
processed.push(curLine.slice(1));
}
this.labels.push({ label: curLine[0].split(/:.*/)[0], line: lineNum });
//processed.push(curLine.slice(1));
} else processed.push(curLine);
}
return processed;
}
processData(tokenLines) {
// we want to find out where the end of the instructions will be
// however, we don't know how many the assembler will insert to deal with pages
// i can't be bothered doing this properly so let's just add an extra two bytes
// for every instruction that accesses memory
// get number of instructions that deal with memory
var dataInstructions = tokenLines.findIndexes(line=>line[0].toLowerCase().match('^.data$'));
var numMem = tokenLines.filter(line => line[0].toLowerCase().match(/(ldb|stb|jsr|jmp|bflag|bnoflag)/)).length;
var start = ( (tokenLines.length - dataInstructions.length) + numMem )* 2;
var curAddr = 0;
var bytes = [];
dataInstructions.forEach(i=>{
this.curLine = i;
var thisLabel = this.labels.find(l => l.line== i);
if(thisLabel){
this.labelAddresses.push({ label: thisLabel.label, address: curAddr+start });
}
tokenLines[i].slice(1).forEach(token=>{
// if string
if(token.match(/^".*"$/)) {
token.slice(1).slice(0,-1).replace(/[^\x00-\x7F]/g, "").split('').forEach(character=>{
bytes.push(character.charCodeAt(0));
curAddr += 1;
});
} else {
bytes.push(this.intByte(token));
curAddr += 1;
}
});
})
return { start, bytes };
}
tokenize(code) {
var tokenLines = [];
// split code into lines - strings enclosed in quotes can span multiple lines
// nb this breaks for escaped quotes but who cares
var lines = code.split(/(?<=^[^\"]*(?:\"[^\"]*\"[^\"]*)*)\r?\n(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/);
lines.forEach(line=>{
// split on wgutespace or comma, unless quoted
var tokens = line.split(/(?<=^[^\"]*(?:\"[^\"]*\"[^\"]*)*)[ ,]+(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/);
// remove comments preceded by ;
var hitSemicolon=false
var tokenLine=[];
tokens.forEach(token=>{
//console.log(token);
if(token[0]==";"||hitSemicolon){
hitSemicolon=true;
} else {
tokenLine.push(token);
//console.log(tokenLine);
}
})
tokenLines.push(tokenLine);
})
return tokenLines;
}
}