cpu-simulator/js/assembler copy.js

Array.prototype.findIndexes=function(where) {
  return this.reduce((a, e, i) => (where(e, i) ? a.concat(i) : a), []);
}
class assembler {
  labels = [];
  labelAddresses = [];
  address = 0;
  curLine = 0;

  offset = 0; // account for any instructions the assembler needs to add
  upper8(address){
    return address>>8;
  }
  lower8(address){
    return address&0x00FF;
  }
  intByte(n){
    n=n.replace("#","");
    var invert = false;
    if(n[0]=="~") { invert=true; n=n.slice(1)};
    var b = 0;
    if(n.match(/^0b/)){
      b = parseInt(n.slice(2),2);
    } else {
      b = parseInt(n);
    }
    if(isNaN(b)) {
      throw 'Invalid number';
    } else if(b>255 || b <-128){
      throw 'Number will not fit in a byte!';
    }
    if(b<0){
      // apply 2's complement to negative value
      var r= ~Math.abs(b)+1;
    } else {
      var r= b;
    };
    return invert ? ((~r>>>0)&0xFF) : r;
  }
  encodeInstruction(line){
    var bytes = [0,0];
    var instruction = instructions.opcodes.find(oc=>oc.mnemonic==line[0].toLowerCase());
    if(instruction.mnemonic=="bflag"||instruction.mnemonic=="bnoflag") {
      var flag = instructions.flags.indexOf(line[1]);
      if(flag>=0){
        // evil stupid dumb
        line[1] = line[1].replace(instructions.flags[flag],instructions.registers[flag]);

      } else throw 'Invalid flag.';
    }
    if(!instruction) throw 'Unknown opcode';
    var patterns = instructions.arg_patterns[instruction.patterns];
    var mode = -1;
    if(patterns[0].pattern.length!=line.length-1){
      throw 'Wrong number of operands';
    } else if(patterns.length==1){
      mode = patterns[0].mode;
    } else {
      patterns.forEach(pattern=>{
        var fit=0;
        for(var i=0;i<pattern.pattern.length;i++){
          if(line[i+1].match(pattern.pattern[i])){
            fit++;
          }
        }
        if(fit==pattern.pattern.length){
          mode=pattern.mode;
        }
      })
    }
    if(mode==-1) { console.log(line); throw 'Ill-formatted instruction' };
    // first byte - opcode, addressing mode, destination register
    var destreg = instructions.registers.indexOf(line[1])
    bytes[0] = (instruction.opcode << 4) + (mode << 3) + Math.max(destreg,0);
    if(mode==0){
      // register address in first 3 bits, the rest is 0
      bytes[1]=instructions.registers.indexOf(line[2])<<5;
    } else {
      // immediate data (remove #)
      bytes[1]=this.intByte(line[line.length-1].replace("#",""));
    }
    bytes[1]=Math.max(bytes[1],0);
    //console.log(bytes);
    return bytes;

  }
  processInstructions(instructions){
    var bytecode=[];
    var dataPage=0;
    var instPage=0;
    var instAddr=0;
    console.log({labels:this.labels});
    instructions.forEach((instruction,i)=>{
      var l;
      var mnemonic = instruction[0].toLowerCase();
      this.curLine = i;
      var offset=0;
      // dealing with pseudo instructions
      if(mnemonic==".data"){
        return;
        /*
      } else if(mnemonic=="hlt"){  // halt: jump to same instruction
        if(this.upper8(instAddr)!=instPage){
          instPage=this.upper8(instAddr);
          bytecode=bytecode.concat(this.encodeInstruction(["mov","ibank","#"+instPage]));
          offset=2;
        }
        bytecode = bytecode.concat(this.encodeInstruction(["jmp","#"+this.lower8(instAddr+offset)]));
      } else if(mnemonic=="nop"){ // nop: jump to next instruction
        if(this.upper8(instAddr+2)!=instPage){
          offset=2;
          instPage=this.upper8(instAddr);
          bytecode=bytecode.concat(this.encodeInstruction(["mov","ibank","#"+instPage]));
        }
        bytecode = bytecode.concat(this.encodeInstruction(["jmp","#"+this.lower8(instAddr+2+offset)]));
        */
    // now process labels
      } else if(mnemonic=="jsr"||mnemonic=="jmp"){
        instruction[1] = instruction[1].replace("SKIP",'#'+(instAddr+4+offset));
        l = this.labelAddresses.find(la=>instruction[1].includes(la.label));

        if(l){ // we must replace
          instruction[1] = instruction[1].replace(l.label, "#"+this.lower8(l.address));
          if(this.upper8(l.address)!=instPage){
            instPage=this.upper8(l.address);
            bytecode=bytecode.concat(this.encodeInstruction(["mov","ip","#"+instPage]));
            offset=2;
          }
        }
        console.log(instruction);
        bytecode=bytecode.concat(this.encodeInstruction(instruction));
      } else if(mnemonic=="stb"||mnemonic=="ldb"||mnemonic=="mov"||mnemonic=="bflag"||mnemonic=="bnoflag"){
        // QUICK AND DIRTY: WILL BREAK UNDER CERTAIN CONDITIONS
        instruction[2] = instruction[2].replace("SKIP",'#'+(instAddr+4+offset));
        l = this.labelAddresses.find(la=>instruction[2].includes(la.label));
        if(l){ // we must replace
          instruction[2] = instruction[2].replace(l.label, "#"+this.lower8(l.address));
          if(this.upper8(l.address)!=dataPage){
            dataPage=this.upper8(l.address);
            bytecode=bytecode.concat(this.encodeInstruction(["mov","dp","#"+dataPage]));
            offset=2;
          }
        }
        bytecode=bytecode.concat(this.encodeInstruction(instruction));
      } else bytecode=bytecode.concat(this.encodeInstruction(instruction));
      console.log({i:i, l:this.labels});
      l = this.labels.findIndex(la=>la.line==i);
      if(l>-1){
        this.labelAddresses.push({label: this.labels[l].label, address: instAddr});
      }
      instAddr += 2 + offset;
    });
    return bytecode;
  }
  assemble(code) {
    // try
    var tokens = this.tokenize(code).filter(line=>line.length>0 &&line[0]!='');
    console.log(tokens);
    var withPseudo = this.processAndOr(tokens);
    var delabeled = this.processLabels(withPseudo);
    var data = this.processData(delabeled);
    var bytecode = this.processInstructions(delabeled);
    var pad = data.bytes.length>0?Array(data.start - bytecode.length).fill(0):[];
    console.log({tokens,withPseudo,delabeled,data,bytecode})

    return bytecode.concat(pad).concat(data.bytes);
    // catch
  }
  processAndOr(tokenLines) {
    var processed=[];
    tokenLines.forEach(line=>{
      if(line.length<2){
        processed.push(line)
      }
      else if(line[0].toLowerCase()=="and") {
        line[0] = "nand";
        processed.push(line);                          // NAND a, b
        processed.push(['nand',line[1],line[1]]);      // NAND a, a
      } else if(line[1].toLowerCase()=="and") {
        line[1] = "nand";
        processed.push(line);                          // ibid
        processed.push(['nand',line[2],line[2]]);
      } else if(line[0].toLowerCase()=="or") {
        processed.push(['nand',line[1],line[1]]);      // NAND a, a
        if(line[2].toLowerCase().match(instructions.match.reg)){
          processed.push(['nand',line[2],line[2]]);    // NAND b, b
          processed.push(['nand',line[1],line[2]]);    // NAND a, b
          processed.push(['nand',line[2],line[2]]);    // NAND b, b
        } else if(line[2].toLowerCase().match(instructions.match.imm8)) {
          //int imm8 = intByte(this.intByte(line[2]);                    // NAND b, b
          processed.push(['nand',line[1],"~"+line[2]]);//line[2]]);    // NAND a, b
        } else if(line[2].toLowerCase().match(instructions.match.label)) {
          processed.push(['nand',line[1],"~"+line[2]]);// NAND a, ~b
        } else throw("error converting and");
      } else if(line[1].toLowerCase()=="or"){
        processed.push(['nand',line[2],line[2]]);      // NAND a, a
        if(line[3].toLowerCase().match(/(ip|dp|sp|r[0-3])/)){
          processed.push(['nand',line[3],line[3]]);    // NAND b, b
          processed.push(['nand',line[2],line[3]]);    // NAND a, b
          processed.push(['nand',line[3],line[3]]);    // NAND b, b
        } else if(line[3].toLowerCase().match(instructions.match.imm8)) {
          imm8 = ~this.intByte(line[3]);                    // NAND b, b
          processed.push(['nand',line[2],line[3]]);    // NAND a, b
        } else if(line[3].toLowerCase().match(instructions.match.label)) {
          processed.push(['nand',line[2],"~"+line[3]]);// NAND a, ~b
        } else throw("error converting and");

      } else {
        processed.push(line);
      }
    });
    return processed;
  }
  processLabels(tokenLines) {
    var processed = [];
    // first pass: extract labels
    for(var i=0;i<tokenLines.length;i++){
      var curLine = tokenLines[i];
      if(curLine[0].match(/^[A-z0-9]+:$/)){
        // if this line is just a label, we want to remove it
        // and have the label point to the next line
        // regardless, this label will point to the next line added
        // to the processed array
        var lineNum = processed.length;
        if(curLine.length > 1 && curLine[1]!=''){
          // first token was label; next token will be opcode or data
          processed.push(curLine.slice(1));
        }
        this.labels.push({ label: curLine[0].split(/:.*/)[0], line: lineNum });
        //processed.push(curLine.slice(1));
      } else processed.push(curLine);
    }
    return processed;
  }
  processData(tokenLines) {
    // we want to find out where the end of the instructions will be
    // however, we don't know how many the assembler will insert to deal with pages
    // i can't be bothered doing this properly so let's just add an extra two bytes
    // for every instruction that accesses memory
    // get number of instructions that deal with memory
    var dataInstructions = tokenLines.findIndexes(line=>line[0].toLowerCase().match('^.data$'));
    var numMem = tokenLines.filter(line => line[0].toLowerCase().match(/(ldb|stb|jsr|jmp|bflag|bnoflag)/)).length;
    var start = ( (tokenLines.length - dataInstructions.length) + numMem )* 2;
    var curAddr = 0;
    var bytes = [];
    dataInstructions.forEach(i=>{
      this.curLine = i;
      var thisLabel = this.labels.find(l => l.line== i);
      if(thisLabel){
        this.labelAddresses.push({ label: thisLabel.label, address: curAddr+start });
      }
      tokenLines[i].slice(1).forEach(token=>{
        // if string
        if(token.match(/^".*"$/)) {
          token.slice(1).slice(0,-1).replace(/[^\x00-\x7F]/g, "").split('').forEach(character=>{
            bytes.push(character.charCodeAt(0));
            curAddr += 1;
          });
        } else {
          bytes.push(this.intByte(token));
          curAddr += 1;
        }
      });

    })

    return { start, bytes };

  }
  tokenize(code) {
    var tokenLines = [];
    // split code into lines - strings enclosed in quotes can span multiple lines
    // nb this breaks for escaped quotes but who cares
    var lines = code.split(/(?<=^[^\"]*(?:\"[^\"]*\"[^\"]*)*)\r?\n(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/);
    lines.forEach(line=>{
      // split on wgutespace or comma, unless quoted
      var tokens = line.split(/(?<=^[^\"]*(?:\"[^\"]*\"[^\"]*)*)[ ,]+(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/);
      // remove comments preceded by ;
      var hitSemicolon=false
      var tokenLine=[];
      tokens.forEach(token=>{
        //console.log(token);
        if(token[0]==";"||hitSemicolon){
          hitSemicolon=true;
        } else {
        tokenLine.push(token);
        //console.log(tokenLine);
        }
      })
      tokenLines.push(tokenLine);
    })
    return tokenLines;
  }
}