/*
 * Compiler.java - Propeller LMM assembler
 *
 * Created on July 27, 2007, 12:30 PM
 *
 *  --------------------------------------------------------------------------------
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * --------------------------------------------------------------------------------
 *
 *
 * This compiler implements some "code" swapping to achieve a LMM
 *
 * There are 2 kinds of assembler primitives, those native to the propeller
 * and the emulated ones. All emulated instructions are calls to a kernel
 * subrutine that performs the action or to a series of instructions
 * All addresses are absolute, symbols are of two kinds, absolute to the 512 long
 * memory space and absolute to the program
 *
 * Exist now 8 registes plus stack pointer
 * The stack can be manipulated directly with new instructions
 *
 * New nmemonics:
 *
 * push rx : pushes a long into the stack
 * pushib : pushes a byte into the stack (as long)
 * pushid : pushes a long into the stack (as long)
 * pop rx : pops a long from the stack
 * enter #number : reserves number longs in the stack, pushes base pointer 
 * leave : restores stack pointer, base pointer 
 * ldb/ldw/ldd rx,#memaddr loads a byte/word/long from memory (not from stack)
 * stb/stw/std #memaddr,rx : Stores a byte/word/long into memory
 * li : loads a long immediate, uses any form of mov/shl
 * ldstb ldstw ldstd rx,#offset : loads a byte, word, long from stack at offset baseptr+offset
 * ststb ststd ststd #offset,rx : stores a byte, word, long into stack
 *
 * rcall #dest : call to subrutine
 * rret : subritine return
 * rjmp : jump
 *
 * Note on conditionals :
 *
 * New mnemonics do support conditions !!!
 * because second long is encoded with condition 0000 (IF_NEVER)
 *
 * rjmp is translated to:
 *
 * [conds] call #krnl_jump
 * long [conds=0]|[destination [18..0]]
 *
 * All addresses are aligned to long boundaries (for jumps/call) and in longs
 * Only 19 bits of the address are available
 *
 *
 *
 */

package org.pacito.plmmass;

/**
 * This class implements an assembler for the propeller
 * @author pacito
 */
public class LMMPreprocessor {
    public final int MAX_COG_LONGS = 512;
    protected final int MAX_LITERAL = 0x1ff;
    protected final int MASK_INST = 0xfc000000;
    protected final int MASK_Z    = 0x02000000;
    protected final int MASK_C    = 0x01000000;
    protected final int MASK_R    = 0x00800000;
    protected final int MASK_I    = 0x00400000;
    protected final int MASK_COND = 0x003c0000;
    protected final int MASK_D    = 0x0003fe00;
    protected final int MASK_S    = 0x000001ff;
    
    protected final int IF_NEVER     =  0;
    protected final int IF_NC_AND_NZ =  1;
    protected final int IF_NC_AND_Z  =  2;
    protected final int IF_NC        =  3;
    protected final int IF_C_AND_NZ  =  4;
    protected final int IF_NZ        =  5;
    protected final int IF_C_NE_Z    =  6;
    protected final int IF_NC_OR_NZ  =  7;
    protected final int IF_C_AND_Z   =  8;
    protected final int IF_C_EQ_Z    =  9;
    protected final int IF_Z         = 10;
    protected final int IF_NC_OR_Z   = 11;
    protected final int IF_C         = 12;
    protected final int IF_C_OR_NZ   = 13;
    protected final int IF_C_OR_Z    = 14;
    protected final int IF_ALWAYS    = 15; // 1111  
    // read-only registers
    protected final int REG_FORB0 = 0x1f0;
    protected final int REG_FORB1 = 0x1f3;
    
    protected final int REG_ADDR  = 0x1f0;
    protected final int MASK_RAM  = 0x1ff;
    protected final int MASK_REGS = 0x00f;
    // Cog possible status
    
    protected String[] opcodes = { "wrbyte",  "rdbyte",  "wrword",  "rdword",  "wrlong",  "rdlong",   "clkset",  "cogid",  
                                   "coginit", "cogstop", "locknew", "lockret", "lockset", "lockclr",  "mul",     "muls",   
                                   "enc",     "ones",    "ror",     "rol",     "shr",     "shl",      "rcr",     "rcl",
                                   "sar",     "rev",     "mins",    "maxs",    "min",     "max",      "movs",    "movd",   
                                   "movi",    "jmp",     "call",    "ret",     "test",    "and",      "testn",   "andn",   
                                   "or",      "xor",     "muxc",    "muxnc",   "muxz",    "muxnz",    "add",     "cmp",    
                                   "sub",     "addabs",  "subabs",  "sumc",    "sumnc",   "sumz",     "sumnz",   "mov", 
                                   "neg",     "abs",     "absneg",  "negc",    "negnc",   "negz",     "negnz",   "cmps",
                                   "cmpsx",   "addx",    "cmpx",    "subx",    "adds",    "subs",     "addsx",   "subsx",
                                   "cmpsub",  "djnz",    "tjnz",    "tjz ",    "waitpeq", "waitpne ", "waitcnt", "waitvid", 
                                   "nop",     "=",       "long",    "byte",    "word",    "string",   "push",    "pop",
                                    "rcall",  "rjmp",    "enter",   "leave",   "rret",    "ldb",      "ldw",     "ldd",
                                    "stb",    "stw",     "std",     "ldstb",   "ldstw",   "ldstd",    "ststb",   "ststw", 
                                    "ststd",  "li",      "pushib",  "pushid",  "include"};
    
    protected final int OP_MASK_OP = 0;
    protected final int OP_MASK_R = 1;
    protected final int OP_MASK_C = 2;
    protected final int OP_MASK_S = 3;
    
    //                                 opcode r  c  s
    // c = 0 des not accepts condition, no args (nop)
    // c = 1 accepts condition, 2 args
    // c = 2 accepts condition, no args (ret)
    // c = 3 i is forced to 1, just field d is used
    // c = 4 aceepts condition, 1 arg used (call)
    // c = 5 aceepts condition, 1 arg used (jmp)
    // c = 6 does not aceepts condition, 1 arg used (=)
    // c = 7 does not aceepts condition, 1 arg used (long)
    protected byte[][] opcodes_mask = { {  0, 0, 1, 0 }, // wrbyte
                                        {  0, 1, 1, 0 }, // rdbyte
                                        {  1, 0, 1, 0 }, // wrword
                                        {  1, 1, 1, 0 }, // rdword
                                        {  2, 0, 1, 0 }, // wrlong
                                        {  2, 1, 1, 0 }, // rdlong
                                        {  3, 0, 3, 0 }, // clkset
                                        {  3, 1, 3, 1 }, // cogid
                                        {  3, 0, 3, 2 }, // coginit
                                        {  3, 0, 3, 3 }, // cogstop
                                        {  3, 0, 3, 4 }, // locknew
                                        {  3, 0, 3, 5 }, // lockret
                                        {  3, 0, 3, 6 }, // lockset
                                        {  3, 0, 3, 7 }, // lockclr
                                        {  4, 1, 1, 0 }, // mul
                                        {  5, 1, 1, 0 }, // muls
                                        {  6, 1, 1, 0 }, // enc
                                        {  7, 1, 1, 0 }, // ones
                                        {  8, 1, 1, 0 }, // ror
                                        {  9, 1, 1, 0 }, // rol
                                        { 10, 1, 1, 0 }, // shr
                                        { 11, 1, 1, 0 }, // shl
                                        { 12, 1, 1, 0 }, // rcr
                                        { 13, 1, 1, 0 }, // rcl
                                        { 14, 1, 1, 0 }, // sar
                                        { 15, 1, 1, 0 }, // rev
                                        { 16, 1, 1, 0 }, // mins
                                        { 17, 1, 1, 0 }, // maxs
                                        { 18, 1, 1, 0 }, // min
                                        { 19, 1, 1, 0 }, // max
                                        { 20, 1, 1, 0 }, // movs
                                        { 21, 1, 1, 0 }, // movd
                                        { 22, 1, 1, 0 }, // movi
                                        { 23, 0, 5, 0 }, // jmp
                                        { 23, 1, 4, 0 }, // call
                                        { 23, 0, 2, 0 }, // ret
                                        { 24, 0, 1, 0 }, // test
                                        { 24, 1, 1, 0 }, // and
                                        { 25, 0, 1, 0 }, // testn
                                        { 25, 1, 1, 0 }, // andn
                                        { 26, 1, 1, 0 }, // or
                                        { 27, 1, 1, 0 }, // xor
                                        { 28, 1, 1, 0 }, // muxc
                                        { 29, 1, 1, 0 }, // muxnc
                                        { 30, 1, 1, 0 }, // muxz
                                        { 31, 1, 1, 0 }, // muxnz
                                        { 32, 1, 1, 0 }, // add
                                        { 33, 0, 1, 0 }, // cmp
                                        { 33, 1, 1, 0 }, // sub
                                        { 34, 1, 1, 0 }, // addabs
                                        { 35, 1, 1, 0 }, // subabs
                                        { 36, 1, 1, 0 }, // sumc
                                        { 37, 1, 1, 0 }, // sumnc
                                        { 38, 1, 1, 0 }, // sumz
                                        { 39, 1, 1, 0 }, // sumnz
                                        { 40, 1, 1, 0 }, // mov
                                        { 41, 1, 1, 0 }, // neg
                                        { 42, 1, 1, 0 }, // abs
                                        { 43, 1, 1, 0 }, // absneg
                                        { 44, 1, 1, 0 }, // negc
                                        { 45, 1, 1, 0 }, // negnc
                                        { 46, 1, 1, 0 }, // negz
                                        { 47, 1, 1, 0 }, // negnz
                                        { 48, 0, 1, 0 }, // cmps
                                        { 49, 0, 1, 0 }, // cmpsx
                                        { 50, 1, 1, 0 }, // addx
                                        { 51, 0, 1, 0 }, // cmpx
                                        { 51, 1, 1, 0 }, // subx
                                        { 52, 1, 1, 0 }, // adds
                                        { 53, 1, 1, 0 }, // subs
                                        { 54, 1, 1, 0 }, // addsx
                                        { 55, 1, 1, 0 }, // subsx
                                        { 56, 1, 1, 0 }, // cmpsub
                                        { 57, 1, 1, 0 }, // djnz
                                        { 58, 0, 1, 0 }, // tjnz
                                        { 59, 0, 1, 0 }, // tjz
                                        { 60, 0, 1, 0 }, // waitpeq
                                        { 61, 0, 1, 0 }, // waitpne
                                        { 62, 1, 1, 0 }, // waitcnt
                                        { 63, 0, 1, 0 }, // waitvid
                                        {  0, 0, 0, 0 }, // nop
                                        {  0, 0, 6, 0 }, // =
                                        {  0, 0, 7, 0 }  // long
                                        
                                        
                                    };
    
    protected String[] regs = { "PAR", "CNT", "INA", "INB", "OUTA", "OUTB", "DIRA", "DIRB", 
                                "CTRA", "CTRB", "FRQA", "FRQB", "PHSA", "PHSB", "VCFG", "VSCL" };
    
    protected String[] conds = { "if_always",   "if_nc_and_nz", "if_nc_and_z", "if_nc", 
                                 "if_c_and_nz", "if_nz",        "if_c_ne_z",   "if_nc_or_nz", 
                                 "if_c_and_z",  "if_c_eq_z",    "if_z",        "if_nc_or_z", 
                                 "if_c",        "if_c_or_nz",   "if_c_and_z",  "if_never",
                                 "if_e",        "if_ne",        "if_a",        "if_b",
                                 "if_ae",       "if_be",        "if_z_eq_c",   "if_z_ne_c",
                                 "if_z_and_c",  "if_z_and_nc",  "if_nz_and_c", "if_nz_and_nc",
                                 "if_z_or_c",   "if_z_or_nc",   "if_nz_or_c",  "if_nz_or_nc"
                                };
    
    protected int[] condmap = {  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                10, 5, 1,12, 3,14, 9, 6, 8, 2,  4,  1, 14, 11, 13,  7 };
                                
    protected java.util.Vector <Symbol> symboltable;
    protected long[] compiledCode;
    protected int compiledCodeLength, errorLine;
    //protected String source;
    protected String[] tokenizedline;
    protected int lastError;
    protected String lastErrorStr = "";
    protected String notFoundSymbol = "";
    
    protected final String ERR_STR_NONE = "Compilation sucessful";
    protected final String ERR_STR_SYMEXISTS = "Duplicated symbol";
    protected final String ERR_STR_UKNCOND = "Condition unknown";
    protected final String ERR_STR_UKNINST = "Instruction Unknown";
    protected final String ERR_STR_SYMNF = "Symbol not found";
    protected final String ERR_STR_EMPTY = "Symbol only line";
    protected final String ERR_STR_DUPWX = "Duplicated wc or wz field";
    protected final String ERR_STR_ARGMISS = "Argument(s) missing";
    protected final String ERR_STR_RDONLY = "Destination is read-only";
    protected final String ERR_STR_GARBAGE = "Garbage at end of line";
    
    protected final int ERR_NONE      = 0; // no error
    protected final int ERR_SYMEXISTS = -1; // symbol exists
    protected final int ERR_UKNCOND   = -2; // unknown condition
    protected final int ERR_UKNINST   = -3; // unknown instruction
    protected final int ERR_SYMNF     = -4; // symbol not found
    protected final int ERR_EMPTY     = -5; // empty line
    protected final int ERR_DUPWX     = -6; // duplicated WC or WZ field
    protected final int ERR_ARGMISS = -7; // Argument(s) missing
    protected final int ERR_RDONLY = -8; // memory position is read-only (INA/INB)
    protected final int ERR_GARBAGE = -9; // garbage at end of line
    
    // position of fields in the tokenized line 
    
    protected final int IDX_SYM  = 0;
    protected final int IDX_COND = 1;
    protected final int IDX_INST = 2;
    protected final int IDX_ARG1 = 3;
    protected final int IDX_ARG2 = 4;
    protected final int IDX_WC   = 5;
    protected final int IDX_WZ   = 6;
    
    protected final String WC = "wc";
    protected final String WZ = "wz";
    /** Creates a new instance of compiler */
    public LMMPreprocessor() {
        symboltable = new java.util.Vector<Symbol>();
        // Adds registers as symbols
        for (int i = 0; i < 16; i++)
            symboltable.add(new Symbol(regs[i], REG_ADDR + i));
    }
    
    /**
     * Creates a new Compiler class with the symbol table
     *
     */
    public LMMPreprocessor(java.util.Vector <Symbol> st) {
        symboltable = st;
        // Adds registers as symbols
        //for (int i = 0; i < 16; i++)
        //    symboltable.add(new Symbol(regs[i], REG_ADDR + i));
        
        //compileLine("aa  add 1,#$2 wz", 500);
    }
    /**
     * Compiles a line of code, generates 1 instruction, no symbols are created
     *
     * @param line String source line to compile
     */
    public long compileLine(String line, int addr) {
        lastError = tokenize(line, false);
        if (lastError != ERR_NONE) {
            reportError();
            return -1L;
        }
        Instruction i = compileTokenized(addr, false);
        if (i == null) {
            reportError();
            return -1L;
        }
        return i.getOpcode();
    }
    
    /**
     * Two pass assembler, code is one line per row
     *
     *
     *
     */
    
    public int compile(String[] code) {
        int l = code.length;
        int addr = 0;
        Instruction i;
        compiledCodeLength = 0;
        compiledCode = null;
        compiledCode = new long[MAX_COG_LONGS];
        errorLine = -1;
        // first pass, symbol creation, "=" silently ignored !!"
        for (int j = 0; j < l; j++) {
            errorLine = j;
            if (code[j] == null || code[j].length() < 1)
                continue;
            lastError = tokenize(code[j], false);
            if (lastError != ERR_NONE)
                return reportError(); // Line number with error
            i = compileTokenized(addr, true); // we allow it to create symbols from labels and equates
            if (i != null) addr++;
            
        }
        // second pass
        addr = 0;
        for (int j = 0; j < l; j++) {
            errorLine = j;
            if (code[j] == null || code[j].length() < 1)
                continue;
            lastError = tokenize(code[j], true);
            if (lastError != ERR_NONE) 
                return reportError(); // Line number with error
            i = compileTokenized(addr, false);
            if (i != null) // if the lined contained a 
                compiledCode[addr++] = i.getOpcode();
            else // maybe an error
                if (lastError != ERR_NONE)
                    return reportError();
        }
        
        compiledCodeLength = addr;
        lastError = ERR_NONE;
        return reportError();
    }
    
    /**
     * Returns the compiled code as a long[]
     *
     */
    public long[] getCompiledCode() {
        return compiledCode;
    }
    
    /**
     * Returns the number of symbols created
     *
     */
    public int getSymbolCount() {
        return symboltable.size();
    }
    
    /**
     * Gets the amount of code generated (in longs)
     */
    public int getCodeSize() {
        return compiledCodeLength;
    }
    
    /**
     * Returns the line were an error occurred
     *
     */
    public int getErrorLine() {
        return errorLine;
    }
    
    /**
     * Returns the last error as a String
     *
     */
    public String getErrorString() {
        return lastErrorStr;
    }
    
    /**
     * loads a symbol table
     *
     */
    
    public void setSymbolTable(java.util.Vector <Symbol> st) {
        symboltable = st;
    }
    
    
    /**
     * returns the symbol table
     *
     */
    
    public java.util.Vector <Symbol> getSymbolTable() {
        return symboltable;
    }
    /**
     * Generates the error string and returns the lastError
     * 
     */
    
    protected int reportError() {
        switch (lastError) {
            case ERR_NONE: 
                lastErrorStr = ERR_STR_NONE + ", " + getCodeSize() + " long(s) used, " + getSymbolCount() + " symbol(s) created";
                return lastError;
            case ERR_UKNCOND:
                lastErrorStr = ERR_STR_UKNCOND;
                break;
            case ERR_UKNINST:
                lastErrorStr = ERR_STR_UKNINST + "(" + notFoundSymbol + ")";
                break;
            case ERR_SYMEXISTS:
                lastErrorStr = ERR_STR_SYMEXISTS;
                break;
            case ERR_SYMNF:
                lastErrorStr = ERR_STR_SYMNF + "(" + notFoundSymbol + ")";
                break;
            case ERR_DUPWX:
                lastErrorStr = ERR_STR_DUPWX;
                break;
            case ERR_ARGMISS:
                lastErrorStr = ERR_STR_ARGMISS;
                break;
            case ERR_RDONLY:
                lastErrorStr = ERR_STR_RDONLY;
                break;
            case ERR_GARBAGE:
                lastErrorStr = ERR_STR_GARBAGE;
        }
        
        lastErrorStr += ", at line " + errorLine;
        return lastError;
    }
    /**
     * Converts a line of code into a tokenized form
     *
     * Symbol condition instruction arg1[,arg2]
     *
     * Tokenized form (Strings):
     * tokenizedline[0] = Symbol Name if any
     * tokenizedline[1] = condition
     * tokenizedline[2] = instruction name
     * tokenizedline[3] = first argument
     * tokenizedline[4] = second argument
     *
     * Does test if condition is valid
     */
    
    protected int tokenize(String l, boolean ignoreSyms) {
        String[] ptl;
        String s;
        int err = ERR_NONE;
        int stage = 0;
        ptl = separate(l);
        
        tokenizedline = null;
        tokenizedline = new String[IDX_WZ + 1];
        
        if (ptl == null)
            return ERR_NONE; // empty line, no error, tokenized line contains nulls
        
        int idx = 0, idx_sym = -1, idx_cond = -1, idx_inst = -1, idx_arg1 = -1, idx_arg2 = -1;
        int idx_wc = -1, idx_wz = -1;
        
        //System.out.println("))" + ptl[0] + " -- " + ptl[1] + "((");
        
        // ptl contains a separated list of things, we have to identify useful and in-order info
        // symbol
        
        if (symbolExists(ptl[idx]) && !ignoreSyms)
            return ERR_SYMEXISTS;

        if (!isInstruction(ptl[idx]) && !isCondition(ptl[idx])) {
            idx_sym = 0; // label exist
            idx++;
        }
        
        if (isCondition(ptl[idx])) {
            idx_cond = idx; // condition exist
            idx++;
        }
        
        if (isInstruction(ptl[idx])) {
            idx_inst = idx;
            idx++;
        }
        else {
            // no instruction was found, but maybe a label exists
            
            if (ptl[idx] == null) {//(idx_sym != -1) {
                tokenizedline[IDX_SYM] = ptl[idx_sym];
                return ERR_NONE; // ok if label
            }
            notFoundSymbol = ptl[idx]; // not found instruction
            return ERR_UKNINST; // No instruction
        }
        
        // instruction is valid, now we check for arguments
        
        if (ptl[idx] != null) { // 1 arg minimum
            boolean args = true;
            if (ptl[idx].equalsIgnoreCase(WC)) {
                idx_wc = idx;
                idx++;
                args = false;
            }
            if (ptl[idx].equalsIgnoreCase(WZ)) {
                idx_wz = idx;
                idx++;
                args = false;
            }
            if (args) {
                idx_arg1 = idx;
                idx++;
                if (ptl[idx] != null) {
                    if (ptl[idx].equalsIgnoreCase(WC)) {
                       idx_wc = idx;
                        idx++;
                        args = false;
                    }
                    if (ptl[idx].equalsIgnoreCase(WZ)) {
                        idx_wz = idx;
                        idx++;
                        args = false;
                    }
                    if (args) {
                        idx_arg2 = idx;
                        idx++;
                    }
                }
            }
            
            // now we check if flags are present
            if (ptl[idx] != null) {
                if (ptl[idx].equalsIgnoreCase(WC)) {
                    if (idx_wc != -1)
                        return ERR_DUPWX;
                    idx_wc = idx;
                    idx++;
                }
                else {
                    if (ptl[idx].equalsIgnoreCase(WZ)) {
                        if (idx_wz != -1)
                            return ERR_DUPWX;
                        idx_wz = idx;
                        idx++;
                    }
                    else 
                        return ERR_GARBAGE;
                }
            }
            if (ptl[idx] != null) {
                if (ptl[idx].equalsIgnoreCase(WC)) {
                    if (idx_wc != -1)
                        return ERR_DUPWX;
                    idx_wc = idx;
                    idx++;
                }
                else {
                    if (ptl[idx].equalsIgnoreCase(WZ)) {
                        if (idx_wz != -1)
                            return ERR_DUPWX;
                        idx_wz = idx;
                        idx++;
                    }
                    else 
                        return ERR_GARBAGE;
                }
            }
        }

        if (idx_sym != -1) tokenizedline[IDX_SYM] = ptl[idx_sym];
        else tokenizedline[IDX_SYM] = null;
        if (idx_cond != -1) tokenizedline[IDX_COND] = ptl[idx_cond];
        else tokenizedline[IDX_COND] = null;
        if (idx_inst != -1) tokenizedline[IDX_INST] = ptl[idx_inst];
        else tokenizedline[IDX_INST] = null;
        if (idx_arg1 != -1) tokenizedline[IDX_ARG1] = ptl[idx_arg1];
        else tokenizedline[IDX_ARG1] = null;
        if (idx_arg2 != -1) tokenizedline[IDX_ARG2] = ptl[idx_arg2];
        else tokenizedline[IDX_ARG2] = null;
        if (idx_wc != -1) tokenizedline[IDX_WC] = ptl[idx_wc];
        else tokenizedline[IDX_WC] = null;
        if (idx_wz != -1) tokenizedline[IDX_WZ] = ptl[idx_wz];
        else tokenizedline[IDX_WZ] = null;

        return err;
    }
    
    
    /**
     * Separates a line by whitespace (and commas)
     * @param l String line to separate
     */
    protected String[] separate(String l) {
        String[] tl = new String[IDX_WZ + 1];
        int ip, fp, idx;
        
        ip = 0;
        fp = 0;
        idx = 0;
        String nl = l.trim();
        // discards rest of line after \' ; or {
        while ((fp < nl.length()) && (idx < IDX_WZ + 1) && (nl.charAt(fp) != '\'') && (nl.charAt(fp) != ';') && (nl.charAt(fp) != '{')) {
            switch (nl.charAt(fp)) {
                case 9:
                case 32:
                case ',':
                    if (ip < fp)
                        tl[idx++] = nl.substring(ip, fp);
                    fp++;
                    ip = fp;
                    break;
                case '=': // is also a separator, but is included
                    if (ip < fp)
                        tl[idx++] = nl.substring(ip, fp);
                    tl[idx++] = "=";
                    fp++;
                    ip = fp;
                    break;
                default:
                    fp++;
            }
        }
        
        if (ip < fp) {
            if (fp > nl.length())
                tl[idx++] = nl.substring(ip);
            else
                tl[idx++] = nl.substring(ip, fp);
        }
        if (idx == 0)
            return null;
        
        return tl;
    }
    
    /**
     * Compiles an instruction
     * 
     * Returns an <code>Instruction</code>, if and only if the tokenized line contains a valid instruction
     * A symbol can be created if it allowed and if one exists
     *
     * @param addr int Address of the current point
     * @param createSymbol boolean flags if a symbol can be created if one exists
     */
    
    protected Instruction compileTokenized(int addr, boolean createSymbol) {
        Instruction i = new Instruction();
        byte[] op_mask;
        int cond = IF_ALWAYS;
        long op;
        
        Symbol sym = null;
        
        if (tokenizedline[IDX_COND] != null)
            cond = getConditionMask(tokenizedline[IDX_COND]);
        
        if (tokenizedline[IDX_INST] == null) {// empty line, no instruction, we check if a symbol can be created
            if (tokenizedline[IDX_SYM] != null && createSymbol) {
               sym = new Symbol(tokenizedline[0], addr);
                symboltable.add(sym); // adds symbol to symbol table
            }
            return null;
        }
        
        lastError = ERR_NONE;
        
        op_mask = getOpcodeMask(tokenizedline[IDX_INST]);
        
        op = (op_mask[OP_MASK_OP] << 26);
        if ((tokenizedline[IDX_ARG1] == null) && (op_mask[OP_MASK_C] != 0)
            && (op_mask[OP_MASK_C] != 2)){
            lastError = ERR_ARGMISS; // too few args
            return null;
        }
        switch (op_mask[OP_MASK_C]) {
            case 0: // only used by nop, no condition accepted
                break;
            case 1: // condition is accepted
                op |= cond << 18;
                op |= op_mask[OP_MASK_R] == 1 ? MASK_R:0;
                op |= tokenizedline[IDX_WC] != null ? MASK_C:0;
                op |= tokenizedline[IDX_WZ] != null ? MASK_Z:0;
                if ((getSymbolAddr(tokenizedline[IDX_ARG1]) >= REG_FORB0) &&
                    (getSymbolAddr(tokenizedline[IDX_ARG1]) <= REG_FORB1)) {
                    lastError = ERR_RDONLY; // read-only destination
                    return null;
                }
                op |= getSymbolAddr(tokenizedline[IDX_ARG1]) << 9; // d
                if (tokenizedline[IDX_ARG2] != null) {
                    if (tokenizedline[IDX_ARG2].charAt(0) == '#')
                        op |= getLiteral(tokenizedline[IDX_ARG2]) | MASK_I;
                    else op |= getSymbolAddr(tokenizedline[IDX_ARG2]);
                } else {
                    lastError = ERR_ARGMISS; // too few args
                    return null;
                }
                break;
            case 2: // used by ret (is a jmp, written at compile time)
                op |= cond << 18;
                op |= op_mask[OP_MASK_R] == 1 ? MASK_R:0;
                op |= tokenizedline[IDX_WC] != null ? MASK_C:0;
                op |= tokenizedline[IDX_WZ] != null ? MASK_Z:0;
                op |= MASK_I;
                break;
            case 3: // field s contains part of the opcode mask, only filed d is used
                op |= cond << 18;
                op |= op_mask[OP_MASK_R] == 1 ? MASK_R:0;
                op |= tokenizedline[IDX_WC] != null ? MASK_C:0;
                op |= tokenizedline[IDX_WZ] != null ? MASK_Z:0;
                op |= getSymbolAddr(tokenizedline[IDX_ARG1]) << 9; // d
                op |= op_mask[OP_MASK_S];
                break;
            case 4: // call d, s  d is destination for jmp, s is jmp
                op |= cond << 18;
                op |= op_mask[OP_MASK_R] == 1 ? MASK_R:0;
                op |= tokenizedline[IDX_WC] != null ? MASK_C:0;
                op |= tokenizedline[IDX_WZ] != null ? MASK_Z:0;
                if (tokenizedline[IDX_ARG1].charAt(0) != '#') {
                    lastError = ERR_ARGMISS; // too few args
                    return null;
                }
                op |= getLiteral(tokenizedline[IDX_ARG1]) | MASK_I;
                op |= getSymbolAddr(tokenizedline[IDX_ARG1].substring(1) + "_ret") << 9; // d
                
                break;
            case 5: // jmp
                op |= cond << 18;
                op |= op_mask[OP_MASK_R] == 1 ? MASK_R:0;
                op |= tokenizedline[IDX_WC] != null ? MASK_C:0;
                op |= tokenizedline[IDX_WZ] != null ? MASK_Z:0;
                if (tokenizedline[IDX_ARG1].charAt(0) == '#')
                    op |= getLiteral(tokenizedline[IDX_ARG1]) | MASK_I;
                else op |= getSymbolAddr(tokenizedline[IDX_ARG1]); // s
                break;
            case 6: // equate, define, = just a symbol, no real instruction
                if (!createSymbol)
                    return null; // ignores equates in second pass
                if ((tokenizedline[IDX_SYM] != null) && (tokenizedline[IDX_ARG1] != null)) {
                    sym = new Symbol(tokenizedline[IDX_SYM], (int) getSymbolAddr(tokenizedline[IDX_ARG1]));
                    symboltable.add(sym);
                    return null; // no instruction
                }
                break;
            case 7: // reserve space, long
                op = getSymbolAddr(tokenizedline[IDX_ARG1]);
                break;
        }
        // now we can create the symbol, if it exists and is not an equate
        
        if (tokenizedline[IDX_SYM] != null && createSymbol) {
            sym = new Symbol(tokenizedline[0], addr);
            symboltable.add(sym); // adds symbol to symbol table
        }
        if (lastError != ERR_NONE && !createSymbol) { // symbol not found during second pass
            return null;
        }
            
        i.setOpcode(op);
        i.setAddr(addr);
        i.setSymbol(sym);
        i.setSize(4);
        
        return i;
    }    
    
    protected boolean symbolExists(String s) {
        Symbol sym;
        if (s == null)
            return false;
        int l = symboltable.size();
        for (int i = 0; i < l; i++) {
            sym = (Symbol) symboltable.elementAt(i);
            if (sym.match(s))
                return true;
        }
        return false;
    }
    
    protected boolean isCondition(String s) {
        int l = conds.length;
        if (s == null)
            return false;
        
        for (int i = 0; i < l; i++)
            if (s.equalsIgnoreCase(conds[i]))
                return true;
        return false;
    }
    
    protected boolean isInstruction(String s) {
        int l = opcodes.length;
        if (s == null)
            return false;
        
        for (int i = 0; i < l; i++)
            if (s.equalsIgnoreCase(opcodes[i]))
                return true;
        return false;
    }
    
    
    protected byte[] getOpcodeMask(String s) {
        int l = opcodes.length;
        if (s == null)
            return null;
        
        for (int i = 0; i < l; i++)
            if (s.equalsIgnoreCase(opcodes[i]))
                return opcodes_mask[i];
        return null;
    }
    
    protected int getConditionMask(String s) {
        int l = conds.length;
        if (s == null)
            return IF_ALWAYS;
        
        for (int i = 0; i < l; i++)
            if (s.equalsIgnoreCase(conds[i]))
                return condmap[i]; // gets the condition from the mapping Name -> cond
        return IF_ALWAYS;
    }

    /**
     * Returns the address of the current symbol if it is found or
     * the number if is a valid number, zero otherwise
     *
     */
    protected long getSymbolAddr(String s) {
        Symbol sym;
        if (s == null)
            return 0;
        // check first if it is not a number
        try {
            long i = parseNumber(s);
            return i & MAX_LITERAL;
        } catch (java.lang.NumberFormatException e) {
            int l = symboltable.size();
            for (int i = 0; i < l; i++) {
                sym = (Symbol) symboltable.elementAt(i);
                if (sym.match(s))
                    return sym.getAddr();
            }
        }
        notFoundSymbol = s;
        lastError = ERR_SYMNF; // symbol not found
        return 0;
    }
    
    protected long getLiteral(String s) {
        if (s == null)
            return 0;
        
        String ns = s.substring(1);
        long i = 0;
        
        try {
            i =  parseNumber(ns);
        } catch (java.lang.NumberFormatException e) {
            i = getSymbolAddr(ns);
            if (i == 0)
                return -1;
        }
        return i & MAX_LITERAL;
    }
    
    /**
     * Parses a string as a number
     *
     * Accepts:
     *
     * Hex ($), Decimal () and binary (%) numbers
     * Separated by '_' if present
     */
    
    protected long parseNumber(String n) throws java.lang.NumberFormatException {
        long i = 0;
        int j, p = 0;
        n = n.toLowerCase();
        switch (n.charAt(0)) {
            case '$': // Hex
                p = 1;
                while (p < n.length()) {
                    j = (int) n.charAt(p);
                    switch (n.charAt(p)) {
                        case '0': case '1': case '2': case '3': case '4':
                        case '5': case '6': case '7': case '8': case '9': 
                        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                            i = (i << 4) | (j > 0x49 ? j-87:j-48);
                            break;
                        case '_':
                            break;
                        default:
                            throw new java.lang.NumberFormatException();
                    }
                    p++;
                }
                break;
            case '%': // BIN
                p = 1;
                while (p < n.length()) {
                    j = (int) n.charAt(p);
                    switch (n.charAt(p)) {
                        case '0': case '1': 
                            i = (i << 1) | (j-48);
                            break;
                        case '_':
                            break;
                        default:
                            throw new java.lang.NumberFormatException();
                    }
                    p++;
                }
                break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9': case '_':
                while (p < n.length()) {
                    j = (int) n.charAt(p);
                    switch (j) {
                        case '0': case '1': case '2': case '3': case '4':
                        case '5': case '6': case '7': case '8': case '9': 
                            i = (i * 10) + (j-48);
                            break;
                        case '_':
                            break;
                        default:
                            throw new java.lang.NumberFormatException();
                    }
                    p++;
                }
                break;
            default:
                throw new java.lang.NumberFormatException();
        }
        return i;
    }
    
    protected class Instruction {
        long opcode;
        int addr;
        int size;
        Symbol sym;
        
        public void setOpcode(long o) {
            opcode = o;
        }
        
        public void setAddr(int a) {
            addr = a;
        }
        
        public void setSize(int s) {
            size = s;
        }
        
        public void setSymbol(Symbol s) {
            sym = s;
        }
        
        public long getOpcode() {
            return opcode;
        }
        
        public int getAddr() {
            return addr;
        }
        
        public int getSize() {
            return size;
        }
        
        public Symbol getSymbol() {
            return sym;
        }
        
        public String getSymbolname() {
            return sym.getSymbolName();
        }
    }
}
