/* In other compilers, you generate a parse tree and a tree of code to be generated; usually, for convenience, the same tree data structure is used for both. Unfortunately because of the 'virtual compiling machine' nature of icode, we cannot use the stack directly, but have to go through the compilation process, which involves generating a tree of opcodes for code generation as in any other type of compiler. It would have been really nice if the descriptor format allowed opcode trees as part of its data, but unfortunately it only allows for terminals, so the data structure below could be seen as expanding the descriptor structure with extra fields to support code tree generation. Eventually in most of the places in my code where I'm handling tags, I'll be replacing the tags with these, which in terminal cases will merely be null wrappers for a tag. The trees built up thus will eventually be walked and code will be output. It's possible that the generated code could be a stack system and look very like the input icode, and if I were writing an icode interpreter that's probably what I'd do, but since I am generating C I will be building the short stack operations back up into as large trees as possible so that the output is reasonably human-readable C As an initial hack, I'm going to overload the cstack and use negative indexes to denote opcodes and positive indexes to denote descriptors. 0 is a descriptor. opcodes start at -1. To avoid getting signs inverted too often, the negation will be performed only at the point of the cpush(). Conceptually the opcodes below need not be the same as icodes, but then again there seems to be no reason to make them different. */ #define TAG 0 typedef struct opcode {/* can be either terminal tags, or more tree */ int op; /* icode byte - steal '0' for terminal tag */ int opdcount_or_tag; /* for convenience, how many operands */ // WOULD PREFER BELOW: int *opd; /* flex array of ints; ints are indexes into opcodes */ int opd[12]; /* array of indexes into opcodes for operands or params */ /* temp hack limits procedures to 12 parameters */ } opcode; opcode code[32*1024]; /* large during debugging, eventually small */ static int this_opcode = 0; /* pre-decrement stack index */ extern int getrecordformat(int tagnum); extern char *evalexpr(int tagnum); extern char *rawtag(int tagnum); extern char *cdecl(int tagnum); extern char *tag_rhs_value(int tagnum); extern char *tag_lhs_value(int tagnum); extern char *tag_rhs_addr(int tagnum); extern char *tag_lhs_addr(int tagnum); #include "dump.c" #define NOFUN 0 #define MONOFUN 1 #define BIFUN 2 #define PREFIX 3 #define INFIX 4 #define POSTFIX 5 /* imp used to have a postfix factorial! */ #define ARRAY 6 static int type[256]; /* code for how the icode is implemented in C, as above */ char *cop[256]; /* C operator as a string */ /* BOTH OF THE ABOVE MUST BE INITIALIASED, EITHER WITH ASSIGNMENTS IN A STARTUP ROUTINE, OR AS A STRING ARRAY! */ void good_cop(void) { cop['u'] = "+"; /* ADDA */ /* don't yet know what this does */ cop['w'] = "-"; /* SUBA */ /* don't yet know what this does */ cop['+'] = "+"; /* ADD */ cop['-'] = "-"; /* SUB */ cop['['] = "<<"; /* LSH */ cop[']'] = ">>"; /* RSH */ /* would be better to have two icodes, one for real div & one for int div */ cop['/'] = "/"; /* QUOT - remember reals!!! */ /* maybe steal reverse-divide for real divide, since makebinop will already have taken care of ordering the parameters... */ cop['Q'] = "/"; /* DIVIDE (reversed params) - ditto reals!!! */ // cop['v'] = "imp_mod"; /* MOD */ cop['v'] = "abs"; /* MOD */ cop['\\'] = "\\"; /* NOT */ /* (a bug in ctohtml here :-( ) */ cop['U'] = "-"; /* NEGATE */ cop['x'] = "imp_real_exp"; /* REXP */ cop['X'] = "imp_int_exp"; /* IEXP */ cop['!'] = "|"; /* OR */ cop['%'] = "^"; /* XOR */ cop['&'] = "&"; /* AND */ cop['*'] = "*"; /* MUL */ type['u'] = type['w'] = type['+'] = type['-'] = type['['] = type[']'] = type['v'] = type['/'] = type['Q'] = type['!'] = type['%'] = type['&'] = type['*'] = INFIX; type['\\'] = PREFIX; /* NOT */ type['U'] = PREFIX; /* NEGATE */ type['v'] = MONOFUN; /* MOD */ type['x'] = BIFUN; /* REXP */ type['X'] = BIFUN; /* IEXP */ } char *tag_rhs_value(int tagnum); char *evalexpr(int tagnum) { #define SLOP 32 /* making it a symbolic constant is no better programming practise than having it at all in the first place. Just a quick hack during development to be removed later. */ int op; char *opd1, *opd2, *result; //memprintf("Evalexpr: %d\n", tagnum); if (tagnum >= 0) return(tag_rhs_value(tagnum)); tagnum = -tagnum; /* now an index into code[] */ /* this next line may become redundant, with suitable code changes */ if ((op = code[tagnum].op) == TAG) return(tag_rhs_value(code[tagnum].opdcount_or_tag)); /* operators are already ordered left->right if sequential execution is important */ if (op == 'a') { /* ARRAY */ opd1 = evalexpr(code[tagnum].opd[0]); opd2 = evalexpr(code[tagnum].opd[1]); result = malloc(strlen(opd1)+strlen(opd2)+SLOP); sprintf(result, "%s[%s]", opd2, opd1); return strdup(result); } else if (op == 'n') { /* RECORD FIELD */ int recordformat, fieldtag, fieldno, celltag, recordobject; /* op == 'n' */ //memprintf("tagnum = %d\n", tagnum); recordobject = code[tagnum].opd[0]; fieldno = code[tagnum].opd[1]; /* the record expression */ opd1 = evalexpr(recordobject); //memprintf("recordobject = %d = %s\n", recordobject, evalexpr(recordobject)); //memprintf("fieldno = %d\n", fieldno); recordformat = getrecordformat(recordobject); //memprintf("recordformat = %d\n", recordformat); celltag = -tag[recordformat].b; //memprintf("celltag = -tag[recordformat].b = %d\n", celltag); fieldtag = code[celltag].opd[fieldno]; //memprintf("fieldtag = code[celltag].opd[fieldno] = %d\n", fieldtag); result = malloc(strlen(opd1)+20+SLOP); /* need to know if opd1 is a reference or object. A hacky test is just to look for "*" at the start of the string, but I'm not sure that this is robust enough (eg what if "*x + *y" - don't want to convert that to "(x + *y)->" */ if (*opd1 == '*') { sprintf(result, "%s->", opd1+1); } else { sprintf(result, "%s.", opd1); } localtag(result, fieldtag, (char *)tag[fieldtag].data); return strdup(result); } else if (op == '$') { int a, b, c, t, f, u, i, s, x; char *fname, *name; int fi, mlen; name = (char *)tag[code[tagnum].opd[0]].data; /* Don't know if this is valid yet */ a = tag[code[tagnum].opd[0]].a; b = tag[code[tagnum].opd[0]].b; c = tag[code[tagnum].opd[0]].c; t = a>>4; f = a&15; x = c&7; c >>= 3; s = c&1; c >>= 1; i = c&1; c >>= 1; u = c&1; fname = tag_rhs_value(code[tagnum].opd[0]); /* should be simple */ /* fname will be lower case (or mangled) and a trailing space; name will be uppercase raw */ mlen = strlen(fname); for (fi = 1; fi < code[tagnum].opdcount_or_tag; fi++) { /* oops. */ /* TODO: if opd is a name type, be sure to call _address rather than _value */ mlen += strlen(tag_rhs_value(code[tagnum].opd[fi])); if (fi+1 < code[tagnum].opdcount_or_tag) mlen += strlen(", "); } result = malloc(mlen+SLOP+100); *result = '\0'; if (debug_input) sprintf(result+strlen(result), "/* t=%d f=%d x=%d s=%d i=%d u=%d */", t,f,x,s,i,u); /* be careful here - fname already has the "*" in front of it */ sprintf(result+strlen(result), "%s%s(", (f == 9 ? ""/* so don't need * here too */ : ""), fname); /* MAP CALL */ for (fi = 1; fi < code[tagnum].opdcount_or_tag; fi++) { /* oops. */ strcat(result, tag_rhs_value(code[tagnum].opd[fi])); if (fi+1 < code[tagnum].opdcount_or_tag) strcat(result, ", "); } strcat(result, ")"); return strdup(result); } else if (type[op]==INFIX) switch (op) { case 'u': /* ADDA */ /* don't yet know what this does */ case 'w': /* SUBA */ /* don't yet know what this does */ case '+': /* ADD */ case '-': /* SUB */ case '[': /* LSH */ case ']': /* RSH */ case 'v': /* MOD */ case '/': /* QUOT */ case 'Q': /* DIVIDE (reversed params) */ case '!': /* OR */ case '%': /* XOR */ case '&': /* AND */ case '*': /* MUL */ opd1 = evalexpr(code[tagnum].opd[0]); opd2 = evalexpr(code[tagnum].opd[1]); result = malloc(strlen(opd1)+strlen(opd2)+SLOP); /* heap lossage */ sprintf(result, "(%s %s %s)", opd1, cop[op], opd2); return strdup(result); /* heap lossage */ default: return "***UNHANDLED INFIX***"; } else if (type[op]==PREFIX) switch (op) { case '\\': /* NOT */ case 'U': /* NEGATE */ opd1 = evalexpr(code[tagnum].opd[0]); result = malloc(strlen(opd1)+SLOP); sprintf(result, "(%s%s)", cop[op], opd1); return strdup(result); default: return "***UNHANDLED PREFIX***"; } else if (type[op]==MONOFUN) { opd1 = evalexpr(code[tagnum].opd[0]); result = malloc(strlen(opd1)+SLOP); sprintf(result, "%s(%s)", cop[op], opd1); return strdup(result); } else if (type[op]==BIFUN) switch (op) { /* these are implemented as function calls in C */ case 'x': /* REXP */ case 'X': /* IEXP */ opd1 = evalexpr(code[tagnum].opd[0]); opd2 = evalexpr(code[tagnum].opd[1]); result = malloc(strlen(opd1)+strlen(opd2)+SLOP); sprintf(result, "%s(%s, %s)", cop[op], opd1, opd2); return strdup(result); default: return "***UNHANDLED BIFUN***"; } else { memprintf("ERROR: Unhandled op in evalexpr '%c'\n", op); *(int *)0 = 0; return "***UNHANDLED OP***"; } } /* function param stuff still needs a little research into icode structure */ int newopcodecell(int icode) /* result is a positive number */ { --this_opcode; code[-this_opcode].op = icode; return -this_opcode; } int makeopcodeterminal(int tag) { --this_opcode; code[-this_opcode].op = TAG; code[-this_opcode].opdcount_or_tag = tag; return -this_opcode; } int makemonop(int icode, int tosop) { int op = newopcodecell(icode); code[op].opdcount_or_tag = 1; code[op].opd[0] = tosop; return op; } int makebinop(int op, int sos, int tos) { int cell = newopcodecell(op); code[cell].opdcount_or_tag = 2; code[cell].opd[0] = sos; /* GET OPD OFF STACK!!! */ code[cell].opd[1] = tos; return cell; } int makeiconsttag(int iconst) { char tmp[32]; /* init value of thisitag is off array end */ /* ideally we should scan the array to see if the constant has already been seen */ /* currently not checking for overlap with tag array :-( */ thisitag -= 1; sprintf(tmp, "%d", iconst); tag[thisitag].data = iconst; tag[thisitag].a = (1<<4)+1 /* int value */; tag[thisitag].b = 1; tag[thisitag].c = 2 /* const */; return thisitag; } int makenewinternaltag(void) { return(--thisitag); } int makenconsttag(char *sconst) { /* should we also add the 'indirect object' tag? */ thisitag -= 1; tag[thisitag].data = (int)strdup(sconst); tag[thisitag].a = (3<<4)+1 /* string value */; tag[thisitag].b = -2; /* setting length to -2 denotes variable name */ tag[thisitag].c = 2 /* const, with indir flag */; return thisitag; } int makesconsttag(char *sconst) { /* should we also add the 'indirect object' tag? */ thisitag -= 1; tag[thisitag].data = (int)strdup(sconst); tag[thisitag].a = (3<<4)+1 /* string value */; tag[thisitag].b = -1; /* setting length to -1 denotes literal string */ tag[thisitag].c = 2 /* const, with indir flag */; return thisitag; } void cpush(int tag) { *++cstackp = tag; } int cpop(void) { int val = *cstackp; if (cstackp == cstack) { memprintf("*** stack underflow\n"); fflush(stdout); *(int *)0 = 0; /* force backtrace */ } cstackp -= 1; return val; } void call_sos_tos(int icodeop) { int tos, sos, binop; tos = cpop(); sos = cpop(); binop = -makebinop(icodeop, sos, tos); cpush(binop); } void tos_op_sos(int icodeop) { int tos, sos, binop; tos = cpop(); sos = cpop(); binop = -makebinop(icodeop, tos, sos); cpush(binop); } void sos_op_tos(int icodeop) { int tos, sos, binop; tos = cpop(); sos = cpop(); binop = -makebinop(icodeop, sos, tos); cpush(binop); } void monop(int op) { int tos, monop; tos = cpop(); monop = -makemonop(op, tos); cpush(monop); /*memprintf("%sPush(%s%s);\n", indent, op, tag_rhs_value(tos));*/ } void pushi(int litconst) { /*memprintf("%sPushI(0x%08x);\n", indent, litconst);*/ cpush(makeiconsttag(litconst)); } void pushs(char *litconst) { /*memprintf("%sPushS(@0x%08x);\n", indent, (int)litconst);*/ cpush(makesconsttag(litconst)); } void push(int tagnum) { /*memprintf("%sPush(%s)\n", indent, rawtag(tagnum));*/ cpush(tagnum); } void asspar(void) { int tos, sos, op; /*c("ASSPAR");*/ tos = cpop(); sos = cpop(); /* attach tos as the next parameter to function sos() */ if (sos >= 0) { /* sos is an unencapsulated procedure call. So wrap it in an opcode */ op = newopcodecell('$'); code[op].opdcount_or_tag = 1; code[op].opd[0] = sos; /* first param is the function itself */ } else op = -sos; /*memprintf("attaching opd #%d to opcode %d\n", code[op].opdcount_or_tag, op); fflush(stdout);*/ //fprintf(stderr, "setting code[%d].opd[%d] to %d\n", op, code[op].opdcount_or_tag, tos); code[op].opd[code[op].opdcount_or_tag++] = tos; cpush(-op); /* replace the modified function descriptor */ /* and remove the parameter */ } void call(void) { int tos, op; /* tos is a function/map/proc/pred call; either a tag with no params, or an opcode with the params in the attached array */ /*c("CALL");*/ tos = cpop(); if (tos >= 0) { /* tos is an unencapsulated procedure call. So wrap it in an opcode */ op = newopcodecell('$'); code[op].opdcount_or_tag = 1; code[op].opd[0] = tos; /* first param is the function itself */ tos = -op; } op = -tos; if (code[op].op != '$') { memprintf("TOS:\n"); memprintf(" %s\n", evalexpr(tos)); fflush(stdout); assert(code[op].op == '$'); } tos = code[op].opd[0]; assert(tos >= 0); /* if it is a function, push it back; but if it is a procedure, print out a call to it. */ if ((tag[tos].a&15) == 7) { memprintf("%s%s;\n", indent, tag_rhs_value(-op)/*, tag[tos].a&15*/); return; } cpush(-op); } void assval(void) { char *source, *dest; int code, tos, sos; tos = cpop(); sos = cpop(); /* leaves stack empty - so check for it, dummy! */ dest = tag_lhs_value(sos); source = tag_rhs_value(tos); code = dest[1]; if ((*dest == '?') && ((code == 'R') || (code == 'S'))) { /* this is the biggest hack in this whole project */ /* ?R denotes a Record and ?S a String */ /* followed by variable and size */ /* e.g. ?Rfred[0];sizeof struct BFM */ char *size = dest+2; dest = strchr(size, ';'); *dest++ = '\0'; if (code == 'R') { if (strcmp(source, "0") == 0) { memprintf("%smemset(%s,%s,%s);\n", indent, dest, source, size); } else { memprintf("%smemcpy(%s,%s,%s);\n", indent, dest, source, size); } } else if (code == 'S') { memprintf("%sstrncpy(%s,%s,%s+1);\n", indent, dest, source, size); } else { assert((code == 'R') || (code == 'S')); } } else { memprintf("%s%s = %s;\n", indent, dest, source); } } void assref(void) { int tos, sos; tos = cpop(); sos = cpop(); /* leaves stack empty - so check for it, dummy! */ memprintf("%s%s = %s;\n", indent, tag_lhs_addr(sos), tag_rhs_addr(tos)); } void arrayaccess(int icodeop) { int tos, sos, binop; tos = cpop(); sos = cpop(); /* BUG: TODO: have to percolate the type of the array item up to the node */ binop = -makebinop(icodeop, tos, sos); cpush(binop); } void fieldselect(int icodeop) { int subtag, recordname, binop; subtag = cpop(); recordname = cpop(); //fprintf(stderr, "Select: field %d of record %s\n", subtag, tag_rhs_value(recordname)); /* BUG: TODO: have to percolate the type of the field up to the node */ binop = -makebinop(icodeop, recordname, subtag); cpush(binop); } void result(int opcode) { int tos; tos = cpop(); /* leaves stack empty - so check for it, dummy! */ memprintf("%sreturn(%s);\n", indent, (opcode == 'M' /* MAP result */ ? tag_rhs_addr(tos) : tag_rhs_value(tos))); } void ifcond(int opcode, int condition, int label) { int tos, sos; /* if (pop op pop) then goto label */ tos = cpop(); if (opcode == 'k') { memprintf("%sif (!(%s)) goto L_%04x;\n", indent, tag_rhs_value(tos), lineno[label]); } else if (opcode == 't') { memprintf("%sif (%s) goto L_%04x;\n", indent, tag_rhs_value(tos), lineno[label]); } else if (opcode == 'C') { /* compare address */ sos = cpop(); memprintf("%sif (%s %s %s) goto L_%04x;\n", indent, tag_rhs_addr(sos), cond[condition], tag_rhs_addr(tos), lineno[label]); } else { sos = cpop(); memprintf("%sif (%s %s %s) goto L_%04x;\n", indent, tag_rhs_value(sos), cond[condition], tag_rhs_value(tos), lineno[label]); if (opcode == '"') cpush(tos); /* JUMPIFD - double-sided */ } } void for_loop(int opcode, int nest_level, int contlab, int exitlab) { int co, ex; int initial, final, increment, control; initial = cpop(); final = cpop(); increment = cpop(); control = cpop(); //memprintf("// FOR CONT=%04x EXIT=%04x\n", contlab, exitlab); memprintf("%sfor (%s = %s; %s += %s; %s != %s) {\n", indent, tag_lhs_value(control), tag_rhs_value(initial), tag_lhs_value(control), tag_rhs_value(increment), tag_rhs_value(control), tag_rhs_value(final)); co = get_next_lineno(); //memprintf("//forloop: setting lineno[%04x] = %04x (continue)\n", contlab, co); lineno[contlab] = co; ex = get_next_lineno(); //memprintf("//forloop: setting lineno[%04x] = %04x (exit)\n", exitlab, ex); lineno[exitlab] = ex; // memprintf("L_%04x:\n", lineno[contlab]); //??? setbackwardlab (d, &linenod) /*memprintf("%s//TODO: change goto L_%04x and goto L_%04x into continue and break\n", indent, lineno[contlab], lineno[exitlab]);*/ } /* returns the TAG number of the format, NOT the CELL number */ int getrecordformat(int tagnum) { int next, codenum, op; //memprintf("getrecordformat entry: %d\n", tagnum); if (tagnum >= 0) { //memprintf("TAG: %d (%s) => %d\n", tagnum, rawtag(tagnum), tag[tagnum].b); return(tag[tagnum].b); /* assuming it was a record */ } codenum = -tagnum; /* now an index into code[] */ //memprintf("Cell: %d\n", codenum); /* this next line may become redundant, with suitable code changes */ if ((op = code[codenum].op) == TAG) { /* does this make sense? */ //memprintf("punting array access to TAG at %d (%s)\n",/* //memprintf*/ code[codenum].opdcount_or_tag, evalexpr(code[codenum].opdcount_or_tag)); return(getrecordformat(code[codenum].opdcount_or_tag)); } /* operators are already ordered left->right if sequential execution is important */ if (op == 'a') { /* ARRAY */ //memprintf("*** punting array access to tag or cell at %d (%s) <-- this is the bug\n",/* //memprintf*/code[codenum].opd[1/*WAS0*/],evalexpr(code[codenum].opd[1/*WAS0*/])); return(getrecordformat(code[codenum].opd[1/*WAS0*/])); } else if (op == 'n') { /* RECORD FIELD */ //memprintf("looking at subfield for %d\n", codenum); return(getrecordformat(code[codenum].opd[0])); } else if (op == '$') { /* This is a bottom-level tag of some description - anything higher has recursed down to here already. If it is a record map or record array name etc etc the 'b' field should point to an opcode cell, and that opcode cell in turn should contain *both* the object *and* the object's recordformat - however this structure may not yet be implemented for all tag types */ /* OR */ /* This is a bottom-level tag of some description - anything higher has recursed down to here already. If it is a record map or record array name etc etc the 'b' field should point to a recordformat descriptor: either it is negative, in which case it is a cell containing the recordformat and its fields, or it is positive in which case it is a recordformat tag itself */ /* actually... since it is known to be negative, it must be an object descriptor, and presumably a record or procedure/fn/map call */ //memprintf("Object: %s\n", rawcell(codenum)); if (code[codenum].op == TAG) { //memprintf("Tag: %d\n", code[tagnum].opdcount_or_tag); return(getrecordformat(code[tagnum].opdcount_or_tag)); } else { next = code[codenum].opd[0]; //memprintf("Link: %s\n", (next < 0 ? rawcell(-next): rawtag(next))); return(getrecordformat(next)); } exit(0); } else { memprintf("ERROR: Unexpected op in getrecordformat '%c'\n", op); *(int *)0 = 0; return 0; } }