/* In other compilers, you generate a parse tree and a tree of code to be generated; usually, for convenience, the same tree data structure is used for both. Unfortunately because of the 'virtual compiling machine' nature of icode, we cannot use the stack directly, but have to go through the compilation process, which involves generating a tree of opcodes for code generation as in any other type of compiler. It would have been really nice if the descriptor format allowed opcode trees as part of its data, but unfortunately it only allows for terminals, so the data structure below could be seen as expanding the descriptor structure with extra fields to support code tree generation. Eventually in most of the places in my code where I'm handling tags, I'll be replacing the tags with these, which in terminal cases will merely be null wrappers for a tag. The trees built up thus will eventually be walked and code will be output. It's possible that the generated code could be a stack system and look very like the input icode, and if I were writing an icode interpreter that's probably what I'd do, but since I am generating C I will be building the short stack operations back up into as large trees as possible so that the output is reasonably human-readable C As an initial hack, I'm going to overload the cstack and use negative indexes to denote opcodes and positive indexes to denote descriptors. 0 is a descriptor. opcodes start at -1. To avoid getting signs inverted too often, the negation will be performed only at the point of the cpush(). Conceptually the opcodes below need not be the same as icodes, but then again there seems to be no reason to make them different. */ #define TAG 0 typedef struct opcode {/* can be either terminal tags, or more tree */ int op; /* icode byte - steal '0' for terminal tag */ int opdcount_or_tag; /* for convenience, how many operands */ // WOULD PREFER BELOW: int *opd; /* flex array of ints; ints are indexes into opcodes */ int opd[12]; /* array of indexes into opcodes for operands or params */ /* temp hack limits procedures to 12 parameters */ } opcode; opcode code[32*1024]; /* large during debugging, eventually small */ static int this_opcode = 0; /* pre-decrement stack index */ extern char *evalexpr(int tagnum); extern char *rawtag(int tagnum); extern char *cdecl(int tagnum); extern char *tag_rhs_value(int tagnum); extern char *tag_lhs_value(int tagnum); extern char *tag_rhs_addr(int tagnum); extern char *tag_lhs_addr(int tagnum); #include "dump.c" #define NOFUN 0 #define MONOFUN 1 #define BIFUN 2 #define PREFIX 3 #define INFIX 4 #define POSTFIX 5 /* imp used to have a postfix factorial! */ #define ARRAY 6 static int type[256]; /* code for how the icode is implemented in C, as above */ char *cop[256]; /* C operator as a string */ /* BOTH OF THE ABOVE MUST BE INITIALIASED, EITHER WITH ASSIGNMENTS IN A STARTUP ROUTINE, OR AS A STRING ARRAY! */ void good_cop(void) { cop['u'] = "+"; /* ADDA */ /* don't yet know what this does */ cop['w'] = "-"; /* SUBA */ /* don't yet know what this does */ cop['+'] = "+"; /* ADD */ cop['-'] = "-"; /* SUB */ cop['['] = "<<"; /* LSH */ cop[']'] = ">>"; /* RSH */ cop['v'] = "%"; /* MOD */ /* would be better to have two icodes, one for real div & one for int div */ cop['/'] = "/"; /* QUOT - remember reals!!! */ /* maybe steal reverse-divide for real divide, since makebinop will already have taken care of ordering the parameters... */ cop['Q'] = "/"; /* DIVIDE (reversed params) - ditto reals!!! */ cop['\\'] = "\\"; /* NOT */ /* (a bug in ctohtml here :-( ) */ cop['U'] = "-"; /* NEGATE */ cop['x'] = "imp_real_exp"; /* REXP */ cop['X'] = "imp_int_exp"; /* IEXP */ cop['!'] = "|"; /* OR */ cop['%'] = "^"; /* XOR */ cop['&'] = "&"; /* AND */ cop['*'] = "*"; /* MUL */ type['u'] = type['w'] = type['+'] = type['-'] = type['['] = type[']'] = type['v'] = type['/'] = type['Q'] = type['!'] = type['%'] = type['&'] = type['*'] = INFIX; type['\\'] = PREFIX; /* NOT */ type['U'] = PREFIX; /* NEGATE */ type['x'] = BIFUN; /* REXP */ type['X'] = BIFUN; /* IEXP */ } char *tag_rhs_value(int tagnum); char *evalexpr(int tagnum) { #define SLOP 10 /* making it a symbolic constant is no better programming practise than having it at all in the first place. Just a quick hack during development to be removed later. */ int op; char *opd1, *opd2, *result; if (tagnum >= 0) return(tag_rhs_value(tagnum)); tagnum = -tagnum; /* now an index into code[] */ /* this next line may become redundant, with suitable code changes */ if ((op = code[tagnum].op) == TAG) return(tag_rhs_value(code[tagnum].opdcount_or_tag)); /* operators are already ordered left->right if sequential execution is important */ if (op == 'a') { /* ARRAY */ opd1 = evalexpr(code[tagnum].opd[0]); opd2 = evalexpr(code[tagnum].opd[1]); result = malloc(strlen(opd1)+strlen(opd2)+SLOP); sprintf(result, "%s[%s]", opd2, opd1); return strdup(result); } else if (op == 'n') { /* RECORD FIELD */ opd1 = evalexpr(code[tagnum].opd[0]); result = malloc(strlen(opd1)+20+SLOP); /* need to check pointer versus object for . vs -> */ sprintf(result, "%s.{field %d}", opd1, tag[code[tagnum].opd[1]].data); return strdup(result); } else if (op == '$') { int a, b, c, t, f, u, i, s, x; char *fname, *name; int fi, mlen; name = (char *)tag[code[tagnum].opd[0]].data; /* Don't know if this is valid yet */ a = tag[code[tagnum].opd[0]].a; b = tag[code[tagnum].opd[0]].b; c = tag[code[tagnum].opd[0]].c; t = a>>4; f = a&15; x = c&7; c >>= 3; s = c&1; c >>= 1; i = c&1; c >>= 1; u = c&1; fname = tag_rhs_value(code[tagnum].opd[0]); /* should be simple */ /* fname will be lower case (or mangled) and a trailing space; name will be uppercase raw */ mlen = strlen(fname); for (fi = 1; fi < code[tagnum].opdcount_or_tag; fi++) { /* oops. */ /* TODO: if opd is a name type, be sure to call _address rather than _value */ mlen += strlen(tag_rhs_value(code[tagnum].opd[fi])); if (fi+1 < code[tagnum].opdcount_or_tag) mlen += strlen(", "); } result = malloc(mlen+SLOP+100); *result = '\0'; if (debug_input) sprintf(result+strlen(result), "/* t=%d f=%d x=%d s=%d i=%d u=%d */", t,f,x,s,i,u); /* be careful here - fname already has the "*" in front of it */ sprintf(result+strlen(result), "%s%s(", (f == 9 ? ""/* so don't need * here too */ : ""), fname); /* MAP CALL */ for (fi = 1; fi < code[tagnum].opdcount_or_tag; fi++) { /* oops. */ strcat(result, tag_rhs_value(code[tagnum].opd[fi])); if (fi+1 < code[tagnum].opdcount_or_tag) strcat(result, ", "); } strcat(result, ")"); return strdup(result); } else if (type[op]==INFIX) switch (op) { case 'u': /* ADDA */ /* don't yet know what this does */ case 'w': /* SUBA */ /* don't yet know what this does */ case '+': /* ADD */ case '-': /* SUB */ case '[': /* LSH */ case ']': /* RSH */ case 'v': /* MOD */ case '/': /* QUOT */ case 'Q': /* DIVIDE (reversed params) */ case '!': /* OR */ case '%': /* XOR */ case '&': /* AND */ case '*': /* MUL */ opd1 = evalexpr(code[tagnum].opd[0]); opd2 = evalexpr(code[tagnum].opd[1]); result = malloc(strlen(opd1)+strlen(opd2)+SLOP); sprintf(result, "(%s %s %s)", opd1, cop[op], opd2); return strdup(result); default: return "***UNHANDLED INFIX***"; } else if (type[op]==PREFIX) switch (op) { case '\\': /* NOT */ case 'U': /* NEGATE */ opd1 = evalexpr(code[tagnum].opd[0]); result = malloc(strlen(opd1)+SLOP); sprintf(result, "(%s%s)", cop[op], opd1); return strdup(result); default: return "***UNHANDLED PREFIX***"; } else if (type[op]==MONOFUN) switch (op) { default: return "***UNHANDLED MONOFUN***"; opd1 = evalexpr(code[tagnum].opd[0]); result = malloc(strlen(opd1)+SLOP); sprintf(result, "%s(%s)", cop[op], opd1); return strdup(result); } else if (type[op]==BIFUN) switch (op) { /* these are implemented as function calls in C */ case 'x': /* REXP */ case 'X': /* IEXP */ opd1 = evalexpr(code[tagnum].opd[0]); opd2 = evalexpr(code[tagnum].opd[1]); result = malloc(strlen(opd1)+strlen(opd2)+SLOP); sprintf(result, "%s(%s, %s)", cop[op], opd1, opd2); return strdup(result); default: return "***UNHANDLED BIFUN***"; } else { printf("ERROR: Unhandled op in evalexpr '%c'\n", op); return "***UNHANDLED OP***"; } } /* function param stuff still needs a little research into icode structure */ int newopcodecell(int icode) /* result is a positive number */ { --this_opcode; code[-this_opcode].op = icode; return -this_opcode; } int makeopcodeterminal(int tag) { --this_opcode; code[-this_opcode].op = TAG; code[-this_opcode].opdcount_or_tag = tag; return -this_opcode; } int makemonop(int icode, int tosop) { int op = newopcodecell(icode); code[op].opdcount_or_tag = 1; code[op].opd[0] = tosop; return op; } int makeemptyfncall(void) { return 0; } void addfnparam(int tos) { } /* aka asspar */ int setfncall(int fn, int tos) { return fn; } int makebinop(int op, int sos, int tos) { int cell = newopcodecell(op); code[cell].opdcount_or_tag = 2; code[cell].opd[0] = sos; /* GET OPD OFF STACK!!! */ code[cell].opd[1] = tos; return cell; } int makeiconsttag(int iconst) { char tmp[32]; /* init value of thisitag is off array end */ /* ideally we should scan the array to see if the constant has already been seen */ /* currently not checking for overlap with tag array :-( */ thisitag -= 1; sprintf(tmp, "%d", iconst); tag[thisitag].data = iconst; tag[thisitag].a = (1<<4)+1 /* int value */; tag[thisitag].b = 1; tag[thisitag].c = 2 /* const */; return thisitag; } int makesconsttag(char *sconst) { /* should we also add the 'indirect object' tag? */ thisitag -= 1; tag[thisitag].data = (int)strdup(sconst); tag[thisitag].a = (3<<4)+1 /* string value */; tag[thisitag].b = 255; /* length? */ tag[thisitag].c = 2 /* const */; return thisitag; } void cpush(int tag) { *++cstackp = tag; } int cpop(void) { int val = *cstackp; if (cstackp == cstack) { fprintf(stdout, "*** stack underflow\n"); fflush(stdout); *(int *)0 = 0; /* force backtrace */ } cstackp -= 1; return val; } void call_tos_sos(char *fn) { int tos, sos; /*c("call_tos_sos");*/ printf("/* TODO call_tos_sos */\n"); tos = cpop(); sos = cpop(); cpush(tos); /* for now - needs to be operated on */ /*printf("%sPush(%s(%s,%s));\n", indent, fn, tag_rhs_value(tos), tag_rhs_value(sos));*/ } void tos_op_sos(int icodeop) { int tos, sos, binop; tos = cpop(); sos = cpop(); binop = -makebinop(icodeop, tos, sos); cpush(binop); } void sos_op_tos(int icodeop) { int tos, sos, binop; tos = cpop(); sos = cpop(); binop = -makebinop(icodeop, sos, tos); cpush(binop); } void monop(int op) { int tos, monop; tos = cpop(); monop = -makemonop(op, tos); cpush(monop); /*printf("%sPush(%s%s);\n", indent, op, tag_rhs_value(tos));*/ } void pushi(int litconst) { /*printf("%sPushI(0x%08x);\n", indent, litconst);*/ cpush(makeiconsttag(litconst)); } void pushs(char *litconst) { /*printf("%sPushS(@0x%08x);\n", indent, (int)litconst);*/ cpush(makesconsttag(litconst)); } void push(int tagnum) { /*printf("%sPush(%s)\n", indent, rawtag(tagnum));*/ cpush(tagnum); } void asspar(void) { int tos, sos, op; /*c("ASSPAR");*/ tos = cpop(); sos = cpop(); /* attach tos as the next parameter to function sos() */ if (sos >= 0) { /* sos is an unencapsulated procedure call. So wrap it in an opcode */ op = newopcodecell('$'); code[op].opdcount_or_tag = 1; code[op].opd[0] = sos; /* first param is the function itself */ } else op = -sos; /*fprintf(stdout, "attaching opd #%d to opcode %d\n", code[op].opdcount_or_tag, op); fflush(stdout);*/ code[op].opd[code[op].opdcount_or_tag++] = tos; cpush(-op); /* replace the modified function descriptor */ /* and remove the parameter */ } void call(void) { int tos, op, i; /* tos is a function/map/proc/pred call; either a tag with no params, or an opcode with the params in the attached array */ /*c("CALL");*/ tos = cpop(); if (tos >= 0) { /* tos is an unencapsulated procedure call. So wrap it in an opcode */ op = newopcodecell('$'); code[op].opdcount_or_tag = 1; code[op].opd[0] = tos; /* first param is the function itself */ tos = -op; } op = -tos; if (code[op].op != '$') { printf("TOS:\n"); printf(" %s\n", evalexpr(tos)); fflush(stdout); assert(code[op].op == '$'); } tos = code[op].opd[0]; assert(tos >= 0); /* if it is a function, push it back; but if it is a procedure, print out a call to it. */ if ((tag[tos].a&15) == 7) { fprintf(stdout, "%s%s;\n", indent, tag_rhs_value(-op)/*, tag[tos].a&15*/); return; } cpush(-op); } void assval(void) { int tos, sos; tos = cpop(); sos = cpop(); /* leaves stack empty - so check for it, dummy! */ printf("%s%s = %s;\n", indent, tag_lhs_value(sos), tag_rhs_value(tos)); } void assref(void) { int tos, sos; tos = cpop(); sos = cpop(); /* leaves stack empty - so check for it, dummy! */ printf("%s%s = %s;\n", indent, tag_lhs_addr(sos), tag_rhs_addr(tos)); } void arrayaccess(int icodeop) { int tos, sos, binop; tos = cpop(); sos = cpop(); binop = -makebinop(icodeop, tos, sos); cpush(binop); } void fieldselect(int icodeop) { int subtag, recordname, binop; subtag = cpop(); recordname = cpop(); subtag = makeiconsttag(subtag); binop = -makebinop(icodeop, recordname, subtag); cpush(binop); } void result(int opcode) { int tos; tos = cpop(); /* leaves stack empty - so check for it, dummy! */ printf("%sreturn(%s);\n", indent, (opcode == 'M' /* MAP result */ ? tag_rhs_addr(tos) : tag_rhs_value(tos))); } void ifcond(int opcode, int condition, int label) { int tos, sos; /* if (pop op pop) then goto label */ tos = cpop(); if (opcode == 'k') { printf("%sif (!(%s)) goto L_%04x;\n", indent, tag_rhs_value(tos), lineno[label]); } else if (opcode == 't') { printf("%sif (%s) goto L_%04x;\n", indent, tag_rhs_value(tos), lineno[label]); } else { sos = cpop(); printf("%sif (%s %s %s) goto L_%04x;\n", indent, tag_rhs_value(sos), cond[condition], tag_rhs_value(tos), lineno[label]); if (opcode == '"') cpush(tos); /* JUMPIFD - double-sided */ } } void for_loop(int opcode, int nest_level, int contlab, int exitlab) { int co, ex; int initial, final, increment, control; initial = cpop(); final = cpop(); increment = cpop(); control = cpop(); //printf("// FOR CONT=%04x EXIT=%04x\n", contlab, exitlab); printf("%sfor (%s = %s; %s += %s; %s != %s) {\n", indent, tag_lhs_value(control), tag_rhs_value(initial), tag_lhs_value(control), tag_rhs_value(increment), tag_rhs_value(control), tag_rhs_value(final)); co = get_next_lineno(); //printf("//forloop: setting lineno[%04x] = %04x (continue)\n", contlab, co); lineno[contlab] = co; ex = get_next_lineno(); //printf("//forloop: setting lineno[%04x] = %04x (exit)\n", exitlab, ex); lineno[exitlab] = ex; // fprintf (stdout, "L_%04x:\n", lineno[contlab]); //??? setbackwardlab (d, &linenod) /*printf("%s//TODO: change goto L_%04x and goto L_%04x into continue and break\n", indent, lineno[contlab], lineno[exitlab]);*/ }