#include <stdarg.h>
#include <stdio.h>
#include <assert.h>

/* This is a macro, ARRAY(), which creates space for a dynamically-bounded
   multi-dimensional array in C, for use in translating Imp's %array's to C.
   It creates the array off the stack using the non-portable alloca() call,
   and tweaks the lower bounds of each dimension so that arrays do not need
   to be based at 0.

   The complexity below is because one of the bounds parameters to
   an array declaration may be a function, and that function may
   itself declare a dynamically-bound array.  Unlikely but possible.

   We need to use alloca because otherwise a real malloc would
   have to be explicitly freed at the end of the procedure, which is
   not only an amazing amount of extra compiler complexity (eg to handle
   %signals) but is also an unnecessary run-time overhead as malloc is
   generally rather expensive.

   We can't use gcc's extended dynamically-bound arrays because
   they're always 0-based.  And you can't tweak the base by
   adjusting the pointer as in the code above.  I would also
   prefer the access to array elements to look natural, eg
   x = arr[1][n][-3];   ...   I don't want to have to make
   corrections to array indices at the point of usage, nor do
   I want to flatten the array into 1-D and simulate the dimensionality
   by scaling and adding the indices appropiately.

   Further complexity is caused by the possibility of the bounds
   of the array declaration being functions with side-effects, so
   care must be taken to evaluate them only once.

   push_evalonce takes the varargs and stores them in a stack,
   returning the base of the data.

   stacktop_evalonce returns the same data, leaving it on the stack

   pop_evalonce returns the same data, but then pops the stack.

   It uses a downward-growing stack so that the stacktop is always
   usable directly as the base of the lb/ub array, indexed by dim.

   Finally: data created by alloca() is lost on exit from the enclosing
   procedure, not the enclosing "{}" block, so we do have the option of
   wrapping the macro body in {} to take advantage of GCC's ability to use
   compound statements in macros, although we have not yet done so.
   The alloca() call must be made by the procedure which contains the
   declaration we're currently handling; it cannot be made in another
   procedure call.

 */

#define MAX_EVALONCE 1024

static int top_evalonce = MAX_EVALONCE; /* push then use, down-growing */

static int lb[MAX_EVALONCE]; /* lower bound */
static int ub[MAX_EVALONCE]; /* upper bound */
static int sz[MAX_EVALONCE]; /* size of object.  ptr size or final objsize */
static int dm[MAX_EVALONCE]; /* number of dimensions that this array has */

static int imp_datasize_inner(
                          int this,
                          int *dm, int *sz, int *lb, int *ub)
{
  return((ub[this]-lb[this]+1) * (sz[this] + (this == dm[this] ?
    0 : (imp_datasize_inner(this+1, dm, sz, lb, ub)))));
}

static void *imp_arrayalloc_inner(
                              int this,
                              int spacep,
                              int *dm, int *sz,
                              int *lb, int *ub)
{
  int i;
  int datap, p, ptr;

  if ((this == 0) && (dm[this] == 0)) {
    /* simple 1-D array */
    return (void *)(spacep - (lb[this] * sz[this]));
  }

  p = spacep; /* between spacep and datap are pointers to next level */
  datap = spacep + ((ub[this]-lb[this]+1) * sz[this]);
  for (i = lb[this]; i <= ub[this]; i++) {
    /* This code makes the assumption that sizeof(void *) <= sizeof(int) */
    /* and that sizeof(x **) == sizeof(x *)                              */

    if (this+1 == (dm[this])) { /* final bound, use contiguous array, don't recurse */
      ptr = datap - (lb[this+1] * sz[this+1]);
      datap += (ub[this+1] - lb[this+1] + 1) * sz[this+1];
    } else {
      ptr = (int)imp_arrayalloc_inner(this+1, datap, dm, sz, lb, ub);
      datap += imp_datasize_inner(this+1, dm, sz, lb, ub);
    }
    *(int *)p = ptr;
    p += sz[this];
  }
  return (void *)(spacep - (lb[this] * sz[this]));
}

void *imp_arrayalloc(void *space, int decl_base)
{
  return imp_arrayalloc_inner(0, (int)space, dm+decl_base, sz+decl_base, lb+decl_base, ub+decl_base);
}

int imp_datasize(int decl_base)
{
  return imp_datasize_inner(0, dm+decl_base, sz+decl_base, lb+decl_base, ub+decl_base);
}

/* convert from stdargs to regular data for other procedures */
int push_evalonce(int dims, int objsize, int lb1, ...)
{
  int i;
  va_list ap;
  int maxdim = dims - 1;
  top_evalonce -= dims;

  if (top_evalonce < 0) {
    fprintf(stderr,
      "dynamic array declaration: too many dynamically nested declarations\n");
    fflush(stderr);
    assert(top_evalonce >= 0);
  }

  for (i = 0; i < maxdim; i++) {
    dm[top_evalonce+i] = maxdim;
    sz[top_evalonce+i] = sizeof(void *);
  }

  dm[top_evalonce+maxdim] = maxdim;
  sz[top_evalonce+maxdim] = objsize;
  va_start(ap, lb1);
  lb[top_evalonce] = lb1; ub[top_evalonce] = va_arg(ap, int);
  for (i = 1; i < dims; i++) {
    lb[top_evalonce+i] = va_arg(ap, int);
    ub[top_evalonce+i] = va_arg(ap, int);
  }
  va_end(ap);
  return top_evalonce;
}

int pop_evalonce(int dims)
{
  int last = top_evalonce;
  top_evalonce += dims;
  return last; /* not interrupt-driven code so doesn't hurt to be outside */
               /* anyway, there will be no more pushing calls until done. */
}

/* extreme care needed here to evaluate stdarg params only once */
#define ARRAY(dims, type, lb, ub, ...) \
  (push_evalonce(dims, sizeof(type), lb, ub, ##__VA_ARGS__), \
   imp_arrayalloc(alloca(imp_datasize(top_evalonce)), pop_evalonce(dims)) \
  )

int main(int argc, char **argv) {
  int i, j, k, q;
  int *p1 = alloca(0);
  long long int ***la = ARRAY(3, long long int, -1, 1, -1, 1, -1, 1);
  int *ap1 = ARRAY(1, long long int, -2, 6);
  int **ap2 = ARRAY(2, int, 0, 1, 0, 1);
  int ***ap3 = ARRAY(3, int, 0, 1, 0, 1, 0, 1);
  int ***ap = ARRAY(3, int, 0, 5, 0, 16, 0, 20);
  long long int ***lap3 = ARRAY(3, long long int, -1, 1, -2, 2, -3, 3);
  int *p2 = alloca(0);
  int *p3;

  p3 = alloca(4);
  fprintf(stderr,
          "p1 = %08x,  p2 = %08x,  p3 = %08x,  la = %08x\n",
          p1, p2, p3, la);

  q = 42;
  for (k = -1; k <= 1; k++) {
    for (j = -1; j <= 1; j++) {
      for (i = -1; i <= 1; i++) {
        fprintf(stdout, "&(la[%d][%d][%d]) = %p, ", i,j,k, &(la[i][j][k]));
        la[i][j][k] = q++;
      }
      fprintf(stdout, "\n");
    }
    fprintf(stdout, "\n");
  }
  q = 42;
  for (k = -1; k <= 1; k++) {
    for (j = -1; j <= 1; j++) {
      for (i = -1; i <= 1; i++) {
        if (la[i][j][k] != q++) {
          fprintf(stderr, "error: cannot read back value.  item corrupt.\n");
        }
      }
    }
  }

  return(0);
}