// TO DO: use pthreads/* and dequeue-phreads.c

#include <stdio.h>
#include <stdlib.h>

/*

how to handle custom blocks  don't try to mirror the procedure structure like
I've been doing here - instead, make all the code flat, regardless of which sprite
or hat block or custom block it is in - then treat the whole shebang like an
emulator, or more accurately a static binary translation, with procedure calling
being just another emulated instruction - one which *doesn't* cause a real
procedure call on the CPU's stack, but which only uses an explicit soft
return address stack and possibly a parameter stack. Remembering that each
thread (clone) may need its own stack...  Maybe use a linked list instead
of an array for the stack, which would allow multiple stacks to be entwined.

(Use the old trick of keeping a freelist inside the stack cells.  Needs a single
atomic allocate and free.

Can we use omp for this?  Take advantage of multiple cores?

 */

// Experimenting with calling custom blocks as coroutines.  It may
// work because there are no local variables except the parameters.

// Also can extend this code to write a queue scheduler for simulations.

// TO DO: wait(0) waits for end of frame
//        wait(n) waits for abs time currenttime+n

// waiting process does not keep checking the clock.  instead they should go
// into an ordered queue, and only the top of the queue is checked, so that
// anything due to run later than the head of the queue doesn't need to be checked.
// (be careful with multiple events scheduled for the same time)

// do we have a run queue and a waiting queue, or should there only be one queue?

typedef void COROUTINE (int my_clone_id);

typedef struct {
  COROUTINE *proc;
  int clone_id;
} QUEUE;

static int next_clone_id = 0;
QUEUE queue[100]; // should be off the heap, and realloc (x*2)+1 units whenever it's full

void start(COROUTINE x)
{
  queue[next_clone_id].proc = x;
  queue[next_clone_id].clone_id = next_clone_id;
  next_clone_id++;
}

void run_a_little(int clone_id)
{
  queue[clone_id].proc(clone_id);
}

#define _enter_ \
  static int lastline = 0; \
  switch(lastline) { \
  case 0:

#define _yield_ \
  lastline = __LINE__; \
  goto done; \
  case __LINE__:

// return has to do something to cause the program flow to revert to the original caller.
// perhaps when making the call, take the caller out of the queue altogether, and when
// returning, put the caller back in at the head of the queue?

// clone IDs will be significant in this process to avoid fuck ups where two threads are
// calling the same block - don't want them to return to the wrong caller!

#define _return_(_results_) do {_results_; return;} while (0) /* TO DO: still need a way to return to caller on final exit */

#define _exit_(_results_)			\
  } \
done: \
 _return_(_results_)

static int proc1_i;
void proc1_gf_1(int my_clone_id)
{
  _enter_;
  printf("proc1 starting\n");
  proc1_i = 1;
  for (;;) {
    proc1_i += 1;
    printf("proc1 executing %d\n", proc1_i);
    _yield_; // placed at the foot of every loop
  }
  _exit_();
}

void push(int p1) { // TO DO
}

int pop(void) {
  return 42; // TO DO
}

static int fib_param_n;
static int fib_result;
static int fib_localvar_temp1, fib_localvar_temp2;
void fib_init(int n)
{
  push(fib_param_n); fib_param_n = n; // preserve procedure parameters for recursion
}

void fib(int n);
void fib_body(int my_clone_id) // pseudo-automatic translation of Scratch code
{
  _enter_;
  if ( fib_param_n == 0 ) {
    fib_result = 0;
    _return_(fib_param_n = pop());
  } else if (fib_param_n == 1 ) {
    fib_result = 1;
    _return_(fib_param_n = pop());
  } else {
    fib(fib_param_n-1);
    fib_localvar_temp1 = fib_result;
    _yield_;
    fib(fib_param_n-2);
    fib_localvar_temp2 = fib_result;
    _yield_;
    fib_result = ( fib_localvar_temp1 + fib_localvar_temp2 ); // will this actually timeslice or do I need to yield *after* each procedure call too?
    _return_(fib_param_n = pop());
  }
  _exit_(fib_param_n = pop()); // restore any procedure parameters
} 

// fib is not a good example but it's a good awkward test case!
void fib(int n)
{
  fib_init(n); // set up parameters
  start(fib_body);
  // TO DO: block until the coroutine exits
}

static int proc2_i;
void proc1_gf_2(int my_close_id)
{
  _enter_;
  printf("proc2 starting\n");
  proc2_i = 1;
  for (;;) {
    fib(38);
    _yield_;
    proc2_i += 1;
    printf("proc2 executing %d, fib(38) = %d\n", proc2_i, fib_result);
    _yield_; // placed at the foot of every loop (and after every procedure call?  Or before?)
  }
  _exit_();
}

// there's something wrong with this.  Surely we need a stack of state for recursive calls?
// Or is it all implicit?

int main(int argc, char **argv) {
  int clone;
  start(proc1_gf_1);
  start(proc1_gf_2);
  for (;;) { // main loop
    for (clone = 0; clone < next_clone_id; clone++) {
      run_a_little(clone);
    }
  }

  exit(0);
  return(0);
}