static char *rcsid="$Id: simple.c,v 1.9 2011/05/04 14:02:36 gtoal Exp gtoal $";

// The file you are reading is a 'folded' version of the source file, simple.c
// - you can click on any of the sections displayed in <<french style brackets>>
// and the underlying code will be expanded.  Clicking on that code will undo
// the expansion and replace it with the summary again.  This style of display
// is intended to make it easier to see the important parts of the program and
// hide the boring housekeeping details which distract from understanding the
// overall picture on first reading.

« 'to do' comments »
/* births are counted per day but deaths are not. fix... daylight model (sleep/warm up/eat => migrate triggers) some lead, some follow, some stay at home. Important that followers and stay-at-homes retain genetic diversity to accomodate changes in environment. */
« Include files. »
« Tweakable parameters »
« Table used in conjunction with movement genes (really just crude approximation to sin and cos!) »
« Definition of a butterfly actor/object/record »
#define MALE 0 #define FEMALE 1 #define CATERPILLAR 0 #define ADULT 1 typedef struct critter { long long health; int hunger; int food_in_stomach; int sex; int days_before_egg_laying; int body_type; // pupa, caterpillar, butterfly int body_temp; // for overheating/freezing to death int wake_sleep; long long age_in_days; « A short note on genetic algorithms (from Kent Paul Dolan's Travelling Salesman page) »
«
What a genetic algorithm does is to imitate that highly successful technique seen in nature, evolution by natural selection. Because computer algorithms' generations can run so much faster than the generation times of nature, this very inefficient process that takes millions of years in nature to create a higher brow line, finds solutions much more quickly in a computer. Typically genetic algorithms employ these parts.
  • A fitness function, some way of deciding that one candidate solution is better than another is. In nature, this function is defined as survival to spawn a next generation. In a computer genetic algorithm, the fitness function is used to determine whether a genome survives into the next generation, or is used for determining which genomes get to participate in reproduction in the current generation (or both), at the programmer's whim.
  • One or more mutation mechanisms, that create modified next generation candidates irrespective of fitness, and usually without involving other candidates. In nature, this might be radiation or chemical influence. In nature as in the computer, most mutations are less fit than their parents, so this mechanism is seemingly very wasteful, but programmers care even less than uncaring nature does about the culls.
  • One or more reproduction mechanisms, that create new candidates with combinations of characteristics of one or more old candidates, probably biasing which ones get to participate in reproduction based on their fitness. In nature, this competition for the right to reproduce occurs in many forms, and reproduction can be sexual or asexual or both, depending on species. Genetic algorithms use similar approaches.
  • Some filtering mechanism that selects fitter individuals with higher probability to survive to the next generation. In nature this is typically competition for food, territory, mates, or other scarce resources. In genetic algorithms, it is some application of the fitness function. Notice that this is not the same as a guarantee, either in nature or in a computer program, that the fittest do survive. Good and bad luck still have full scope of operation. It is sufficient for evolution to take place that the fittest have a better chance to survive and reproduce than do the less fit.
I just mention this here to point out that I do know what a genetic algorithm is, and in fact I even know what a neural net is -- and this code is strictly neither. It's a very crude mechanism for now. If this code shows the expected result, we don't need to go the whole hog to make it a 'proper' genetic algorithm...
»
int movement_gene[365]; // compass points: 0 to 31 (full circle), strength for each direction, 0..100(%) int follower_gene; // does he follow the guy near to him? 0..100 int lat, lon; // world coords // int tag; struct critter *next; } critter;
« Helper functions. Watch out for these when making code thread-safe »
int main(int argc, char **argv) { /* Start with a large existing population */ « globals: be careful using these in threads »
// LOCAL PROCEDURES. (I use gcc's extension of nested procedures - algol/pascal style */ « Graphics support: 'draw frame' - dumps the world as an image, which is later converted to jpg format »
void draw_frame(int seq) { // seq is absolute time stamp FILE *image; char map[480][480]; int x, y, ybar; static char fname[256], command[1024]; // first we'll write an ascii-format pnm file (trivially simple format) sprintf(fname, "frame%08d.pnm", seq); // then we'll convert the frame to jpeg. Later we'll join all the jpegs into an mp4. sprintf(command, "pnmtojpeg < frame%08d.pnm > frame%08d.jpg", seq, seq); image = fopen(fname, "w"); if (image == NULL) { fprintf(stderr, "Cannot open %s\n", fname); exit(1); } « Initialise the frame to black »
individual = population; while (individual != NULL) { « for each butterfly, increment the count for the square it occupies. (we max out at 15 for convenience in drawing) »
} « Output the trivial header of the netpbm format file. »
for (ybar = 0; ybar < 480; ybar++) { « Loop over each row of the virtual space »
} fclose(image); « Convert from netpbm format to jpg »
}
« 'dump population' - once every 1000 iterations or so, we output the current population to a file. (overwriting the same file each time). This will be used when we reach a stable model to seed the first generation, rather than starting from an arbitrary "god's view" of the world and evolving to here. Note that the dump file contains *everything* that is needed to restart a computation after a crash - it has all the internal state of the remaining butterflies, plus all the significant globals. Although we don't yet have a 'restart' procedure, if we ever get into very long production runs, we'll add the code to load these files back in. Note that the state is only ever dumped outside of the main simulation loop, so that thread safety is not an issue. »
« The initial population is created here. This is an unrealistic population but it is sufficient as a starting place for evolution. To be wholly realistic we would have to simulate the world going back to the first microbe! »
« This is a fairly realistic 'breed' procedure which mixes genes from both parents. (Although in the early stages of code development we cheat and simply clone the father.) »
« If there is any housekeeping we want to do at the start of a timeslice, do it here »
// In order to later convert this code to run multiple threads on a // multi-core processor, this procedure must be thread-safe. That // means (roughly) that no variables are accessed outside of those // within the 'individual' record/object. (read-only access to global // variables is OK, as long as those variables don't change at all during // the parallel for-loop which invokes this procedure) // currently this 'individual' shadows the global declaration, and really // it is all quite messy. Need to restructure it so that the thread-safe // code (not just 'simulate' but everything called in the parallelizable // loop, such as end_timeslice) uses a local, frozen, individual. void simulate(critter *individual) { int dir, day = time_elapsed/TICKS_PER_DAY; if (individual->health <= 0LL) return; if (individual->body_type == CATERPILLAR) { if (individual->age_in_days >= (long long)(50+posrand(10))) { individual->body_type = ADULT; individual->age_in_days = 1LL; } else if (individual->age_in_days >= 60) { // hard limit maximum caterpillar period individual->body_type = ADULT; individual->age_in_days = 1LL; } } individual->health -= 1LL; // just living expends some energy if (individual->health <= 0LL) { //if (day > 1900) fprintf(stderr, "Died from low health (1->0)!!!\n"); return; } if (individual->age_in_days >= ((long long) posrand(100)+365LL) * 5LL) { individual->health = 0LL; // died of old age at something over 5 years return; } // if we found our way to mexico (-200,-1200, 200, 1000) // during arbitrary breeding season, breed (temp crude hack) // To do: sexual coupling if ( (individual->body_type == ADULT) && (individual->days_before_egg_laying == 0) && (individual->age_in_days > 100LL) && date_range(day, MARCH, JULY) && within_geographical_area(individual->lat, individual->lon, MEXICO) ) { critter *child; int i; // to do: find nearest female and breed // usually we randomly choose a parent gene rather than averaging them // for now, just clone fprintf(stdout, "Breeding at %d %d\n", individual->lat, individual->lon); for (i = 0; i < 30; i++) { // replacement rate: 30 births - arbitrary for now child = breed(individual, individual); // children inserted into linked list immediately after parents // - hopefully for better locality in multi-cpu implementation? fprintf(stdout, "Insert %d\n", child->tag); // linked-list implementation probably not thread-safe. Need to rethink this. child->next = individual->next; individual->next = child; } // stop the mother breeding again tomorrow: individual->days_before_egg_laying = posrand(20)+190; // (the loop above plus the line immediately before ought to be done // within the 'breed' procedure.) } else if ( (individual->body_type == ADULT) && date_range(day, MARCH, JULY) && within_geographical_area(individual->lat, individual->lon, MEXICO) ) { // it's party time! //fprintf(stdout, "[%d, %d] Eating at %d %d", individual->health, individual->tag, individual->lat, individual->lon); individual->health += 2LL; dir = day%365; assert((0 <= dir) && (dir < 365)); if (individual->movement_gene[dir] > 50) { individual->lat += lat_movement[dir]; individual->lon += lon_movement[dir]; } // stay in feeding zone when food still available i = 0; while (!within_geographical_area(individual->lat, individual->lon, MEXICO)) { individual->lat -= lat_movement[dir]; individual->lon -= lon_movement[dir]; if (i == 100) break; dir = posrand(365); individual->lat += lat_movement[dir]; individual->lon += lon_movement[dir]; } } else if ( (individual->body_type == ADULT) && date_range(day, AUGUST, DECEMBER) && within_geographical_area(individual->lat, individual->lon, NORTHEAST) ) { // good eatin' //fprintf(stdout, "[%d, %d] Eating at %d %d", individual->health, individual->tag, individual->lat, individual->lon); individual->health += 2LL; // cue migration instinct when fully healthy? //if (individual->health >= 60000) fprintf(stdout, " to excess!"); //fprintf(stdout, "\n"); dir = day%365; assert((0 <= dir) && (dir < 365)); if (individual->movement_gene[dir] > 50) { individual->lat += lat_movement[dir]; individual->lon += lon_movement[dir]; } // stay in feeding zone when food still available i = 0; while (!within_geographical_area(individual->lat, individual->lon, NORTHEAST)) { individual->lat -= lat_movement[dir]; individual->lon -= lon_movement[dir]; if (i == 100) break; dir = posrand(365); individual->lat += lat_movement[dir]; individual->lon += lon_movement[dir]; } } else if (individual->body_type == ADULT) { // randomly move according to genes for now. (caterpillars don't move much) // Later trigger by some external cause. // To enable migration, this has to be idempotent, not random // so this is probably not thread safe. We can work around this // by precomputing the randoms outside the loop, or by storing and // updating the random state inside each butterfly record // if ((0 <= (day%365) && (day%365) <= 100)) dir = posrand(16) * 2; else dir = posrand(32); assert(0 <= dir && dir < 32); dir = day%365; assert((0 <= dir) && (dir < 365)); if (individual->movement_gene[dir] > 50) { individual->lat += lat_movement[dir]; individual->lon += lon_movement[dir]; } // or if a follower and someone nearby just moved..? } if (abs(individual->lat) >= 2000 || abs(individual->lon) >= 2000) { // lost at sea, freezing in the arctic, burning in the desert //if (day > 1900) fprintf(stderr, "Off the map!\n"); individual->health = 0LL; return; } } « If there is any housekeeping we want to do at the end of a timeslice, do it here »
« one-off global initialisation »
// CODE SECTION « A little bit of system code to ensure the graphics library is pre-loaded. »
initialise_simulation(); // RUN THE SIMULATION // This is the loop we will eventually parallelise in order to // run on OMP (multi-core). Calls to simulate() should be possible // to execute in parallel as all the data these calls use is stored // within each butterfly object. // Note that this linked-list implementation is not parallelizable // or thread-safe. We need a better way of slicing up our universe // so that separate cores can take an equal slice of work. for (;;) { /* for each 15 minute time frame until all dead */ /* for each member, advance time forward one unit */ individual = population; // first member in list assert(individual != NULL); start_timeslice(); // SIMULATE ONE TIME SLICE // This loop can be parallelized with OMP; anything called within // this loop must be thread safe. The current linked list does not // lend itself well to parallelization. /* PARALLEL FOR */ for (;;) { // handle every butterfly in list for this one timeslice simulate(individual); // includes breeding and dying individual = individual->next; // new births are inserted after the parent and will therefore be found OK. if (individual == NULL) break; } « cull deaths »
assert(alive > 0); // fprintf(stderr, "Tick %d: %d births, %d deaths, %d remaining\n", // time_elapsed, births, deaths, alive); time_elapsed += 1; // 1 15-minute 'tick' day = time_elapsed/TICKS_PER_DAY; assert (population != NULL); end_timeslice(); if (day == 8000) { fprintf(stderr, "Simulation cut off after 8000 days - see population.txt\n"); break; } } // next time slice « on exit, run "ffmpeg -y -i frame%08d.jpg output.mp4" »
exit(0); }