static char *rcsid="$Id: simple.c,v 1.9 2011/05/04 14:02:36 gtoal Exp gtoal $";
// The file you are reading is a 'folded' version of the source file, simple.c
// - you can click on any of the sections displayed in <<french style brackets>>
// and the underlying code will be expanded. Clicking on that code will undo
// the expansion and replace it with the summary again. This style of display
// is intended to make it easier to see the important parts of the program and
// hide the boring housekeeping details which distract from understanding the
// overall picture on first reading.
« 'to do' comments »
« Include files. »
« Tweakable parameters »
« Table used in conjunction with movement genes (really just crude approximation
to sin and cos!) »// 0
// 31 1
// 30 2
//
// 29 3
//
// 28 4
//
// 27 5
//
// 26 6
//
// 25 7
//
// 24 . 8
//
// 23 9
//
// 22 10
//
// 21 11
//
// 20 12
//
// 19 13
//
// 18 14
//
// 17 15
// 16
//
/*
const int lat_movement[32] = { 3,3,2,2,1,1,0,0,
0,0,-1,-1,-2,-2,-3,-3,
-3,-3,-2,-2,-1,-1,0,0,
0,0,1,1,2,2,3,3
};
const int lon_movement[32] = {0,0,1,1,2,2,3,3,
3,3,2,2,1,1,0,0,
0,0,-1,-1,-2,-2,-3,-3,
-3,-3,-2,-2,-1,-1,0,0
};
*/
#define JANUARY 0
#define FEBRUARY 1
#define MARCH 2
#define APRIL 3
#define MAY 4
#define JUNE 5
#define JULY 6
#define AUGUST 7
#define SEPTEMBER 8
#define OCTOBER 9
#define NOVEMBER 10
#define DECEMBER 11
int lat_movement[365];
int lon_movement[365];
« Definition of a butterfly actor/object/record »#define MALE 0
#define FEMALE 1
#define CATERPILLAR 0
#define ADULT 1
typedef struct critter {
long long health;
int hunger;
int food_in_stomach;
int sex;
int days_before_egg_laying;
int body_type; // pupa, caterpillar, butterfly
int body_temp; // for overheating/freezing to death
int wake_sleep;
long long age_in_days;
« A short note on genetic algorithms (from Kent Paul Dolan's Travelling Salesman page) » «
What a genetic algorithm does is to imitate that highly successful technique
seen in nature, evolution by natural selection. Because computer algorithms'
generations can run so much faster than the generation times of nature, this
very inefficient process that takes millions of years in nature to create a
higher brow line, finds solutions much more quickly in a computer. Typically
genetic algorithms employ these parts.
- A fitness function, some way of deciding that one candidate solution is
better than another is. In nature, this function is defined as survival to
spawn a next generation. In a computer genetic algorithm, the fitness
function is used to determine whether a genome survives into the next
generation, or is used for determining which genomes get to participate in
reproduction in the current generation (or both), at the programmer's whim.
- One or more mutation mechanisms, that create modified next generation
candidates irrespective of fitness, and usually without involving other
candidates. In nature, this might be radiation or chemical influence. In
nature as in the computer, most mutations are less fit than their parents, so
this mechanism is seemingly very wasteful, but programmers care even less
than uncaring nature does about the culls.
- One or more reproduction mechanisms, that create new candidates with
combinations of characteristics of one or more old candidates, probably
biasing which ones get to participate in reproduction based on their fitness.
In nature, this competition for the right to reproduce occurs in many forms,
and reproduction can be sexual or asexual or both, depending on species.
Genetic algorithms use similar approaches.
- Some filtering mechanism that selects fitter individuals with higher
probability to survive to the next generation. In nature this is typically
competition for food, territory, mates, or other scarce resources. In genetic
algorithms, it is some application of the fitness function. Notice that this
is not the same as a guarantee, either in nature or in a computer program,
that the fittest do survive. Good and bad luck still have full scope of
operation. It is sufficient for evolution to take place that the fittest have
a better chance to survive and reproduce than do the less fit.
I just mention this here to point out that I do know what a genetic algorithm is,
and in fact I even know what a neural net is -- and this code is strictly neither.
It's a very crude mechanism for now. If this code shows the expected result, we don't
need to go the whole hog to make it a 'proper' genetic algorithm...
» int movement_gene[365]; // compass points: 0 to 31 (full circle), strength for each direction, 0..100(%)
int follower_gene; // does he follow the guy near to him? 0..100
int lat, lon; // world coords
//
int tag;
struct critter *next;
} critter;
« Helper functions. Watch out for these when making code thread-safe »
int main(int argc, char **argv)
{
/* Start with a large existing population */
« globals: be careful using these in threads »
// LOCAL PROCEDURES. (I use gcc's extension of nested procedures - algol/pascal style */
« Graphics support: 'draw frame' - dumps the world as an image, which
is later converted to jpg format » void draw_frame(int seq) { // seq is absolute time stamp
FILE *image;
char map[480][480];
int x, y, ybar;
static char fname[256], command[1024];
// first we'll write an ascii-format pnm file (trivially simple format)
sprintf(fname, "frame%08d.pnm", seq);
// then we'll convert the frame to jpeg. Later we'll join all the jpegs into an mp4.
sprintf(command, "pnmtojpeg < frame%08d.pnm > frame%08d.jpg", seq, seq);
image = fopen(fname, "w");
if (image == NULL) {
fprintf(stderr, "Cannot open %s\n", fname);
exit(1);
}
« Initialise the frame to black » individual = population;
while (individual != NULL) {
« for each butterfly, increment the count for the square it occupies.
(we max out at 15 for convenience in drawing) » }
« Output the trivial header of the netpbm format file. » for (ybar = 0; ybar < 480; ybar++) {
« Loop over each row of the virtual space » y = 479-ybar;
« Pad the left-hand side of the image out to 720x480 (DVD format) » for (x = 0; x < 480; x++) {
« draw a single row » int lat, lon;
lat = (x - 480/2) * 4000 / 480;
lon = (y - 480/2) * 4000 / 480;
// unless we're plotting individual critters, a density map
// would be a better style of display
if (map[x][y] == 0) {
« Draw the background. Two areas are coloured » if (date_range(day, MARCH, JULY) && within_geographical_area(lat, lon, MEXICO)) {
fprintf(image, "0 4 0 "); // mexico - green
} else if (date_range(day, AUGUST, DECEMBER) && within_geographical_area(lat, lon, NORTHEAST)) {
fprintf(image, "0 0 15 "); // feeding area - blue
} else {
fprintf(image, "2 2 2 "); // rest (we don't show off-land) - black
}
} else {
« Draw the butterflies » }
}
« Pad the right-hand side of the image out to 720x480 (DVD format) » fprintf(image, "\n");
}
fclose(image);
« Convert from netpbm format to jpg » }
« 'dump population' - once every 1000 iterations or so, we output the
current population to a file. (overwriting the same file each time).
This will be used when we reach a stable model to seed the first
generation, rather than starting from an arbitrary "god's view" of
the world and evolving to here.
Note that the dump file contains *everything* that is needed
to restart a computation after a crash - it has all the internal
state of the remaining butterflies, plus all the significant
globals. Although we don't yet have a 'restart' procedure, if
we ever get into very long production runs, we'll add the code to
load these files back in. Note that the state is only ever dumped
outside of the main simulation loop, so that thread safety is not
an issue.
» void dump_population(void) {
FILE *dna;
dna = fopen("population.txt", "w");
individual = population;
fprintf(dna, "source: %s\n", rcsid);
fprintf(dna, "ranseed: %u\n", ranseed);
fprintf(dna, "tick: %d\n\n", time_elapsed);
fprintf(dna, "ticks_per_day: %d\n\n", TICKS_PER_DAY);
fprintf(dna, "mexico: %d %d %d %d\n\n", MEXICO);
fprintf(dna, "northeast: %d %d %d %d\n\n", NORTHEAST);
while (individual != NULL) {
int i;
fprintf(dna, "tag: %d\n", individual->tag);
fprintf(dna, "health: %lld\n", individual->health);
fprintf(dna, "body_type: %lld\n", individual->body_type);
fprintf(dna, "days_before_egg_laying: %d\n", individual->days_before_egg_laying);
fprintf(dna, "age: %lld\n", individual->age_in_days);
fprintf(dna, "lat: %d\n", individual->lat);
fprintf(dna, "lon: %d\n", individual->lon);
for (i = 0; i < 365; i++) {
fprintf(dna, "movement_gene_%d: %d\n", i, individual->movement_gene[i]);
}
fprintf(dna, "\n");
individual = individual->next;
}
fclose(dna);
}
« The initial population is created here. This is an unrealistic
population but it is sufficient as a starting place for evolution.
To be wholly realistic we would have to simulate the world going
back to the first microbe! »
« This is a fairly realistic 'breed' procedure which mixes genes
from both parents. (Although in the early stages of code development
we cheat and simply clone the father.) »
« If there is any housekeeping we want to do at the start of a timeslice, do it here »
// In order to later convert this code to run multiple threads on a
// multi-core processor, this procedure must be thread-safe. That
// means (roughly) that no variables are accessed outside of those
// within the 'individual' record/object. (read-only access to global
// variables is OK, as long as those variables don't change at all during
// the parallel for-loop which invokes this procedure)
// currently this 'individual' shadows the global declaration, and really
// it is all quite messy. Need to restructure it so that the thread-safe
// code (not just 'simulate' but everything called in the parallelizable
// loop, such as end_timeslice) uses a local, frozen, individual.
void simulate(critter *individual)
{
int dir, day = time_elapsed/TICKS_PER_DAY;
if (individual->health <= 0LL) return;
if (individual->body_type == CATERPILLAR) {
if (individual->age_in_days >= (long long)(50+posrand(10))) {
individual->body_type = ADULT; individual->age_in_days = 1LL;
} else if (individual->age_in_days >= 60) { // hard limit maximum caterpillar period
individual->body_type = ADULT; individual->age_in_days = 1LL;
}
}
individual->health -= 1LL; // just living expends some energy
if (individual->health <= 0LL) {
//if (day > 1900) fprintf(stderr, "Died from low health (1->0)!!!\n");
return;
}
if (individual->age_in_days >= ((long long) posrand(100)+365LL) * 5LL) {
individual->health = 0LL; // died of old age at something over 5 years
return;
}
// if we found our way to mexico (-200,-1200, 200, 1000)
// during arbitrary breeding season, breed (temp crude hack)
// To do: sexual coupling
if (
(individual->body_type == ADULT) &&
(individual->days_before_egg_laying == 0) &&
(individual->age_in_days > 100LL) &&
date_range(day, MARCH, JULY) && within_geographical_area(individual->lat, individual->lon, MEXICO)
) {
critter *child;
int i;
// to do: find nearest female and breed
// usually we randomly choose a parent gene rather than averaging them
// for now, just clone
fprintf(stdout, "Breeding at %d %d\n", individual->lat, individual->lon);
for (i = 0; i < 30; i++) { // replacement rate: 30 births - arbitrary for now
child = breed(individual, individual);
// children inserted into linked list immediately after parents
// - hopefully for better locality in multi-cpu implementation?
fprintf(stdout, "Insert %d\n", child->tag);
// linked-list implementation probably not thread-safe. Need to rethink this.
child->next = individual->next;
individual->next = child;
}
// stop the mother breeding again tomorrow:
individual->days_before_egg_laying = posrand(20)+190;
// (the loop above plus the line immediately before ought to be done
// within the 'breed' procedure.)
} else if (
(individual->body_type == ADULT) &&
date_range(day, MARCH, JULY) &&
within_geographical_area(individual->lat, individual->lon, MEXICO)
) {
// it's party time!
//fprintf(stdout, "[%d, %d] Eating at %d %d", individual->health, individual->tag, individual->lat, individual->lon);
individual->health += 2LL;
dir = day%365; assert((0 <= dir) && (dir < 365));
if (individual->movement_gene[dir] > 50) {
individual->lat += lat_movement[dir];
individual->lon += lon_movement[dir];
}
// stay in feeding zone when food still available
i = 0;
while (!within_geographical_area(individual->lat, individual->lon, MEXICO)) {
individual->lat -= lat_movement[dir];
individual->lon -= lon_movement[dir];
if (i == 100) break;
dir = posrand(365);
individual->lat += lat_movement[dir];
individual->lon += lon_movement[dir];
}
} else if (
(individual->body_type == ADULT) &&
date_range(day, AUGUST, DECEMBER) &&
within_geographical_area(individual->lat, individual->lon, NORTHEAST)
) {
// good eatin'
//fprintf(stdout, "[%d, %d] Eating at %d %d", individual->health, individual->tag, individual->lat, individual->lon);
individual->health += 2LL; // cue migration instinct when fully healthy?
//if (individual->health >= 60000) fprintf(stdout, " to excess!");
//fprintf(stdout, "\n");
dir = day%365; assert((0 <= dir) && (dir < 365));
if (individual->movement_gene[dir] > 50) {
individual->lat += lat_movement[dir];
individual->lon += lon_movement[dir];
}
// stay in feeding zone when food still available
i = 0;
while (!within_geographical_area(individual->lat, individual->lon, NORTHEAST)) {
individual->lat -= lat_movement[dir];
individual->lon -= lon_movement[dir];
if (i == 100) break;
dir = posrand(365);
individual->lat += lat_movement[dir];
individual->lon += lon_movement[dir];
}
} else if (individual->body_type == ADULT) {
// randomly move according to genes for now. (caterpillars don't move much)
// Later trigger by some external cause.
// To enable migration, this has to be idempotent, not random
// so this is probably not thread safe. We can work around this
// by precomputing the randoms outside the loop, or by storing and
// updating the random state inside each butterfly record
// if ((0 <= (day%365) && (day%365) <= 100)) dir = posrand(16) * 2; else dir = posrand(32); assert(0 <= dir && dir < 32);
dir = day%365; assert((0 <= dir) && (dir < 365));
if (individual->movement_gene[dir] > 50) {
individual->lat += lat_movement[dir];
individual->lon += lon_movement[dir];
}
// or if a follower and someone nearby just moved..?
}
if (abs(individual->lat) >= 2000 || abs(individual->lon) >= 2000) {
// lost at sea, freezing in the arctic, burning in the desert
//if (day > 1900) fprintf(stderr, "Off the map!\n");
individual->health = 0LL; return;
}
}
« If there is any housekeeping we want to do at the end of a timeslice, do it here » void end_timeslice(void)
{
if ((time_elapsed % TICKS_PER_DAY) == 0) { // midnight. Do any daily housekeeping chores.
individual->age_in_days += 1LL;
if (individual->days_before_egg_laying > 0) individual->days_before_egg_laying -= 1;
//#ifdef NEVER
fprintf(stdout,
"Lifetime = %d days, population = %d, ave health = %lld/%lld first=%d last=%d [%lld (%lld days)] at %d,%d\n",
day, alive, overall_health, (long long)alive, population->tag, individual->tag, individual->health, individual->age_in_days, individual->lat, individual->lon);
//#endif
fprintf(stdout,
"Lifetime = %d days, population = %d, ave health = %lld, births=%d deaths=%d\n",
day, alive, overall_health/(long long)alive, births, deaths);
births = 0;
draw_frame(day);
if ((day % 100) == 0) {
dump_population();
assert(individual == NULL);
}
}
}
« one-off global initialisation »
// CODE SECTION
« A little bit of system code to ensure the graphics library is pre-loaded. »
initialise_simulation();
// RUN THE SIMULATION
// This is the loop we will eventually parallelise in order to
// run on OMP (multi-core). Calls to simulate() should be possible
// to execute in parallel as all the data these calls use is stored
// within each butterfly object.
// Note that this linked-list implementation is not parallelizable
// or thread-safe. We need a better way of slicing up our universe
// so that separate cores can take an equal slice of work.
for (;;) { /* for each 15 minute time frame until all dead */
/* for each member, advance time forward one unit */
individual = population; // first member in list
assert(individual != NULL);
start_timeslice();
// SIMULATE ONE TIME SLICE
// This loop can be parallelized with OMP; anything called within
// this loop must be thread safe. The current linked list does not
// lend itself well to parallelization.
/* PARALLEL FOR */ for (;;) { // handle every butterfly in list for this one timeslice
simulate(individual); // includes breeding and dying
individual = individual->next; // new births are inserted after the parent and will therefore be found OK.
if (individual == NULL) break;
}
« cull deaths »
assert(alive > 0);
// fprintf(stderr, "Tick %d: %d births, %d deaths, %d remaining\n",
// time_elapsed, births, deaths, alive);
time_elapsed += 1; // 1 15-minute 'tick'
day = time_elapsed/TICKS_PER_DAY;
assert (population != NULL);
end_timeslice();
if (day == 8000) {
fprintf(stderr, "Simulation cut off after 8000 days - see population.txt\n");
break;
}
} // next time slice
« on exit, run "ffmpeg -y -i frame%08d.jpg output.mp4" » exit(0);
}