From 6af927939228692305367f23fe810ac3558e4faf Mon Sep 17 00:00:00 2001 From: wgroeneveld Date: Fri, 30 Nov 2018 20:54:56 +0100 Subject: [PATCH] update naive impl conway, active tracking is broken --- .../src/ActiveTrackingConwayScene.cpp | 32 ++++++++++++------- demos/demo4-conway/src/ConwayScene.cpp | 21 ++---------- demos/demo4-conway/src/ConwayScene.h | 17 +++++----- demos/demo4-conway/src/ConwaySeeder.cpp | 4 +-- demos/demo4-conway/src/NaiveConwayScene.cpp | 16 +++++++--- 5 files changed, 44 insertions(+), 46 deletions(-) diff --git a/demos/demo4-conway/src/ActiveTrackingConwayScene.cpp b/demos/demo4-conway/src/ActiveTrackingConwayScene.cpp index e269015..a0021db 100644 --- a/demos/demo4-conway/src/ActiveTrackingConwayScene.cpp +++ b/demos/demo4-conway/src/ActiveTrackingConwayScene.cpp @@ -14,9 +14,12 @@ void ActiveTrackingConwayScene::tick(u16 keys) { int totalAmountAlive = 0; dma3_cpy(buffer, map, sizeof(buffer)); - // when starting slow (100 active), this totals to 100 * 9 = +900 instructions VS +30k in the naive impl - // when starting high, this will too slow down, worst case slightly slower than naive (SIZE * 9 instead of * 8) - // 1. loop through all "active" cells. + // "Active Tracking" implementation: + // 1. O(active*8*9) loop + // when starting slow (100 active), this totals to 100 * 9 * 8 = +7200 instructions VS +30k in the naive impl + // when starting high, this will too slow down, worst case even slower than naive (SIZE*9*8 instead of only *8) + + // 1. loop through all "active" cells, initially gathered outside tick() for(int i = 0; i < MAP_SIZE; i++) { auto cell = activeCellIndex[i]; if(!cell.taken) { @@ -24,17 +27,20 @@ void ActiveTrackingConwayScene::tick(u16 keys) { } int x = cell.x, y = cell.y; - // 2. process those _AND_ the neighbours (could be dead ones becoming alive) - for(int x_i = x - 1; x_i <= x + 1; x_i++) { - for(int y_j = y - 1; y_j <= y + 1; y_j++) { - int toCheckPos = y_j * MAP_WIDTH + x_i; + // 2. process those _AND_ the neighbours (9 checks) (could be dead ones becoming alive) + for(int y_j = y - 1; y_j <= y + 1; y_j++) { + int y_jw = y_j * MAP_WIDTH; + + for(int x_i = x - 1; x_i <= x + 1; x_i++) { + int toCheckPos = y_jw + x_i; if(toCheckPos >= 0 && toCheckPos < MAP_SIZE - 1) { - u16 state = getNextState(x_i, y_j); + // 3. Remember the following statement expands into 8 neighbour-checks + u16 state = getNextState(x_i, y_j, toCheckPos); if(state == ALIVE) { // 3. Save the cell metadata if active for the next generation - activeCellIndexBuffer[totalAmountAlive].taken = true; - activeCellIndexBuffer[totalAmountAlive].x = x_i; - activeCellIndexBuffer[totalAmountAlive].y = y_j; + activeCellIndex[totalAmountAlive].taken = true; + activeCellIndex[totalAmountAlive].x = x_i; + activeCellIndex[totalAmountAlive].y = y_j; totalAmountAlive++; } buffer[toCheckPos] = state; @@ -46,7 +52,7 @@ void ActiveTrackingConwayScene::tick(u16 keys) { TextStream::instance().setText(std::string("amount alive: ") + std::to_string(totalAmountAlive) + std::string(" of ") + std::to_string(MAP_SIZE), 1, 1); TextStream::instance().setText(std::string("generation: ") + std::to_string(generation), 2, 1); - dma3_cpy(activeCellIndex, activeCellIndexBuffer, sizeof(activeCellIndex)); + //dma3_cpy(activeCellIndex, activeCellIndexBuffer, sizeof(activeCellIndex)); dma3_cpy(map, buffer, sizeof(map)); bg.get()->updateMap(map); @@ -54,10 +60,12 @@ void ActiveTrackingConwayScene::tick(u16 keys) { void ActiveTrackingConwayScene::postload() { + // speed optimization: save active cells in a separate array, to be reused (that's why the taken bool is there) int i = 0; for(int w = 0; w < MAP_WIDTH; w++) { for(int h = 0; h < MAP_HEIGHT; h++) { u8 index = h * MAP_WIDTH + w; + if(map[index] == ALIVE) { activeCellIndex[i].taken = true; activeCellIndex[i].x = w; diff --git a/demos/demo4-conway/src/ConwayScene.cpp b/demos/demo4-conway/src/ConwayScene.cpp index 6a0e609..d48579d 100644 --- a/demos/demo4-conway/src/ConwayScene.cpp +++ b/demos/demo4-conway/src/ConwayScene.cpp @@ -22,23 +22,12 @@ std::vector ConwayScene::backgrounds() { return { bg.get() }; } -u16 ConwayScene::getNextState(int x, int y) { - int pos = y * MAP_WIDTH + x; +u16 ConwayScene::getNextState(int x, int y, int pos) { int amountAlive = countAmountOfNeighbouringCellsAlive(pos, x, y); int currentState = map[pos]; - - if(currentState == DEAD) { - if(amountAlive == 3) { - return ALIVE; - } - return DEAD; - } else { - if (amountAlive < 2 || amountAlive > 3) { - return DEAD; - } - return ALIVE; - } + // speed optimization: skip the ifs. "? ALIVE : DEAD" can also be skipped if you're sure they are 1 and 0. + return (amountAlive == 3 || (amountAlive == 2 && (currentState == ALIVE))) ? ALIVE : DEAD; } int ConwayScene::countAmountOfNeighbouringCellsAlive(int pos, int x, int y) { @@ -68,10 +57,6 @@ void ConwayScene::load() { } void ConwayScene::seedRandomMap(int seedcount) { - for(int i = 0; i < MAP_SIZE; i++) { - map[i] = DEAD; - } - for(int i = 0; i < seedcount; i++) { int x = qran_range(0, MAP_WIDTH); int y = qran_range(0, MAP_HEIGHT); diff --git a/demos/demo4-conway/src/ConwayScene.h b/demos/demo4-conway/src/ConwayScene.h index 1968a11..9a4fafa 100644 --- a/demos/demo4-conway/src/ConwayScene.h +++ b/demos/demo4-conway/src/ConwayScene.h @@ -5,27 +5,26 @@ #ifndef GBA_SPRITE_ENGINE_PROJECT_CONWAYSCENE_H #define GBA_SPRITE_ENGINE_PROJECT_CONWAYSCENE_H +#include +#include -#define MAP_WIDTH 64 -#define MAP_HEIGHT 64 -#define MAP_SIZE 64 * 64 +#define MAP_WIDTH GBA_SCREEN_WIDTH / 8 +#define MAP_HEIGHT GBA_SCREEN_HEIGHT / 8 +#define MAP_SIZE MAP_WIDTH * MAP_HEIGHT #define ALIVE 0x0001 -#define DEAD 0x0002 - -#include - +#define DEAD 0x0000 class ConwayScene : public Scene { protected: u8 percentageSeed; u16 generation; std::unique_ptr bg; - u16 map[MAP_SIZE], buffer[MAP_SIZE]; + u16 map[MAP_SIZE] = {DEAD}, buffer[MAP_SIZE] = {DEAD}; void seedRandomMap(int seedcount); int countAmountOfNeighbouringCellsAlive(int pos, int x, int y); - u16 getNextState(int x, int y); + u16 getNextState(int x, int y, int pos); public: ConwayScene(const std::shared_ptr &engine, u8 percentageSeed) : Scene(engine), percentageSeed(percentageSeed) {} diff --git a/demos/demo4-conway/src/ConwaySeeder.cpp b/demos/demo4-conway/src/ConwaySeeder.cpp index 87c2d01..25f2dc0 100644 --- a/demos/demo4-conway/src/ConwaySeeder.cpp +++ b/demos/demo4-conway/src/ConwaySeeder.cpp @@ -57,11 +57,11 @@ void ConwaySeeder::tick(u16 keys) { } } else if(keys & KEY_UP && percentage < 90) { delta = 1; - } else if(keys & KEY_DOWN && percentage > 10) { + } else if(keys & KEY_DOWN && percentage > 5) { delta = -1; } else { if(delta != 0) { - percentage += 10 * delta; + percentage += 5 * delta; delta = 0; } } diff --git a/demos/demo4-conway/src/NaiveConwayScene.cpp b/demos/demo4-conway/src/NaiveConwayScene.cpp index 0314427..7060325 100644 --- a/demos/demo4-conway/src/NaiveConwayScene.cpp +++ b/demos/demo4-conway/src/NaiveConwayScene.cpp @@ -16,15 +16,21 @@ void NaiveConwayScene::tick(u16 keys) { int totalAmountAlive = 0; dma3_cpy(buffer, map, sizeof(buffer)); - // Naïve implementation: + // "Naïve" implementation: // 1. O(n^2) loop // 2. for each element, check x-1, x, x+y, y-1, y,y+1 // totals to min. 4096 * 8 = +32.768 instructions, each update(), at only 16.8 MHz! - for(int w = 0; w < MAP_WIDTH; w++) { - for(int h = 0; h < MAP_HEIGHT; h++) { - u16 state = getNextState(w, h); + + // speed optimization: reverse x/y in loop, calculate index in outer for + // speed optimization: (needed for gba, as we copypaste this literally into VRAM) single-depth array with * lookup + for(int h = 0; h < MAP_HEIGHT; h++) { + int hw = h * MAP_WIDTH; + + for(int w = 0; w < MAP_WIDTH; w++) { + int pos = hw + w; + u16 state = getNextState(w, h, pos); if(state == ALIVE) totalAmountAlive++; - buffer[h * MAP_WIDTH + w] = state; + buffer[pos] = state; } }