update impl active tracking

This commit is contained in:
wgroeneveld 2018-12-03 16:20:58 +01:00
parent 6af9279392
commit c327aa3c8e
6 changed files with 84 additions and 31 deletions

21
Release.key Normal file
View File

@ -0,0 +1,21 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
Version: GnuPG v2.0.15 (GNU/Linux)
mQENBFdEgZoBCAD06orQfic7gtx/akz9QG5I9p0hjLDurZUdqL5QcV11IkvXar99
tWforuy4tr74oUwQVn9E6GtVejwjGZfJ60qQSOs8Jye/qdtDiVg9ORFa+hDji9+X
NW22ZjVYlwfwGejXwsk8KiLYmd5nT2UftXHadInBaWj8SGtX/fDiGJmt6rBTPVs/
JSvNF3NNi95Pnwsji9LVLGAJzHO/2h1qvK8tmGaMEAgXYuYEhZ+K2ld4c609aV1Q
WZkZakz+l9AGd1K6wy4TFgQSBoRgGZJNdDGAmorL5y/mrH/rG/9YG5BNHbhklHp+
Obn5iWBPfoHVke2XbrjSOtYetIrPE+UUFDQLABEBAAG0OmhvbWU6UGl2YWw4MSBP
QlMgUHJvamVjdCA8aG9tZTpQaXZhbDgxQGJ1aWxkLm9wZW5zdXNlLm9yZz6JAT4E
EwECACgFAldEgZoCGwMFCQQesAAGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJ
EF0O8jNk4jAxoUgH/3YwxTPIAXCwcetRQbf3lp5N/MaV5sMf8fiV5Uy1HBKn2HMi
Gm75axQlMJTC39FTbq0CT/3nfeUhvsk1ipk41KpkukcrDpGvppZYEkzj8zxEXfiq
Ow78lWD66jpY184rTpsLLt4IcqqH1QMC7fQV9b8mzR5WmBRUhnO+l6oIT3fccZ9D
faNfXl/+CuAX+S6i7kJRyHIN07T0tKaZMyIGSiw7bhQ8L/0NZ9vn57J+KC6f5SEG
kmEs/j1AltsRI8R6Z9alT0XSjnwW6o281mbpir/12A2O/9mxVhFrcgHdB2yl3JCN
OgSX8LnHTDnjmMCmCWVz73QQDVrZJmJ0ltwVBO+IRgQTEQIABgUCV0SBmgAKCRA7
MBG3a51lI1fSAKCA+sfj77Uv1D4g+sK7lbE0AyJKagCfdFQQ9lCfmEQ1wfZyOCvs
GSV0JlA=
=1rpz
-----END PGP PUBLIC KEY BLOCK-----

View File

@ -10,49 +10,61 @@ ActiveTrackingConwayScene::ActiveTrackingConwayScene(const std::shared_ptr<GBAEn
: ConwayScene(engine, percentageSeed) {}
void ActiveTrackingConwayScene::tick(u16 keys) {
generation++;
int totalAmountAlive = 0;
generation++;;
dma3_cpy(buffer, map, sizeof(buffer));
// "Active Tracking" implementation:
// 1. O(active*8*9) loop
// when starting slow (100 active), this totals to 100 * 9 * 8 = +7200 instructions VS +30k in the naive impl
// when starting high, this will too slow down, worst case even slower than naive (SIZE*9*8 instead of only *8)
// 1. O(changing*(8*2)) loop
// when starting slow (100 active), this totals to 100 * (8*2) = +1600 instructions VS +4800 in the naive impl
// when starting high, this will too slow down, worst case even slower than naive (SIZE(*8+2) instead of only *8)
// 1. loop through all "active" cells, initially gathered outside tick()
int j = 0;
u8 futureState, currState;
for(int i = 0; i < MAP_SIZE; i++) {
auto cell = activeCellIndex[i];
if(!cell.taken) {
break;
}
int x = cell.x, y = cell.y;
int x = cell.x, y = cell.y, pos = cell.pos;
// 2. process those _AND_ the neighbours (9 checks) (could be dead ones becoming alive)
for(int y_j = y - 1; y_j <= y + 1; y_j++) {
int y_jw = y_j * MAP_WIDTH;
// 2. process only those which will change state: update neighbour count. x8
int delta = cell.futureState == ALIVE ? +1 : -1;
totalAmountAlive += delta;
buffer[cell.pos] = cell.futureState;
for(int x_i = x - 1; x_i <= x + 1; x_i++) {
for(int y_j = y - 1; y_j <= y + 1; y_j++) {
int toCheckPos = y_j * MAP_WIDTH + x_i;
if(toCheckPos >= 0 && toCheckPos < MAP_SIZE - 1 && pos != toCheckPos) {
neighbourCount[toCheckPos] += delta;
}
}
}
for(int x_i = x - 1; x_i <= x + 1; x_i++) {
int toCheckPos = y_jw + x_i;
// 3. if neighbours (and own cell) will possibly change, add them to the next run. x9
for(int x_i = x - 1; x_i <= x + 1; x_i++) {
for(int y_j = y - 1; y_j <= y + 1; y_j++) {
int toCheckPos = y_j * MAP_WIDTH + x_i;
if(toCheckPos >= 0 && toCheckPos < MAP_SIZE - 1) {
// 3. Remember the following statement expands into 8 neighbour-checks
u16 state = getNextState(x_i, y_j, toCheckPos);
if(state == ALIVE) {
// 3. Save the cell metadata if active for the next generation
activeCellIndex[totalAmountAlive].taken = true;
activeCellIndex[totalAmountAlive].x = x_i;
activeCellIndex[totalAmountAlive].y = y_j;
totalAmountAlive++;
currState = buffer[toCheckPos];
futureState = getNextStateWithCount(currState, neighbourCount[toCheckPos]);
if(futureState != currState) {
activeCellIndexBuffer[j].taken = true;
activeCellIndexBuffer[j].x = x_i;
activeCellIndexBuffer[j].y = y_j;
activeCellIndexBuffer[j].pos = toCheckPos;
activeCellIndexBuffer[j].futureState = futureState;
j++;
}
buffer[toCheckPos] = state;
}
}
}
}
activeCellIndexBuffer[totalAmountAlive].taken = false;
activeCellIndexBuffer[j].taken = false;
TextStream::instance().setText(std::string("amount alive: ") + std::to_string(totalAmountAlive) + std::string(" of ") + std::to_string(MAP_SIZE), 1, 1);
TextStream::instance().setText(std::string("generation: ") + std::to_string(generation), 2, 1);
//dma3_cpy(activeCellIndex, activeCellIndexBuffer, sizeof(activeCellIndex));
dma3_cpy(activeCellIndex, activeCellIndexBuffer, sizeof(activeCellIndexBuffer));
dma3_cpy(map, buffer, sizeof(map));
bg.get()->updateMap(map);
@ -62,16 +74,28 @@ void ActiveTrackingConwayScene::tick(u16 keys) {
void ActiveTrackingConwayScene::postload() {
// speed optimization: save active cells in a separate array, to be reused (that's why the taken bool is there)
int i = 0;
for(int w = 0; w < MAP_WIDTH; w++) {
for(int h = 0; h < MAP_HEIGHT; h++) {
u8 index = h * MAP_WIDTH + w;
for(int h = 0; h < MAP_HEIGHT; h++) {
int h_w = h * MAP_WIDTH;
for(int w = 0; w < MAP_WIDTH; w++) {
int pos = h_w + w;
int count = countAmountOfNeighbouringCellsAlive(pos, w, h);
u8 currentState = map[pos];
u8 futureState = getNextStateWithCount(currentState, count);
if(map[index] == ALIVE) {
if(currentState == ALIVE) {
totalAmountAlive++;
}
neighbourCount[pos] = count;
if(currentState != futureState) {
activeCellIndex[i].taken = true;
activeCellIndex[i].x = w;
activeCellIndex[i].y = h;
activeCellIndex[i].pos = pos;
activeCellIndex[i].futureState = futureState;
i++;
}
}
}
activeCellIndex[i].taken = false;
}

View File

@ -10,12 +10,15 @@
struct cell {
bool taken;
int x, y;
int x, y, pos;
u8 futureState;
};
class ActiveTrackingConwayScene : public ConwayScene {
private:
cell activeCellIndex[MAP_SIZE] = {0}, activeCellIndexBuffer[MAP_SIZE] = {0};
int totalAmountAlive = 0;
int neighbourCount[MAP_SIZE] = {0};
cell activeCellIndex[MAP_SIZE + 1] = {0}, activeCellIndexBuffer[MAP_SIZE + 1] = {0};
public:
ActiveTrackingConwayScene(const std::shared_ptr<GBAEngine> &engine, u8 percentageSeed);

View File

@ -22,12 +22,16 @@ std::vector<Background *> ConwayScene::backgrounds() {
return { bg.get() };
}
u16 ConwayScene::getNextStateWithCount(int currentState, int amountAlive) {
// speed optimization: skip the ifs. "? ALIVE : DEAD" can also be skipped if you're sure they are 1 and 0.
return (amountAlive == 3 || (amountAlive == 2 && (currentState == ALIVE))) ? ALIVE : DEAD;
}
u16 ConwayScene::getNextState(int x, int y, int pos) {
int amountAlive = countAmountOfNeighbouringCellsAlive(pos, x, y);
int currentState = map[pos];
// speed optimization: skip the ifs. "? ALIVE : DEAD" can also be skipped if you're sure they are 1 and 0.
return (amountAlive == 3 || (amountAlive == 2 && (currentState == ALIVE))) ? ALIVE : DEAD;
return getNextStateWithCount(currentState, amountAlive);
}
int ConwayScene::countAmountOfNeighbouringCellsAlive(int pos, int x, int y) {

View File

@ -25,6 +25,7 @@ protected:
void seedRandomMap(int seedcount);
int countAmountOfNeighbouringCellsAlive(int pos, int x, int y);
u16 getNextState(int x, int y, int pos);
u16 getNextStateWithCount(int currentState, int count);
public:
ConwayScene(const std::shared_ptr<GBAEngine> &engine, u8 percentageSeed) : Scene(engine), percentageSeed(percentageSeed) {}

View File

@ -19,7 +19,7 @@ void NaiveConwayScene::tick(u16 keys) {
// "Naïve" implementation:
// 1. O(n^2) loop
// 2. for each element, check x-1, x, x+y, y-1, y,y+1
// totals to min. 4096 * 8 = +32.768 instructions, each update(), at only 16.8 MHz!
// totals to min. 600 * 8 = +4800 instructions, each update(), at only 16.8 MHz!
// speed optimization: reverse x/y in loop, calculate index in outer for
// speed optimization: (needed for gba, as we copypaste this literally into VRAM) single-depth array with * lookup