move to toncs faster line rendering algorithms
This commit is contained in:
parent
6855ce19f2
commit
0e6f2db4ed
|
@ -1,16 +1,23 @@
|
||||||
project(gba-bitmap-engine)
|
project(gba-bitmap-engine)
|
||||||
set_property(SOURCE src/gba/sin_lut.s PROPERTY LANGUAGE C)
|
set_property(SOURCE
|
||||||
set_property(SOURCE src/gba/tonc_bios.s PROPERTY LANGUAGE C)
|
src/gba/sin_lut.s
|
||||||
set_property(SOURCE src/background/tonc_font.s PROPERTY LANGUAGE C)
|
src/gba/tonc_bios.s
|
||||||
set_source_files_properties(src/gba/tonc_bios.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
|
src/gba/tonc_memset.s
|
||||||
set_source_files_properties(src/background/tonc_font.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
|
src/background/tonc_font.s PROPERTY LANGUAGE C)
|
||||||
|
set_source_files_properties(
|
||||||
|
src/gba/tonc_bios.s
|
||||||
|
src/gba/tonc_memset.s
|
||||||
|
src/background/tonc_font.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
|
||||||
|
set_source_files_properties(src/gba/tonc_bmp8.cpp PROPERTIES COMPILE_FLAGS "-Wno-pointer-arith")
|
||||||
|
|
||||||
add_library(${PROJECT_NAME}
|
add_library(${PROJECT_NAME}
|
||||||
src/palette/palette_manager.cpp
|
src/palette/palette_manager.cpp
|
||||||
src/palette/combined_palette.cpp
|
src/palette/combined_palette.cpp
|
||||||
src/background/text_stream.cpp
|
src/background/text_stream.cpp
|
||||||
src/gba/sin_lut.s
|
src/gba/sin_lut.s
|
||||||
|
src/gba/tonc_memset.s
|
||||||
src/gba/tonc_bios.s
|
src/gba/tonc_bios.s
|
||||||
|
src/gba/tonc_bmp8.cpp
|
||||||
src/background/tonc_font.s
|
src/background/tonc_font.s
|
||||||
src/gba_engine.cpp
|
src/gba_engine.cpp
|
||||||
src/math.cpp
|
src/math.cpp
|
||||||
|
|
|
@ -148,58 +148,11 @@ INLINE void *toncset32(void *dst, u32 src, uint count);
|
||||||
|
|
||||||
|
|
||||||
// Fast memcpy/set
|
// Fast memcpy/set
|
||||||
void memset16(void *dst, u16 hw, uint hwcount);
|
extern "C" void memset16(void *dst, u16 hw, uint hwcount);
|
||||||
void memcpy16(void *dst, const void* src, uint hwcount);
|
extern "C" void memcpy16(void *dst, const void* src, uint hwcount);
|
||||||
|
|
||||||
IWRAM_CODE void memset32(void *dst, u32 wd, uint wcount);
|
extern "C" IWRAM_CODE void memset32(void *dst, u32 wd, uint wcount);
|
||||||
IWRAM_CODE void memcpy32(void *dst, const void* src, uint wcount);
|
extern "C" IWRAM_CODE void memcpy32(void *dst, const void* src, uint wcount);
|
||||||
|
|
||||||
|
|
||||||
//! Fastfill for halfwords, analogous to memset()
|
|
||||||
/*! Uses <code>memset32()</code> if \a hwcount>5
|
|
||||||
* \param dst Destination address.
|
|
||||||
* \param hw Source halfword (not address).
|
|
||||||
* \param hwcount Number of halfwords to fill.
|
|
||||||
* \note \a dst <b>must</b> be halfword aligned.
|
|
||||||
* \note \a r0 returns as \a dst + \a hwcount*2.
|
|
||||||
*/
|
|
||||||
void memset16(void *dst, u16 hw, uint hwcount);
|
|
||||||
|
|
||||||
//! \brief Copy for halfwords.
|
|
||||||
/*! Uses <code>memcpy32()</code> if \a hwn>6 and
|
|
||||||
\a src and \a dst are aligned equally.
|
|
||||||
\param dst Destination address.
|
|
||||||
\param src Source address.
|
|
||||||
\param hwcount Number of halfwords to fill.
|
|
||||||
\note \a dst and \a src <b>must</b> be halfword aligned.
|
|
||||||
\note \a r0 and \a r1 return as
|
|
||||||
\a dst + \a hwcount*2 and \a src + \a hwcount*2.
|
|
||||||
*/
|
|
||||||
void memcpy16(void *dst, const void* src, uint hwcount);
|
|
||||||
|
|
||||||
|
|
||||||
//! Fast-fill by words, analogous to memset()
|
|
||||||
/*! Like CpuFastSet(), only without the requirement of
|
|
||||||
32byte chunks and no awkward store-value-in-memory-first issue.
|
|
||||||
\param dst Destination address.
|
|
||||||
\param wd Fill word (not address).
|
|
||||||
\param wdcount Number of words to fill.
|
|
||||||
\note \a dst <b>must</b> be word aligned.
|
|
||||||
\note \a r0 returns as \a dst + \a wdcount*4.
|
|
||||||
*/
|
|
||||||
IWRAM_CODE void memset32(void *dst, u32 wd, uint wdcount);
|
|
||||||
|
|
||||||
|
|
||||||
//! \brief Fast-copy by words.
|
|
||||||
/*! Like CpuFastFill(), only without the requirement of 32byte chunks
|
|
||||||
\param dst Destination address.
|
|
||||||
\param src Source address.
|
|
||||||
\param wdcount Number of words.
|
|
||||||
\note \a src and \a dst <b>must</b> be word aligned.
|
|
||||||
\note \a r0 and \a r1 return as
|
|
||||||
\a dst + \a wdcount*4 and \a src + \a wdcount*4.
|
|
||||||
*/
|
|
||||||
IWRAM_CODE void memcpy32(void *dst, const void* src, uint wdcount);
|
|
||||||
|
|
||||||
//\}
|
//\}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
//
|
||||||
|
// Created by Wouter Groeneveld on 10/07/20.
|
||||||
|
// Excerpts from tonc_video - only taken which was needed (M4)
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef GBA_BITMAP_ENGINE_PROJECT_TONC_VIDEO_H
|
||||||
|
#define GBA_BITMAP_ENGINE_PROJECT_TONC_VIDEO_H
|
||||||
|
|
||||||
|
#include <libgba-sprite-engine/gba/tonc_types.h>
|
||||||
|
#include <libgba-sprite-engine/gba/toolbox.h>
|
||||||
|
|
||||||
|
void bmp8_plot(int x, int y, u32 clr, void *dstBase, uint dstP);
|
||||||
|
|
||||||
|
void bmp8_hline(int x1, int y, int x2, u32 clr, void *dstBase, uint dstP);
|
||||||
|
void bmp8_vline(int x, int y1, int y2, u32 clr, void *dstBase, uint dstP);
|
||||||
|
void bmp8_line(int x1, int y1, int x2, int y2, u32 clr,
|
||||||
|
void *dstBase, uint dstP);
|
||||||
|
|
||||||
|
INLINE void m4_hline(int x1, int y, int x2, u8 clrid);
|
||||||
|
INLINE void m4_vline(int x, int y1, int y2, u8 clrid);
|
||||||
|
INLINE void m4_line(int x1, int y1, int x2, int y2, u8 clrid);
|
||||||
|
INLINE void m4_plot(int x, int y, u8 clrid);
|
||||||
|
|
||||||
|
//! Plot a \a clrid pixel on the current mode 4 backbuffer
|
||||||
|
INLINE void m4_plot(int x, int y, u8 clrid)
|
||||||
|
{
|
||||||
|
u16 *dst= &vid_page[(y*M4_WIDTH+x)>>1];
|
||||||
|
if(x&1)
|
||||||
|
*dst= (*dst& 0xFF) | (clrid<<8);
|
||||||
|
else
|
||||||
|
*dst= (*dst&~0xFF) | clrid;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//! Draw a \a clrid colored horizontal line in mode 4.
|
||||||
|
INLINE void m4_hline(int x1, int y, int x2, u8 clrid)
|
||||||
|
{ bmp8_hline(x1, y, x2, clrid, vid_page, M4_WIDTH); }
|
||||||
|
|
||||||
|
|
||||||
|
//! Draw a \a clrid colored vertical line in mode 4.
|
||||||
|
INLINE void m4_vline(int x, int y1, int y2, u8 clrid)
|
||||||
|
{ bmp8_vline(x, y1, y2, clrid, vid_page, M4_WIDTH); }
|
||||||
|
|
||||||
|
|
||||||
|
//! Draw a \a clrid colored line in mode 4.
|
||||||
|
INLINE void m4_line(int x1, int y1, int x2, int y2, u8 clrid)
|
||||||
|
{ bmp8_line(x1, y1, x2, y2, clrid, vid_page, M4_WIDTH); }
|
||||||
|
|
||||||
|
#endif //GBA_BITMAP_ENGINE_PROJECT_TONC_VIDEO_H
|
|
@ -0,0 +1,18 @@
|
||||||
|
//
|
||||||
|
// Created by Wouter Groeneveld on 10/07/20.
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef GBA_BITMAP_ENGINE_PROJECT_TOOLBOX_H
|
||||||
|
#define GBA_BITMAP_ENGINE_PROJECT_TOOLBOX_H
|
||||||
|
|
||||||
|
#define GBA_SCREEN_WIDTH 240
|
||||||
|
#define GBA_SCREEN_WIDTH_FX GBA_SCREEN_WIDTH << 8
|
||||||
|
#define GBA_SCREEN_HEIGHT 160
|
||||||
|
#define GBA_SCREEN_HEIGHT_FX GBA_SCREEN_HEIGHT << 8
|
||||||
|
|
||||||
|
#define M4_WIDTH GBA_SCREEN_WIDTH
|
||||||
|
#define M4_HEIGHT GBA_SCREEN_HEIGHT
|
||||||
|
|
||||||
|
extern COLOR *vid_page;
|
||||||
|
|
||||||
|
#endif //GBA_BITMAP_ENGINE_PROJECT_TOOLBOX_H
|
|
@ -15,16 +15,8 @@
|
||||||
#include "sound_control.h"
|
#include "sound_control.h"
|
||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
|
|
||||||
#define GBA_SCREEN_WIDTH 240
|
|
||||||
#define GBA_SCREEN_WIDTH_FX GBA_SCREEN_WIDTH << 8
|
|
||||||
#define GBA_SCREEN_HEIGHT 160
|
|
||||||
#define GBA_SCREEN_HEIGHT_FX GBA_SCREEN_HEIGHT << 8
|
|
||||||
|
|
||||||
|
|
||||||
#define M4_WIDTH 240
|
|
||||||
|
|
||||||
const unsigned int black[VRAM_PAGE_SIZE] = {};
|
const unsigned int black[VRAM_PAGE_SIZE] = {};
|
||||||
extern u16 *vid_page;
|
|
||||||
|
|
||||||
class GBAEngine {
|
class GBAEngine {
|
||||||
private:
|
private:
|
||||||
|
@ -74,6 +66,7 @@ public:
|
||||||
for(int i = 0; i < times; i++){}
|
for(int i = 0; i < times; i++){}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void plotPixel(int x, int y, u8 clrId);
|
||||||
inline void plotPixel(const VectorPx &pixel, u8 clrId);
|
inline void plotPixel(const VectorPx &pixel, u8 clrId);
|
||||||
inline void plotLine(const VectorPx &point0, const VectorPx &point1, u8 clrId);
|
inline void plotLine(const VectorPx &point0, const VectorPx &point1, u8 clrId);
|
||||||
};
|
};
|
||||||
|
|
|
@ -0,0 +1,206 @@
|
||||||
|
//
|
||||||
|
// Created by Wouter Groeneveld on 10/07/20.
|
||||||
|
//
|
||||||
|
|
||||||
|
//! Plot a single pixel on a 8-bit buffer
|
||||||
|
#include <libgba-sprite-engine/gba/tonc_types.h>
|
||||||
|
#include <libgba-sprite-engine/gba/tonc_core.h>
|
||||||
|
#include <libgba-sprite-engine/gba/toolbox.h>
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\param x X-coord.
|
||||||
|
\param y Y-coord.
|
||||||
|
\param clr Color.
|
||||||
|
\param dstBase Canvas pointer (halfword-aligned plz).
|
||||||
|
\param dstP Canvas pitch in bytes.
|
||||||
|
\note Slow as fuck. Inline plotting functionality if possible.
|
||||||
|
*/
|
||||||
|
void bmp8_plot(int x, int y, u32 clr, void *dstBase, uint dstP)
|
||||||
|
{
|
||||||
|
u16 *dstD= (u16*)(dstBase+y*dstP+(x&~1));
|
||||||
|
|
||||||
|
if(x&1)
|
||||||
|
*dstD= (*dstD& 0xFF) | (clr<<8);
|
||||||
|
else
|
||||||
|
*dstD= (*dstD&~0xFF) | (clr&0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//! Draw a horizontal line on an 8bit buffer
|
||||||
|
/*!
|
||||||
|
\param x1 First X-coord.
|
||||||
|
\param y Y-coord.
|
||||||
|
\param x2 Second X-coord.
|
||||||
|
\param clr Color index.
|
||||||
|
\param dstBase Canvas pointer (halfword-aligned plz).
|
||||||
|
\param dstP canvas pitch in bytes.
|
||||||
|
\note Does normalization, but not bounds checks.
|
||||||
|
*/
|
||||||
|
void bmp8_hline(int x1, int y, int x2, u32 clr, void *dstBase, uint dstP)
|
||||||
|
{
|
||||||
|
// --- Normalize ---
|
||||||
|
clr &= 0xFF;
|
||||||
|
if(x2<x1)
|
||||||
|
{ int tmp= x1; x1= x2; x2= tmp; }
|
||||||
|
|
||||||
|
uint width= x2-x1+1;
|
||||||
|
u16 *dstL= (u16*)(dstBase+y*dstP + (x1&~1));
|
||||||
|
|
||||||
|
// --- Left unaligned pixel ---
|
||||||
|
if(x1&1)
|
||||||
|
{
|
||||||
|
*dstL= (*dstL & 0xFF) + (clr<<8);
|
||||||
|
width--;
|
||||||
|
dstL++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Right unaligned pixel ---
|
||||||
|
if(width&1)
|
||||||
|
dstL[width/2]= (dstL[width/2]&~0xFF) + clr;
|
||||||
|
width /= 2;
|
||||||
|
|
||||||
|
// --- Aligned line ---
|
||||||
|
if(width)
|
||||||
|
memset16(dstL, dup8(clr), width);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//! Draw a vertical line on an 8bit buffer
|
||||||
|
/*!
|
||||||
|
\param x X-coord.
|
||||||
|
\param y1 First Y-coord.
|
||||||
|
\param y2 Second Y-coord.
|
||||||
|
\param clr Color index.
|
||||||
|
\param dstBase Canvas pointer (halfword-aligned plz).
|
||||||
|
\param dstP canvas pitch in bytes.
|
||||||
|
\note Does normalization, but not bounds checks.
|
||||||
|
*/
|
||||||
|
void bmp8_vline(int x, int y1, int y2, u32 clr, void *dstBase, uint dstP)
|
||||||
|
{
|
||||||
|
// --- Normalize ---
|
||||||
|
if(y2<y1)
|
||||||
|
{ int tmp= y1; y1= y2; y2= tmp; }
|
||||||
|
|
||||||
|
uint height= y2-y1+1;
|
||||||
|
u16 *dstL= (u16*)(dstBase+y1*dstP + (x&~1));
|
||||||
|
dstP /= 2;
|
||||||
|
|
||||||
|
if(x&1)
|
||||||
|
{
|
||||||
|
clr <<= 8;
|
||||||
|
while(height--)
|
||||||
|
{ *dstL= (*dstL& 0xFF) + clr; dstL += dstP; }
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
clr &= 0xFF;
|
||||||
|
while(height--)
|
||||||
|
{ *dstL= (*dstL&~0xFF) + clr; dstL += dstP; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//! Draw a line on an 8bit buffer
|
||||||
|
/*!
|
||||||
|
\param x1 First X-coord.
|
||||||
|
\param y1 First Y-coord.
|
||||||
|
\param x2 Second X-coord.
|
||||||
|
\param y2 Second Y-coord.
|
||||||
|
\param clr Color index.
|
||||||
|
\param dstBase Canvas pointer (halfword-aligned plz).
|
||||||
|
\param dstP Canvas pitch in bytes.
|
||||||
|
\note Does normalization, but not bounds checks.
|
||||||
|
*/
|
||||||
|
void bmp8_line(int x1, int y1, int x2, int y2, u32 clr,
|
||||||
|
void *dstBase, uint dstP)
|
||||||
|
{
|
||||||
|
|
||||||
|
// Trivial lines: horz and vertical
|
||||||
|
if(y1 == y2) // Horizontal
|
||||||
|
{
|
||||||
|
// Adjust for inclusive ends
|
||||||
|
if(x2 == x1)
|
||||||
|
{ bmp8_plot(x1, y1, clr, dstBase, dstP); return; }
|
||||||
|
|
||||||
|
bmp8_hline(x1, y1, x2, clr, dstBase, dstP);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if(x1 == x2) // Vertical
|
||||||
|
{
|
||||||
|
// Adjust for inclusive ends
|
||||||
|
if(y2 == y1)
|
||||||
|
{ bmp8_plot(x1, y1, clr, dstBase, dstP); return; }
|
||||||
|
|
||||||
|
bmp8_vline(x1, y1, y2, clr, dstBase, dstP);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Slogging through the diagonal ---
|
||||||
|
|
||||||
|
int ii, dx, dy, xstep, ystep, dd;
|
||||||
|
u32 addr= (u32)(dstBase + y1*dstP + x1), mask= 255;
|
||||||
|
u16 *dstL;
|
||||||
|
|
||||||
|
clr &= mask;
|
||||||
|
clr |= clr<<8;
|
||||||
|
if(x1 & 1)
|
||||||
|
mask= ~mask;
|
||||||
|
|
||||||
|
// --- Normalization ---
|
||||||
|
if(x1>x2)
|
||||||
|
{ xstep= -1; dx= x1-x2; }
|
||||||
|
else
|
||||||
|
{ xstep= +1; dx= x2-x1; }
|
||||||
|
|
||||||
|
if(y1>y2)
|
||||||
|
{ ystep= -dstP; dy= y1-y2; }
|
||||||
|
else
|
||||||
|
{ ystep= +dstP; dy= y2-y1; }
|
||||||
|
|
||||||
|
|
||||||
|
// --- Drawing ---
|
||||||
|
// NOTE: because xstep is alternating, you can do marvels
|
||||||
|
// with mask-flips
|
||||||
|
// NOTE: (mask>>31) is equivalent to (x&1) ? 0 : 1
|
||||||
|
|
||||||
|
if(dx>=dy) // Diagonal, slope <= 1
|
||||||
|
{
|
||||||
|
dd= 2*dy - dx;
|
||||||
|
|
||||||
|
for(ii=dx; ii>=0; ii--)
|
||||||
|
{
|
||||||
|
dstL= (u16*)(addr - (mask>>31));
|
||||||
|
*dstL= (*dstL &~ mask) | (clr & mask);
|
||||||
|
|
||||||
|
if(dd >= 0)
|
||||||
|
{ dd -= 2*dx; addr += ystep; }
|
||||||
|
|
||||||
|
dd += 2*dy;
|
||||||
|
addr += xstep;
|
||||||
|
mask = ~mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else // # Diagonal, slope > 1
|
||||||
|
{
|
||||||
|
dd= 2*dx - dy;
|
||||||
|
|
||||||
|
for(ii=dy; ii>=0; ii--)
|
||||||
|
{
|
||||||
|
dstL= (u16*)(addr - (mask>>31));
|
||||||
|
*dstL= (*dstL &~ mask) | (clr & mask);
|
||||||
|
|
||||||
|
if(dd >= 0)
|
||||||
|
{
|
||||||
|
dd -= 2*dy;
|
||||||
|
addr += xstep;
|
||||||
|
mask = ~mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
dd += 2*dx;
|
||||||
|
addr += ystep;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INLINE void m4_line(int x1, int y1, int x2, int y2, u8 clrid)
|
||||||
|
{ bmp8_line(x1, y1, x2, y2, clrid, vid_page, M4_WIDTH); }
|
|
@ -0,0 +1,216 @@
|
||||||
|
//
|
||||||
|
// Alignment-safe and fast memset routines
|
||||||
|
//
|
||||||
|
//! \file tonc_memcpy.s
|
||||||
|
//! \author J Vijn
|
||||||
|
//! \date 20060508 - 20090801
|
||||||
|
//
|
||||||
|
// === NOTES ===
|
||||||
|
@ * 20050924: Lower overhead for all; reduced i-count for u16 loops.
|
||||||
|
@ * These are 16/32bit memset and memcpy. The 32bit versions are in
|
||||||
|
@ iwram for maximum effect and pretty much do what CpuFastSet does,
|
||||||
|
@ except that it'll work for non multiples of 8 words too. Speed
|
||||||
|
@ is as good as CpuFastSet, but with a little less overhead.
|
||||||
|
@ * The 16bit versions call the 32bit ones if possible and/or desirable.
|
||||||
|
@ They are thumb/ROM functions but did them in asm anyway because
|
||||||
|
@ GCC goes haywire with the use of registers resulting in a much
|
||||||
|
@ higher overhead (i.e., detrimental for low counts)
|
||||||
|
@ * Crossover with inline while(nn--) loops (not for(ii++), which are
|
||||||
|
@ much slower):
|
||||||
|
@ memset32: ~5
|
||||||
|
@ memset16: ~8
|
||||||
|
|
||||||
|
.file "tonc_memset.s"
|
||||||
|
|
||||||
|
|
||||||
|
#define DEF_SIZE(_name) .size _name, .-_name
|
||||||
|
|
||||||
|
//! \name Section definitions for assembly.
|
||||||
|
//\{
|
||||||
|
|
||||||
|
#define CSEC_TEXT .text //!< Standard code section directive.
|
||||||
|
#define CSEC_EWRAM .section .ewram , "ax", %progbits //!< EWRAM code section directive.
|
||||||
|
#define CSEC_IWRAM .section .iwram, "ax", %progbits //!< IWRAM code section directive.
|
||||||
|
|
||||||
|
#define DSEC_DATA .data //<! Standard data section directive.
|
||||||
|
#define DSEC_ROM .section .rodata //!< ROM data section directive.
|
||||||
|
#define DSEC_BSS .section .bss //!< Uninited data (RAM) section directive.
|
||||||
|
#define DSEC_SBSS .section .sbss //!< Uninited data (DTCM?) section directive.
|
||||||
|
|
||||||
|
#define ARM_FUNC .arm //!< Indicates an ARM function.
|
||||||
|
#define THUMB_FUNC .thumb_func //!< Indicates a Thumb function.
|
||||||
|
|
||||||
|
//# NOTE: because these use commas, I can't pass them through CPP macros.
|
||||||
|
//# Yes, this is stupid, but do you have a better idea?
|
||||||
|
|
||||||
|
#undef CSEC_EWRAM
|
||||||
|
.macro CSEC_EWRAM
|
||||||
|
.section .ewram , "ax", %progbits
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#undef CSEC_IWRAM
|
||||||
|
.macro CSEC_IWRAM
|
||||||
|
.section .iwram , "ax", %progbits
|
||||||
|
.endm
|
||||||
|
|
||||||
|
//\}
|
||||||
|
|
||||||
|
|
||||||
|
//! \name Function definition macros.
|
||||||
|
//\{
|
||||||
|
|
||||||
|
//! Start an assembly function.
|
||||||
|
/*!
|
||||||
|
\param _name Name of function.
|
||||||
|
\param _section Section to place function in (like .text)
|
||||||
|
*/
|
||||||
|
#define BEGIN_FUNC(_name, _section, _iset) \
|
||||||
|
_section; \
|
||||||
|
_iset; \
|
||||||
|
.align 2; \
|
||||||
|
.global _name; \
|
||||||
|
.type _name STT_FUNC; \
|
||||||
|
_name:
|
||||||
|
|
||||||
|
//! End of a function.
|
||||||
|
#define END_FUNC(_name) DEF_SIZE(_name)
|
||||||
|
|
||||||
|
//! Begin an ARM function
|
||||||
|
/*!
|
||||||
|
\param _name Name of function.
|
||||||
|
\param _section Section to place function in (like .text)
|
||||||
|
*/
|
||||||
|
#define BEGIN_FUNC_ARM(_name, _section) BEGIN_FUNC(_name, _section, ARM_FUNC)
|
||||||
|
|
||||||
|
//! Begin a THUMB function.
|
||||||
|
/*!
|
||||||
|
\param _name Name of function.
|
||||||
|
\param _section Section to place function in (like .text)
|
||||||
|
*/
|
||||||
|
#define BEGIN_FUNC_THUMB(_name, _section) BEGIN_FUNC(_name, _section, THUMB_FUNC)
|
||||||
|
//\}
|
||||||
|
|
||||||
|
//! \name Data definition macros.
|
||||||
|
//\{
|
||||||
|
#define BEGIN_SYMBOL(_name, _section) \
|
||||||
|
_section; \
|
||||||
|
.align; \
|
||||||
|
.global _name; \
|
||||||
|
_name:
|
||||||
|
|
||||||
|
#define END_SYMBOL(_name) DEF_SIZE(_name)
|
||||||
|
//\}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------
|
||||||
|
// CONSTANTS
|
||||||
|
// --------------------------------------------------------------------
|
||||||
|
|
||||||
|
//! \name TSurface member offsets.
|
||||||
|
//\{
|
||||||
|
#define TSRF_data 0
|
||||||
|
#define TSRF_pitch 4
|
||||||
|
#define TSRF_width 8
|
||||||
|
#define TSRF_height 10
|
||||||
|
#define TSRF_bpp 12
|
||||||
|
#define TSRF_type 13
|
||||||
|
#define TSRF_palSize 14
|
||||||
|
#define TSRF_pal 16
|
||||||
|
//\}
|
||||||
|
|
||||||
|
@ === void memset32(void *dst, u32 src, u32 wdn); =====================
|
||||||
|
/*! \fn void memset32(void *dst, u32 src, u32 wdn) IWRAM_CODE;
|
||||||
|
\brief Fast-fill by words.
|
||||||
|
\param dst Destination address.
|
||||||
|
\param src Fill word (not address).
|
||||||
|
\param wdn Number of words to fill.
|
||||||
|
\note \a dst <b>must</b> be word aligned.
|
||||||
|
\note \a r0 returns as \a dst + \a wdn.
|
||||||
|
*/
|
||||||
|
/* Reglist:
|
||||||
|
r0, r1: dst, src
|
||||||
|
r2: wdn, then wdn>>3
|
||||||
|
r3-r10: data buffer
|
||||||
|
r12: wdn&7
|
||||||
|
*/
|
||||||
|
BEGIN_FUNC_ARM(memset32, CSEC_IWRAM)
|
||||||
|
and r12, r2, #7
|
||||||
|
movs r2, r2, lsr #3
|
||||||
|
beq .Lres_set32
|
||||||
|
push {r4-r9}
|
||||||
|
@ set 32byte chunks with 8fold xxmia
|
||||||
|
mov r3, r1
|
||||||
|
mov r4, r1
|
||||||
|
mov r5, r1
|
||||||
|
mov r6, r1
|
||||||
|
mov r7, r1
|
||||||
|
mov r8, r1
|
||||||
|
mov r9, r1
|
||||||
|
.Lmain_set32:
|
||||||
|
stmia r0!, {r1, r3-r9}
|
||||||
|
subs r2, r2, #1
|
||||||
|
bhi .Lmain_set32
|
||||||
|
pop {r4-r9}
|
||||||
|
@ residual 0-7 words
|
||||||
|
.Lres_set32:
|
||||||
|
subs r12, r12, #1
|
||||||
|
stmhsia r0!, {r1}
|
||||||
|
bhi .Lres_set32
|
||||||
|
bx lr
|
||||||
|
END_FUNC(memset32)
|
||||||
|
|
||||||
|
@ === void memset16(void *dst, u16 src, u32 hwn); =====================
|
||||||
|
/*! \fn void memset16(void *dst, u16 src, u32 hwn);
|
||||||
|
\brief Fill for halfwords.
|
||||||
|
Uses <code>memset32()</code> if \a hwn>5
|
||||||
|
\param dst Destination address.
|
||||||
|
\param src Source halfword (not address).
|
||||||
|
\param wdn Number of halfwords to fill.
|
||||||
|
\note \a dst <b>must</b> be halfword aligned.
|
||||||
|
\note \a r0 returns as \a dst + \a hwn.
|
||||||
|
*/
|
||||||
|
/* Reglist:
|
||||||
|
r0, r1: dst, src
|
||||||
|
r2, r4: wdn
|
||||||
|
r3: tmp; and data buffer
|
||||||
|
*/
|
||||||
|
BEGIN_FUNC_THUMB(memset16, CSEC_TEXT)
|
||||||
|
push {r4, lr}
|
||||||
|
@ under 6 hwords -> std set
|
||||||
|
cmp r2, #5
|
||||||
|
bls .Ltail_set16
|
||||||
|
@ dst not word aligned: copy 1 hword and align
|
||||||
|
lsl r3, r0, #31
|
||||||
|
bcc .Lmain_set16
|
||||||
|
strh r1, [r0]
|
||||||
|
add r0, #2
|
||||||
|
sub r2, r2, #1
|
||||||
|
@ Again, memset32 does the real work
|
||||||
|
.Lmain_set16:
|
||||||
|
lsl r4, r1, #16
|
||||||
|
orr r1, r4
|
||||||
|
lsl r4, r2, #31
|
||||||
|
lsr r2, r2, #1
|
||||||
|
ldr r3, =memset32
|
||||||
|
bl .Llong_bl
|
||||||
|
@ NOTE: r0 is altered by memset32, but in exactly the right
|
||||||
|
@ way, so we can use is as is. r1 is now doubled though.
|
||||||
|
lsr r2, r4, #31
|
||||||
|
beq .Lend_set16
|
||||||
|
lsr r1, #16
|
||||||
|
.Ltail_set16:
|
||||||
|
sub r2, #1
|
||||||
|
bcc .Lend_set16 @ r2 was 0, bug out
|
||||||
|
lsl r2, r2, #1
|
||||||
|
.Lres_set16:
|
||||||
|
strh r1, [r0, r2]
|
||||||
|
sub r2, r2, #2
|
||||||
|
bcs .Lres_set16
|
||||||
|
.Lend_set16:
|
||||||
|
pop {r4}
|
||||||
|
pop {r3}
|
||||||
|
.Llong_bl:
|
||||||
|
bx r3
|
||||||
|
END_FUNC(memset16)
|
||||||
|
|
||||||
|
|
||||||
|
@ EOF
|
|
@ -3,6 +3,7 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
#include <libgba-sprite-engine/gba/tonc_memdef.h>
|
#include <libgba-sprite-engine/gba/tonc_memdef.h>
|
||||||
|
#include <libgba-sprite-engine/gba/tonc_video.h>
|
||||||
#include <libgba-sprite-engine/gba_engine.h>
|
#include <libgba-sprite-engine/gba_engine.h>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <libgba-sprite-engine/gba/tonc_core.h>
|
#include <libgba-sprite-engine/gba/tonc_core.h>
|
||||||
|
@ -145,42 +146,15 @@ void GBAEngine::flipPage() {
|
||||||
REG_DISPCNT ^= DCNT_PAGE;
|
REG_DISPCNT ^= DCNT_PAGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// http://www.coranac.com/tonc/text/bitmaps.htm
|
|
||||||
// this thing is supposed to be very slow. see link above.
|
|
||||||
inline void GBAEngine::plotPixel(const VectorPx &pixel, u8 clrId) {
|
inline void GBAEngine::plotPixel(const VectorPx &pixel, u8 clrId) {
|
||||||
u16 *dst = &vid_page[(pixel.y() * M4_WIDTH + pixel.x()) / 2];
|
m4_plot(pixel.x(), pixel.y(), clrId);
|
||||||
if(pixel.x() & 1) {
|
|
||||||
*dst = (*dst & 0xFF) | (clrId << 8);
|
|
||||||
} else {
|
|
||||||
*dst = (*dst & ~0xFF) | clrId;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// more or less 1-to-1:
|
|
||||||
// https://www.davrous.com/2013/06/14/tutorial-part-2-learning-how-to-write-a-3d-soft-engine-from-scratch-in-c-ts-or-js-drawing-lines-triangles/
|
|
||||||
inline void GBAEngine::plotLine(const VectorPx &point0, const VectorPx &point1, u8 clrId) {
|
inline void GBAEngine::plotLine(const VectorPx &point0, const VectorPx &point1, u8 clrId) {
|
||||||
int x0 = point0.x();
|
// uses tonc's optimalization tricks to get 10 FPS extra compared to standard bline algorithms
|
||||||
int y0 = point0.y();
|
m4_line(point0.x(), point0.y(), point1.x(), point1.y(), clrId);
|
||||||
int x1 = point1.x();
|
|
||||||
int y1 = point1.y();
|
|
||||||
|
|
||||||
int dx = ABS(x1 - x0);
|
|
||||||
int dy = ABS(y1 - y0);
|
|
||||||
int sx = (x0 < x1) ? 1 : -1;
|
|
||||||
int sy = (y0 < y1) ? 1 : -1;
|
|
||||||
int err = dx - dy;
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
plotPixel(VectorPx(x0, y0), clrId);
|
|
||||||
|
|
||||||
if ((x0 == x1) && (y0 == y1)) break;
|
|
||||||
auto e2 = 2 * err;
|
|
||||||
if (e2 > -dy) { err -= dy; x0 += sx; }
|
|
||||||
if (e2 < dx) { err += dx; y0 += sy; }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline VectorPx GBAEngine::project(const VectorFx &coord, const MatrixFx &transMat) {
|
inline VectorPx GBAEngine::project(const VectorFx &coord, const MatrixFx &transMat) {
|
||||||
auto point = MatrixFx::transformCoordinates(coord, transMat);
|
auto point = MatrixFx::transformCoordinates(coord, transMat);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue