Skip to content

Instantly share code, notes, and snippets.

@kdrnic
Last active May 23, 2018 14:59
Show Gist options
  • Select an option

  • Save kdrnic/6463c4aeda75238da52a97cf7d40ef45 to your computer and use it in GitHub Desktop.

Select an option

Save kdrnic/6463c4aeda75238da52a97cf7d40ef45 to your computer and use it in GitHub Desktop.
#include <allegro.h>
#include <math.h>
#include <stdio.h>
#include "raster.h"
/*
Illustration of the two subtriangles
-verts[0] -vs[0]=ve[0]
/ \ / \
/ \- / \-
/ \ / \
/ \ / \
/ \- / \-
/ \ / \ ve[0]
/ /-\verts[1] -----------------ve[1] -----------------
/ /--- / /---
/ /--- / /---
/ /--- / /---
/ /--- / /---
--- ---
verts[2] vs[1]=ve[1]
*/
void kdr_triangle3d_f(BITMAP *bmp, int shader, BITMAP *tex, V3D_f *_v1, V3D_f *_v2, V3D_f *_v3)
{
//Holds ends of triangle scanlines
//'p' stands for perspective correction
//and the z is actually inverse
struct {
float x, u, v, up, vp, zp;
} scan_s, scan_e, *scan_l, *scan_r;
//Local copy, since may be order swapped
V3D_f *verts[] = {_v1, _v2, _v3};
//Used for swaps
V3D_f *tmp_vp;
//Triangle is cut into two triangles, by the horizontal line that goes through verts[1]
//vs is the segment that intersects this line
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
//Final values for each pixel
int x_i, y_i, u_i, v_i, up_i, vp_i;
float z_i;
//Calculated colour
int c;
//One pass for each of the subtriangles
int pass;
#define SWAP_V(a, b) if(1){ tmp_vp = (a); (a) = (b); (b) = tmp_vp; }(void)0
//Linearly interpolate between a and b by c amount
#define LINTERP(a, b, c) ((a) + ((b) - (a)) * c)
//A ratio of how far along is c, in the segment a to b
#define ALONG(a, b, c) (((b) - (a)) ? ((c) - (a)) / ((b) - (a)) : 0.0)
//Order verts
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
vs0 = verts[0];
vs1 = verts[2];
//Verts for upper subtriangle
ve0 = verts[0];
ve1 = verts[1];
//Two passes as discussed above
for(pass = 0; pass < 2; pass++){
//One iteration per horizontal scanline
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
//Calculate interpolated values for both ends of the scanline
//Of note, both those and the interpolations down below can have calculations
//simplified since they obviously move by constant steps
const float along_s = ALONG(vs0->y, vs1->y, (float) y_i);
const float along_e = ALONG(ve0->y, ve1->y, (float) y_i);
scan_s.x = LINTERP(vs0->x, vs1->x, along_s);
scan_s.u = LINTERP(vs0->u, vs1->u, along_s);
scan_s.v = LINTERP(vs0->v, vs1->v, along_s);
scan_s.up = LINTERP(vs0->u / vs0->z, vs1->u / vs1->z, along_s);
scan_s.vp = LINTERP(vs0->v / vs0->z, vs1->v / vs1->z, along_s);
scan_s.zp = LINTERP(1.0 / vs0->z, 1.0 / vs1->z, along_s);
scan_e.x = LINTERP(ve0->x, ve1->x, along_e);
scan_e.u = LINTERP(ve0->u, ve1->u, along_e);
scan_e.v = LINTERP(ve0->v, ve1->v, along_e);
scan_e.up = LINTERP(ve0->u / ve0->z, ve1->u / ve1->z, along_e);
scan_e.vp = LINTERP(ve0->v / ve0->z, ve1->v / ve1->z, along_e);
scan_e.zp = LINTERP(1.0 / ve0->z, 1.0 / ve1->z, along_e);
//This test could actually be done only once
//scan_l to scan_r is strictly left to right
if(scan_e.x > scan_s.x){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
//Iterate through scanline
for(x_i = ceil(scan_l->x); x_i < ceil(scan_r->x); x_i++){
//Final interpolation for each pixel
const float along_x = ALONG(scan_l->x, scan_r->x, (float) x_i);
u_i = floor( LINTERP(scan_l->u, scan_r->u, along_x));
v_i = floor( LINTERP(scan_l->v, scan_r->v, along_x));
//Some games only calculate the inverse of z_i every few pixels
//and add a further linear interpolation inbetween
z_i = LINTERP(scan_l->zp, scan_r->zp, along_x);
up_i = floor( LINTERP(scan_l->up, scan_r->up, along_x) / z_i);
vp_i = floor( LINTERP(scan_l->vp, scan_r->vp, along_x) / z_i);
//Pick colour according to shader
switch(shader){
case POLYTYPE_FLAT:
c = verts[0]->c;
break;
case POLYTYPE_PTEX:
case POLYTYPE_PTEX_MASK:
//Handle negative UVs and wrapping
up_i += tex->w << 8;
vp_i += tex->h << 8;
up_i %= tex->w;
vp_i %= tex->h;
c = getpixel(tex, up_i, vp_i);
break;
case POLYTYPE_ATEX:
case POLYTYPE_ATEX_MASK:
//Handle negative UVs and wrapping
u_i += tex->w << 8;
v_i += tex->h << 8;
u_i %= tex->w;
v_i %= tex->h;
c = getpixel(tex, u_i, v_i);
break;
}
putpixel(bmp, x_i, y_i, c);
}
}
//Verts for lower subtriangle
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef LINTERP
#undef ALONG
}
//TODO: mind fill convention
//TODO: mind pixel centres
/*
Persp-correct only, slightly faster version of the above
Instead of using the linterp formula directly, keep current pos and increase with fixed steps
*/
void kdr_triangle3d_f_fast2(BITMAP *bmp, int s, BITMAP *tex, V3D_f *_v1, V3D_f *_v2, V3D_f *_v3)
{
V3D_f scan_s, scan_e, *scan_l, *scan_r, scan_x;
V3D_f scan_x_step, scan_s_step, scan_e_step;
//Hard-copied so that u and v may be divided by z, and z inverted
V3D_f __verts[3] = {*_v1, *_v2, *_v3};
V3D_f *verts[] = {&__verts[0], &__verts[1], &__verts[2]};
V3D_f *tmp_v;
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
int x_i, y_i, u_i, v_i, x_l, x_r;
float tmp;
int pass, c;
#define SWAP_V(a, b) if(1){ tmp_v = (a); (a) = (b); (b) = tmp_v; }(void)0
#define STEPSIZE(a, b, c) (((b) - (a)) * (c))
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
//Applies perspective projection
verts[0]->z = 1.0f / verts[0]->z;
verts[1]->z = 1.0f / verts[1]->z;
verts[2]->z = 1.0f / verts[2]->z;
verts[0]->u *= verts[0]->z;
verts[1]->u *= verts[1]->z;
verts[2]->u *= verts[2]->z;
verts[0]->v *= verts[0]->z;
verts[1]->v *= verts[1]->z;
verts[2]->v *= verts[2]->z;
vs0 = verts[0];
vs1 = verts[2];
ve0 = verts[0];
ve1 = verts[1];
//Step along Y
tmp = 1.0f / (ceil(vs1->y) - ceil(vs0->y));
scan_s_step.z = STEPSIZE(vs0->z, vs1->z, tmp);
scan_s_step.u = STEPSIZE(vs0->u, vs1->u, tmp);
scan_s_step.v = STEPSIZE(vs0->v, vs1->v, tmp);
scan_s_step.x = STEPSIZE(vs0->x, vs1->x, tmp);
scan_s = *vs0;
for(pass = 0; pass < 2; pass++){
//Step along Y
tmp = 1.0f / (floor(ve1->y) - ceil(ve0->y));
scan_e_step.z = STEPSIZE(ve0->z, ve1->z, tmp);
scan_e_step.u = STEPSIZE(ve0->u, ve1->u, tmp);
scan_e_step.v = STEPSIZE(ve0->v, ve1->v, tmp);
scan_e_step.x = STEPSIZE(ve0->x, ve1->x, tmp);
scan_e = *ve0;
//This check stays relevant throughout a pass
if((scan_e_step.x > scan_s_step.x) ^ (pass)){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
//Step along X
tmp = 1.0f / (ceil(scan_r->x) - ceil(scan_l->x));
scan_x_step.z = STEPSIZE(scan_l->z, scan_r->z, tmp);
scan_x_step.u = STEPSIZE(scan_l->u, scan_r->u, tmp);
scan_x_step.v = STEPSIZE(scan_l->v, scan_r->v, tmp);
scan_x = *scan_l;
x_l = ceil(scan_l->x);
x_r = ceil(scan_r->x);
for(x_i = x_l; x_i < x_r; x_i++){
tmp = 1.0f / scan_x.z;
u_i = floor(scan_x.u * tmp);
v_i = floor(scan_x.v * tmp);
//Negative and wrapping UVs
u_i = (u_i + 0x10000) & (tex->w - 1);
v_i = (v_i + 0x10000) & (tex->h - 1);
c = getpixel(tex, u_i, v_i);
putpixel(bmp, x_i, y_i, c);
//Step along the scanline
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
}
//Step along Y for both the start and end of the scanline
scan_s.x += scan_s_step.x;
scan_s.z += scan_s_step.z;
scan_s.u += scan_s_step.u;
scan_s.v += scan_s_step.v;
scan_e.x += scan_e_step.x;
scan_e.z += scan_e_step.z;
scan_e.u += scan_e_step.u;
scan_e.v += scan_e_step.v;
}
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef STEPSIZE
}
/*
Faster version of the above
Only perspective correct support in 32bpp
Tries to calculate perspective correct UVs each 16 pixels, and linterp inbetween
An enormous, ugly hack
*/
void kdr_triangle3d_f_fast(BITMAP *bmp, int s, BITMAP *tex, V3D_f *_v1, V3D_f *_v2, V3D_f *_v3)
{
V3D_f scan_s, scan_e, *scan_l, *scan_r, scan_x;
V3D_f scan_x_step, scan_s_step, scan_e_step;
V3D_f __verts[3] = {*_v1, *_v2, *_v3};
V3D_f *verts[] = {&__verts[0], &__verts[1], &__verts[2]};
V3D_f *tmp_v;
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
int x_i, y_i, u_i, v_i, u_i_n, v_i_n, x_i_n, x_l, x_r;
float z_i, z_i_n;
int pass;
float tmp;
int *pixel;
#define SWAP_V(a, b) if(1){ tmp_v = (a); (a) = (b); (b) = tmp_v; }(void)0
#define STEPSIZE(a, b, c) (((b) - (a)) * (c))
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
verts[0]->z = 1.0f / verts[0]->z;
verts[1]->z = 1.0f / verts[1]->z;
verts[2]->z = 1.0f / verts[2]->z;
verts[0]->u *= verts[0]->z;
verts[1]->u *= verts[1]->z;
verts[2]->u *= verts[2]->z;
verts[0]->v *= verts[0]->z;
verts[1]->v *= verts[1]->z;
verts[2]->v *= verts[2]->z;
vs0 = verts[0];
vs1 = verts[2];
ve0 = verts[0];
ve1 = verts[1];
tmp = 1.0f / (ceil(vs1->y) - ceil(vs0->y));
scan_s_step.z = STEPSIZE(vs0->z, vs1->z, tmp);
scan_s_step.u = STEPSIZE(vs0->u, vs1->u, tmp);
scan_s_step.v = STEPSIZE(vs0->v, vs1->v, tmp);
scan_s_step.x = STEPSIZE(vs0->x, vs1->x, tmp);
scan_s = *vs0;
for(pass = 0; pass < 2; pass++){
tmp = 1.0f / (ceil(ve1->y) - ceil(ve0->y));
scan_e_step.z = STEPSIZE(ve0->z, ve1->z, tmp);
scan_e_step.u = STEPSIZE(ve0->u, ve1->u, tmp);
scan_e_step.v = STEPSIZE(ve0->v, ve1->v, tmp);
scan_e_step.x = STEPSIZE(ve0->x, ve1->x, tmp);
scan_e = *ve0;
if((scan_e_step.x > scan_s_step.x) ^ (pass)){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
#define STEPLEN 16
#define STEPLENF 16.0f
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
tmp = 1.0f / ((scan_r->x) - (scan_l->x));
scan_x_step.z = STEPSIZE(scan_l->z, scan_r->z, tmp) * STEPLENF;
scan_x_step.u = STEPSIZE(scan_l->u, scan_r->u, tmp) * STEPLENF;
scan_x_step.v = STEPSIZE(scan_l->v, scan_r->v, tmp) * STEPLENF;
scan_x = *scan_l;
x_l = (scan_l->x);
x_r = (scan_r->x);
z_i = 1.0 / scan_x.z;
u_i = (scan_x.u * z_i);
v_i = (scan_x.v * z_i);
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
z_i_n = 1.0f / scan_x.z;
pixel = (int *) bmp->line[y_i];
pixel += x_l;
for(x_i = x_l; x_i < x_r; x_i = x_i_n){
x_i_n = x_i + STEPLEN;
u_i_n = (scan_x.u * z_i_n);
v_i_n = (scan_x.v * z_i_n);
u_i_n -= u_i;
v_i_n -= v_i;
if(x_i_n < x_r){
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
z_i_n = 1.0f / scan_x.z;
}
#define UP_I(n) (((u_i + ((u_i_n * (n)) >> 4)) + 65536) & (tex->w - 1))
#define VP_I(n) (((v_i + ((v_i_n * (n)) >> 4)) + 65536) & (tex->h - 1))
#define X_I(n) (x_i + (n))
#define C(n) _getpixel32(tex, UP_I(n), VP_I(n))
#define PUTPIXEL(i, c) *(pixel + i) = (c); (void)0
//This unrolling provides a quite radical performance boost
#if 1
switch(x_r - x_i){
default:
#if STEPLEN >= 16
case 16:
PUTPIXEL((15), C(15));
case 15:
PUTPIXEL((14), C(14));
case 14:
PUTPIXEL((13), C(13));
case 13:
PUTPIXEL((12), C(12));
case 12:
PUTPIXEL((11), C(11));
case 11:
PUTPIXEL((10), C(10));
case 10:
PUTPIXEL((9), C(9));
case 9:
PUTPIXEL((8), C(8));
#endif
#if STEPLEN >= 8
case 8:
PUTPIXEL((7), C(7));
case 7:
PUTPIXEL((6), C(6));
case 6:
PUTPIXEL((5), C(5));
case 5:
PUTPIXEL((4), C(4));
#endif
#if STEPLEN >= 4
case 4:
PUTPIXEL((3), C(3));
case 3:
PUTPIXEL((2), C(2));
#endif
#if STEPLEN >= 2
case 2:
PUTPIXEL((1), C(1));
#endif
case 1:
PUTPIXEL((0), C(0));
}
#else
int i;
for(i = 0; i < x_r - x_i; i++){
_putpixel32(bmp, X_I(i), y_i, C(i));
}
#endif
pixel += STEPLEN;
u_i += u_i_n;
v_i += v_i_n;
}
scan_s.x += scan_s_step.x;
scan_s.z += scan_s_step.z;
scan_s.u += scan_s_step.u;
scan_s.v += scan_s_step.v;
scan_e.x += scan_e_step.x;
scan_e.z += scan_e_step.z;
scan_e.u += scan_e_step.u;
scan_e.v += scan_e_step.v;
}
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef STEPSIZE
}
int raster_triangle_func = 0;
typedef void (*t3d_f_func_ptr)(BITMAP *, int, BITMAP *, V3D_f *, V3D_f *, V3D_f *);
void kdr_polygon3d_f(BITMAP *bmp, int shader, BITMAP *tex, int num_verts, V3D_f **verts)
{
const t3d_f_func_ptr funcs[] = {
triangle3d_f,
kdr_triangle3d_f,
kdr_triangle3d_f_fast,
kdr_triangle3d_f_fast2
};
if(num_verts <= 2) return;
if(num_verts > 3){
int i;
for(i = 1; i < num_verts - 1; i++){
funcs[raster_triangle_func](bmp, shader, tex, verts[0], verts[i], verts[i + 1]);
}
return;
}
funcs[raster_triangle_func](bmp, shader, tex, verts[0], verts[1], verts[2]);
}
//and the z is actually inverse
struct {
float x, u, v, up, vp, zp;
} scan_s, scan_e, *scan_l, *scan_r;
//Local copy, since may be order swapped
V3D_f *verts[] = {_v1, _v2, _v3};
//Used for swaps
V3D_f *tmp_vp;
//Triangle is cut into two triangles, by the horizontal line that goes through verts[1]
//vs is the segment that intersects this line
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
//Final values for each pixel
int x_i, y_i, u_i, v_i, up_i, vp_i;
float z_i;
//Calculated colour
int c;
//One pass for each of the subtriangles
int pass;
#define SWAP_V(a, b) if(1){ tmp_vp = (a); (a) = (b); (b) = tmp_vp; }(void)0
//Linearly interpolate between a and b by c amount
#define LINTERP(a, b, c) ((a) + ((b) - (a)) * c)
//A ratio of how far along is c, in the segment a to b
#define ALONG(a, b, c) (((b) - (a)) ? ((c) - (a)) / ((b) - (a)) : 0.0)
//Order verts
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
vs0 = verts[0];
vs1 = verts[2];
//Verts for upper subtriangle
ve0 = verts[0];
ve1 = verts[1];
//Two passes as discussed above
for(pass = 0; pass < 2; pass++){
//One iteration per horizontal scanline
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
//Calculate interpolated values for both ends of the scanline
//Of note, both those and the interpolations down below can have calculations
//simplified since they obviously move by constant steps
const float along_s = ALONG(vs0->y, vs1->y, (float) y_i);
const float along_e = ALONG(ve0->y, ve1->y, (float) y_i);
scan_s.x = LINTERP(vs0->x, vs1->x, along_s);
scan_s.u = LINTERP(vs0->u, vs1->u, along_s);
scan_s.v = LINTERP(vs0->v, vs1->v, along_s);
scan_s.up = LINTERP(vs0->u / vs0->z, vs1->u / vs1->z, along_s);
scan_s.vp = LINTERP(vs0->v / vs0->z, vs1->v / vs1->z, along_s);
scan_s.zp = LINTERP(1.0 / vs0->z, 1.0 / vs1->z, along_s);
scan_e.x = LINTERP(ve0->x, ve1->x, along_e);
scan_e.u = LINTERP(ve0->u, ve1->u, along_e);
scan_e.v = LINTERP(ve0->v, ve1->v, along_e);
scan_e.up = LINTERP(ve0->u / ve0->z, ve1->u / ve1->z, along_e);
scan_e.vp = LINTERP(ve0->v / ve0->z, ve1->v / ve1->z, along_e);
scan_e.zp = LINTERP(1.0 / ve0->z, 1.0 / ve1->z, along_e);
//This test could actually be done only once
//scan_l to scan_r is strictly left to right
if(scan_e.x > scan_s.x){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
//Iterate through scanline
for(x_i = ceil(scan_l->x); x_i < ceil(scan_r->x); x_i++){
//Final interpolation for each pixel
const float along_x = ALONG(scan_l->x, scan_r->x, (float) x_i);
u_i = floor( LINTERP(scan_l->u, scan_r->u, along_x));
v_i = floor( LINTERP(scan_l->v, scan_r->v, along_x));
//Some games only calculate the inverse of z_i every few pixels
//and add a further linear interpolation inbetween
z_i = LINTERP(scan_l->zp, scan_r->zp, along_x);
up_i = floor( LINTERP(scan_l->up, scan_r->up, along_x) / z_i);
vp_i = floor( LINTERP(scan_l->vp, scan_r->vp, along_x) / z_i);
//Pick colour according to shader
switch(shader){
case POLYTYPE_FLAT:
c = verts[0]->c;
break;
case POLYTYPE_PTEX:
case POLYTYPE_PTEX_MASK:
//Handle negative UVs and wrapping
up_i += tex->w << 8;
vp_i += tex->h << 8;
up_i %= tex->w;
vp_i %= tex->h;
c = getpixel(tex, up_i, vp_i);
break;
case POLYTYPE_ATEX:
case POLYTYPE_ATEX_MASK:
//Handle negative UVs and wrapping
u_i += tex->w << 8;
v_i += tex->h << 8;
u_i %= tex->w;
v_i %= tex->h;
c = getpixel(tex, u_i, v_i);
break;
}
putpixel(bmp, x_i, y_i, c);
}
}
//Verts for lower subtriangle
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef LINTERP
#undef ALONG
}
//TODO: mind fill convention
//TODO: mind pixel centres
/*
Persp-correct only, slightly faster version of the above
Instead of using the linterp formula directly, keep current pos and increase with fixed steps
*/
void kdr_triangle3d_f_fast2(BITMAP *bmp, int s, BITMAP *tex, V3D_f *_v1, V3D_f *_v2, V3D_f *_v3)
{
V3D_f scan_s, scan_e, *scan_l, *scan_r, scan_x;
V3D_f scan_x_step, scan_s_step, scan_e_step;
//Hard-copied so that u and v may be divided by z, and z inverted
V3D_f __verts[3] = {*_v1, *_v2, *_v3};
V3D_f *verts[] = {&__verts[0], &__verts[1], &__verts[2]};
V3D_f *tmp_v;
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
int x_i, y_i, u_i, v_i, x_l, x_r;
float tmp;
int pass, c;
#define SWAP_V(a, b) if(1){ tmp_v = (a); (a) = (b); (b) = tmp_v; }(void)0
#define STEPSIZE(a, b, c) (((b) - (a)) * (c))
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
//Applies perspective projection
verts[0]->z = 1.0f / verts[0]->z;
verts[1]->z = 1.0f / verts[1]->z;
verts[2]->z = 1.0f / verts[2]->z;
verts[0]->u *= verts[0]->z;
verts[1]->u *= verts[1]->z;
verts[2]->u *= verts[2]->z;
verts[0]->v *= verts[0]->z;
verts[1]->v *= verts[1]->z;
verts[2]->v *= verts[2]->z;
vs0 = verts[0];
vs1 = verts[2];
ve0 = verts[0];
ve1 = verts[1];
//Step along Y
tmp = 1.0f / (ceil(vs1->y) - ceil(vs0->y));
scan_s_step.z = STEPSIZE(vs0->z, vs1->z, tmp);
scan_s_step.u = STEPSIZE(vs0->u, vs1->u, tmp);
scan_s_step.v = STEPSIZE(vs0->v, vs1->v, tmp);
scan_s_step.x = STEPSIZE(vs0->x, vs1->x, tmp);
scan_s = *vs0;
for(pass = 0; pass < 2; pass++){
//Step along Y
tmp = 1.0f / (floor(ve1->y) - ceil(ve0->y));
scan_e_step.z = STEPSIZE(ve0->z, ve1->z, tmp);
scan_e_step.u = STEPSIZE(ve0->u, ve1->u, tmp);
scan_e_step.v = STEPSIZE(ve0->v, ve1->v, tmp);
scan_e_step.x = STEPSIZE(ve0->x, ve1->x, tmp);
scan_e = *ve0;
//This check stays relevant throughout a pass
if((scan_e_step.x > scan_s_step.x) ^ (pass)){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
//Step along X
tmp = 1.0f / (ceil(scan_r->x) - ceil(scan_l->x));
scan_x_step.z = STEPSIZE(scan_l->z, scan_r->z, tmp);
scan_x_step.u = STEPSIZE(scan_l->u, scan_r->u, tmp);
scan_x_step.v = STEPSIZE(scan_l->v, scan_r->v, tmp);
scan_x = *scan_l;
x_l = ceil(scan_l->x);
x_r = ceil(scan_r->x);
for(x_i = x_l; x_i < x_r; x_i++){
tmp = 1.0f / scan_x.z;
u_i = floor(scan_x.u * tmp);
v_i = floor(scan_x.v * tmp);
//Negative and wrapping UVs
u_i = (u_i + 0x10000) & (tex->w - 1);
v_i = (v_i + 0x10000) & (tex->h - 1);
c = getpixel(tex, u_i, v_i);
putpixel(bmp, x_i, y_i, c);
//Step along the scanline
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
}
//Step along Y for both the start and end of the scanline
scan_s.x += scan_s_step.x;
scan_s.z += scan_s_step.z;
scan_s.u += scan_s_step.u;
scan_s.v += scan_s_step.v;
scan_e.x += scan_e_step.x;
scan_e.z += scan_e_step.z;
scan_e.u += scan_e_step.u;
scan_e.v += scan_e_step.v;
}
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef STEPSIZE
}
/*
Faster version of the above
Only perspective correct support in 32bpp
Tries to calculate perspective correct UVs each 16 pixels, and linterp inbetween
An enormous, ugly hack
*/
void kdr_triangle3d_f_fast(BITMAP *bmp, int s, BITMAP *tex, V3D_f *_v1, V3D_f *_v2, V3D_f *_v3)
{
V3D_f scan_s, scan_e, *scan_l, *scan_r, scan_x;
V3D_f scan_x_step, scan_s_step, scan_e_step;
V3D_f __verts[3] = {*_v1, *_v2, *_v3};
V3D_f *verts[] = {&__verts[0], &__verts[1], &__verts[2]};
V3D_f *tmp_v;
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
int x_i, y_i, u_i, v_i, u_i_n, v_i_n, x_i_n, x_l, x_r;
float z_i, z_i_n;
int pass;
float tmp;
#define SWAP_V(a, b) if(1){ tmp_v = (a); (a) = (b); (b) = tmp_v; }(void)0
#define STEPSIZE(a, b, c) (((b) - (a)) * (c))
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
verts[0]->z = 1.0f / verts[0]->z;
verts[1]->z = 1.0f / verts[1]->z;
verts[2]->z = 1.0f / verts[2]->z;
verts[0]->u *= verts[0]->z;
verts[1]->u *= verts[1]->z;
verts[2]->u *= verts[2]->z;
verts[0]->v *= verts[0]->z;
verts[1]->v *= verts[1]->z;
verts[2]->v *= verts[2]->z;
vs0 = verts[0];
vs1 = verts[2];
ve0 = verts[0];
ve1 = verts[1];
tmp = 1.0f / (ceil(vs1->y) - ceil(vs0->y));
scan_s_step.z = STEPSIZE(vs0->z, vs1->z, tmp);
scan_s_step.u = STEPSIZE(vs0->u, vs1->u, tmp);
scan_s_step.v = STEPSIZE(vs0->v, vs1->v, tmp);
scan_s_step.x = STEPSIZE(vs0->x, vs1->x, tmp);
scan_s = *vs0;
for(pass = 0; pass < 2; pass++){
tmp = 1.0f / (ceil(ve1->y) - ceil(ve0->y));
scan_e_step.z = STEPSIZE(ve0->z, ve1->z, tmp);
scan_e_step.u = STEPSIZE(ve0->u, ve1->u, tmp);
scan_e_step.v = STEPSIZE(ve0->v, ve1->v, tmp);
scan_e_step.x = STEPSIZE(ve0->x, ve1->x, tmp);
scan_e = *ve0;
if((scan_e_step.x > scan_s_step.x) ^ (pass)){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
#define STEPLEN 16
#define STEPLENF 16.0f
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
tmp = 1.0f / (ceil(scan_r->x) - ceil(scan_l->x));
scan_x_step.z = STEPSIZE(scan_l->z, scan_r->z, tmp) * STEPLENF;
scan_x_step.u = STEPSIZE(scan_l->u, scan_r->u, tmp) * STEPLENF;
scan_x_step.v = STEPSIZE(scan_l->v, scan_r->v, tmp) * STEPLENF;
scan_x = *scan_l;
x_l = ceil(scan_l->x);
x_r = ceil(scan_r->x);
z_i = 1.0 / scan_x.z;
u_i = (scan_x.u * z_i);
v_i = (scan_x.v * z_i);
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
z_i_n = 1.0f / scan_x.z;
for(x_i = x_l; x_i < x_r; x_i = x_i_n){
x_i_n = x_i + STEPLEN;
u_i_n = (scan_x.u * z_i_n);
v_i_n = (scan_x.v * z_i_n);
u_i_n -= u_i;
v_i_n -= v_i;
if(x_i_n < x_r){
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
z_i_n = 1.0f / scan_x.z;
}
#define UP_I(n) (((u_i + ((u_i_n * (n)) >> 4)) + 65536) & (tex->w - 1))
#define VP_I(n) (((v_i + ((v_i_n * (n)) >> 4)) + 65536) & (tex->h - 1))
#define X_I(n) x_i + (n)
#define C(n) _getpixel32(tex, UP_I(n), VP_I(n))
switch(x_r - x_i){
default:
#if STEPLEN >= 16
case 16:
_putpixel32(bmp, X_I(15), y_i, C(15));
case 15:
_putpixel32(bmp, X_I(14), y_i, C(14));
case 14:
_putpixel32(bmp, X_I(13), y_i, C(13));
case 13:
_putpixel32(bmp, X_I(12), y_i, C(12));
case 12:
_putpixel32(bmp, X_I(11), y_i, C(11));
case 11:
_putpixel32(bmp, X_I(10), y_i, C(10));
case 10:
_putpixel32(bmp, X_I(9), y_i, C(9));
case 9:
_putpixel32(bmp, X_I(8), y_i, C(8));
#endif
#if STEPLEN >= 8
case 8:
_putpixel32(bmp, X_I(7), y_i, C(7));
case 7:
_putpixel32(bmp, X_I(6), y_i, C(6));
case 6:
_putpixel32(bmp, X_I(5), y_i, C(5));
case 5:
_putpixel32(bmp, X_I(4), y_i, C(4));
#endif
#if STEPLEN >= 4
case 4:
_putpixel32(bmp, X_I(3), y_i, C(3));
case 3:
_putpixel32(bmp, X_I(2), y_i, C(2));
#endif
#if STEPLEN >= 2
case 2:
_putpixel32(bmp, X_I(1), y_i, C(1));
#endif
case 1:
_putpixel32(bmp, X_I(0), y_i, C(0));
}
u_i += u_i_n;
v_i += v_i_n;
}
scan_s.x += scan_s_step.x;
scan_s.z += scan_s_step.z;
scan_s.u += scan_s_step.u;
scan_s.v += scan_s_step.v;
scan_e.x += scan_e_step.x;
scan_e.z += scan_e_step.z;
scan_e.u += scan_e_step.u;
scan_e.v += scan_e_step.v;
}
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef STEPSIZE
}
int raster_triangle_func = 0;
typedef void (*t3d_f_func_ptr)(BITMAP *, int, BITMAP *, V3D_f *, V3D_f *, V3D_f *);
void kdr_polygon3d_f(BITMAP *bmp, int shader, BITMAP *tex, int num_verts, V3D_f **verts)
{
const t3d_f_func_ptr funcs[] = {
triangle3d_f,
kdr_triangle3d_f,
kdr_triangle3d_f_fast,
kdr_triangle3d_f_fast2
};
if(num_verts <= 2) return;
if(num_verts > 3){
int i;
for(i = 1; i < num_verts - 1; i++){
funcs[raster_triangle_func](bmp, shader, tex, verts[0], verts[i], verts[i + 1]);
}
return;
}
funcs[raster_triangle_func](bmp, shader, tex, verts[0], verts[1], verts[2]);
}
//Holds ends of triangle scanlines
//'p' stands for perspective correction
//and the z is actually inverse
struct {
float x, u, v, up, vp, zp;
} scan_s, scan_e, *scan_l, *scan_r;
//Local copy, since may be order swapped
V3D_f *verts[] = {_v1, _v2, _v3};
//Used for swaps
V3D_f *tmp_vp;
//Triangle is cut into two triangles, by the horizontal line that goes through verts[1]
//vs is the segment that intersects this line
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
//Final values for each pixel
int x_i, y_i, u_i, v_i, up_i, vp_i;
float z_i;
//Calculated colour
int c;
//One pass for each of the subtriangles
int pass;
#define SWAP_V(a, b) if(1){ tmp_vp = (a); (a) = (b); (b) = tmp_vp; }(void)0
//Linearly interpolate between a and b by c amount
#define LINTERP(a, b, c) ((a) + ((b) - (a)) * c)
//A ratio of how far along is c, in the segment a to b
#define ALONG(a, b, c) (((b) - (a)) ? ((c) - (a)) / ((b) - (a)) : 0.0)
//Order verts
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
vs0 = verts[0];
vs1 = verts[2];
//Verts for upper subtriangle
ve0 = verts[0];
ve1 = verts[1];
//Two passes as discussed above
for(pass = 0; pass < 2; pass++){
//One iteration per horizontal scanline
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
//Calculate interpolated values for both ends of the scanline
//Of note, both those and the interpolations down below can have calculations
//simplified since they obviously move by constant steps
const float along_s = ALONG(vs0->y, vs1->y, (float) y_i);
const float along_e = ALONG(ve0->y, ve1->y, (float) y_i);
scan_s.x = LINTERP(vs0->x, vs1->x, along_s);
scan_s.u = LINTERP(vs0->u, vs1->u, along_s);
scan_s.v = LINTERP(vs0->v, vs1->v, along_s);
scan_s.up = LINTERP(vs0->u / vs0->z, vs1->u / vs1->z, along_s);
scan_s.vp = LINTERP(vs0->v / vs0->z, vs1->v / vs1->z, along_s);
scan_s.zp = LINTERP(1.0 / vs0->z, 1.0 / vs1->z, along_s);
scan_e.x = LINTERP(ve0->x, ve1->x, along_e);
scan_e.u = LINTERP(ve0->u, ve1->u, along_e);
scan_e.v = LINTERP(ve0->v, ve1->v, along_e);
scan_e.up = LINTERP(ve0->u / ve0->z, ve1->u / ve1->z, along_e);
scan_e.vp = LINTERP(ve0->v / ve0->z, ve1->v / ve1->z, along_e);
scan_e.zp = LINTERP(1.0 / ve0->z, 1.0 / ve1->z, along_e);
//This test could actually be done only once
//scan_l to scan_r is strictly left to right
if(scan_e.x > scan_s.x){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
//Iterate through scanline
for(x_i = ceil(scan_l->x); x_i < ceil(scan_r->x); x_i++){
//Final interpolation for each pixel
const float along_x = ALONG(scan_l->x, scan_r->x, (float) x_i);
u_i = floor( LINTERP(scan_l->u, scan_r->u, along_x));
v_i = floor( LINTERP(scan_l->v, scan_r->v, along_x));
//Some games only calculate the inverse of z_i every few pixels
//and add a further linear interpolation inbetween
z_i = LINTERP(scan_l->zp, scan_r->zp, along_x);
up_i = floor( LINTERP(scan_l->up, scan_r->up, along_x) / z_i);
vp_i = floor( LINTERP(scan_l->vp, scan_r->vp, along_x) / z_i);
//Pick colour according to shader
switch(shader){
case POLYTYPE_FLAT:
c = verts[0]->c;
break;
case POLYTYPE_PTEX:
case POLYTYPE_PTEX_MASK:
//Handle negative UVs and wrapping
up_i += tex->w << 8;
vp_i += tex->h << 8;
up_i %= tex->w;
vp_i %= tex->h;
c = getpixel(tex, up_i, vp_i);
break;
case POLYTYPE_ATEX:
case POLYTYPE_ATEX_MASK:
//Handle negative UVs and wrapping
u_i += tex->w << 8;
v_i += tex->h << 8;
u_i %= tex->w;
v_i %= tex->h;
c = getpixel(tex, u_i, v_i);
break;
}
putpixel(bmp, x_i, y_i, c);
}
}
//Verts for lower subtriangle
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef LINTERP
#undef ALONG
}
/*
Fast version of the above
Only perspective correct support
An enormous, ugly hack
*/
void kdr_triangle3d_f_fast(BITMAP *bmp, int s, BITMAP *tex, V3D_f *_v1, V3D_f *_v2, V3D_f *_v3)
{
V3D_f scan_s, scan_e, *scan_l, *scan_r, scan_x;
V3D_f scan_x_step, scan_s_step, scan_e_step;
V3D_f __verts[3] = {*_v1, *_v2, *_v3};
V3D_f *verts[] = {&__verts[0], &__verts[1], &__verts[2]};
V3D_f *tmp_v;
V3D_f *ve0, *ve1;
const V3D_f *vs0, *vs1;
int x_i, y_i, u_i, v_i, u_i_n, v_i_n, x_i_n, x_l, x_r;
float z_i, z_i_n;
int pass;
float tmp;
#define SWAP_V(a, b) if(1){ tmp_v = (a); (a) = (b); (b) = tmp_v; }(void)0
#define STEPSIZE(a, b, c) (((b) - (a)) * (c))
if(verts[1]->y < verts[0]->y) SWAP_V(verts[1], verts[0]);
if(verts[2]->y < verts[0]->y) SWAP_V(verts[2], verts[0]);
if(verts[1]->y > verts[2]->y) SWAP_V(verts[1], verts[2]);
verts[0]->z = 1.0f / verts[0]->z;
verts[1]->z = 1.0f / verts[1]->z;
verts[2]->z = 1.0f / verts[2]->z;
verts[0]->u *= verts[0]->z;
verts[1]->u *= verts[1]->z;
verts[2]->u *= verts[2]->z;
verts[0]->v *= verts[0]->z;
verts[1]->v *= verts[1]->z;
verts[2]->v *= verts[2]->z;
vs0 = verts[0];
vs1 = verts[2];
ve0 = verts[0];
ve1 = verts[1];
tmp = 1.0f / (ceil(vs1->y) - ceil(vs0->y));
scan_s_step.z = STEPSIZE(vs0->z, vs1->z, tmp);
scan_s_step.u = STEPSIZE(vs0->u, vs1->u, tmp);
scan_s_step.v = STEPSIZE(vs0->v, vs1->v, tmp);
scan_s_step.x = STEPSIZE(vs0->x, vs1->x, tmp);
scan_s = *vs0;
for(pass = 0; pass < 2; pass++){
tmp = 1.0f / (ceil(ve1->y) - ceil(ve0->y));
scan_e_step.z = STEPSIZE(ve0->z, ve1->z, tmp);
scan_e_step.u = STEPSIZE(ve0->u, ve1->u, tmp);
scan_e_step.v = STEPSIZE(ve0->v, ve1->v, tmp);
scan_e_step.x = STEPSIZE(ve0->x, ve1->x, tmp);
scan_e = *ve0;
if((scan_e_step.x > scan_s_step.x) ^ (pass)){
scan_l = &scan_s;
scan_r = &scan_e;
}
else{
scan_l = &scan_e;
scan_r = &scan_s;
}
#define STEPLEN 16
#define STEPLENF 16.0f
for(y_i = ceil(ve0->y); y_i < ceil(ve1->y); y_i++){
tmp = 1.0f / (ceil(scan_r->x) - ceil(scan_l->x));
scan_x_step.z = STEPSIZE(scan_l->z, scan_r->z, tmp) * STEPLENF;
scan_x_step.u = STEPSIZE(scan_l->u, scan_r->u, tmp) * STEPLENF;
scan_x_step.v = STEPSIZE(scan_l->v, scan_r->v, tmp) * STEPLENF;
scan_x = *scan_l;
x_l = ceil(scan_l->x);
x_r = ceil(scan_r->x);
z_i = 1.0 / scan_x.z;
u_i = (scan_x.u * z_i);
v_i = (scan_x.v * z_i);
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
z_i_n = 1.0f / scan_x.z;
for(x_i = x_l; x_i < x_r; x_i = x_i_n){
x_i_n = x_i + STEPLEN;
u_i_n = (scan_x.u * z_i_n);
v_i_n = (scan_x.v * z_i_n);
u_i_n -= u_i;
v_i_n -= v_i;
if(x_i_n < x_r){
scan_x.z += scan_x_step.z;
scan_x.u += scan_x_step.u;
scan_x.v += scan_x_step.v;
z_i_n = 1.0f / scan_x.z;
}
#define UP_I(n) (((u_i + ((u_i_n * (n)) >> 4)) + 65536) & (tex->w - 1))
#define VP_I(n) (((v_i + ((v_i_n * (n)) >> 4)) + 65536) & (tex->h - 1))
#define X_I(n) x_i + (n)
#define C(n) _getpixel32(tex, UP_I(n), VP_I(n))
switch(x_r - x_i){
default:
#if STEPLEN >= 16
case 16:
_putpixel32(bmp, X_I(15), y_i, C(15));
case 15:
_putpixel32(bmp, X_I(14), y_i, C(14));
case 14:
_putpixel32(bmp, X_I(13), y_i, C(13));
case 13:
_putpixel32(bmp, X_I(12), y_i, C(12));
case 12:
_putpixel32(bmp, X_I(11), y_i, C(11));
case 11:
_putpixel32(bmp, X_I(10), y_i, C(10));
case 10:
_putpixel32(bmp, X_I(9), y_i, C(9));
case 9:
_putpixel32(bmp, X_I(8), y_i, C(8));
#endif
#if STEPLEN >= 8
case 8:
_putpixel32(bmp, X_I(7), y_i, C(7));
case 7:
_putpixel32(bmp, X_I(6), y_i, C(6));
case 6:
_putpixel32(bmp, X_I(5), y_i, C(5));
case 5:
_putpixel32(bmp, X_I(4), y_i, C(4));
#endif
#if STEPLEN >= 4
case 4:
_putpixel32(bmp, X_I(3), y_i, C(3));
case 3:
_putpixel32(bmp, X_I(2), y_i, C(2));
#endif
#if STEPLEN >= 2
case 2:
_putpixel32(bmp, X_I(1), y_i, C(1));
#endif
case 1:
_putpixel32(bmp, X_I(0), y_i, C(0));
}
u_i += u_i_n;
v_i += v_i_n;
}
scan_s.x += scan_s_step.x;
scan_s.z += scan_s_step.z;
scan_s.u += scan_s_step.u;
scan_s.v += scan_s_step.v;
scan_e.x += scan_e_step.x;
scan_e.z += scan_e_step.z;
scan_e.u += scan_e_step.u;
scan_e.v += scan_e_step.v;
}
//Verts for lower subtriangle
ve0 = verts[1];
ve1 = verts[2];
}
#undef SWAP_V
#undef STEPSIZE
}
int raster_triangle_func = 0;
typedef void (*t3d_f_func_ptr)(BITMAP *, int, BITMAP *, V3D_f *, V3D_f *, V3D_f *);
void kdr_polygon3d_f(BITMAP *bmp, int shader, BITMAP *tex, int num_verts, V3D_f **verts)
{
const t3d_f_func_ptr funcs[] = {triangle3d_f, kdr_triangle3d_f, kdr_triangle3d_f_fast};
if(num_verts <= 2) return;
if(num_verts > 3){
int i;
for(i = 1; i < num_verts - 1; i++){
funcs[raster_triangle_func](bmp, shader, tex, verts[0], verts[i], verts[i + 1]);
}
return;
}
funcs[raster_triangle_func](bmp, shader, tex, verts[0], verts[1], verts[2]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment