;//////////////////////////////////////////////////////////////////////////////////////// ;// Spherical Harmonic Surface ;// Copyright (c) 2004 adresd <adresd_ps2dev@yahoo.com> ;// ;// Licensed under the AFL v2.0. See the file LICENSE included with this ;// distribution for licensing terms. ;// ;// This is with smooth normals and envmapping. ;// very much overkill. will be stripped and opted to build #6 ;// ;// calculated with each point, as done. ;// as drawn re-uses previous strip for making the tri strips, so ;// first strip may not be correct first run. ;// ;// Note that this code is highly unoptimized, but might be useful to learn from or so. ;// iScreenMatrix = 0 iNormalMatrix = 4 iDU = 12 ; du TWOPI/resolutioni (float) *theta* i iDV = 13 ; dv PI/resolutionj (float) *pi* j iDPI = 12 iDTHETA = 13 iPHIMOD = 14 ; TWOPI/resolutioni/10 (float) iTHETAMOD = 15 ; PI/resolutionj/10 (float) iRESOLI = 16 ; resolution-i (int) all fields iRESOLJ = 17 ; resolution-j (int) all fields iSCALE = 18 ; scale in all MF0 = 19 ; mf[0] all MF2 = 20 ; mf[2] all MF4 = 21 ; mf[4] all MF6 = 22 ; mf[6] all MF13 = 23 ; mf[1],mf[3],mf[1],mf[3] MF57 = 24 ; mf[5],mf[7],mf[5],mf[7] iGIFTAGJ = 25 ; gif tag (for j verts) numverts = 98 vfData1 = 26 ; vfdata1 is x points * 2, normal,xyz vfData2 = (numverts*2)+vfData1 ; vfdata3 is x points * 2, tex,xyz vfKick = (numverts*2)+vfData2 ; vfKick is x points * 6. tex,xyz end = (numverts*6)+vfKick ; end of used vu mem ; end = 26 + (numverts * 10) ; for 90, gives 990 + 26 = 1016, 8 quads free .global g_Vu1_SparmGenEnv .global g_Vu1_SparmGenEnv_End .p2align 4 ; This aligns the following data to quadword alignment .vu ; vas-asm adaptation of vcl powf function by ; (c) Ian Stephenson ; ; ian@dctsystems.freeserve.co.uk ; adapted from vcl to vas-macro by adresd .macro MYFAST_POW result,a,b,tmp ITOF0 \a,\a nop NOP loi 0.00000011920928955078125 MULI \a,\a,I nop NOP loi -127 ADDi \tmp,\a,I nop mul \tmp,\tmp,\b nop NOP loi 127 ADDi \tmp,\tmp,I nop NOP loi 8388608 MULi \tmp,\tmp,I nop FTOI0 \result,\tmp nop .endm ; calculates a normal, non destructively ; (c) adresd .macro calc_normal temp2,point1,point2,point3,temp1 ; vector lengths sub \temp1,\point1,\point2 nop sub \temp2,\point1,\point3 nop ; cross product opmula.xyz ACC,\temp1,\temp2 nop opmsub.xyz \temp2,\temp2,\temp1 nop ; w = 0 sub.w \temp2,vf00,vf00 nop ; normalise mul.xyz \temp1,\temp2,\temp2 nop mulax.w ACC,vf00,\temp1 nop madday.w ACC,vf00,\temp1 nop maddz.w \temp1,vf00,\temp1 nop nop rsqrt Q,vf00w,vf24w ; fudge.. is temp1w sub.w \temp2,vf00,vf00 nop nop waitq mulq.xyz \temp2,\temp2,Q nop .endm g_Vu1_SparmGenEnv: ; for each point/vtx ; spharm_Eval(u,v,&q[0]); ; spharm_Eval(u+du/10,v, &temp[0]); ; spharm_Eval(u,v+dv/10, &temp[1]); ; spharm_calcnormal(&n[0],&q[0],&temp[0],&temp[1]); ; phi, theta ; phi+du/10, theta ; phi, theta+dv/10 ; phi2 = phi+(du/10) ; theta2 = theta+(dv/10) start: nop iaddiu vi14,vi00,200 nop lq vf28, iScreenMatrix+0(vi00) nop lq vf29, iScreenMatrix+1(vi00) nop lq vf30, iScreenMatrix+2(vi00) nop lq vf31, iScreenMatrix+3(vi00) nop iaddiu vi13, vi00, iRESOLI nop ilwr.w vi08, (vi13)w ; this is u int limit nop iaddiu vi13, vi00, iRESOLJ nop ilwr.w vi07, (vi13)w ; this is v int limit ; now setup the output buffer pointer (vi03,vi04) - for storing ; vi03 is current, vi04 is previous - these are master do not modify ; vi05,vi06 are working copies (to be modified) nop iaddiu vi03, vi00, vfData1 ; coord address (store) nop iaddiu vi04, vi00, vfData2 ; coord address (previous) ; vf01 = x,y=phi z,w=phi ; vf03 = x,y=phi z,w=phi2 (i count) , vi01 = i nop lq vf01,iDPI(vi00) ; is dpi nop lq vf03,iPHIMOD(vi00) ; phi modifier (dphi/10) add.zw vf01,vf01,vf03 iadd vi01,vi00,vi08 ; set u to limit outer_loop_start: ; x sinphi = PbSin(phi); ; y cosphi = PbCos(phi); ; z sinphi2 = PbSin(phi+(du/10)); ; w cosphi2 = PbCos(phi+(du/10)); ; x = phi, z = phi2 nop move vf19,vf03 ; x = phi, z = phi2 nop bal vi13,doSinCos ; vf19 is dest, sin1,cos1,sin2,cos2 nop nop sub vf24,vf00,vf00 mr32 vf18,vf19 ; vf18 ends up as cos1,sin2,cos2,sin1 nop mr32 vf08,vf18 ; vf08 ends up as sin2,cos2,sin1,cos1 add.xy vf07,vf24,vf19 nop ; vf07 xy = sin1 add.zw vf07,vf24,vf08 nop ; vf07 zw = sin1 add.zw vf08,vf24,vf19 nop ; vf08 zw = sin2 , xy = sin2 ; vf07 contains sinphi,cosphi,sinphi,cosphi ; vf08 contains sinphi2,cosphi2,sinphi2,cosphi2 ; r1 = powf(PbSin(mf[0]*phi),mf[1]); ; r2 = powf(PbSin(mf[0]*phi2),mf[1]); ; x = mf[0]*phi , z = mf[0]*phi2 nop lq vf19,MF0(vi00); mul vf19,vf19,vf03 nop ; x = mf[0]*phi, z = mf[0]*phi2 nop bal vi13,doSinCos ; vf19 is dest, sin,cos,sin,cos nop nop nop move vf11,vf19 ; copy sin values into dest ; r1 += powf(PbCos(mf[2]*phi),mf[3]); ; r2 += powf(PbCos(mf[2]*phi2),mf[3]); ; r3 = r1; ; x = mf[2]*phi , z = mf[2]*phi2 nop lq vf19,MF2(vi00) mul vf19,vf19,vf03 nop ; x = mf[2]*phi, z = mf[2]*phi2 nop bal vi13,doSinCos ; vf19 is dest, sin,cos,sin,cos sub.yw vf11,vf00,vf00 nop add.yw vf11,vf11,vf19 nop ; move cos values ready for next step ; vf11 now contains values for pow call ; powf, in: ; x = PbSin(mf[0]*phi) ; y = PbCos(mf[2]*phi) ; z = PbSin(mf[0]*phi2) ; w = PbCos(mf[2]*phi2) ; sec val , x=mf[1], y=mf[3], z=mf[1], w=mf[3] ; out: x = r1a, r1b , y = r2a, r2b (r1 is same as r3) nop lq vf19,MF13(vi00) MYFAST_POW vf05,vf11,vf19,vf24 ; setup store pointers from master copies nop iadd vi05,vi00,vi03 ; vf02 = x,y=theta z,w=theta ; vf04 = x,y=theta z,w=theta2 (j count) , vi02 = j nop lq vf02,iDTHETA(vi00) ; is dpi nop lq vf04,iTHETAMOD(vi00) ; theta modifier (dtheta/10) add.zw vf02,vf02,vf04 iadd vi02,vi00,vi07 ; set v to limit inner_loop_start: ; in loop (inner bit) ; x sintheta = PbSin(theta); ; y costheta = PbCos(theta); ; z sintheta2 = PbSin(theta+(dv/10)); ; w costheta2 = PbCos(theta+(dv/10)); ; x = theta, z = theta2 nop move vf19,vf04 ; x = theta, z = theta2 nop bal vi13,doSinCos ; vf19 is dest, sin1,cos1,sin2,cos2 nop nop sub vf18,vf00,vf00 nop addy.x vf09,vf18,vf19 nop ; vf09 x = cos1 addx.z vf09,vf18,vf19 nop ; vf09 z = sin1 addw.x vf10,vf18,vf19 nop ; vf10 x = cos2 addz.z vf10,vf18,vf19 nop ; vf10 z = sin2 ; vf09 contains costheta,?,sintheta,? ; vf10 contains costheta2,?,sintheta2,? ; r1ta = powf(PbSin(mf[4]*theta),mf[5]); ; r3a = powf(PbSin(mf[4]*theta2),mf[5]); ; x = mf[4]*theta , z = mf[4]*theta2 nop lq vf19,MF4(vi00); mul vf19,vf19,vf04 nop ; x = mf[4]*theta, z = mf[4]*theta2 nop bal vi13,doSinCos ; vf19 is dest, sin,cos,sin,cos nop nop nop move vf11,vf19 ; copy sin values into dest ; r1tb = powf(PbCos(mf[6]*theta),mf[7]); ; r3b += powf(PbCos(mf[6]*theta2),mf[7]); ; x = mf[6]*theta , z = mf[6]*theta2 nop lq vf19,MF6(vi00); mul vf19,vf19,vf04 nop ; x = mf[6]*theta, z = mf[6]*theta2 nop bal vi13,doSinCos ; vf19 is dest, sin,cos,sin,cos nop nop sub.yw vf11,vf00,vf00 nop add.yw vf11,vf11,vf19 nop ; move cos values ready for next step ; vf11 now contains values for pow call ; powf, in: ; x = PbSin(mf[4]*theta) ; y = PbCos(mf[6]*theta) ; z = PbSin(mf[4]*theta2) ; w = PbCos(mf[6]*theta2) ; sec val , x=mf[5], y=mf[7], z=mf[5], w=mf[7] ; out: x = r1ta, r1tb , y = r3a, r3b nop lq vf19,MF57(vi00) MYFAST_POW vf06,vf11,vf19,vf24 ; vf05 - x = r1a, r1b , z = r2a, r2b (r1 is same as r3) ; vf06 - x = r1ta, r1tb , z = r3a, r3b ; now we need to add ; vf21 r1 = r1a+r1b+r1ta+r1tb ; vf22 r2 = r2a+r2b+r1ta+r1tb ; vf23 r3 = r1a+r1b+r3a+r3b ; r1 = vf05x+vf05y + vf06x+vf06y ; r2 = vf05z+vf05w + vf06x+vf06y ; r3 = vf05x+vf05y + vf06z+vf06w ; vf05x + vf05y , vf06x + vf06y - doubled adds ; vf05z + vf05w , vf06z + vf06w - singles addy.x vf18x,vf05x,vf05y nop ; vf18x = vf05x + vf05y addy.x vf19x,vf06x,vf06y nop ; vf19x = vf06x + vf06y addx.x vf24x,vf18x,vf19x nop ; vf24x = vf18x + vf19x (r1) addx.xyz vf21xyz,vf00xyz,vf24x nop ; set xyz to same value vf21=r1 addw.z vf16z,vf05z,vf05w nop ; vf16z = vf05z + vf05w addz.x vf24x,vf19x,vf16z nop ; vf24x = vf19x + vf16z (r2) addx.xyz vf22xyz,vf00xyz,vf24x nop ; set xyz to same value vf22=r2 addw.z vf17z,vf06z,vf06w nop ; vf17z = vf06z + vf06w addz.x vf24x,vf18x,vf17z nop ; vf24x = vf18x + vf17z (r3) addx.xyz vf23xyz,vf00xyz,vf24x nop ; set xyz to same value vf23=r2 ; from earlier: ; vf07 contains sinphi,cosphi,sinphi,cosphi ; vf08 contains sinphi2,cosphi2,sinphi2,cosphi2 ; vf09 contains costheta,?,sintheta,? ; vf10 contains costheta2,?,sintheta2,? ; vf18 = output1 ; vf19 = output2 ; vf20 = output3 ; output values ; output1->x = r1 * sinphi * costheta; ; output1->y = r1 * cosphi; ; output1->z = r1 * sinphi * sintheta; ; output1->w = 1.0f; mul.xyz vf18,vf21,vf07 nop mul.xz vf18,vf18,vf09 loi 0.0 addi.w vf18,vf00,I nop ; output2->x = r2 * sinphi2 * costheta; ; output2->y = r2 * cosphi2; ; output2->z = r2 * sinphi2 * sintheta; ; output2->w = 1.0f; mul.xyz vf19,vf22,vf08 nop mul.xz vf19,vf19,vf09 nop addi.w vf19,vf00,I nop ; output3->x = r3 * sinphi * costheta2; ; output3->y = r3 * cosphi; ; output3->z = r3 * sinphi * sintheta2; ; output3->w = 1.0f; mul.xyz vf20,vf23,vf07 nop mul.xz vf20,vf20,vf10 nop addi.w vf20,vf00,I nop calc_normal vf22,vf18,vf19,vf20,vf24 ; now multiply xyz by scale (output1) - * scale nop lq vf21,iSCALE(vi00) mul.xyz vf18,vf18,vf21 nop ; mult verts by scale nop sqi vf18,(vi05++) ; store vertex nop sqi vf22,(vi05++) ; store normal ; end of inner loop inner_loop_end: ; vf02 = x,y=theta z,w=theta ; vf04 = x,y=theta z,w=theta2 (j count) , vi02 = j nop lq vf18,iDTHETA(vi00) ; dtheta add vf02,vf02,vf18 lq vf19,iTHETAMOD(vi00) ; theta modifier (dtheta/10) add vf04,vf02,vf19 iaddi vi02,vi02,-1 ; j-- nop nop inner_loop_endcheck: nop ibne vi02,vi00,inner_loop_start nop nop ; now we need to kick this list, and leave that going, so we can build another one nop iadd vi10,vi00,vi07 ; set num of points to do ; nop iaddi vi10,vi10,-1 ; do one less nop nop nop bal vi13,rendercall_normals nop nop outer_loop_end: ; vf01 = x,y=phi z,w=phi ; vf03 = x,y=phi z,w=phi2 (i count) , vi01 = i nop lq vf18,iDPI(vi00) ; is dpi add vf01,vf01,vf18 lq vf19,iPHIMOD(vi00) ; phi modifier (dphi/10) add vf03,vf01,vf19 iaddi vi01,vi01,-1 ; i-- nop nop outer_loop_endcheck: ; check limit, if under, continue nop ibne vi01,vi00,outer_loop_start nop nop finish: nop nop nop[e] nop nop nop ;//////////////////////////////////////////////////////////////////////////// ;// Re-run if called again nop b g_Vu1_SparmGenEnv nop nop ;////////////////////////////////////////////////////////////////////////////////// ;// ;// THIS IS THE TRIANGLE STRIP VERSION ; this must be called with the num of verts in vi10 ; builds a strip of tris, using values from previous strip for one side ; transforms normals and vertices for the current strip, then outputs a giflist ; ready for kicking to the vu. ; This is done in a seperate function as opposed to main loop to help readability ; and understanding of giflist building on vu ; ; trashes: ; vi05,vi06,vi10,vi11,vi12 ; vf19,vf18,vf20,vf21,vf24 ; output is xgkicked pkt to gs rendercall_normals: nop iaddiu vi11, vi00, vfKick ; xgkick - store addr nop iaddiu vi12, vi00, vfKick ; xgkick - kick addr ; setup buffer pointers ; vi03 is current, vi04 is previous , 5 and 6 are working copies nop iadd vi05,vi00,vi03 nop iadd vi06,vi00,vi04 nop lq.xyzw vf20, iGIFTAGJ(vi00) ; giftag nop sqi.xyzw vf20, (vi011++) ; store the giftag ; load the normal transform matrix nop lq vf14, iNormalMatrix+0(vi00) nop lq vf15, iNormalMatrix+1(vi00) nop lq vf16, iNormalMatrix+2(vi00) nop lq vf17, iNormalMatrix+3(vi00) ; setup the color sub vf24,vf24,vf24 loi 128.0 addi.xyz vf24,vf00,I nop ftoi0 vf24, vf24 nop ; color 64,64,64,1 rendercall_normals_loop: ;////////////////////////////////////////////////////////////////////////////////// ; load second two co-ords ; vf20 = XYZ(1) , vf21 = XYZ(2) nop lqi vf21, (vi05++) ; XYZ (2) one to transform nop lqi vf18, (vi05++) ; normal (2) one to transfrom nop lqi vf20, (vi06++) ; texture (1) from last seg ;////////////////////////////////////////////////////////////////////////////////// ;// Project vertex to screen space. XYZ(2) mulax acc, vf28, vf21x lqi vf19, (vi06++) ; XYZ (1) from last seg madday acc, vf29, vf21y nop maddaz acc, vf30, vf21z sqi vf20, (vi11++) ; store tex (1) maddw vf21, vf31, vf21w nop nop div q, vf00w, vf21w ;// While waiting for div, do envmap calc on normals ; envmapping faked.. vf14,vf15,vf16,vf17 is normal trans mtx ; vf18 is the normal MULAx.xyzw ACC, VF14, VF18x sqi vf24, (vi11++) ; store col (1) MADDAy.xyzw ACC, VF15, VF18y sqi vf19, (vi11++) ; store xyz (1) MADDAz.xyzw ACC, VF16, VF18z nop MADDw.xyzw VF18, VF17, VF18w iaddi vi06,vi06,-2 ; wind back, ready for store ; vf18 is rotated normal as color ,vf20 is the rotated normal ; set zw (Q) to 0 addw.z vf18z,vf00z,vf00w LOI 0.5 ; set q (z) to 1.0 ; new version ADDw.xy VF18xy, VF18xy, VF00w nop ; add 1.0 to x and y (makes them 0-2.0) muli.xy vf18xy,vf18xy,I nop ; div by 2 (0-1.0) ; vf18 is the texture co-ords ;continue sub.w vf21,vf21,vf21 waitq mulq.xyz vf21, vf21, q sqi vf18, (vi11++) ; store tex (2) nop sqi vf24, (vi11++) ; store col (1) ftoi4 vf21, vf21 nop ; fixedpoint for gif nop sqi vf21, (vi11++) ; store xyz(2) ;////////////////////////////////////////////////////////////////////////////////// nop iaddi vi10, vi10, -1 ; nop sqi vf18, (vi06++) ; store tex for next iter nop sqi vf21, (vi06++) ; store xyz for next iter nop ibne vi10, vi00, rendercall_normals_loop ; loop check nop nop nop nop nop xgkick vi12 nop nop rendercall_normals_skip: NOP jr vi13 NOP nop ;// ;////////////////////////////////////////////////////////////////////////////////// ; input vf19 , x , z ; output vf19 x=sinx,y=cosx z=sinz,w=cosz ; ; touches ; ACC,I ; vf14, vf15, vf16, vf17, vf18, vf19, vf20, vf21 ;-------------------------------------------------------------------- ; doSinCos - Returns the sin and cos of up to 2 angles, which must ; be contained in the X and Z elements of "angle". The sin/cos pair ; will be contained in the X/Y elements of "sincos" for the first ; angle, and Z/W for the second one. ; Thanks to Colin Hughes (SCEE) for that one. ; Thanks to the playstation2 linux forums and site for it. doSinCos: ;// Calc the rotation angles from the lagrate and make identity y matrix mulz.w vf19, vf00, vf19z nop ; copy angle from z to w addx.y vf19, vf00, vf19x loi 1.570796 ; copy angle from x to y subi.xz vf19, vf19, i nop ; phase difference for sin as cos ( pi/2 ) abs vf19, vf19 nop ; mirror cos around zero maxw vf20, vf00, vf00w loi -0.159155 ; initialise all 1s mulai acc, vf19, i loi 12582912.0 ; scale so single cycle is range 0 to -1 ( *-1/2pi ) msubai acc, vf20, i nop ; apply bias to remove fractional part maddai acc, vf20, i loi -0.159155 ; remove bias to leave original int part msubai acc, vf19, i loi 0.5 ; apply original number to leave fraction range only msubi vf19, vf20, i nop ; ajust range: -0.5 to +0.5 abs vf19, vf19 loi 0.25 ; clamp: 0 to +0.5 subi vf19, vf19, i nop ; ajust range: -0.25 to +0.25 mul vf21, vf19, vf19 loi -76.574959 ; a^2 muli vf14, vf19, i loi -41.341675 ; k4 a muli vf16, vf19, i loi 81.602226 ; k2 a muli vf15, vf19, i nop ; k3 a mul vf18, vf21, vf21 nop ; a^4 mul vf14, vf14, vf21 nop ; k4 a^3 mula acc, vf16, vf21 loi 39.710659 ; + k2 a^3 muli vf16, vf19, i nop ; k5 a mul vf17, vf18, vf18 nop ; a^8 madda acc, vf14, vf18 nop ; + k4 a^7 madda acc, vf15, vf18 loi 6.283185 ; + k3 a^5 maddai acc, vf19, i nop ; + k1 a madd vf19, vf16, vf17 nop ; + k5 a^9 NOP jr vi13 NOP nop ;// g_Vu1_SparmGenEnv_End: