Subversion Repositories ps3ware

Rev

Rev 210 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
204 ironpeter 1
#include "common_spu.h"
2
#include "structs.h"
3
#include <math.h>
4
 
5
#include <float.h>
6
 
7
 
8
uint16 ia = 0, ib = 0, ic = 0;
9
 
10
vec_uint4 inline uint16_uload( qword address )
11
{
12
    vec_uint4 qw = (vec_uint4)si_lqd( address, 0 );
13
    vec_uint4 ao = spu_and( (vec_uint4)address, spu_splats( (unsigned int )0xf ) );
14
    vec_uint4 am = spu_shuffle( ao, ao, (vec_uchar16)spu_splats( (unsigned int) 0x80800303 ) );
15
    am = spu_add( am, spu_splats( (unsigned int)0x80800001 ) );
16
    return spu_shuffle( qw, qw, (vec_uchar16)am );
17
}
18
 
19
vec_uint4 inline f2h( vec_uint4 h )
20
{
21
    vec_uint4 s = spu_and( spu_rlmask( h, -16 ), spu_splats( (unsigned int)0x8000) );
22
    vec_uint4 e = spu_and( spu_rlmask( h, -23 ), spu_splats( (unsigned int)0xff) );
23
    vec_uint4 z = spu_cmpgt( e,  spu_splats( (unsigned int)112 ) );
24
    vec_uint4 m = spu_and( h, spu_splats( (unsigned int)0x7fffff) );
25
 
26
    e = spu_sub( e, spu_splats( (unsigned int)112 ) );
27
 
28
    e = spu_sel( spu_splats( (unsigned int)0x0 ), e, z );
29
 
30
    e = spu_rl( e, 10 );
31
 
32
    m = spu_rlmask( m, -13 );
33
 
34
    return spu_or( e, spu_or( m, s ) );
35
 
36
}
37
 
38
vec_float4 inline h2f( vec_uint4 h )
39
{
40
 
41
    vec_uint4 s = spu_and( spu_rlmask( h, -15 ), spu_splats( (unsigned int)0x1) );
42
    vec_uint4 e = spu_and( spu_rlmask( h, -10 ), spu_splats( (unsigned int)0x1f) );
43
    vec_uint4 m = spu_and( h, spu_splats( (unsigned int)0x3ff) );
44
 
45
    e = spu_add( e, spu_splats( (unsigned int)112 ) );
46
 
47
    e = spu_rl( e, 23 );
48
    m = spu_rl( m, 13 );
49
    s = spu_rl( s, 31 );
50
 
51
    return (vec_float4)spu_or( e, spu_or( m, s ) );
52
}
53
 
54
 
55
struct sin_cos
56
{
57
    vec_float4 scsc[257];
58
 
59
    sin_cos()
60
    {
61
        vec_float4 start = (vec_float4){ +1.0f, 0.0f, -1.0f, 0.0f };
62
        vec_float4 co = (vec_float4){ +0.999698819f, +0.999698819f, +0.999698819f, +0.999698819f };
63
        vec_float4 si = (vec_float4){ +0.024541228f, -0.024541228f, +0.024541228f, -0.024541228f };
64
 
65
        for( size_t i = 0; i < 257; ++i )
66
        {
67
            scsc[i] = start;
68
            start = start * co + si * YXWZ( start );
69
 
70
        }
71
 
72
        //printf( "%f %f\n", ((float *)scsc)[1026], ((float *)scsc)[1027] );
73
 
74
    }  
75
 
76
    vec_float4 value( vec_float4 f )
77
    {
78
        vec_int4  i = spu_convts( f, 8 );
79
        vec_float4 fi = spu_convtf( i, 8 );
80
        vec_float4 d = spu_mul( spu_sub( f, fi ), spu_splats( 256.0f ) );
81
 
82
        unsigned int ind = si_to_uint( (qword)i ) & 255;
83
        vec_float4 a = scsc[ind + 0];
84
        vec_float4 b = scsc[ind + 1];
85
 
86
        return spu_madd( spu_sub( b, a ), d, a );
87
 
88
    }
89
};
90
 
91
 
92
sin_cos table;
93
 
94
 
95
 
96
vec_float4 inline conv(
97
    vec_uint4 kbeg,
98
    vec_uint4 kend,
99
    vec_uint4 vbeg,
100
    vec_uint4 vend,
101
    vec_float4 time )
102
{
103
    vec_float4 fbeg = h2f( vbeg );
104
    vec_float4 fend = h2f( vend );
105
 
106
    vec_float4 tbeg = spu_convtf( kbeg, 0 );
107
    vec_float4 tend = spu_convtf( kend, 0 );
108
 
109
    vec_float4 dtbeg = spu_sub( time, tbeg );
110
    vec_float4 dtend = spu_sub( tend, time );
111
    vec_float4 mul = spu_re( spu_sub( tend, tbeg ) );
112
 
113
    vec_float4 xyzw = spu_add( spu_mul( dtbeg, fend ), spu_mul( dtend, fbeg ) );
114
    xyzw = spu_mul( mul, xyzw );
115
    return xyzw;
116
}
117
 
118
void inline process_linear_spline(
119
    qword &address,
120
    vec_uint4 &kbeg,
121
    vec_uint4 &kend,
122
    vec_uint4 &vbeg,
123
    vec_uint4 &vend,
124
    vec_float4 time,
125
    vec_uint4 startFrame,
126
    vec_uint4 endFrame )
127
{
128
    vec_uint4 num = spu_rl( uint16_uload( address ), 1 );
129
    vec_uint4 dec = spu_sub( num, spu_splats( (unsigned int)0x2) );
130
    vec_uint4 ptr = spu_sel( num, dec, spu_cmpgt( num, (unsigned int)0x2 ) );
131
    vec_uint4 fptr = spu_add( (vec_uint4)address, ptr );
132
 
133
    vec_uint4 beg = spu_splats( (unsigned int)0x0 );
134
    vec_uint4 end = dec;
135
    vec_uint4 ibeg = startFrame;
136
    vec_uint4 iend = endFrame;
137
 
138
    //printf( "%d \n", si_to_uint( (qword)num ) );
139
    while( 1 )
140
    {
141
        vec_uint4 bega = spu_add( beg, spu_splats( (unsigned int)0x4 ) );
142
        vec_uint4 tst = spu_cmpgt( bega, end );
143
 
144
        unsigned int ret = si_to_uint( (qword)tst );
145
 
146
        if( ret == 0xffffffff )
147
        {
148
            break;
149
        }
150
 
151
        vec_uint4 med = spu_rl( spu_rlmask( spu_add( beg, end ), -2 ), 1 );
152
        vec_uint4 i = uint16_uload( (qword)spu_add( med, (vec_uint4)address ) );
153
 
154
        vec_float4 fi = spu_convtf( i, 0 );
155
        vec_uint4 b = spu_cmpgt( time, fi );
156
 
157
        beg = spu_sel( beg, med, b );
158
        end = spu_sel( med, end, b );
159
 
160
        ibeg = spu_sel( ibeg, i, b );
161
        iend = spu_sel( i, iend, b );
162
 
163
    }
164
 
165
    address = (qword)spu_add( fptr, num );
166
 
167
    vec_uint4 f1 = uint16_uload( (qword)spu_add( beg, fptr) );
168
    vec_uint4 f2 = uint16_uload( (qword)spu_add( end, fptr) );
169
 
170
    vbeg = spu_shuffle( vbeg, f1, SWZ{ F_Y, F_Z, F_W, S_X } );
171
    vend = spu_shuffle( vend, f2, SWZ{ F_Y, F_Z, F_W, S_X } );
172
 
173
    kbeg = spu_shuffle( kbeg, ibeg, SWZ{ F_Y, F_Z, F_W, S_X } );
174
    kend = spu_shuffle( kend, iend, SWZ{ F_Y, F_Z, F_W, S_X } );
175
 
176
 
177
}
178
 
179
 
180
instance_data_t insts[256] __attribute__((aligned(128)));;
181
BinaryParticle  particles[1024] __attribute__((aligned(128)));
182
char            chunks[4][2048] __attribute__((aligned(128)));
183
char            fly[4] = { 0 };
184
uint16          startFrames[4];
185
uint16          endFrames[4];
186
float           realTimes[4];
187
float           dst[11];
188
 
189
 
190
uint16 number = 0;
191
 
192
 
193
struct Vertex
194
{
195
    float x, z, y;
196
    short u, v;
197
    short r,g,b,a;
198
};
199
 
200
 
201
Vertex   outBuffer[512];
202
size_t   outPtr = 0;
203
uint16   coords[256][8];
204
 
205
void inline DoParticle( size_t next, size_t inst )
206
{
207
 
208
    //printf( "a4 \n" );
209
 
210
    float time = realTimes[next];
211
    char *data = chunks[next];
212
    vec_uint4 startFrame = (vec_uint4)si_from_uint( startFrames[next] );
213
    vec_uint4 endFrame = (vec_uint4)si_from_uint( endFrames[next] );
214
    mfc_write_tag_mask( 1 << next  );
215
    mfc_read_tag_status_any();
216
 
217
    float  x, y, z, u, v;
218
    short  r, g, b, a;
219
 
212 ironpeter 220
    /*
221
    unsigned long long te;
222
    GetTime( te );
223
    */
224
 
225
 
226
    vec_float4 *iptr = (vec_float4 *)&insts[inst];
227
 
204 ironpeter 228
    qword dptr = si_from_ptr( data );
229
 
230
    vec_float4 vtime = spu_splats( time );
231
 
232
 
233
    vec_uint4  kbeg = spu_splats( (unsigned int)0 );
234
    vec_uint4  kend = spu_splats( (unsigned int)0 );
235
    vec_uint4  vbeg = spu_splats( (unsigned int)0 );
236
    vec_uint4  vend = spu_splats( (unsigned int)0 );
237
 
238
 
239
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
240
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
241
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
212 ironpeter 242
 
243
    vec_float4 exyz = conv( kbeg, kend, vbeg, vend, vtime );
244
 
245
    vec_float4 xxxx = YYYY( exyz );
246
    vec_float4 yyyy = ZZZZ( exyz );
247
    vec_float4 zzzz = WWWW( exyz );
248
 
249
    vec_float4 xyze = spu_madd( xxxx, iptr[1], spu_madd( yyyy, iptr[2], spu_madd( zzzz, iptr[3] , iptr[4] ) ) );
250
 
204 ironpeter 251
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
212 ironpeter 252
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
253
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
254
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
204 ironpeter 255
 
256
 
257
    vec_float4 rgba = conv( kbeg, kend, vbeg, vend, vtime );
212 ironpeter 258
    rgba = spu_mul( rgba, iptr[0] );
204 ironpeter 259
    vec_uint4  colh = f2h( ( vec_uint4 ) rgba );
260
 
212 ironpeter 261
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
262
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
263
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
264
    process_linear_spline( dptr, kbeg, kend, vbeg, vend, vtime, startFrame, endFrame );
204 ironpeter 265
 
212 ironpeter 266
    vec_float4 uvps = conv( kbeg, kend, vbeg, vend, vtime );
204 ironpeter 267
 
212 ironpeter 268
 
269
    x = ((float *)&xyze)[0];
270
    y = ((float *)&xyze)[1];
271
    z = ((float *)&xyze)[2];
204 ironpeter 272
 
273
 
274
 
275
    r = ((unsigned  int *)&colh)[0];
276
    g = ((unsigned  int *)&colh)[1];
277
    b = ((unsigned  int *)&colh)[2];
278
    a = ((unsigned  int *)&colh)[3];
279
 
280
 
212 ironpeter 281
    u = ((float *)&uvps)[0];
282
    v = ((float *)&uvps)[1];
204 ironpeter 283
 
210 ironpeter 284
    u *= insts[inst].scale;
285
    v *= insts[inst].scale;
286
 
287
 
212 ironpeter 288
    Vertex &a00 = outBuffer[outPtr + 0];
289
    Vertex &a01 = outBuffer[outPtr + 1];
290
    Vertex &a11 = outBuffer[outPtr + 2];
291
    Vertex &a10 = outBuffer[outPtr + 3];
204 ironpeter 292
 
212 ironpeter 293
    vec_uint4 ind = spu_convtu( WWWW( uvps ), 0 );
204 ironpeter 294
    uint32_t sprite = si_to_uint( (qword)ind ) & 255;    
295
 
296
 
212 ironpeter 297
    vec_float4 rot = table.value( ZZZZ( uvps ) );
204 ironpeter 298
 
299
    float si = ((float *)&rot)[0];
300
    float co = ((float *)&rot)[1];
301
 
302
    a00.x = x + co * u - si * v;
303
    a01.x = x + co * u + si * v;
304
    a11.x = x - co * u + si * v;
305
    a10.x = x - co * u - si * v;
306
 
307
    a00.y = y - si * u - co * v;
308
    a01.y = y - si * u + co * v;
309
    a11.y = y + si * u + co * v;
310
    a10.y = y + si * u - co * v;
311
 
312
    a00.z = z;
313
    a01.z = z;
314
    a11.z = z;
315
    a10.z = z;
316
 
317
    a00.u = coords[sprite][0];
318
    a00.v = coords[sprite][1];
319
    a01.u = coords[sprite][2];
320
    a01.v = coords[sprite][3];
321
    a11.u = coords[sprite][4];
322
    a11.v = coords[sprite][5];
323
    a10.u = coords[sprite][6];
324
    a10.v = coords[sprite][7];
325
 
326
    a00.r = r;
327
    a01.r = r;
328
    a11.r = r;
329
    a10.r = r;
330
 
331
    a00.g = g;
332
    a01.g = g;
333
    a11.g = g;
334
    a10.g = g;
335
 
336
    a00.b = b;
337
    a01.b = b;
338
    a11.b = b;
339
    a10.b = b;
340
 
341
    a00.a = a;
342
    a01.a = a;
343
    a11.a = a;
344
    a10.a = a;
345
 
346
 
212 ironpeter 347
    outPtr = ( outPtr + 4 ) & 511;
204 ironpeter 348
    ++number;
212 ironpeter 349
 
350
 
351
    /*
352
    float cl1 = GetTime( te );
353
 
354
 
355
    static int out = 0;
356
 
357
    if( ++out < 100 )
358
    {
359
        printf( "%f \n", 1.0f / cl1 );
360
    }*/
204 ironpeter 361
}
362
 
363
 
364
 
365
void SampleParticles( float _time, uint32 num, unsigned long long base, uint16 loopFrame, size_t inst )
366
{
367
        size_t j = 0;
368
        for( size_t i = 0; i < num; ++i )
369
        {
370
            uint16 startFrame = particles[i].startFrame;
371
            uint16 endFrame = particles[i].endFrame;
372
 
373
 
374
            float realTime = 0.0f;
375
            bool calc = false;
376
 
377
            float time = _time;
378
 
379
            if( time >= startFrame && time <= endFrame )
380
            {
381
                realTime = time;
382
                calc = true;
383
            }
384
            else
385
            {
386
 
387
                time += loopFrame;
388
 
389
                if( time >= startFrame && time <= endFrame )
390
                {
391
                    realTime = time;
392
                    calc = true;
393
                }
394
            }
395
 
396
            if( calc )
397
            {
398
 
399
                mfc_get( &chunks[j][0], base + particles[i].qwordOffset * 16, particles[i].qwordSize * 16, j, 0, 0 );
400
                fly[j] = 1;
401
 
402
                //printf( "%d \n", particles[i].qwordSize * 16 );
403
                startFrames[j] = startFrame;
404
                endFrames[j] = endFrame;
405
                realTimes[j] = realTime;
406
                size_t next = ( j + 1 ) & 3;
407
                if( fly[next] == 1 )
408
                {
409
                    fly[next] = 0;
410
                    DoParticle( next, inst );
411
                }
412
                j = next;
413
            }
414
        }
415
 
416
        for( size_t i = 0; i < 4; ++i )
417
        {
418
            if( fly[i] == 1 )
419
            {
420
                fly[i] = 0;
421
                DoParticle( i, inst );
422
            }
423
 
424
        }
425
}
426
 
427
 
428
 
212 ironpeter 429
void transpose( vec_float4 *mat )
430
{
431
    vec_float4 m0 = mat[0];
432
    vec_float4 m1 = mat[1];
433
    vec_float4 m2 = mat[2];
434
    vec_float4 m3 = mat[3];
435
 
436
    vec_float4 s0 = spu_shuffle( m0, m1, SWZ{ F_X, F_Y, S_X, S_Y } );
437
    vec_float4 s1 = spu_shuffle( m0, m1, SWZ{ F_Z, F_W, S_Z, S_W } );
438
 
439
    vec_float4 s2 = spu_shuffle( m2, m3, SWZ{ F_X, F_Y, S_X, S_Y } );
440
    vec_float4 s3 = spu_shuffle( m2, m3, SWZ{ F_Z, F_W, S_Z, S_W } );
441
 
442
    mat[0] = spu_shuffle( s0, s2, SWZ{ F_X, F_Z, S_X, S_Z } );
443
    mat[1] = spu_shuffle( s0, s2, SWZ{ F_Y, F_W, S_Y, S_W } );
444
    mat[2] = spu_shuffle( s1, s3, SWZ{ F_X, F_Z, S_X, S_Z } );
445
    mat[3] = spu_shuffle( s1, s3, SWZ{ F_Y, F_W, S_Y, S_W } );
446
 
447
 
448
}
204 ironpeter 449
 
212 ironpeter 450
 
204 ironpeter 451
int main(unsigned long long spe_id, unsigned long long program_data_ea, unsigned long long env)
452
{
453
        //unsigned long long te;
454
        spu_write_decrementer( 0xffffffff );
455
 
456
 
457
        while( 1 )
458
        {
459
 
460
                spu_read_in_mbox();
461
                ParticleHeader header;
462
                program_data_t data;
463
 
464
                mfc_get( &data, program_data_ea, sizeof( data ), 0, 0, 0 );
465
                mfc_write_tag_mask( 1 );
466
                mfc_read_tag_status_any();
467
 
468
                //printf( "%x %x \n", data.atlas, data.asize );
469
 
470
                mfc_get( &coords[0], data.atlas, data.asize, 0, 0, 0 );
471
                mfc_get( &insts[0], data.insts, data.isize * sizeof( instance_data_t ), 0, 0, 0 );
472
                mfc_get( &header, data.fx, sizeof( header ), 0, 0, 0 );
473
                mfc_write_tag_mask( 1 );
474
                mfc_read_tag_status_any();
212 ironpeter 475
 
476
 
204 ironpeter 477
 
478
                int particleSize =  header.trackOffset - header.particleOffset;
479
 
480
 
481
                mfc_get( &particles[0], data.fx + header.particleOffset, particleSize, 0, 0, 0 );
482
                mfc_write_tag_mask( 1 );
483
                mfc_read_tag_status_any();
484
 
485
                //printf( "a1\n" );
486
 
487
 
488
                data.quads = 0;
489
 
490
                for( size_t i = 0; i < data.isize; ++i )
491
                {
492
                    outPtr = 0;
493
 
212 ironpeter 494
                    transpose( (vec_float4 *)insts[i].mat );
204 ironpeter 495
                    uint32_t t = insts[i].time;
496
 
497
                    float ftime = ( t >> 8 ) % ( header.loopFrame ) + ( t & 255 ) / 256.0f;
498
 
499
                    SampleParticles( ftime, header.particles, data.fx + header.trackOffset, header.loopFrame, i );                     
500
                    mfc_put( outBuffer, data.dynamic, 24 * outPtr, 0, 0, 0 );
501
                    mfc_write_tag_mask( 1 );
502
                    mfc_read_tag_status_any();
503
                    data.quads += outPtr / 4;  
504
                    data.dynamic += 24 * outPtr;
505
                }
506
 
507
                mfc_put( &data, program_data_ea, sizeof( data ), 0, 0, 0 );
508
                mfc_write_tag_mask( 1 );
509
                mfc_read_tag_status_any();
510
 
511
 
512
                spu_write_out_mbox( 3 );
513
 
514
        }
515
        return 0;
516
}