Communications system calls on QCDSP --- Bob Mawhinney Hi, These are the system calls which drive the Serial Communication Units (SCU) on QCDSP. They are C++, so are overloaded. It is implicitly assumed that a neighboring node also calls a corresonding function for the other end of the transaction. On QCDOC, the SCU's will support an interrupt feature which allows one node to signal a neighbor about a message it was not expecting. This is useful for general message routing, but not very important for QCD. The SCUDirArg class is detailed after the SCU system calls. An example code is given at the end. Best, Bob ********************************************************************* * * A part of sysfunc.h for QCDSP //-------------------------------------------------------------------- // This function returns the explicit wire number (0 - 7) of the // physics direction given. This function should not be needed // by users, except for assembly coded applications. //-------------------------------------------------------------------- int SCURemap( SCUDir dir ); //-------------------------------------------------------------------- // These are the primary functions for generic SCU transfers. //-------------------------------------------------------------------- void SCUTrans( SCUDirArg * arg ); void SCUTrans( SCUDirArg ** arg, int n ); //-------------------------------------------------------------------- // This function does multiple transfers (n of them) for a specified // direction. All transfers have the same block, stride and number // of blocks, but different addresses. The address field of the // arg object is the base address. Each transfer is started at // a specified offest relative to the base. //-------------------------------------------------------------------- void SCUTrans( SCUDirArg * arg, unsigned int * offset, int n ); //-------------------------------------------------------------------- // These are used to set up the DMA, as given by the arguments // passed. No transfers are done. //-------------------------------------------------------------------- void SCUSetDMA( SCUDirArg * arg ); void SCUSetDMA( SCUDirArg ** arg, int n ); //-------------------------------------------------------------------- // These functions also perform SCU transfers, but do not alter // the existing block, stride and number of blocks held in the // registers of the SCU. These can be used by general users, but // you MUST not call any function between setting the DMA and // calling these routine, or successive calls to these routines. // A called function might alter the block, stride and/or block length // which would interfere with your transfer. //-------------------------------------------------------------------- void SCUTransAddr( SCUDirArg * arg ); void SCUTransAddr( SCUDirArg ** arg, int n ); //-------------------------------------------------------------------- // SCUPoll is used to find the status of a transfer in a particular // direction. It returns 0 if the direction is free, 1 if a // receive is in progress and 2 if a send is in progress. //-------------------------------------------------------------------- int SCUPoll( SCUDirArg * arg ); //-------------------------------------------------------------------- // The SCUTransComplete functions do not return until the transfers in // the specified directions are finished. In particular, SCUComplete() // only returns when all transfers are completed. //-------------------------------------------------------------------- void SCUTransComplete(); void SCUTransComplete( SCUDirArg * arg ); void SCUTransComplete( SCUDirArg ** arg, int n ); ********************************************************************* * * scu_dir_arg.h for QCDSP * //------------------------------------------------------------------ // 10/13/97 RDM // // scu_dir_arg.h defines the interface for the SCUDirArg class, // which is used in calling many system SCU routines. // // addr holds the DRAM address for the sent/received data // // packed holds the block, stride and number of blocks for the // transfer, assembled into the form needed by the SCU DMA // control register. // // stride_ext holds the 7 upper bits of the stride (bits 12-18 // of the full stride). // // xr_reg holds the address in the SCU control area where addr is // loaded to start the transfer. // // poll_reg is the address in the SCU control area to poll for the // status of the desired wire. // // dma_reg is the adddress in the SCU control area where packed // is loaded. // // stride_reg is the adddress in the SCU control area where stride_ext // is loaded. // // wire is the physical wire number (0 - 7) // // xr is the send/receive flag, defined in scu_enum.h //------------------------------------------------------------------ // #ifndef INCLUDED_SCU_DIR_ARG_H #define INCLUDED_SCU_DIR_ARG_H #include "../../include/scu_enum.h" class SCUDirArg { private: void * addr; unsigned int packed; unsigned int stride_ext; void ** xr_reg; volatile unsigned int * poll_reg; unsigned int * dma_reg; unsigned int * stride_reg; int wire; SCUXR xr; public: SCUDirArg(); SCUDirArg( void * addr, SCUDir dir, SCUXR xr, int blklen, int numblk = 1, int stride = 1 ); ~SCUDirArg(); void Init( void * addr, SCUDir dir, SCUXR xr, int blklen, int numblk = 1, int stride = 1 ); void * Addr(); void * Addr( void * addr ); int Blklen(); int Blklen( int blklen ); int Numblk(); int Numblk( int numblk ); int Stride(); int Stride( int stride ); void Reload( void * a, int blklen, int numblk = 1, int stride = 1 ); void printf(); friend void SCUTrans( SCUDirArg * arg ); friend void SCUTrans( SCUDirArg ** arg, int n ); friend void SCUTrans( SCUDirArg * arg, unsigned int * addr, int n ); friend void SCUSetDMA( SCUDirArg * arg ); friend void SCUSetDMA( SCUDirArg ** arg, int n ); friend void SCUTransAddr( SCUDirArg * arg ); friend void SCUTransAddr( SCUDirArg ** arg, int n ); friend int SCUPoll( SCUDirArg * arg ); friend void SCUTransComplete(); friend void SCUTransComplete( SCUDirArg * arg ); friend void SCUTransComplete( SCUDirArg ** arg, int n ); friend void SCUTransLog( SCUDirArg * arg ); friend void SCUTransLocal( SCUDirArg * arg ); friend void SCUCopy( int x, int r ); friend void SCUSetDMALog( SCUDirArg * arg ); friend void SCUSetDMALocal( SCUDirArg * arg ); friend void SCUTransAddrLog( SCUDirArg * arg ); friend void SCUTransAddrLocal( SCUDirArg * arg ); friend int SCUPollLog( int wire ); friend int SCUPollLocal( int wire ); friend void SCUTransStartLog( SCUDirArg * arg ); friend void SCUTransStartLocal( SCUDirArg * arg ); }; #endif ********************************************************************** * * An example code which uses the SCU calls on QCDSP. //--------------------------------------------------------------------- // main.C 10/26/97 RDM // // Test and demonstration of SCU system calls. // Three main sections involve transfers. // // 1) sets up all wires, but only uses 2. // 2) runs all 8 wires and then reverses all directions // and runs all eight again. // 3) runs only 2 wires, but uses different system calls. // //--------------------------------------------------------------------- #include #include #include //--------------------------------------------------------------------- // Memory where data is transferred to/from. //--------------------------------------------------------------------- const int Dsize = 5000; unsigned int Data[ 16 * Dsize]; //--------------------------------------------------------------------- // block size, number blocks and stride for first transfers. //--------------------------------------------------------------------- const int bs[8] = { 5, 100, 3, 7, 14, 40, 100, 400 }; const int nb[8] = { 40, 2, 7, 3, 20, 7, 4, 1 }; const int st[8] = { 2, 6, 80, 200, 2, 9, 1, 1 }; //--------------------------------------------------------------------- // Data to use to set up multiple argument objects by looping. // Array elements with even index are sends, odd are receives. //--------------------------------------------------------------------- SCUDirArg arg[16]; const SCUDir dir[] = { SCU_XP, SCU_XM, SCU_YP, SCU_YM, SCU_ZP, SCU_ZM, SCU_TP, SCU_TM, SCU_XM, SCU_XP, SCU_YM, SCU_YP, SCU_ZM, SCU_ZP, SCU_TM, SCU_TP }; const SCUXR xr[] = { SCU_SEND, SCU_REC, SCU_SEND, SCU_REC, SCU_SEND, SCU_REC, SCU_SEND, SCU_REC, SCU_SEND, SCU_REC, SCU_SEND, SCU_REC, SCU_SEND, SCU_REC, SCU_SEND, SCU_REC, }; const int blksize[] = { 5, 100, 3, 7, 14, 40, 100, 400, 10, 30, 9, 6, 22, 14, 8, 88 }; const int numblk[] = { 40, 2, 7, 3, 20, 7, 4, 1, 3, 1, 12, 18, 14, 22, 11, 1 }; const int stride[] = { 2, 6, 80, 20, 2, 9, 1, 1, 1, 5, 12, 3, 11, 8, 4, 4 }; //--------------------------------------------------------------------- // Functions to set values for data in send buffers and clear // receive buffers. //--------------------------------------------------------------------- void set_data( int * buf, int n ); void clear_data( int * buf, int n ); //--------------------------------------------------------------------- // Used to check a transfer specified by ?x against received // values ?r. //--------------------------------------------------------------------- void check_data( void * ax, int bx, int nx, int sx, void * ar, int br, int nr, int sr ); //--------------------------------------------------------------------- // main program //--------------------------------------------------------------------- main() { int i; int buf[16]; // Used to encode the offset (in units of Dsize) // from Data where send/receive results are // located. Used by set_data and clear_data. void * addr[16]; SCUDirArg * arg1p[8]; SCUDirArg * arg2p[8]; //------------------------------------------------------------------- // Define send/receive buffers //------------------------------------------------------------------- for ( i = 0; i < 16; i++ ) addr[i] = Data + i * Dsize; //------------------------------------------------------------------- // Example of using constructor to initialize transfer. // Even array members send, odd receive. // Section 1 transfer //------------------------------------------------------------------- SCUDirArg xp( addr[0], SCU_XP, SCU_SEND, bs[0], nb[0], st[0] ); SCUDirArg xm( addr[1], SCU_XM, SCU_REC , bs[1], nb[1], st[1] ); SCUDirArg yp( addr[2], SCU_YP, SCU_SEND, bs[2], nb[2], st[2] ); SCUDirArg ym( addr[3], SCU_YM, SCU_REC , bs[3], nb[3], st[3] ); SCUDirArg zp( addr[4], SCU_ZP, SCU_SEND, bs[4], nb[4], st[4] ); SCUDirArg zm( addr[5], SCU_ZM, SCU_REC , bs[5], nb[5], st[5] ); SCUDirArg tp( addr[6], SCU_TP, SCU_SEND, bs[6], nb[6], st[6] ); SCUDirArg tm( addr[7], SCU_TM, SCU_REC , bs[7], nb[7], st[7] ); //------------------------------------------------------------------- // Specify the buffers holding send data and initialize data // Even indices send. //------------------------------------------------------------------- for ( i = 0; i < 8; i++ ) buf[i] = 2 * i; set_data( buf, 8 ); //------------------------------------------------------------------- // Specify and zero receive buffers. // Odd indices receive //------------------------------------------------------------------- for ( i = 0; i < 8; i++ ) buf[i] = 2 * i + 1; clear_data( buf, 8 ); //------------------------------------------------------------------- // Print contents of arguments set up so far. //------------------------------------------------------------------- printf("\nOutput for first transfer\n"); xp.printf(); xm.printf(); yp.printf(); ym.printf(); zp.printf(); zm.printf(); tp.printf(); tm.printf(); //------------------------------------------------------------------- // Do first transfer //------------------------------------------------------------------- printf("Starting send on XP\n"); SCUTrans( &xp ); printf("Starting receive on XM\n"); SCUTrans( &xm ); printf("Starting to check if transfer complete\n"); SCUTransComplete(); printf("Starting to check data sent\n"); check_data( addr[0], bs[0], nb[0], st[0], addr[1], bs[1], nb[1], st[1] ); //---------------------------------------------------------------- // Set up other arguments to do send +, receive - and then // send -, receive +. //---------------------------------------------------------------- printf("\nOutput for second transfer\n"); for ( i = 0; i < 16; i++ ) { arg[i].Init( addr[i], dir[i], xr[i], blksize[i], numblk[i], stride[i] ); arg[i].printf(); } //------------------------------------------------------------------- // zero receive buffers. //------------------------------------------------------------------- clear_data( buf, 8 ); //------------------------------------------------------------------- // Send along + directions, receive on - //------------------------------------------------------------------- for ( i = 0; i < 8; i++ ) arg1p[i] = &arg[i]; printf("Starting SCUTrans\n"); SCUTrans( arg1p, 8 ); printf("Starting SCUTransComplete\n"); SCUTransComplete(); for ( i = 0; i < 8; i += 2 ) check_data( addr[ i ], blksize[ i ], numblk[ i ], stride[ i ], addr[i+1], blksize[i+1], numblk[i+1], stride[i+1] ); //------------------------------------------------------------------- // Send along - directions, receive on + //------------------------------------------------------------------- for ( i = 0; i < 8; i++ ) arg2p[i] = &arg[i+8]; printf("Starting SCUTrans\n"); SCUTrans( arg2p, 8 ); printf("Starting SCUTransComplete\n"); SCUTransComplete(); for ( i = 8; i < 16; i += 2 ) check_data( addr[ i ], blksize[ i ], numblk[ i ], stride[ i ], addr[i+1], blksize[i+1], numblk[i+1], stride[i+1] ); //------------------------------------------------------------------- // Third section, test other functions //------------------------------------------------------------------- printf("\nOutput for third transfer\n"); ym.Blklen( 9 ); ym.Numblk( 8 ); ym.Stride( 2 ); yp.Blklen( 6 ); yp.Numblk( 12 ); yp.Stride( 5 ); SCUSetDMA( &ym ); SCUSetDMA( &yp ); for ( i = 2; i < 6; i++ ) { clear_data( buf, 8 ); SCUTransAddr( &ym ); SCUTransAddr( &yp ); SCUTransComplete(); check_data( yp.Addr(), yp.Blklen(), yp.Numblk(), yp.Stride(), ym.Addr(), ym.Blklen(), ym.Numblk(), ym.Stride() ); yp.Addr( addr[ 2*i ] ); ym.Addr( addr[ 2*i + 1 ] ); } } //-------------------------------------------------------------------- // set data to send. Set send data to include the ID of // this node to check SCULocal flag. //-------------------------------------------------------------------- void set_data( int * buf, int n ) { int i; int m; int id; unsigned int * uip; for ( m = 0; m < n; m++ ) { id = ( UniqueID() << 16 ) | ( buf[m] << 12 ) ; uip = Data + buf[m] * Dsize; for ( i = 0; i < Dsize; i++ ) *uip++ = id + i; } } //-------------------------------------------------------------------- // clear data receive area. //-------------------------------------------------------------------- void clear_data( int * buf, int n ) { int i; int m; unsigned int * uip; for ( m = 0; m < n; m++ ) { uip = Data + buf[m] * Dsize; for ( i = 0; i < Dsize; i++ ) *uip++ = 0; } } //-------------------------------------------------------------------- // int nx, nr: number of blocks (send, rec) // int sx, sr: stride (send, rec) // int bx, br: block length (send, rec) // unsigned int * ax: addresses (send) // unsigned int * ar: addresses (receive) //-------------------------------------------------------------------- void check_data( void * vax, int bx, int nx, int sx, void * var, int br, int nr, int sr ) { unsigned int diff; int i,j,k,kb; unsigned int * ax = ( unsigned int * ) vax; unsigned int * ar = ( unsigned int * ) var; //---------------------------------------------------------------- // The stride is 1 less than the hardware stride to make the // pointer arithmetic more efficient. //---------------------------------------------------------------- sx -= 1; sr -= 1; if ( bx * nx != br * nr ) { printf("Send blksize and numblk (%d, %d) not consistent with\n", bx, nx ); printf(" receive blksize and numblk (%d, %d)\n", br, nr ); exit(0); } k = 0; kb = 0; for ( i = 0; i < nx; i++ ) { for ( j = 0; j < bx; j++ ) { if ( i == 0 && j == 0 ) { diff = *ar - *ax; printf("check_data: difference of first word is %d\n", diff ); } if ( *ar++ != *ax++ + diff ) { printf("Send word %d in block %d differs from receive\n", j, i); printf(" word %d in block %d \n", k, kb ); } k++; if ( k == br ) { ar += sr; k = 0; kb++; } } ax += sx; } }