QCDSP CPS primitives ---- Bob Mawhinney Hi, As we discussed on the phone today, the QCDSP high performance invertors are not built from primitives, but are written as individual optimized routines. We do have some matrix and vector primitives, which are detailed in the file vector.h included below. These are part of the general "utilities" of CPS. The implementation of many of these is done in assembly, to achieve performance on the DSP. The biggest performance hit is DRAM bandwidth, which will be much, much greater for QCDOC. Best, Bob //------------------------------------------------------------------ // // vector.h // // Header file for the vector class. // // For now this is specific to three colors. The constructor // will exit if the number of colors is not equal to three. // // This file contains the declarations of the Vector and Matrix // classes as well as the declarations of some genaral c-style // functions that perform operations on vectors of general // length. // //------------------------------------------------------------------ #ifndef INCLUDED_VECTOR_H #define INCLUDED_VECTOR_H #include #include "data_types.h" class Vector; // forward declaration class Matrix; //------------------------------------------------------------------ // Declarations of some genaral c-style functions that perform // operations on vectors. For these functions there exists // optimized assembly code. //------------------------------------------------------------------ extern "C" { void moveMem(void *b, const void *a, int len); // b=a, REQUIRE:len >= 4 void mDotMEqual(float* c, const float* a, const float* b); // C = A*B // Assumption: c!=a and c!=b void mDotMPlus(float* c, const float* a, const float* b); // C += A*B // Assumption: c!=a and c!=b void uDotXEqual(float* y, const float* m, const float* x); // y = M*x float dotProduct(const float *a, const float *b, int); // len >= 3 void vecAddEquVec(float *a, const float *b, int); // A += B void vecMinusEquVec(float *a, const float *b, int); // A -= B void vecNegative(float *a, const float *b, int); // A = -B void vecTimesEquFloat(float *a, float b, int); // A *= b void fTimesV1PlusV2(float *a, float b, const float *c, const float *d, int size); // A = b*C+D void fTimesV1MinusV2(float *a, float b, const float *c, const float *d, int size); // A = b*C-D void compDotProduct(float *c_r, float *c_i, const float *a, const float *b, int); // len >= 3 void cTimesV1PlusV2(float *a, float b_re, float b_im, const float *c, const float *d, int size); // A = b*C+D void cTimesV1MinusV2(float *a, float b_re, float b_im, const float *c, const float *d, int size); // A = b*C-D void oneMinusfTimesMatrix(float *a, float b, const float *c, int n); // A = 1-b*C, A and C are Matrix } //------------------------------------------------------------------ // Declarations of some genaral c-style functions that perform // operations on vectors. For these functions there is // no optimized assembly. //------------------------------------------------------------------ void uDotXMinus(float* y, const float* u, const float* x); void uDagDotXEqual(float* y, const float* u, const float* x); void uDagDotXPlus(float* y, const float* u, const float* x); //------------------------------------------------------------------ // Declarations of some genaral c-style functions that compute // re/im parts of the su3 characters of various representations // of su3 matrices. //------------------------------------------------------------------ extern float reChar6(float *p) ; extern float imChar6(float *p) ; extern float reChar8(float *p) ; extern float reChar10(float *p) ; extern float imChar10(float *p) ; //------------------------------------------------------------------ // For now the number of colors for the Matrix and Vector classes // is fixed. If the number of colors is not 3 the appropriate // constructors will exit. //------------------------------------------------------------------ enum{COLORS=3}; //------------------------------------------------------------------ // The Matrix class. // For now Matrix is a class of general 3x3 complex matrices. // If the number of colors is not 3 the constructor will exit. //------------------------------------------------------------------ class Matrix { Float u[2*COLORS*COLORS]; // The matrix friend class Vector; public: // CREATORS Matrix(); Matrix(float c); Matrix(const Complex& c); Matrix(const Matrix& m); Matrix& operator=(float c); Matrix& operator=(const Complex& c); Matrix& operator=(const Matrix& m) { moveMem(u, m.u, COLORS*COLORS*2*sizeof(Float)); return *this; } // MANIPULATORS Matrix& operator+=(const Matrix& m) { vecAddEquVec((float *)u, (float *)m.u, COLORS*COLORS*2); return *this; } Matrix& operator+=(float c) { u[0] += c; u[8] += c; u[16] += c; return *this; } Matrix& operator-=(const Matrix& m) { vecMinusEquVec((float *)u, (float *)m.u, COLORS*COLORS*2); return *this; } Matrix& operator-=(float c) { u[0] -= c; u[8] -= c; u[16] -= c; return *this; } Matrix& operator*=(float c) { vecTimesEquFloat((float *)u, c, COLORS*COLORS*2); return *this; } void DotMEqual(const Matrix& a, const Matrix& b) { mDotMEqual((float *)u, (float *) a.u, (float *) b.u);} //u = a.u * b.u void DotMPlus(const Matrix& a, const Matrix& b) { mDotMPlus((float *)u, (float *)a.u, (float *)b.u);} //u += a.u * b.u void Trans(const float* m); // *this = transpose of m. Assume: this != m void Trans(const Matrix& m) { Trans((const float *)(m.u)); } void Dagger(const float* m); // put dag(m) and save it to this object void Dagger(const Matrix& m) { Dagger((const float *)&m); } void TrLessAntiHermMatrix(const Matrix& this_dag); // translate this to traceless anti-hermitian matrix // passing this_dag as an argument in oder to make it // more efficient. void Cross2(const Vector& v1, const Vector& v2); // *this = 2 * (V1 x V2^*) void AntiHermMatrix(const float *a); // a points to an array of 8 real numbers // *this = i \lamda^i * a^i // \lambda^i are 8 Gellmann matrices void RandLink(void); // make this matrix to be a random SU3 with phase factor eta void Unitarize(void); // force this matrix to be a unitary SU(3) matrix void UnitMatrix(void); void ZeroMatrix(void); void NegMatrix(const Matrix& m) { vecNegative((float *)u, (float *)&m, COLORS*COLORS*2); } void OneMinusfTimesM(float x, const Matrix& m) { oneMinusfTimesMatrix((float *)u, x, (float *)&m, COLORS*COLORS*2); } // ACCESSORS Complex& operator()(int i, int j); const Complex& operator()(int i, int j) const; Complex& operator[](int i) { return ((Complex*)u)[i]; } const Complex& operator[](int i) const { return ((Complex*)u)[i]; } void Det(float* c) const; // get determinant and store it to c float ReTr() const; // real part of the trace Complex Tr() const; float NegHalfTrSquare() const; // -0.5 tr(*this)^2, this matrix must be anti-hermitian float ErrorSU3() const; // return |U~dag U - I|^2 // SU(3) Characters Complex Char3() const { return Tr() ; } ; Complex Char6() const ; Complex Char8() const ; Complex Char10() const ; }; //------------------------------------------------------------------ // The Vector class. // For now Vector is a class of general 3 component column // vectors. If the number of colors is not 3 the constructor // will exit. // Vector is not automatically normalized when constructed. //------------------------------------------------------------------ class Vector { Float v[2*COLORS]; // Vector friend class Matrix; public: // CREATORS Vector(); Vector& operator=(const Vector& x) { moveMem(v, x.v, COLORS*2*sizeof(Float)); return *this; } // MANIPULATORS Vector& operator*=(float p) { vecTimesEquFloat((float *)v, p, COLORS*2); return *this; } Vector& operator+=(const Vector& x) { vecAddEquVec((float *)v, (float *)x.v, COLORS*2); return *this; } Vector& operator-=(const Vector& x) { vecMinusEquVec((float *)v, (float *)x.v, COLORS*2); return *this; } void DotXEqual(const Matrix& m, const Vector& x) { uDotXEqual((float *)v, (float *) m.u, (float *) x.v); } // v = m.u * x.v, m should be in CRAM, x MUST be in DRAM */ void Normalize(void); // Normalize v to 1. void Orthogonalize(const Vector& p1); //-------------------------------------------------------------- // Functions that act on arrays of vectors of general length. // The array of vectors is treated as an array of floating // numbers pointed to by &v and having length len. // This set of functions does not really fit the way // Vector is currently defined (as an array with re/im and // color indeces only. It extends the notion of Vector to // a general array of floating numbers. //-------------------------------------------------------------- void CopyVec(const Vector *b, int len) { moveMem(&v, b, len*sizeof(Float)); } // len is the number of real numbers in the array. // Copy b to v Float NormSqNode(int len) {return Float( dotProduct((float *)&v, (float *)&v, len) ); } // len is the number of real numbers in the array. // Returns the dot product (v*, v) on the node. Float NormSqGlbSum(int len); // len is the number of real numbers in the array. // Returns the dot product (v*, v) summed over all nodes. Float ReDotProductNode(const Vector *b, int len) {return Float( dotProduct((float *)&v, (float *)b, len) ); } // len is the number of real numbers in the array. // Returns the real part of the dot product (v, b) Float ReDotProductGlbSum(const Vector *b, int len); // len is the number of real numbers in the array. // Returns the real part of the dot product (v, b) // summed over all nodes. void Vector::NormSqArraySliceSum(Float *f_out, const int size, const int dir); // Slice sum norm square the vector v of element size "size" perp. // to direction dir void Vector::SliceArraySum(Float *sum, const Float *f_in, const int dir); // Slice sum a lattice of Floats f_in to form an array of length L[dir] // lattice size of virtual grid in direction dir void Vector::SliceArraySumFive(Float *sum, const Float *f_in, const int dir); // 5D Slice sum a lattice of Floats f_in to form an array of length L[dir] // lattice size of virtual grid in direction dir void VecNegative(const Vector *b, int len) {vecNegative((float *)&v, (float *)b, len);} // len is the number of real numbers in the array. // v = -v void VecTimesEquFloat(const Float &fb, int len) {vecTimesEquFloat((float *)&v, float(fb), len);} // len is the number of real numbers in the array. // v = v * fb void VecAddEquVec(const Vector *b, int len) { vecAddEquVec((float *)&v, (float *)b, len);} // len is the number of real numbers in the array. // v = v + b void VecMinusEquVec(const Vector *b, int len) { vecMinusEquVec((float *)&v, (float *)b, len);} // v = v - b void FTimesV1PlusV2(const Float &fb, const Vector *c, const Vector *d, int len) { fTimesV1PlusV2((float *)&v, float(fb), (float *)c, (float *)d, len); } // len is the number of real numbers in the array. // v = fb * c + d void FTimesV1MinusV2(const Float &fb, const Vector *c, const Vector *d, int len) { fTimesV1MinusV2((float *)&v, float(fb), (float *)c, (float *)d, len); } // len is the number of real numbers in the array. // v = fb * c - d Complex CompDotProductNode(const Vector *b, int len) { float c_r, c_i; compDotProduct(&c_r, &c_i, (float *)&v, (float *)b, len); return Complex(c_r,c_i); } // len is the number of complex numbers in the array. // Returns the complex part of the dot product (v*, b) Complex CompDotProductGlbSum(const Vector *b, int len); // len is the number of real numbers in the array. // Returns the complex part of the dot product (v*, b) // summed over all nodes. void CTimesV1PlusV2(const Complex &fb, const Vector *c, const Vector *d, int len) { cTimesV1PlusV2((float *)&v, real(fb), imag(fb), (float *)c, (float *)d, len); } // len is the number of real numbers in the array. // v = fb * c + d void CTimesV1MinusV2(const Complex &fb, const Vector *c, const Vector *d, int len) { cTimesV1MinusV2((float *)&v, real(fb), imag(fb), (float *)c, (float *)d, len); } // len is the number of real numbers in the array. // v = fb * c - d void RandGaussVector(Float sigma2, int len); // It sets len entries of the array // of floating numbers pointed to by &v // with random numbers weighted according to // exp(-x^2 / (2 * sigma2)) void RandVector(Float high, Float low, int len); // It sets len entries of the array // of floating numbers pointed to by &v // with random numbers in the interval // [low, high] }; #endif