diff options
Diffstat (limited to 'btl/libs')
28 files changed, 3201 insertions, 0 deletions
diff --git a/btl/libs/BLACS/blacs.h b/btl/libs/BLACS/blacs.h new file mode 100644 index 0000000..97c863b --- /dev/null +++ b/btl/libs/BLACS/blacs.h @@ -0,0 +1,50 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLACS_H_ +#define BLACS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + + /* BLACS declarations */ + void blacs_pinfo_(int*, int*); + void blacs_get_(int*, int*, int*); + void blacs_gridinit_(int*, const char*, int*, int*); + void blacs_gridinfo_(const int*, int*, int*, int*, int*); + void blacs_pcoord_(const int*, const int*, int*, int*); + void blacs_gridexit_(int*); + void blacs_exit_(int*); + void blacs_barrier_(const int*, const char*); + void dgerv2d_(const int*, const int*, const int*, double*, const int*, const int*, const int*); + void dgesd2d_(const int*, const int*, const int*, const double*, const int*, const int*, const int*); + void sgerv2d_(const int*, const int*, const int*, float*, const int*, const int*, const int*); + void sgesd2d_(const int*, const int*, const int*, const float*, const int*, const int*, const int*); + void igebs2d_(const int*, const char*, const char*, const int*, const int*, const int*, const int*); + void igebr2d_(const int*, const char*, const char*, const int*, const int*, int*, const int*, const int*, const int*); + void dgebs2d_(const int*, const char*, const char*, const int*, const int*, const double*, const int*); + void dgebr2d_(const int*, const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*); + void dgsum2d_(const int*, const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*); + void igsum2d_(const int*, const char*, const char*, const int*, const int*, int*, const int*, const int*, const int*); + +#ifdef __cplusplus +} +#endif + + +#endif /* BLACS_H_ */ diff --git a/btl/libs/BLACS/blacs_interface.hh b/btl/libs/BLACS/blacs_interface.hh new file mode 100644 index 0000000..09f57b3 --- /dev/null +++ b/btl/libs/BLACS/blacs_interface.hh @@ -0,0 +1,153 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BTL_BLACS_INTERFACE_H +#define BTL_BLACS_INTERFACE_H + +#include <vector> +#include <algorithm> +#include "blacs.h" +extern "C" { + void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); + int numroc_(const int*, const int*, const int*, const int*, const int*); +} + +#include "scatter.h" +#include "gather.h" + +template<typename real> +class blacs_interface +{ + +public: + typedef real real_type; + typedef std::vector<real_type> stl_vector; + typedef stl_vector stl_matrix; + + typedef real* gene_matrix; + typedef real* gene_vector; + + + static void free_matrix(gene_matrix & A, int N){ + delete A; + } + + static void free_vector(gene_vector & B){ + delete B; + } + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + int N = A_stl.size(); + A = new real[N]; + for (int j=0;j<N;j++) + A[j] = A_stl[j]; + } + + static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){ + int N = B_stl.size(); + B = new real[N]; + for (int i=0;i<N;i++) + B[i] = B_stl[i]; + } + + static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){ + int N = B_stl.size(); + for (int i=0;i<N;i++) + B_stl[i] = B[i]; + } + + static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){ + int N = A_stl.size(); + for (int i=0;i<N;i++) + A_stl[i] = A[i]; + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + for (int i=0;i<N;i++) + cible[i]=source[i]; + } + + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ + for (int i=0;i<N;i++) + cible[i]=source[i]; + } + + +public: + static int context() { + int ctxt, ignored, what = 0; + blacs_get_(&ignored, &what, &ctxt); + return ctxt; + } + static int myid() { + int procnum, myid; + blacs_pinfo_(&myid, &procnum); + return myid; + } + + + static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix, + int& GlobalRows, int& GlobalCols, + int& BlockRows, int& BlockCols, + int& LocalRows, int& LocalCols + ) { + scatter(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); + } + + static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix, + int *desc, + const int& GlobalRows=0, const int& GlobalCols=0, + const int& BlockRows=0, const int& BlockCols=0 + ) { + int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols, + BlockRows_ = BlockRows, BlockCols_ = BlockCols, + LocalRows_, LocalCols_; + const int ctxt = context(); + scatter(ctxt, GlobalMatrix, LocalMatrix, + GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_ + ); + + const int iZERO = 0; + int info; + const int LLD = std::max(1, LocalRows_); + descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_, + &iZERO, &iZERO, &ctxt, &LLD, &info + ); + } + + static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix, + int& GlobalRows, int& GlobalCols, + int& BlockRows, int& BlockCols, + int& LocalRows, int& LocalCols + ) { + gather(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); + } + + static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix, + int* desc + ) { + int GlobalRows = desc[2], GlobalCols = desc[3], + BlockRows = desc[4], BlockCols = desc[5], + LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8]; + const int ctxt = context(); + gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); + } + + +}; + +#endif /* BTL_BLACS_INTERFACE_H */ diff --git a/btl/libs/BLACS/blacsinit.hh b/btl/libs/BLACS/blacsinit.hh new file mode 100644 index 0000000..41f5ebf --- /dev/null +++ b/btl/libs/BLACS/blacsinit.hh @@ -0,0 +1,38 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLACSINIT_HH +#define BLACSINIT_HH + +#include <cmath> + +bool blacsinit(int *argc, char ***argv) +{ + int context, myid, numproc, prows, pcols, iZERO = 0; + MPI_Init(argc, argv); + blacs_pinfo_(&myid, &numproc); + blacs_get_(&iZERO, &iZERO, &context); + + int p = static_cast<double>(std::sqrt(static_cast<double>(numproc)) + 1.); + while (numproc % p) --p; + prows = p; pcols = numproc/p; + + blacs_gridinit_(&context, "Row-major", &prows, &pcols); + return (myid == 0); +} + +#endif /* BLACSINIT_HH */ diff --git a/btl/libs/BLACS/gather.h b/btl/libs/BLACS/gather.h new file mode 100644 index 0000000..957f7c7 --- /dev/null +++ b/btl/libs/BLACS/gather.h @@ -0,0 +1,49 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef GATHER_H_ +#define GATHER_H_ + +#define TYPENAME float +#define TYPEPREFIX s +#include "gather_impl.h" +#undef TYPENAME +#undef TYPEPREFIX + +#define TYPENAME double +#define TYPEPREFIX d +#include "gather_impl.h" +#undef TYPENAME +#undef TYPEPREFIX + +template<typename T> +static void gather_matrix(std::vector<T>& GlobalMatrix, const std::vector<T>& LocalMatrix, + const int* desc +) { + int GlobalRows = desc[2], GlobalCols = desc[3], + BlockRows = desc[4], BlockCols = desc[5], + LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8]; + int ctxt; + { + int ignored, what = 0; + blacs_get_(&ignored, &what, &ctxt); + } + gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); +} + + +#endif /* GATHER_H_ */ diff --git a/btl/libs/BLACS/gather_impl.h b/btl/libs/BLACS/gather_impl.h new file mode 100644 index 0000000..3b57d57 --- /dev/null +++ b/btl/libs/BLACS/gather_impl.h @@ -0,0 +1,126 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#define PRFX d +#define CAT_(x,y) x##y +#define CAT(x,y) CAT_(x,y) + +#define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_) +#define vector_t std::vector<TYPENAME> + +#include <vector> + + +inline void gather( + const int& context, // [IN] + vector_t& GlobalMatrixVector, // [OUT] Only relevant for root + const vector_t& LocalMatrixVector, // [IN] + int& GlobalRows, // [OUT] + int& GlobalCols, // [OUT] + int& BlockRows, // [IN (root) / OUT (other)] + int& BlockCols, // [IN (root) / OUT (other)] + int& LocalRows, // [IN] + int& LocalCols, // [IN] + const int& rootrow = 0, // [IN] + const int& rootcol = 0 // [IN] +) { + /* Helper variables */ + int iONE = 1, iTWO = 2, imONE = -1; + + int myid, myrow, mycol, procrows, proccols, procnum; + blacs_pinfo_(&myid, &procnum); + blacs_gridinfo_(&context, &procrows, &proccols, &myrow, &mycol); + bool iamroot = (myrow == rootrow && mycol == rootcol); + TYPENAME *GlobalMatrix; + const TYPENAME *LocalMatrix = &LocalMatrixVector[0]; + + /* Broadcast matrix info */ + int binfo[2]; + if (iamroot) { + binfo[0] = BlockRows; + binfo[1] = BlockCols; + + igebs2d_(&context, "All", " ", &iTWO, &iONE, binfo, &iTWO); + } else { + igebr2d_(&context, "All", " ", &iTWO, &iONE, binfo, &iTWO, + &rootrow, &rootcol); + } + BlockRows = binfo[0]; + BlockCols = binfo[1]; + + /* Retrieve matrix global dimensions */ + int minfo[2]; + minfo[0] = LocalRows; minfo[1] = LocalCols; + igsum2d_(&context, "Col", " ", &iONE, &iONE, minfo, &iONE, &imONE, &imONE); + igsum2d_(&context, "Row", " ", &iONE, &iONE, minfo+1, &iONE, &imONE, &imONE); + GlobalRows = minfo[0]; GlobalCols = minfo[1]; + + + /* Reserve space on root */ + if (iamroot) { + GlobalMatrixVector.resize(GlobalRows*GlobalCols); + GlobalMatrix = &GlobalMatrixVector[0]; + } + + /* Gather matrix */ + int srcr = 0, srcc = 0; + int SendRows, SendCols; + int StartRow = 0, StartCol = 0; + for (int r = 0; r < GlobalRows; r += BlockRows, srcr=(srcr+1)%procrows) { + srcc = 0; + + // Is this the last row bloc? + SendRows = BlockRows; + if (GlobalRows-r < BlockRows) + SendRows = GlobalRows-r; + if (SendRows <= 0) + SendRows = 0; + + for (int c=0; c<GlobalCols; c+=BlockCols, srcc=(srcc+1)%proccols) { + + // Is this the last column block? + SendCols = BlockCols; + if (GlobalCols-c < BlockCols) + SendCols = GlobalCols-c; + + // Send data + if (myrow == srcr && mycol == srcc) { + FUNCNAME(gesd2d) (&context, &SendRows, &SendCols, + LocalMatrix+LocalRows*StartCol+StartRow, + &LocalRows, &rootrow, &rootcol + ); + + // Adjust the next starting column + StartCol = (StartCol + SendCols) % LocalCols; + } + + // Receive data + if (iamroot) { + FUNCNAME(gerv2d) (&context, &SendRows, &SendCols, + GlobalMatrix + GlobalRows*c + r, + &GlobalRows, &srcr, &srcc + ); + } + } + + // Adjust the next starting row + if (myrow == srcr) + StartRow = (StartRow + SendRows) % LocalRows; + + } + +} diff --git a/btl/libs/BLACS/scatter.h b/btl/libs/BLACS/scatter.h new file mode 100644 index 0000000..170e82a --- /dev/null +++ b/btl/libs/BLACS/scatter.h @@ -0,0 +1,61 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef SCATTER_H_ +#define SCATTER_H_ + +#define TYPENAME float +#define TYPEPREFIX s +#include "scatter_impl.h" +#undef TYPENAME +#undef TYPEPREFIX + +#define TYPENAME double +#define TYPEPREFIX d +#include "scatter_impl.h" +#undef TYPENAME +#undef TYPEPREFIX + +template<typename T> +void scatter_matrix(const std::vector<T>& GlobalMatrix, std::vector<T>& LocalMatrix, + int *desc, + const int& GlobalRows=0, const int& GlobalCols=0, + const int& BlockRows=0, const int& BlockCols=0 + ) +{ + int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols, + BlockRows_ = BlockRows, BlockCols_ = BlockCols, + LocalRows_, LocalCols_; + int ctxt; + { + int ignored, what = 0; + blacs_get_(&ignored, &what, &ctxt); + } + scatter(ctxt, GlobalMatrix, LocalMatrix, + GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_ + ); + + const int iZERO = 0; + int info; + const int LLD = std::max(1, LocalRows_); + descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_, + &iZERO, &iZERO, &ctxt, &LLD, &info + ); +} + + +#endif /* SCATTER_H_ */ diff --git a/btl/libs/BLACS/scatter_impl.h b/btl/libs/BLACS/scatter_impl.h new file mode 100644 index 0000000..7c38c22 --- /dev/null +++ b/btl/libs/BLACS/scatter_impl.h @@ -0,0 +1,121 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#define CAT_(x,y) x##y +#define CAT(x,y) CAT_(x,y) + +#define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_) +#define vector_t std::vector<TYPENAME> + +#include <vector> + +inline void scatter( + const int& context, // [IN] + const vector_t& GlobalMatrixVector, // [IN] Only relevant for root + vector_t& LocalMatrixVector, // [OUT] The space is reserved here + int& GlobalRows, // [IN (root) / OUT (other)] + int& GlobalCols, // [IN (root) / OUT (other)] + int& BlockRows, // [IN (root) / OUT (other)] + int& BlockCols, // [IN (root) / OUT (other)] + int& LocalRows, // [OUT] + int& LocalCols, // [OUT] + const int& rootrow = 0, // [IN] + const int& rootcol = 0 // [IN] +) { + /* Helper variables */ + int iZERO = 0, iONE = 1, iFOUR = 4; + + int myid, myrow, mycol, procrows, proccols, procnum; + blacs_pinfo_(&myid, &procnum); + blacs_gridinfo_(&context, &procrows, &proccols, &myrow, &mycol); + bool iamroot = (myrow == rootrow && mycol == rootcol); + const TYPENAME *GlobalMatrix = &GlobalMatrixVector[0]; + TYPENAME *LocalMatrix; + + /* Broadcast matrix info */ + int minfo[4]; + if (iamroot) { + minfo[0] = GlobalRows; + minfo[1] = GlobalCols; + minfo[2] = BlockRows; + minfo[3] = BlockCols; + igebs2d_(&context, "All", " ", &iFOUR, &iONE, minfo, &iFOUR); + } else { + igebr2d_(&context, "All", " ", &iFOUR, &iONE, minfo, &iFOUR, + &iZERO, &iZERO); + } + + GlobalRows = minfo[0]; + GlobalCols = minfo[1]; + BlockRows = minfo[2]; + BlockCols = minfo[3]; + + + /* Reserve space */ + LocalRows = numroc_(&GlobalRows, &BlockRows, &myrow, &iZERO, &procrows); + LocalCols = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &proccols); + LocalMatrixVector.resize(LocalRows*LocalCols); + LocalMatrix = &LocalMatrixVector[0]; + + /* Scatter matrix */ + int destr = 0, destc = 0; + int SendRows, SendCols; + int RecvRow = 0, RecvCol = 0; + for (int r = 0; r < GlobalRows; r += BlockRows, destr=(destr+1)%procrows) { + destc = 0; + + // Is this the last row bloc? + SendRows = BlockRows; + if (GlobalRows-r < BlockRows) + SendRows = GlobalRows-r; + if (SendRows <= 0) + SendRows = 0; + + for (int c=0; c<GlobalCols; c+=BlockCols, destc=(destc+1)%proccols) { + + // Is this the last column block? + SendCols = BlockCols; + if (GlobalCols-c < BlockCols) + SendCols = GlobalCols-c; + + // Send data + if (iamroot) { + FUNCNAME(gesd2d) (&context, &SendRows, &SendCols, + GlobalMatrix + GlobalRows*c + r, + &GlobalRows, &destr, &destc + ); + } + + // Rerceive data + if (myrow == destr && mycol == destc) { + FUNCNAME(gerv2d) (&context, &SendRows, &SendCols, + LocalMatrix+LocalRows*RecvCol+RecvRow, + &LocalRows, &rootrow, &rootcol + ); + + // Adjust the next starting column + RecvCol = (RecvCol + SendCols) % LocalCols; + } + } + + // Adjust the next starting row + if (myrow == destr) + RecvRow = (RecvRow + SendRows) % LocalRows; + + } + +} diff --git a/btl/libs/BLAS/CMakeLists.txt b/btl/libs/BLAS/CMakeLists.txt new file mode 100644 index 0000000..de42fe0 --- /dev/null +++ b/btl/libs/BLAS/CMakeLists.txt @@ -0,0 +1,60 @@ + +find_package(ATLAS) +if (ATLAS_FOUND) + btl_add_bench(btl_atlas main.cpp) + if(BUILD_btl_atlas) + target_link_libraries(btl_atlas ${ATLAS_LIBRARIES}) + set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1") + endif(BUILD_btl_atlas) +endif (ATLAS_FOUND) + +find_package(MKL) +if (MKL_FOUND) + btl_add_bench(btl_mkl main.cpp) + if(BUILD_btl_mkl) + target_link_libraries(btl_mkl ${MKL_LIBRARIES}) + set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1") + endif(BUILD_btl_mkl) +endif (MKL_FOUND) + + +find_package(GOTO2) +if (GOTO2_FOUND) + btl_add_bench(btl_goto2 main.cpp) + if(BUILD_btl_goto2) + target_link_libraries(btl_goto2 ${GOTO_LIBRARIES} ) + set_target_properties(btl_goto2 PROPERTIES COMPILE_FLAGS "-DCBLASNAME=GOTO2") + endif(BUILD_btl_goto2) +endif (GOTO2_FOUND) + +find_package(GOTO) +if (GOTO_FOUND) + if(GOTO2_FOUND) + btl_add_bench(btl_goto main.cpp OFF) + else() + btl_add_bench(btl_goto main.cpp) + endif() + if(BUILD_btl_goto) + target_link_libraries(btl_goto ${GOTO_LIBRARIES} ) + set_target_properties(btl_goto PROPERTIES COMPILE_FLAGS "-DCBLASNAME=GOTO") + endif(BUILD_btl_goto) +endif (GOTO_FOUND) + +find_package(ACML) +if (ACML_FOUND) + btl_add_bench(btl_acml main.cpp) + if(BUILD_btl_acml) + target_link_libraries(btl_acml ${ACML_LIBRARIES} ) + set_target_properties(btl_acml PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ACML -DHAS_LAPACK=1") + endif(BUILD_btl_acml) +endif (ACML_FOUND) + +if(Eigen_SOURCE_DIR AND CMAKE_Fortran_COMPILER_WORKS) + # we are inside Eigen and blas/lapack interface is compilable + include_directories(${Eigen_SOURCE_DIR}) + btl_add_bench(btl_eigenblas main.cpp) + if(BUILD_btl_eigenblas) + target_link_libraries(btl_eigenblas eigen_blas eigen_lapack ) + set_target_properties(btl_eigenblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=EigenBLAS") + endif() +endif() diff --git a/btl/libs/BLAS/blas.h b/btl/libs/BLAS/blas.h new file mode 100644 index 0000000..25607a5 --- /dev/null +++ b/btl/libs/BLAS/blas.h @@ -0,0 +1,681 @@ +#ifndef BLAS_H +#define BLAS_H + +#define BLASFUNC(FUNC) FUNC##_ + +#ifdef __WIN64__ +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#include <complex> + +extern "C" { + +int BLASFUNC(xerbla)(const char *, int *info, int); + +float BLASFUNC(sdot) (int *, float *, int *, float *, int *); +float BLASFUNC(sdsdot)(int *, float *, float *, int *, float *, int *); + +double BLASFUNC(dsdot) (int *, float *, int *, float *, int *); +double BLASFUNC(ddot) (int *, double *, int *, double *, int *); +double BLASFUNC(qdot) (int *, double *, int *, double *, int *); + +#if defined(F_INTERFACE_GFORT) && !defined(__64BIT__) +int BLASFUNC(cdotu) (int *, float * , int *, float *, int *); +int BLASFUNC(cdotc) (int *, float *, int *, float *, int *); +void BLASFUNC(zdotu) (double *, int *, double *, int *, double *, int *); +void BLASFUNC(zdotc) (double *, int *, double *, int *, double *, int *); +void BLASFUNC(xdotu) (double *, int *, double *, int *, double *, int *); +void BLASFUNC(xdotc) (double *, int *, double *, int *, double *, int *); +#elif defined(F_INTERFACE_F2C) || \ + defined(F_INTERFACE_PGI) || \ + defined(F_INTERFACE_GFORT) || \ + (defined(F_INTERFACE_PATHSCALE) && defined(__64BIT__)) +void BLASFUNC(cdotu) (float *, int *, float * , int *, float *, int *); +void BLASFUNC(cdotc) (float *, int *, float *, int *, float *, int *); +void BLASFUNC(zdotu) (double *, int *, double *, int *, double *, int *); +void BLASFUNC(zdotc) (double *, int *, double *, int *, double *, int *); +void BLASFUNC(xdotu) (double *, int *, double *, int *, double *, int *); +void BLASFUNC(xdotc) (double *, int *, double *, int *, double *, int *); +#else +std::complex<float> BLASFUNC(cdotu) (int *, float *, int *, float *, int *); +std::complex<float> BLASFUNC(cdotc) (int *, float *, int *, float *, int *); +std::complex<double> BLASFUNC(zdotu) (int *, double *, int *, double *, int *); +std::complex<double> BLASFUNC(zdotc) (int *, double *, int *, double *, int *); +double BLASFUNC(xdotu) (int *, double *, int *, double *, int *); +double BLASFUNC(xdotc) (int *, double *, int *, double *, int *); +#endif + +int BLASFUNC(cdotuw) (int *, float *, int *, float *, int *, float*); +int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*); +int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*); +int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*); + +int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *); +int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *); +int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *); +int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *); +int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *); + +int BLASFUNC(scopy) (int *, float *, int *, float *, int *); +int BLASFUNC(dcopy) (int *, double *, int *, double *, int *); +int BLASFUNC(qcopy) (int *, double *, int *, double *, int *); +int BLASFUNC(ccopy) (int *, float *, int *, float *, int *); +int BLASFUNC(zcopy) (int *, double *, int *, double *, int *); +int BLASFUNC(xcopy) (int *, double *, int *, double *, int *); + +int BLASFUNC(sswap) (int *, float *, int *, float *, int *); +int BLASFUNC(dswap) (int *, double *, int *, double *, int *); +int BLASFUNC(qswap) (int *, double *, int *, double *, int *); +int BLASFUNC(cswap) (int *, float *, int *, float *, int *); +int BLASFUNC(zswap) (int *, double *, int *, double *, int *); +int BLASFUNC(xswap) (int *, double *, int *, double *, int *); + +float BLASFUNC(sasum) (int *, float *, int *); +float BLASFUNC(scasum)(int *, float *, int *); +double BLASFUNC(dasum) (int *, double *, int *); +double BLASFUNC(qasum) (int *, double *, int *); +double BLASFUNC(dzasum)(int *, double *, int *); +double BLASFUNC(qxasum)(int *, double *, int *); + +int BLASFUNC(isamax)(int *, float *, int *); +int BLASFUNC(idamax)(int *, double *, int *); +int BLASFUNC(iqamax)(int *, double *, int *); +int BLASFUNC(icamax)(int *, float *, int *); +int BLASFUNC(izamax)(int *, double *, int *); +int BLASFUNC(ixamax)(int *, double *, int *); + +int BLASFUNC(ismax) (int *, float *, int *); +int BLASFUNC(idmax) (int *, double *, int *); +int BLASFUNC(iqmax) (int *, double *, int *); +int BLASFUNC(icmax) (int *, float *, int *); +int BLASFUNC(izmax) (int *, double *, int *); +int BLASFUNC(ixmax) (int *, double *, int *); + +int BLASFUNC(isamin)(int *, float *, int *); +int BLASFUNC(idamin)(int *, double *, int *); +int BLASFUNC(iqamin)(int *, double *, int *); +int BLASFUNC(icamin)(int *, float *, int *); +int BLASFUNC(izamin)(int *, double *, int *); +int BLASFUNC(ixamin)(int *, double *, int *); + +int BLASFUNC(ismin)(int *, float *, int *); +int BLASFUNC(idmin)(int *, double *, int *); +int BLASFUNC(iqmin)(int *, double *, int *); +int BLASFUNC(icmin)(int *, float *, int *); +int BLASFUNC(izmin)(int *, double *, int *); +int BLASFUNC(ixmin)(int *, double *, int *); + +float BLASFUNC(samax) (int *, float *, int *); +double BLASFUNC(damax) (int *, double *, int *); +double BLASFUNC(qamax) (int *, double *, int *); +float BLASFUNC(scamax)(int *, float *, int *); +double BLASFUNC(dzamax)(int *, double *, int *); +double BLASFUNC(qxamax)(int *, double *, int *); + +float BLASFUNC(samin) (int *, float *, int *); +double BLASFUNC(damin) (int *, double *, int *); +double BLASFUNC(qamin) (int *, double *, int *); +float BLASFUNC(scamin)(int *, float *, int *); +double BLASFUNC(dzamin)(int *, double *, int *); +double BLASFUNC(qxamin)(int *, double *, int *); + +float BLASFUNC(smax) (int *, float *, int *); +double BLASFUNC(dmax) (int *, double *, int *); +double BLASFUNC(qmax) (int *, double *, int *); +float BLASFUNC(scmax) (int *, float *, int *); +double BLASFUNC(dzmax) (int *, double *, int *); +double BLASFUNC(qxmax) (int *, double *, int *); + +float BLASFUNC(smin) (int *, float *, int *); +double BLASFUNC(dmin) (int *, double *, int *); +double BLASFUNC(qmin) (int *, double *, int *); +float BLASFUNC(scmin) (int *, float *, int *); +double BLASFUNC(dzmin) (int *, double *, int *); +double BLASFUNC(qxmin) (int *, double *, int *); + +int BLASFUNC(sscal) (int *, float *, float *, int *); +int BLASFUNC(dscal) (int *, double *, double *, int *); +int BLASFUNC(qscal) (int *, double *, double *, int *); +int BLASFUNC(cscal) (int *, float *, float *, int *); +int BLASFUNC(zscal) (int *, double *, double *, int *); +int BLASFUNC(xscal) (int *, double *, double *, int *); +int BLASFUNC(csscal)(int *, float *, float *, int *); +int BLASFUNC(zdscal)(int *, double *, double *, int *); +int BLASFUNC(xqscal)(int *, double *, double *, int *); + +float BLASFUNC(snrm2) (int *, float *, int *); +float BLASFUNC(scnrm2)(int *, float *, int *); + +double BLASFUNC(dnrm2) (int *, double *, int *); +double BLASFUNC(qnrm2) (int *, double *, int *); +double BLASFUNC(dznrm2)(int *, double *, int *); +double BLASFUNC(qxnrm2)(int *, double *, int *); + +int BLASFUNC(srot) (int *, float *, int *, float *, int *, float *, float *); +int BLASFUNC(drot) (int *, double *, int *, double *, int *, double *, double *); +int BLASFUNC(qrot) (int *, double *, int *, double *, int *, double *, double *); +int BLASFUNC(csrot) (int *, float *, int *, float *, int *, float *, float *); +int BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *); +int BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *); + +int BLASFUNC(srotg) (float *, float *, float *, float *); +int BLASFUNC(drotg) (double *, double *, double *, double *); +int BLASFUNC(qrotg) (double *, double *, double *, double *); +int BLASFUNC(crotg) (float *, float *, float *, float *); +int BLASFUNC(zrotg) (double *, double *, double *, double *); +int BLASFUNC(xrotg) (double *, double *, double *, double *); + +int BLASFUNC(srotmg)(float *, float *, float *, float *, float *); +int BLASFUNC(drotmg)(double *, double *, double *, double *, double *); + +int BLASFUNC(srotm) (int *, float *, int *, float *, int *, float *); +int BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *); +int BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *); + +/* Level 2 routines */ + +int BLASFUNC(sger)(int *, int *, float *, float *, int *, + float *, int *, float *, int *); +int BLASFUNC(dger)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(qger)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(cgeru)(int *, int *, float *, float *, int *, + float *, int *, float *, int *); +int BLASFUNC(cgerc)(int *, int *, float *, float *, int *, + float *, int *, float *, int *); +int BLASFUNC(zgeru)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(zgerc)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); + +int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); + +int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *); + +int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); + +int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(ctpmv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *); + +int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); + +int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); + +int BLASFUNC(ssymv) (char *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(csymv) (char *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(sspmv) (char *, int *, float *, float *, + float *, int *, float *, float *, int *); +int BLASFUNC(dspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(qspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(cspmv) (char *, int *, float *, float *, + float *, int *, float *, float *, int *); +int BLASFUNC(zspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(xspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); + +int BLASFUNC(ssyr) (char *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(dsyr) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(qsyr) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(csyr) (char *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zsyr) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(xsyr) (char *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(ssyr2) (char *, int *, float *, + float *, int *, float *, int *, float *, int *); +int BLASFUNC(dsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(qsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(csyr2) (char *, int *, float *, + float *, int *, float *, int *, float *, int *); +int BLASFUNC(zsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(xsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); + +int BLASFUNC(sspr) (char *, int *, float *, float *, int *, + float *); +int BLASFUNC(dspr) (char *, int *, double *, double *, int *, + double *); +int BLASFUNC(qspr) (char *, int *, double *, double *, int *, + double *); +int BLASFUNC(cspr) (char *, int *, float *, float *, int *, + float *); +int BLASFUNC(zspr) (char *, int *, double *, double *, int *, + double *); +int BLASFUNC(xspr) (char *, int *, double *, double *, int *, + double *); + +int BLASFUNC(sspr2) (char *, int *, float *, + float *, int *, float *, int *, float *); +int BLASFUNC(dspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(qspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(cspr2) (char *, int *, float *, + float *, int *, float *, int *, float *); +int BLASFUNC(zspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(xspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); + +int BLASFUNC(cher) (char *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zher) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(xher) (char *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(chpr) (char *, int *, float *, float *, int *, float *); +int BLASFUNC(zhpr) (char *, int *, double *, double *, int *, double *); +int BLASFUNC(xhpr) (char *, int *, double *, double *, int *, double *); + +int BLASFUNC(cher2) (char *, int *, float *, + float *, int *, float *, int *, float *, int *); +int BLASFUNC(zher2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(xher2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); + +int BLASFUNC(chpr2) (char *, int *, float *, + float *, int *, float *, int *, float *); +int BLASFUNC(zhpr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(xhpr2) (char *, int *, double *, + double *, int *, double *, int *, double *); + +int BLASFUNC(chemv) (char *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhemv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhemv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(chpmv) (char *, int *, float *, float *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhpmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhpmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); + +int BLASFUNC(snorm)(char *, int *, int *, float *, int *); +int BLASFUNC(dnorm)(char *, int *, int *, double *, int *); +int BLASFUNC(cnorm)(char *, int *, int *, float *, int *); +int BLASFUNC(znorm)(char *, int *, int *, double *, int *); + +int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +/* Level 3 routines */ + +int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *, + float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *, + float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); + +int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, + float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); + +int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *, + float *, float *, int *); +int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *, + double *, double *, int *); +int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *, + float *, float *, int *); +int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *, + double *, double *, int *); + +int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); + +int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); + +int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *, + float *, float *, int *); +int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *, + float *, float *, int *); +int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); + +int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); + +int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *, + float *, float *, int *); +int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); + +int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); + +int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(sgema)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(dgema)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); +int BLASFUNC(cgema)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(zgema)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); + +int BLASFUNC(sgems)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(dgems)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); +int BLASFUNC(cgems)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(zgems)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); + +int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *); + +//int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *); +//int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *); + +int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *); +int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *); +int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *); + +int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); + +int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *); + +int BLASFUNC(spotf2)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *); + +//int BLASFUNC(spotrf)(char *, int *, float *, int *, int *); +//int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *); + +int BLASFUNC(slauu2)(char *, int *, float *, int *, int *); +int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(clauu2)(char *, int *, float *, int *, int *); +int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *); + +int BLASFUNC(slauum)(char *, int *, float *, int *, int *); +int BLASFUNC(dlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(qlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(clauum)(char *, int *, float *, int *, int *); +int BLASFUNC(zlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(xlauum)(char *, int *, double *, int *, int *); + +int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *); + +int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *); + +int BLASFUNC(spotri)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotri)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotri)(char *, int *, double *, int *, int *); + +} + +#endif diff --git a/btl/libs/BLAS/blas_interface.hh b/btl/libs/BLAS/blas_interface.hh new file mode 100644 index 0000000..19fc3c1 --- /dev/null +++ b/btl/libs/BLAS/blas_interface.hh @@ -0,0 +1,77 @@ +//===================================================== +// File : blas_interface.hh +// Author : L. Plagne <laurent.plagne@edf.fr)> +// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef blas_PRODUIT_MATRICE_VECTEUR_HH +#define blas_PRODUIT_MATRICE_VECTEUR_HH + +#include <c_interface_base.h> +#include <complex> +extern "C" +{ +#include "blas.h" +#ifdef CBLAS_INTERFACE +# include "cblas.h" +#endif +} + +#define MAKE_STRING2(S) #S +#define MAKE_STRING(S) MAKE_STRING2(S) + +#define CAT2(A,B) A##B +#define CAT(A,B) CAT2(A,B) + + +template<class real> class blas_interface; + + +static char notrans = 'N'; +static char trans = 'T'; +static char nonunit = 'N'; +static char lower = 'L'; +static char right = 'R'; +static char left = 'L'; +static int intone = 1; + + +#define SCALAR float +#define SCALAR_PREFIX s +#ifdef CBLAS_INTERFACE +# include "cblas_interface_impl.hh" +#else +# include "blas_interface_impl.hh" +#endif +#undef SCALAR +#undef SCALAR_PREFIX + + +#define SCALAR double +#define SCALAR_PREFIX d +#ifdef CBLAS_INTERFACE +# include "cblas_interface_impl.hh" +#else +# include "blas_interface_impl.hh" +#endif +#undef SCALAR +#undef SCALAR_PREFIX + +#endif + + + diff --git a/btl/libs/BLAS/blas_interface_impl.hh b/btl/libs/BLAS/blas_interface_impl.hh new file mode 100644 index 0000000..63883a9 --- /dev/null +++ b/btl/libs/BLAS/blas_interface_impl.hh @@ -0,0 +1,96 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// + +#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_) + +template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR> +{ + +public : + + static SCALAR fone; + static SCALAR fzero; + + static inline std::string name() + { + return MAKE_STRING(CBLASNAME); + } + + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(gemv)(¬rans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone); + } + + static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone); + } + + static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N); + } + + static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){ + BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N); + } + + static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){ + BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s); + } + + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone); + } + + static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); + } + + static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); + } + + static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ + BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N); + } + + static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){ + BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone); + } + + static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){ + BLAS_FUNC(scal)(&N,&b,Y,&intone); + BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone); + } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + BLAS_FUNC(copy)(&N, B, &intone, X, &intone); + BLAS_FUNC(trsv)(&lower, ¬rans, &nonunit, &N, L, &N, X, &intone); + } + + static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){ + BLAS_FUNC(copy)(&N, B, &intone, X, &intone); + BLAS_FUNC(trsm)(&right, &lower, ¬rans, &nonunit, &N, &N, &fone, L, &N, X, &N); + } + + static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(trmm)(&left, &lower, ¬rans,&nonunit, &N,&N,&fone,A,&N,B,&N); + } + +}; + +SCALAR blas_interface<SCALAR>::fone = SCALAR(1); +SCALAR blas_interface<SCALAR>::fzero = SCALAR(0); diff --git a/btl/libs/BLAS/c_interface_base.h b/btl/libs/BLAS/c_interface_base.h new file mode 100644 index 0000000..3b6afbb --- /dev/null +++ b/btl/libs/BLAS/c_interface_base.h @@ -0,0 +1,89 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BTL_C_INTERFACE_BASE_H +#define BTL_C_INTERFACE_BASE_H + +#include "utilities.h" +#include <vector> + +template<class real> class c_interface_base +{ + +public: + + typedef real real_type; + typedef std::vector<real> stl_vector; + typedef std::vector<stl_vector > stl_matrix; + + typedef real* gene_matrix; + typedef real* gene_vector; + + static void free_matrix(gene_matrix & A, int N){ + delete A; + } + + static void free_vector(gene_vector & B){ + delete B; + } + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + int N = A_stl.size(); + A = new real[N*N]; + for (int j=0;j<N;j++) + for (int i=0;i<N;i++) + A[i+N*j] = A_stl[j][i]; + } + + static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){ + int N = B_stl.size(); + B = new real[N]; + for (int i=0;i<N;i++) + B[i] = B_stl[i]; + } + + static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){ + int N = B_stl.size(); + for (int i=0;i<N;i++) + B_stl[i] = B[i]; + } + + static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){ + int N = A_stl.size(); + for (int j=0;j<N;j++){ + A_stl[j].resize(N); + for (int i=0;i<N;i++) + A_stl[j][i] = A[i+N*j]; + } + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + for (int i=0;i<N;i++) + cible[i]=source[i]; + } + + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ + for (int j=0;j<N;j++){ + for (int i=0;i<N;i++){ + cible[i+N*j] = source[i+N*j]; + } + } + } + +}; + +#endif diff --git a/btl/libs/BLAS/cblas_interface_impl.hh b/btl/libs/BLAS/cblas_interface_impl.hh new file mode 100644 index 0000000..2da195e --- /dev/null +++ b/btl/libs/BLAS/cblas_interface_impl.hh @@ -0,0 +1,96 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// + +#define BLAS_FUNC(NAME) CAT(cblas_,CAT(SCALAR_PREFIX,NAME)) + +template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR> +{ + +public : + + static SCALAR fone; + static SCALAR fzero; + + static inline std::string name() + { + return MAKE_STRING(CBLASNAME); + } + + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(gemv)(CblasColMajor,CblasNoTrans,N,N,fone,A,N,B,intone,fzero,X,intone); + } + + static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(symv)(CblasColMajor,CblasLower,N,fone,A,N,B,intone,fzero,X,intone); + } + + static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(syr2)(CblasColMajor,CblasLower,N,fone,B,intone,X,intone,A,N); + } + + static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){ + BLAS_FUNC(ger)(CblasColMajor,N,N,fone,X,intone,Y,intone,A,N); + } + + static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){ + BLAS_FUNC(rot)(N,A,intone,B,intone,c,s); + } + + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(gemv)(CblasColMajor,CblasTrans,N,N,fone,A,N,B,intone,fzero,X,intone); + } + + static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(gemm)(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,fone,A,N,B,N,fzero,X,N); + } + + static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(gemm)(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,fone,A,N,B,N,fzero,X,N); + } + + static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ + BLAS_FUNC(syrk)(CblasColMajor,CblasLower,CblasNoTrans,N,N,fone,A,N,fzero,X,N); + } + + static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){ + BLAS_FUNC(axpy)(N,coef,X,intone,Y,intone); + } + + static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){ + BLAS_FUNC(scal)(N,b,Y,intone); + BLAS_FUNC(axpy)(N,a,X,intone,Y,intone); + } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + BLAS_FUNC(copy)(N, B, intone, X, intone); + BLAS_FUNC(trsv)(CblasColMajor,CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, intone); + } + + static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){ + BLAS_FUNC(copy)(N, B, intone, X, intone); + BLAS_FUNC(trsm)(CblasColMajor,CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, fone, L, N, X, N); + } + + static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(trmm)(CblasColMajor,CblasLeft, CblasLower, CblasNoTrans,CblasNonUnit, N,N,fone,A,N,B,N); + } + +}; + +SCALAR blas_interface<SCALAR>::fone = SCALAR(1); +SCALAR blas_interface<SCALAR>::fzero = SCALAR(0); diff --git a/btl/libs/BLAS/main.cpp b/btl/libs/BLAS/main.cpp new file mode 100644 index 0000000..401cb5b --- /dev/null +++ b/btl/libs/BLAS/main.cpp @@ -0,0 +1,116 @@ +//===================================================== +// File : main.cpp +// Author : L. Plagne <laurent.plagne@edf.fr)> +// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// + +#ifndef BLAS_INTERFACE +# ifndef CBLAS_INTERFACE +# define BLAS_INTERFACE +# endif +#endif + +#include "utilities.h" +#include "blas_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" +#include "action_trisolve_matrix.hh" + +#include <string> + +BTL_MAIN; + +int main(int argv, char **argc) +{ + bool + axpy=false, axpby=false, rot=false, + matrix_vector=false, atv=false, symv=false, syr2=false, ger=false, trisolve_vector=false, + matrix_matrix=false, aat=false, trisolve_matrix=false, trmm=false + ; + int N = 100; + + + for (int i = 1; i < argv; ++i) { + std::string arg = argc[i]; + if (arg == "axpy") axpy = true; + else if (arg == "axpby") axpby = true; + else if (arg == "rot") rot = true; + else if (arg == "matrix_vector") matrix_vector = true; + else if (arg == "atv") atv = true; + else if (arg == "symv") symv = true; + else if (arg == "syr2") syr2 = true; + else if (arg == "ger") ger = true; + else if (arg == "trisolve_vector") trisolve_vector = true; + else if (arg == "matrix_matrix") matrix_matrix = true; + else if (arg == "aat") aat = true; + else if (arg == "trisolve_matrix") trisolve_matrix = true; + else if (arg == "trmm") trmm = true; + + else if (arg[0] == '1' && arg[1] == '\0') { + axpy = true; axpby = true; rot = true; + } + else if (arg[0] == '2' && arg[1] == '\0') { + matrix_vector=true; atv=true; symv=true; syr2=true; ger=true; trisolve_vector=true; + } + else if (arg[0] == '3' && arg[1] == '\0') { + matrix_matrix=true; aat=true; trisolve_matrix=true; trmm=true; + } + + // Check switch -N + else if (arg[0] == '-' && arg[1] == 'N') { + if (arg[2] != '\0') + N = atoi(arg.c_str()+2); + else + N = atoi(argc[++i]); + } + } + + if (axpy) + bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N); + if (axpby) + bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N); + if (rot) + bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N); + + if (matrix_vector) + bench<Action_matrix_vector_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N); + if (atv) + bench<Action_atv_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N); + if (symv) + bench<Action_symv<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N); + if (syr2) + bench<Action_syr2<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N); + if (ger) + bench<Action_ger<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N); + if (trisolve_vector) + bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N); + + if (matrix_matrix) + bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N); + if (aat) + bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N); + if (trisolve_matrix) + bench<Action_trisolve_matrix<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N); + if (trmm) + bench<Action_trmm<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N); + + + return 0; +} + + diff --git a/btl/libs/FFTW/fftw_interface.hh b/btl/libs/FFTW/fftw_interface.hh new file mode 100644 index 0000000..81701e7 --- /dev/null +++ b/btl/libs/FFTW/fftw_interface.hh @@ -0,0 +1,87 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BTL_FFTW_INTERFACE_H +#define BTL_FFTW_INTERFACE_H + +#include <complex> +#include <fftw3.h> +#include <vector> + +class fftw_interface +{ +public: + + static inline std::string name() + { + return std::string("fftw"); + } + + typedef std::complex<double> real_type; + typedef std::vector<std::complex<double> > stl_vector; + + typedef fftw_complex* gene_vector; + + typedef fftw_plan plan; + + static inline void free_vector(gene_vector & B){ + fftw_free(B); + } + + static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){ + int N = B_stl.size(); + B = (gene_vector) fftw_malloc(sizeof(fftw_complex) * N); + std::complex<double>* B_cplx = reinterpret_cast<std::complex<double>* >(B); + + for (int i=0;i<N;i++) + B_cplx[i] = B_stl[i]; + } + + static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){ + int N = B_stl.size(); + std::complex<double>* B_cplx = reinterpret_cast<std::complex<double>* >(B); + for (int i=0;i<N;i++) + B_stl[i] = B_cplx[i]; + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + for (int i=0;i<N;i++) { + cible[i][0]=source[i][0]; + cible[i][1]=source[i][1]; + } + } + + /* Begin FFTW operations */ + + static inline void fftw_init_plan(plan & p, const int & N, gene_vector & x, gene_vector & y, const int & sign, const int & flags){ + p = fftw_plan_dft_1d(N, x, y, sign, flags); + } + + static inline void fftw_init_plan_2d(plan & p, const int & N, gene_vector & x, gene_vector& y, const int & sign, const int & flags){ + p = fftw_plan_dft_2d(N, N, x, y, sign, flags); + } + + static inline void fftw_init_plan_3d(plan & p, const int & N, gene_vector & x, gene_vector& y, const int & sign, const int & flags){ + p = fftw_plan_dft_3d(N, N, N, x, y, sign, flags); + } + + static inline void fftw_run(plan & p){ + fftw_execute(p); + } +}; + +#endif // BTL_FFTW_INTERFACE_H diff --git a/btl/libs/FFTW/main.cpp b/btl/libs/FFTW/main.cpp new file mode 100644 index 0000000..8fa452b --- /dev/null +++ b/btl/libs/FFTW/main.cpp @@ -0,0 +1,107 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "bench.hh" +#include "fftw_interface.hh" +#include "fftw_actions.hh" + +#include <string> + +BTL_MAIN; + +int main(int argc, char **argv) +{ + bool + fftw_1d_forward_measure = false, + fftw_1d_forward_estimate = false, + fftw_1d_backward_measure = false, + fftw_1d_backward_estimate = false, + + fftw_2d_forward_measure = false, + fftw_2d_forward_estimate = false, + fftw_2d_backward_measure = false, + fftw_2d_backward_estimate = false, + + fftw_3d_forward_measure = false, + fftw_3d_forward_estimate = false, + fftw_3d_backward_measure = false, + fftw_3d_backward_estimate = false + ; + int N = 100; + + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "FFTW_1D_Forward_Measure" || arg == "all") fftw_1d_forward_measure = true; + else if (arg == "FFTW_1D_Forward_Estimate" || arg == "all") fftw_1d_forward_estimate = true; + else if (arg == "FFTW_1D_Backward_Measure" || arg == "all") fftw_1d_backward_measure = true; + else if (arg == "FFTW_1D_Backward_Estimate" || arg == "all") fftw_1d_backward_estimate = true; + + else if (arg == "FFTW_2D_Forward_Measure" || arg == "all") fftw_2d_forward_measure = true; + else if (arg == "FFTW_2D_Forward_Estimate" || arg == "all") fftw_2d_forward_estimate = true; + else if (arg == "FFTW_2D_Backward_Measure" || arg == "all") fftw_2d_backward_measure = true; + else if (arg == "FFTW_2D_Backward_Estimate" || arg == "all") fftw_2d_backward_estimate = true; + + else if (arg == "FFTW_3D_Forward_Measure" || arg == "all") fftw_3d_forward_measure = true; + else if (arg == "FFTW_3D_Forward_Estimate" || arg == "all") fftw_3d_forward_estimate = true; + else if (arg == "FFTW_3D_Backward_Measure" || arg == "all") fftw_3d_backward_measure = true; + else if (arg == "FFTW_3D_Backward_Estimate" || arg == "all") fftw_3d_backward_estimate = true; + + // Check switch -N + else if (arg[0] == '-' && arg[1] == 'N') { + if (arg[2] != '\0') + N = atoi(arg.c_str()+2); + else + N = atoi(argv[++i]); + } + } + + + if (fftw_1d_forward_measure) + bench<Action_FFTW_1D_Forward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N); + if (fftw_1d_forward_estimate) + bench<Action_FFTW_1D_Forward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N); + if (fftw_1d_backward_measure) + bench<Action_FFTW_1D_Backward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N); + if (fftw_1d_backward_estimate) + bench<Action_FFTW_1D_Backward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N); + + if (fftw_2d_forward_measure) + bench<Action_FFTW_2D_Forward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N); + if (fftw_2d_forward_estimate) + bench<Action_FFTW_2D_Forward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N); + if (fftw_2d_backward_measure) + bench<Action_FFTW_2D_Backward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N); + if (fftw_2d_backward_estimate) + bench<Action_FFTW_2D_Backward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N); + + if (fftw_3d_forward_measure) + bench<Action_FFTW_3D_Forward_Measure<fftw_interface> >(MIN_MV,250, N); + if (fftw_3d_forward_estimate) + bench<Action_FFTW_3D_Forward_Estimate<fftw_interface> >(MIN_MV,250, N); + if (fftw_3d_backward_measure) + bench<Action_FFTW_3D_Backward_Measure<fftw_interface> >(MIN_MV,250, N); + if (fftw_3d_backward_estimate) + bench<Action_FFTW_3D_Backward_Estimate<fftw_interface> >(MIN_MV,250, N); + + + return 0; +} + + diff --git a/btl/libs/LAPACK/lapack.hh b/btl/libs/LAPACK/lapack.hh new file mode 100644 index 0000000..a6d46f1 --- /dev/null +++ b/btl/libs/LAPACK/lapack.hh @@ -0,0 +1,33 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef LAPACK_HH +#define LAPACK_HH + +#define SCALAR float +#define SCALAR_PREFIX s +#include "lapack_.hh" +#undef SCALAR +#undef SCALAR_PREFIX + +#define SCALAR double +#define SCALAR_PREFIX d +#include "lapack_.hh" +#undef SCALAR +#undef SCALAR_PREFIX + +#endif /* LAPACK_HH */ diff --git a/btl/libs/LAPACK/lapack_.hh b/btl/libs/LAPACK/lapack_.hh new file mode 100644 index 0000000..e7c8c3f --- /dev/null +++ b/btl/libs/LAPACK/lapack_.hh @@ -0,0 +1,38 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef CAT +# define CAT2(A,B) A##B +# define CAT(A,B) CAT2(A,B) +#endif +#define LAPACKFUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_) + +#ifdef __cplusplus +extern "C" { +#endif + +void LAPACKFUNC(getrf)(const int*, const int*, SCALAR*, const int*, int*, int*); +void LAPACKFUNC(potrf)(const char*, const int*, SCALAR*, const int*, int*); +void LAPACKFUNC(geqrf)(const int*, const int*, SCALAR*, const int*, SCALAR*, SCALAR*, const int*, int*); +void LAPACKFUNC(gesvd)(const char*, const char*, const int*, const int*, SCALAR*, const int*, SCALAR*, SCALAR*, const int*, SCALAR*, const int*, SCALAR*, const int*, int*); +void LAPACKFUNC(syev)(const char*, const char*, const int*, SCALAR*, const int*, SCALAR*, SCALAR*, const int*, int*); +void LAPACKFUNC(stev)(const char*, const int*, SCALAR*, SCALAR*, SCALAR*, const int*, SCALAR*, int*); + +#ifdef __cplusplus +} +#endif + diff --git a/btl/libs/LAPACK/lapack_interface.hh b/btl/libs/LAPACK/lapack_interface.hh new file mode 100644 index 0000000..a0ec52b --- /dev/null +++ b/btl/libs/LAPACK/lapack_interface.hh @@ -0,0 +1,78 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef LAPACK_INTERFACE_HH +#define LAPACK_INTERFACE_HH + +#include <../BLAS/c_interface_base.h> +#include <complex> +#include "lapack.hh" + +extern "C" { +#include "../BLAS/blas.h" + +//void sgesv_(int*, int*, float *, int*, int*, float *, int*, int*); +//void dgesv_(int*, int*, double*, int*, int*, double*, int*, int*); + +void sgels_(char*, int*, int*, int*, float *, int*, float *, int*, float *, int*, int*); +void dgels_(char*, int*, int*, int*, double*, int*, double*, int*, double*, int*, int*); + +//void sgetrf_(int*, int*, float *, int*, int*, int*); +//void dgetrf_(int*, int*, double*, int*, int*, int*); + +//void spotrf_(char*, int*, float *, int*, int*); +//void dpotrf_(char*, int*, double*, int*, int*); + +//void ssyev_(char*, char*, int*, float *, int*, float *, float *, int*, int*); +//void dsyev_(char*, char*, int*, double*, int*, double*, double*, int*, int*); +} + + +#define MAKE_STRING2(S) #S +#define MAKE_STRING(S) MAKE_STRING2(S) + +#ifndef CAT +# define CAT2(A,B) A##B +# define CAT(A,B) CAT2(A,B) +#endif + +template <typename real> class lapack_interface; + + +static char notrans = 'N'; +static char trans = 'T'; +static char nonunit = 'N'; +static char lower = 'L'; +static char right = 'R'; +static char left = 'L'; +static int intone = 1; +static int zeroint = 0; + + +#define SCALAR float +#define SCALAR_PREFIX s +#include "lapack_interface_impl.hh" +#undef SCALAR +#undef SCALAR_PREFIX + +#define SCALAR double +#define SCALAR_PREFIX d +#include "lapack_interface_impl.hh" +#undef SCALAR +#undef SCALAR_PREFIX + +#endif /* LAPACK_INTERFACE_HH */ diff --git a/btl/libs/LAPACK/lapack_interface_impl.hh b/btl/libs/LAPACK/lapack_interface_impl.hh new file mode 100644 index 0000000..a95bc5b --- /dev/null +++ b/btl/libs/LAPACK/lapack_interface_impl.hh @@ -0,0 +1,139 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#define LAPACKFUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_) + +template<> class lapack_interface<SCALAR> : public c_interface_base<SCALAR> +{ +public: + + static inline std::string name() + { + return MAKE_STRING(LAPACKNAME); + } + + static inline void general_solve(gene_matrix& A, gene_vector& b, gene_vector& x, int N) + { + int *ipiv = new int[N]; + int info; + LAPACKFUNC(copy)(&N, b, &intone, x, &intone); + LAPACKFUNC(gesv)(&N, &intone, A, &N, ipiv, x, &N, &info); + delete[] ipiv; + } + + static inline void least_squares(gene_matrix& A, gene_vector& b, gene_vector& x, int N) + { + int *ipiv = new int[N]; + int info; + LAPACKFUNC(copy)(&N, b, &intone, x, &intone); + SCALAR work1; + int MONE = -1; + LAPACKFUNC(gels)(¬rans, &N, &N, &intone, A, &N, x, &N, &work1, &MONE, &info); + int lwork = (int)work1; + SCALAR *work2 = new SCALAR[lwork]; + LAPACKFUNC(gels)(¬rans, &N, &N, &intone, A, &N, x, &N, work2, &lwork, &info); + delete[] work2; + delete[] ipiv; + } + + static inline void lu_decomp(const gene_matrix& X, gene_matrix& C, int N) + { + int N2 = N*N; + int *ipiv = new int[N]; + int info; + LAPACKFUNC(copy)(&N2, X, &intone, C, &intone); + LAPACKFUNC(getrf)(&N, &N, C, &N, ipiv, &info); + delete[] ipiv; + } + + static inline void cholesky(const gene_matrix& X, gene_matrix& C, int N) + { + int N2 = N*N; + int info; + LAPACKFUNC(copy)(&N2, X, &intone, C, &intone); + LAPACKFUNC(potrf)(&lower, &N, C, &N, &info); + } + + static inline void qr_decomp(const gene_matrix& X, gene_matrix& QR, gene_vector& tau, const int& N) + { + int N2 = N*N; + LAPACKFUNC(copy)(&N2, X, &intone, QR, &intone); + + SCALAR *work = new SCALAR; + int lwork = -1, info; + LAPACKFUNC(geqrf)(&N, &N, QR, &N, tau, work, &lwork, &info); + lwork = *work; + delete work; + work = new SCALAR[lwork]; + LAPACKFUNC(geqrf)(&N, &N, QR, &N, tau, work, &lwork, &info); + delete[] work; + } + + static inline void svd_decomp(const gene_matrix& X, gene_matrix& U, gene_vector& S, gene_matrix& VT, const int& N) + { + int N2 = N*N; + stl_vector Xcopy(N2); + LAPACKFUNC(copy)(&N2, X, &intone, &Xcopy[0], &intone); + + stl_vector work(1); + int lwork = -1, info; + LAPACKFUNC(gesvd)("A", "A", &N, &N, &Xcopy[0], &N, S, U, &N, VT, &N, &work[0], &lwork, &info); + lwork = work[0]; + work.resize(lwork); + LAPACKFUNC(gesvd)("A", "A", &N, &N, &Xcopy[0], &N, S, U, &N, VT, &N, &work[0], &lwork, &info); + } + + static inline void syev(const gene_matrix& X, gene_matrix& V, gene_vector& W, const int& N) + { + int N2 = N*N; + LAPACKFUNC(copy)(&N2, X, &intone, V, &intone); + + stl_vector work(1); + int lwork = -1, info; + LAPACKFUNC(syev)("V", "U", &N, V, &N, W, &work[0], &lwork, &info); + lwork = work[0]; + work.resize(lwork); + LAPACKFUNC(syev)("V", "U", &N, V, &N, W, &work[0], &lwork, &info); + } + + /* Size of D, W: N; size of E: N-1, size of V: NxN */ + static inline void stev(const gene_vector& D, const gene_vector& E, gene_vector& W, gene_matrix& V, const int& N) + { + int N0 = N; + int N1 = N-1; + LAPACKFUNC(copy)(&N0, D, &intone, W, &intone); + stl_vector E_(E, E+N-1), work(max(1, 2*N-2)); + + int info; + LAPACKFUNC(stev)("V", &N, W, &E_[0], V, &N, &work[0], &info); + } + + static inline void symm_ev(const gene_matrix& X, gene_vector& W, int N) + { + char jobz = 'N'; + SCALAR *work = new SCALAR; + int lwork = -1, info; + LAPACKFUNC(syev)(&jobz, &lower, &N, X, &N, W, work, &lwork, &info); + lwork = *work; + delete work; + work = new SCALAR[lwork]; + LAPACKFUNC(syev)(&jobz, &lower, &N, X, &N, W, work, &lwork, &info); + delete[] work; + } + + +}; diff --git a/btl/libs/LAPACK/main.cpp b/btl/libs/LAPACK/main.cpp new file mode 100644 index 0000000..e70c161 --- /dev/null +++ b/btl/libs/LAPACK/main.cpp @@ -0,0 +1,95 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include <btl.hh> +#include <bench.hh> +#include <action_general_solve.hh> +#include <action_least_squares.hh> +#include <action_lu_decomp.hh> +#include <action_cholesky.hh> +#include <action_qr_decomp.hh> +#include <action_svd_decomp.hh> +#include <action_syev.hh> +#include <action_stev.hh> +#include <action_symm_ev.hh> +#include <lapack_interface.hh> + +#include <string> +#include <cstdlib> + +BTL_MAIN; + +int main(int argc, char **argv) +{ + bool + general_solve=false, least_squares=false, lu_decomp=false, cholesky=false, qr_decomp=false, svd_decomp=false, + syev=false, stev=false, symm_ev=false + ; + int N = 100; + + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "general_solve") general_solve = true; + else if (arg == "least_squares") least_squares = true; + else if (arg == "lu_decomp") lu_decomp = true; + else if (arg == "cholesky") cholesky = true; + else if (arg == "qr_decomp") qr_decomp = true; + else if (arg == "svd_decomp") svd_decomp = true; + else if (arg == "syev") syev = true; + else if (arg == "stev") stev = true; + else if (arg == "symm_ev") symm_ev = true; + + // Check switch -N + else if (arg[0] == '-' && arg[1] == 'N') { + if (arg[2] != '\0') + N = atoi(arg.c_str()+2); + else + N = atoi(argv[++i]); + } + } + + + if (general_solve) + bench<Action_general_solve<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N); + + if (least_squares) + bench<Action_least_squares<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N); + + if (lu_decomp) + bench<Action_lu_decomp<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N); + + if (cholesky) + bench<Action_cholesky<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N); + + if (qr_decomp) + bench<Action_qr_decomp<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N); + + if (svd_decomp) + bench<Action_svd_decomp<lapack_interface<REAL_TYPE> > >(MIN_MM,750,N); + + if (syev) + bench<Action_syev<lapack_interface<REAL_TYPE> > >(MIN_MM,750,N); + + if (stev) + bench<Action_stev<lapack_interface<REAL_TYPE> > >(MIN_MM,1000,N); + + if (symm_ev) + bench<Action_symm_ev<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N); + + return 0; +} diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp new file mode 100644 index 0000000..427cf7e --- /dev/null +++ b/btl/libs/PBLAS/main.cpp @@ -0,0 +1,103 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#define DISTRIBUTED + +#include "mpi.h" +#include "utilities.h" +#include "bench.hh" + +#include <iostream> +//using namespace std; + +#include "pblas_interface.hh" +#include "blacsinit.hh" + +#include "action_parallel_axpy.hh" +#include "action_parallel_matrix_vector_product.hh" +#include "action_parallel_lu_decomp.hh" +#include "action_parallel_cholesky.hh" +#include "action_parallel_qr_decomp.hh" +#include "action_parallel_svd_decomp.hh" +#include "action_parallel_symm_ev.hh" + +#include <string> + +BTL_MAIN; + +int main(int argc, char **argv) +{ + bool iamroot = blacsinit(&argc, &argv); + + bool + axpy=false, matrix_vector=false, + lu_decomp=false, cholesky=false, qr_decomp=false, svd_decomp=false, + symm_ev=false + ; + + int N = 100; + + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "axpy") axpy = true; + else if (arg == "matrix_vector") matrix_vector=true; + else if (arg == "lu_decomp") lu_decomp = true; + else if (arg == "cholesky") cholesky = true; + else if (arg == "qr_decomp") qr_decomp = true; + else if (arg == "svd_decomp") svd_decomp = true; + else if (arg == "symm_ev") symm_ev = true; + + // Check switch -N + else if (arg[0] == '-' && arg[1] == 'N') { + if (arg[2] != '\0') + N = atoi(arg.c_str()+2); + else + N = atoi(argv[++i]); + } + } + + + + if (axpy) + distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N, !iamroot); + + if (matrix_vector) + distr_bench<Action_parallel_matrix_vector_product<pblas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N, !iamroot); + + if (lu_decomp) + distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N, !iamroot); + + if (cholesky) + distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, N, !iamroot); + + if (qr_decomp) + distr_bench<Action_parallel_qr_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N, !iamroot); + + if (svd_decomp) + distr_bench<Action_parallel_svd_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,750, N, !iamroot); + + if (symm_ev) + distr_bench<Action_parallel_symm_ev<pblas_interface<REAL_TYPE> > >(MIN_MM,750, N, !iamroot); + + + int iZERO = 0; + blacs_exit_(&iZERO); + return 0; +} + + diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h new file mode 100644 index 0000000..be76e8f --- /dev/null +++ b/btl/libs/PBLAS/pblas.h @@ -0,0 +1,106 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef SCALAPACK_H_ +#define SCALAPACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif + + int numroc_(const int*, const int*, const int*, const int*, const int*); + void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); + + + /* Level 1 */ + + // Single + void psaxpy_(const int*, const float*, + const float*, const int*, const int*, const int*, const int*, + const float*, const int*, const int*, const int*, const int* + ); + + // Double + void pdaxpy_(const int*, const double*, + const double*, const int*, const int*, const int*, const int*, + const double*, const int*, const int*, const int*, const int* + ); + + + + /* Level 2 */ + + // Single + void psgemv_(const char*, const int*, const int*, + const float*, const float*, const int*, const int*, const int*, + const float*, const int*, const int*, const int*, const int*, + const float*, float*, const int*, const int*, const int*, const int* + ); + + // Double + void pdgemv_(const char*, const int*, const int*, + const double*, const double*, const int*, const int*, const int*, + const double*, const int*, const int*, const int*, const int*, + const double*, double*, const int*, const int*, const int*, const int* + ); + + + /* Level 3 */ + + // Single + void pstrmm_(const char*, const char*, const char*, const char*, const int*, const int*, const float*, + const float*, const int*, const int*, const int*, + float*, const int*, const int*, const int*); + + // Double + void pdtrmm_(const char*, const char*, const char*, const char*, const int*, const int*, const double*, + const double*, const int*, const int*, const int*, + double*, const int*, const int*, const int*); + + + + /************* + * Scalapack * + *************/ + // lu_decomp + void psgetrf_(const int*, const int*, float*, const int*, const int*, const int*, int*, int*); + void pdgetrf_(const int*, const int*, double*, const int*, const int*, const int*, int*, int*); + + // cholesky + void pspotrf_(const char*, const int*, float*, const int*, const int*, const int*, int*); + void pdpotrf_(const char*, const int*, double*, const int*, const int*, const int*, int*); + + // qr_decomp + void psgeqpf_(const int*, const int*, float*, const int*, const int*, const int*, int*, float*, float*, const int*, int*); + void pdgeqpf_(const int*, const int*, double*, const int*, const int*, const int*, int*, double*, double*, const int*, int*); + + // svd_decomp + void psgesvd_(const char*, const char*, const int*, const int*, float*, const int*, const int*, const int*, float*, float*, const int*, const int*, const int*, float*, const int*, const int*, const int*, float*, const int*, int*); + void pdgesvd_(const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*, double*, double*, const int*, const int*, const int*, double*, const int*, const int*, const int*, double*, const int*, int*); + + // symm_ev + void pssyevd_(const char*, const char*, const int*, float*, const int*, const int*, const int*, float*, float*, const int*, const int*, const int*, float*, const int*, int*, const int*, int*); + void pdsyevd_(const char*, const char*, const int*, double*, const int*, const int*, const int*, double*, double*, const int*, const int*, const int*, double*, const int*, int*, const int*, int*); + + +#ifdef __cplusplus +} +#endif + + + +#endif /* SCALAPACK_H_ */ diff --git a/btl/libs/PBLAS/pblas_interface.hh b/btl/libs/PBLAS/pblas_interface.hh new file mode 100644 index 0000000..6dfd201 --- /dev/null +++ b/btl/libs/PBLAS/pblas_interface.hh @@ -0,0 +1,57 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "blas.h" +#include "pblas.h" +#include "blacs_interface.hh" + +#define MAKE_STRING2(S) #S +#define MAKE_STRING(S) MAKE_STRING2(S) + +#ifndef CAT +#define CAT2(A,B) A##B +#define CAT(A,B) CAT2(A,B) +#endif + + +template<class real> class pblas_interface; + +/* +static char notrans = 'N'; +static char trans = 'T'; +static char nonunit = 'N'; +static char lower = 'L'; +static char right = 'R'; +static char left = 'L'; +static int intone = 1; +*/ + +#define SCALAR float +#define SCALAR_PREFIX s +#include "pblas_interface_impl.hh" +#undef SCALAR +#undef SCALAR_PREFIX + + +#define SCALAR double +#define SCALAR_PREFIX d +#include "pblas_interface_impl.hh" +#undef SCALAR +#undef SCALAR_PREFIX + + + diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh new file mode 100644 index 0000000..a96b0fd --- /dev/null +++ b/btl/libs/PBLAS/pblas_interface_impl.hh @@ -0,0 +1,246 @@ +//===================================================== +// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com> +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// + +#define PBLAS_PREFIX p +#define PBLAS_FUNC(NAME) CAT(CAT(CAT(PBLAS_PREFIX, SCALAR_PREFIX),NAME),_) +#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_) + +template<> class pblas_interface<SCALAR> : public blacs_interface<SCALAR> +{ +public: + static inline std::string name() + { + return MAKE_STRING(PBLASNAME); + } + + static inline void parallel_axpy(const SCALAR& coef, + gene_vector& x, int *descX, + gene_vector& y, int *descY, + const int& size + ) + { + int iZERO = 0, iONE = 1; + PBLAS_FUNC(axpy)(&size, &coef, + x, &iONE, &iONE, descX, &iONE, + y, &iONE, &iONE, descY, &iONE + ); + } + + static inline void parallel_matrix_vector_product( + int GlobalRows, int GlobalCols, + gene_matrix& A, int *descA, + gene_vector& x, int *descX, + gene_vector& y, int *descY + ) + { + real_type alpha = 1., beta = 0.; + int iONE = 1; + int myid, procnum; + const char notrans = 'N'; + blacs_pinfo_(&myid, &procnum); + + PBLAS_FUNC(gemv)(¬rans, &GlobalRows, &GlobalCols, + &alpha, A, &iONE, &iONE, descA, + x, &iONE, &iONE, descX, &iONE, + &beta, y, &iONE, &iONE, descY, &iONE + ); + + } + + + static inline void parallel_lu_decomp(gene_matrix& X, std::vector<int>& ipiv, const int* desc) + { + const int GlobalRows = desc[2], GlobalCols = desc[3]; + const int iONE = 1; + int info; + ipiv.resize(desc[8] + desc[4]); + PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, + &ipiv[0], &info); + if (info != 0 && myid() == 0) + cout << " { LU error : " << info << " } "; + } + + static inline void parallel_cholesky(gene_matrix& X, const int* desc) + { + const int N = desc[2], iONE = 1; + const char UP = 'U'; + int info; + PBLAS_FUNC(potrf)(&UP, &N, X, &iONE, &iONE, desc, &info); + if (info != 0 && myid() == 0) + cout << " { cholesky error : " << info << " } "; + } + + static inline void parallel_qr_decomp(gene_matrix& X, const int* desc) + { + const int GlobalRows = desc[2], GlobalCols = desc[3], + BlockRows = desc[4], BlockCols = desc[5], + ctxt = desc[1]; + + int myrow, mycol, nprow, npcol, lwork; + SCALAR lworkd; + blacs_gridinfo_(&ctxt, &nprow, &npcol, &myrow, &mycol); + + const int iONE = 1, iZERO = 0, imONE = -1, + ipivdim = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &npcol); + int info; + std::vector<int> ipiv(ipivdim); + std::vector<SCALAR> tau(ipivdim); + + // Retrieve LWORK + PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &lworkd, &imONE, &info); + lwork = static_cast<int>(lworkd); + if (info != 0 && myid() == 0) + cout << " { qr_decomp lwork error } "; + + std::vector<SCALAR> work(lwork); + PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &work[0], &lwork, &info); + if (info != 0 && myid() == 0) + cout << " { qr_decomp computation error } "; + } + + static inline void parallel_symm_ev(gene_matrix& A, const int* descA, gene_vector& w, gene_matrix& Z, const int* descZ) + { + const char jobz = 'V', uplo = 'u'; + const int N = descA[2], iONE = 1, iZERO = 0, imONE = -1; + std::vector<SCALAR> work; + std::vector<int> iwork; + int lwork, liwork, info; + SCALAR lworkd; + + // Retrieve l(i)work + PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w, + Z, &iONE, &iONE, descZ, &lworkd, &imONE, &liwork, &imONE, &info); + lwork = static_cast<int>(lworkd); + work.resize(lwork); iwork.resize(liwork); + if (info != 0 && myid() == 0) + cout << " { symm_ev l(i)work error } "; + + PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w, + Z, &iONE, &iONE, descZ, &work[0], &lwork, &iwork[0], &liwork, &info); + if (info != 0 && myid() == 0) + cout << " { symm_ev computation error } "; + } + + static inline void parallel_svd_decomp(gene_matrix& A, int* descA, gene_matrix& U, int *descU, gene_matrix& V, int *descV, gene_vector& s) + { + const char job = 'V'; + const int size = descA[2], iONE = 1, iZERO = 0, imONE = -1; + std::vector<SCALAR> work; + int info, lwork; + SCALAR lworkd; + + // Retrieve lwork + PBLAS_FUNC(gesvd)(&job, &job, &size, &size, A, &iONE, &iONE, descA, s, + U, &iONE, &iONE, descU, V, &iONE, &iONE, descV, &lworkd, &imONE, &info); + if (info != 0 && myid() == 0) + cout << " { svd_decomp lwork error } "; + lwork = static_cast<int>(lworkd); + work.resize(lwork); + + PBLAS_FUNC(gesvd)(&job, &job, &size, &size, A, &iONE, &iONE, descA, s, + U, &iONE, &iONE, descU, V, &iONE, &iONE, descV, &work[0], &lwork, &info); + if (info != 0 && myid() == 0) + cout << " { svd_decomp computation error } "; + } + + static inline real_type + test_LU(stl_matrix& Global_A, gene_matrix& Local_LU, int *desc) + { + bool iamroot = myid() == 0; + int _size = desc[2]; + + // Create and scatter Identity + int Testdesc[9]; + stl_matrix Global_Test_stl, Local_Test_stl; + if (iamroot) + { + stl_matrix Identity(_size * _size); + for (int r = 0; r < _size; ++r) + Identity[r + _size * r] = 1; + scatter_matrix(Identity, Local_Test_stl, Testdesc, _size, _size, + desc[4], desc[5]); + } + else + scatter_matrix(stl_matrix(), Local_Test_stl, Testdesc); + + // Compute L * U + real_type alpha = 1., malpha = -1; + int iONE = 1; + PBLAS_FUNC(trmm)("L", "L", "N", "N", desc + 2, desc + 2, &alpha, Local_LU, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + PBLAS_FUNC(trmm)("R", "U", "N", "N", desc + 2, desc + 2, &alpha, Local_LU, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + + // Gather result + gather_matrix(Global_Test_stl, Local_Test_stl, desc); + if (iamroot) + { + int size2 = _size*_size; + BLAS_FUNC(axpy)(&size2, &malpha, &Global_A[0], &iONE, + &Global_Test_stl[0], &iONE); + double error = BLAS_FUNC(nrm2)(&size2, &Global_Test_stl[0], &iONE); + error /= BLAS_FUNC(nrm2)(&size2, &Global_A[0], &iONE); + return error; + } + else + return 0.; + } + + static inline real_type + test_cholesky(stl_matrix& Global_A, gene_matrix& Local_U, int *desc) + { + bool iamroot = myid() == 0; + int _size = desc[2]; + + // Create and scatter Identity + int Testdesc[9]; + stl_matrix Global_Test_stl, Local_Test_stl; + if (iamroot) + { + stl_matrix Identity(_size * _size); + for (int r = 0; r < _size; ++r) + Identity[r + _size * r] = 1; + scatter_matrix(Identity, Local_Test_stl, Testdesc, _size, _size, + desc[4], desc[5]); + } + else + scatter_matrix(stl_matrix(), Local_Test_stl, Testdesc); + + // Compute U' * U + real_type alpha = 1., malpha = -1; + int iONE = 1; + PBLAS_FUNC(trmm)("L", "U", "T", "N", desc + 2, desc + 2, &alpha, Local_U, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + PBLAS_FUNC(trmm)("R", "U", "N", "N", desc + 2, desc + 2, &alpha, Local_U, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + + // Gather result + gather_matrix(Global_Test_stl, Local_Test_stl, desc); + if (iamroot) + { + int size2 = _size*_size; + BLAS_FUNC(axpy)(&size2, &malpha, &Global_A[0], &iONE, + &Global_Test_stl[0], &iONE); + double error = BLAS_FUNC(nrm2)(&size2, &Global_Test_stl[0], &iONE); + error /= BLAS_FUNC(nrm2)(&size2, &Global_A[0], &iONE); + return error; + } + else + return 0.; + } +}; diff --git a/btl/libs/STL/CMakeLists.txt b/btl/libs/STL/CMakeLists.txt new file mode 100644 index 0000000..4cfc2dc --- /dev/null +++ b/btl/libs/STL/CMakeLists.txt @@ -0,0 +1,2 @@ + +btl_add_bench(btl_STL main.cpp OFF) diff --git a/btl/libs/STL/STL_interface.hh b/btl/libs/STL/STL_interface.hh new file mode 100644 index 0000000..060cb69 --- /dev/null +++ b/btl/libs/STL/STL_interface.hh @@ -0,0 +1,255 @@ +//===================================================== +// File : STL_interface.hh +// Author : L. Plagne <laurent.plagne@edf.fr)> +// Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef STL_INTERFACE_HH +#define STL_INTERFACE_HH +#include <string> +#include <vector> +#include "utilities.h" + +using namespace std; + +template<class real> +class STL_interface{ + +public : + + typedef real real_type ; + + typedef std::vector<real> stl_vector; + typedef std::vector<stl_vector > stl_matrix; + + typedef stl_matrix gene_matrix; + + typedef stl_vector gene_vector; + + static inline std::string name( void ) + { + return "STL"; + } + + static void free_matrix(gene_matrix & A, int N){} + + static void free_vector(gene_vector & B){} + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A = A_stl; + } + + static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){ + B = B_stl; + } + + static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){ + B_stl = B ; + } + + + static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){ + A_stl = A ; + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + for (int i=0;i<N;i++){ + cible[i]=source[i]; + } + } + + + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ + for (int i=0;i<N;i++) + for (int j=0;j<N;j++) + cible[i][j]=source[i][j]; + } + +// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N) +// { +// real somme; +// for (int j=0;j<N;j++){ +// for (int i=0;i<N;i++){ +// somme=0.0; +// for (int k=0;k<N;k++) +// somme += A[i][k]*A[j][k]; +// X[j][i]=somme; +// } +// } +// } + + static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N) + { + real somme; + for (int j=0;j<N;j++){ + for (int i=0;i<N;i++){ + somme=0.0; + if(i>=j) + { + for (int k=0;k<N;k++){ + somme+=A[k][i]*A[k][j]; + } + X[j][i]=somme; + } + } + } + } + + + static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N) + { + real somme; + for (int j=0;j<N;j++){ + for (int i=0;i<N;i++){ + somme=0.0; + for (int k=0;k<N;k++) + somme+=A[k][i]*B[j][k]; + X[j][i]=somme; + } + } + } + + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) + { + real somme; + for (int i=0;i<N;i++){ + somme=0.0; + for (int j=0;j<N;j++) + somme+=A[j][i]*B[j]; + X[i]=somme; + } + } + + static inline void matrix_vector_product(gene_vector& A, gene_vector & B, gene_vector & X, int N) + { + real somme; + for (int i=0;i<N;i++){ + somme=0.0; + for (int j=0;j<N;j++) + somme+=A[j*N+i]*B[j]; + X[i]=somme; + } + } + + static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N) + { + for (int j=0; j<N; ++j) + X[j] = 0; + for (int j=0; j<N; ++j) + { + real t1 = B[j]; + real t2 = 0; + X[j] += t1 * A[j][j]; + for (int i=j+1; i<N; ++i) { + X[i] += t1 * A[j][i]; + t2 += A[j][i] * B[i]; + } + X[j] += t2; + } + } + + static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N) + { + for (int j=0; j<N; ++j) + { + for (int i=j; i<N; ++i) + A[j][i] += B[i]*X[j] + B[j]*X[i]; + } + } + + static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N) + { + for (int j=0; j<N; ++j) + { + for (int i=j; i<N; ++i) + A[j][i] += X[i]*Y[j]; + } + } + + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) + { + real somme; + for (int i=0;i<N;i++){ + somme = 0.0; + for (int j=0;j<N;j++) + somme += A[i][j]*B[j]; + X[i] = somme; + } + } + + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){ + for (int i=0;i<N;i++) + Y[i]+=coef*X[i]; + } + + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + for (int i=0;i<N;i++) + Y[i] = a*X[i] + b*Y[i]; + } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){ + copy_vector(B,X,N); + for(int i=0; i<N; ++i) + { + X[i] /= L[i][i]; + real tmp = X[i]; + for (int j=i+1; j<N; ++j) + X[j] -= tmp * L[i][j]; + } + } + + static inline real norm_diff(const stl_vector & A, const stl_vector & B) + { + int N=A.size(); + real somme=0.0; + real somme2=0.0; + + for (int i=0;i<N;i++){ + real diff=A[i]-B[i]; + somme+=diff*diff; + somme2+=A[i]*A[i]; + } + return somme/somme2; + } + + static inline real norm_diff(const stl_matrix & A, const stl_matrix & B) + { + int N=A[0].size(); + real somme=0.0; + real somme2=0.0; + + for (int i=0;i<N;i++){ + for (int j=0;j<N;j++){ + real diff=A[i][j] - B[i][j]; + somme += diff*diff; + somme2 += A[i][j]*A[i][j]; + } + } + + return somme/somme2; + } + + static inline void display_vector(const stl_vector & A) + { + int N=A.size(); + for (int i=0;i<N;i++){ + INFOS("A["<<i<<"]="<<A[i]<<endl); + } + } + +}; + +#endif diff --git a/btl/libs/STL/main.cpp b/btl/libs/STL/main.cpp new file mode 100644 index 0000000..4e73328 --- /dev/null +++ b/btl/libs/STL/main.cpp @@ -0,0 +1,42 @@ +//===================================================== +// File : main.cpp +// Author : L. Plagne <laurent.plagne@edf.fr)> +// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "STL_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); + bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); + bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); + bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); + bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + |