summaryrefslogtreecommitdiff
path: root/btl/libs
diff options
context:
space:
mode:
Diffstat (limited to 'btl/libs')
-rw-r--r--btl/libs/BLACS/blacs.h50
-rw-r--r--btl/libs/BLACS/blacs_interface.hh153
-rw-r--r--btl/libs/BLACS/blacsinit.hh38
-rw-r--r--btl/libs/BLACS/gather.h49
-rw-r--r--btl/libs/BLACS/gather_impl.h126
-rw-r--r--btl/libs/BLACS/scatter.h61
-rw-r--r--btl/libs/BLACS/scatter_impl.h121
-rw-r--r--btl/libs/BLAS/CMakeLists.txt60
-rw-r--r--btl/libs/BLAS/blas.h681
-rw-r--r--btl/libs/BLAS/blas_interface.hh77
-rw-r--r--btl/libs/BLAS/blas_interface_impl.hh96
-rw-r--r--btl/libs/BLAS/c_interface_base.h89
-rw-r--r--btl/libs/BLAS/cblas_interface_impl.hh96
-rw-r--r--btl/libs/BLAS/main.cpp116
-rw-r--r--btl/libs/FFTW/fftw_interface.hh87
-rw-r--r--btl/libs/FFTW/main.cpp107
-rw-r--r--btl/libs/LAPACK/lapack.hh33
-rw-r--r--btl/libs/LAPACK/lapack_.hh38
-rw-r--r--btl/libs/LAPACK/lapack_interface.hh78
-rw-r--r--btl/libs/LAPACK/lapack_interface_impl.hh139
-rw-r--r--btl/libs/LAPACK/main.cpp95
-rw-r--r--btl/libs/PBLAS/main.cpp103
-rw-r--r--btl/libs/PBLAS/pblas.h106
-rw-r--r--btl/libs/PBLAS/pblas_interface.hh57
-rw-r--r--btl/libs/PBLAS/pblas_interface_impl.hh246
-rw-r--r--btl/libs/STL/CMakeLists.txt2
-rw-r--r--btl/libs/STL/STL_interface.hh255
-rw-r--r--btl/libs/STL/main.cpp42
28 files changed, 3201 insertions, 0 deletions
diff --git a/btl/libs/BLACS/blacs.h b/btl/libs/BLACS/blacs.h
new file mode 100644
index 0000000..97c863b
--- /dev/null
+++ b/btl/libs/BLACS/blacs.h
@@ -0,0 +1,50 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef BLACS_H_
+#define BLACS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /* BLACS declarations */
+ void blacs_pinfo_(int*, int*);
+ void blacs_get_(int*, int*, int*);
+ void blacs_gridinit_(int*, const char*, int*, int*);
+ void blacs_gridinfo_(const int*, int*, int*, int*, int*);
+ void blacs_pcoord_(const int*, const int*, int*, int*);
+ void blacs_gridexit_(int*);
+ void blacs_exit_(int*);
+ void blacs_barrier_(const int*, const char*);
+ void dgerv2d_(const int*, const int*, const int*, double*, const int*, const int*, const int*);
+ void dgesd2d_(const int*, const int*, const int*, const double*, const int*, const int*, const int*);
+ void sgerv2d_(const int*, const int*, const int*, float*, const int*, const int*, const int*);
+ void sgesd2d_(const int*, const int*, const int*, const float*, const int*, const int*, const int*);
+ void igebs2d_(const int*, const char*, const char*, const int*, const int*, const int*, const int*);
+ void igebr2d_(const int*, const char*, const char*, const int*, const int*, int*, const int*, const int*, const int*);
+ void dgebs2d_(const int*, const char*, const char*, const int*, const int*, const double*, const int*);
+ void dgebr2d_(const int*, const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*);
+ void dgsum2d_(const int*, const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*);
+ void igsum2d_(const int*, const char*, const char*, const int*, const int*, int*, const int*, const int*, const int*);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* BLACS_H_ */
diff --git a/btl/libs/BLACS/blacs_interface.hh b/btl/libs/BLACS/blacs_interface.hh
new file mode 100644
index 0000000..09f57b3
--- /dev/null
+++ b/btl/libs/BLACS/blacs_interface.hh
@@ -0,0 +1,153 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef BTL_BLACS_INTERFACE_H
+#define BTL_BLACS_INTERFACE_H
+
+#include <vector>
+#include <algorithm>
+#include "blacs.h"
+extern "C" {
+ void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*);
+ int numroc_(const int*, const int*, const int*, const int*, const int*);
+}
+
+#include "scatter.h"
+#include "gather.h"
+
+template<typename real>
+class blacs_interface
+{
+
+public:
+ typedef real real_type;
+ typedef std::vector<real_type> stl_vector;
+ typedef stl_vector stl_matrix;
+
+ typedef real* gene_matrix;
+ typedef real* gene_vector;
+
+
+ static void free_matrix(gene_matrix & A, int N){
+ delete A;
+ }
+
+ static void free_vector(gene_vector & B){
+ delete B;
+ }
+
+ static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+ int N = A_stl.size();
+ A = new real[N];
+ for (int j=0;j<N;j++)
+ A[j] = A_stl[j];
+ }
+
+ static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ B = new real[N];
+ for (int i=0;i<N;i++)
+ B[i] = B_stl[i];
+ }
+
+ static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ for (int i=0;i<N;i++)
+ B_stl[i] = B[i];
+ }
+
+ static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
+ int N = A_stl.size();
+ for (int i=0;i<N;i++)
+ A_stl[i] = A[i];
+ }
+
+ static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
+ for (int i=0;i<N;i++)
+ cible[i]=source[i];
+ }
+
+ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
+ for (int i=0;i<N;i++)
+ cible[i]=source[i];
+ }
+
+
+public:
+ static int context() {
+ int ctxt, ignored, what = 0;
+ blacs_get_(&ignored, &what, &ctxt);
+ return ctxt;
+ }
+ static int myid() {
+ int procnum, myid;
+ blacs_pinfo_(&myid, &procnum);
+ return myid;
+ }
+
+
+ static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix,
+ int& GlobalRows, int& GlobalCols,
+ int& BlockRows, int& BlockCols,
+ int& LocalRows, int& LocalCols
+ ) {
+ scatter(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+ }
+
+ static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix,
+ int *desc,
+ const int& GlobalRows=0, const int& GlobalCols=0,
+ const int& BlockRows=0, const int& BlockCols=0
+ ) {
+ int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols,
+ BlockRows_ = BlockRows, BlockCols_ = BlockCols,
+ LocalRows_, LocalCols_;
+ const int ctxt = context();
+ scatter(ctxt, GlobalMatrix, LocalMatrix,
+ GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_
+ );
+
+ const int iZERO = 0;
+ int info;
+ const int LLD = std::max(1, LocalRows_);
+ descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_,
+ &iZERO, &iZERO, &ctxt, &LLD, &info
+ );
+ }
+
+ static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix,
+ int& GlobalRows, int& GlobalCols,
+ int& BlockRows, int& BlockCols,
+ int& LocalRows, int& LocalCols
+ ) {
+ gather(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+ }
+
+ static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix,
+ int* desc
+ ) {
+ int GlobalRows = desc[2], GlobalCols = desc[3],
+ BlockRows = desc[4], BlockCols = desc[5],
+ LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8];
+ const int ctxt = context();
+ gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+ }
+
+
+};
+
+#endif /* BTL_BLACS_INTERFACE_H */
diff --git a/btl/libs/BLACS/blacsinit.hh b/btl/libs/BLACS/blacsinit.hh
new file mode 100644
index 0000000..41f5ebf
--- /dev/null
+++ b/btl/libs/BLACS/blacsinit.hh
@@ -0,0 +1,38 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef BLACSINIT_HH
+#define BLACSINIT_HH
+
+#include <cmath>
+
+bool blacsinit(int *argc, char ***argv)
+{
+ int context, myid, numproc, prows, pcols, iZERO = 0;
+ MPI_Init(argc, argv);
+ blacs_pinfo_(&myid, &numproc);
+ blacs_get_(&iZERO, &iZERO, &context);
+
+ int p = static_cast<double>(std::sqrt(static_cast<double>(numproc)) + 1.);
+ while (numproc % p) --p;
+ prows = p; pcols = numproc/p;
+
+ blacs_gridinit_(&context, "Row-major", &prows, &pcols);
+ return (myid == 0);
+}
+
+#endif /* BLACSINIT_HH */
diff --git a/btl/libs/BLACS/gather.h b/btl/libs/BLACS/gather.h
new file mode 100644
index 0000000..957f7c7
--- /dev/null
+++ b/btl/libs/BLACS/gather.h
@@ -0,0 +1,49 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef GATHER_H_
+#define GATHER_H_
+
+#define TYPENAME float
+#define TYPEPREFIX s
+#include "gather_impl.h"
+#undef TYPENAME
+#undef TYPEPREFIX
+
+#define TYPENAME double
+#define TYPEPREFIX d
+#include "gather_impl.h"
+#undef TYPENAME
+#undef TYPEPREFIX
+
+template<typename T>
+static void gather_matrix(std::vector<T>& GlobalMatrix, const std::vector<T>& LocalMatrix,
+ const int* desc
+) {
+ int GlobalRows = desc[2], GlobalCols = desc[3],
+ BlockRows = desc[4], BlockCols = desc[5],
+ LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8];
+ int ctxt;
+ {
+ int ignored, what = 0;
+ blacs_get_(&ignored, &what, &ctxt);
+ }
+ gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols);
+}
+
+
+#endif /* GATHER_H_ */
diff --git a/btl/libs/BLACS/gather_impl.h b/btl/libs/BLACS/gather_impl.h
new file mode 100644
index 0000000..3b57d57
--- /dev/null
+++ b/btl/libs/BLACS/gather_impl.h
@@ -0,0 +1,126 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#define PRFX d
+#define CAT_(x,y) x##y
+#define CAT(x,y) CAT_(x,y)
+
+#define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_)
+#define vector_t std::vector<TYPENAME>
+
+#include <vector>
+
+
+inline void gather(
+ const int& context, // [IN]
+ vector_t& GlobalMatrixVector, // [OUT] Only relevant for root
+ const vector_t& LocalMatrixVector, // [IN]
+ int& GlobalRows, // [OUT]
+ int& GlobalCols, // [OUT]
+ int& BlockRows, // [IN (root) / OUT (other)]
+ int& BlockCols, // [IN (root) / OUT (other)]
+ int& LocalRows, // [IN]
+ int& LocalCols, // [IN]
+ const int& rootrow = 0, // [IN]
+ const int& rootcol = 0 // [IN]
+) {
+ /* Helper variables */
+ int iONE = 1, iTWO = 2, imONE = -1;
+
+ int myid, myrow, mycol, procrows, proccols, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ blacs_gridinfo_(&context, &procrows, &proccols, &myrow, &mycol);
+ bool iamroot = (myrow == rootrow && mycol == rootcol);
+ TYPENAME *GlobalMatrix;
+ const TYPENAME *LocalMatrix = &LocalMatrixVector[0];
+
+ /* Broadcast matrix info */
+ int binfo[2];
+ if (iamroot) {
+ binfo[0] = BlockRows;
+ binfo[1] = BlockCols;
+
+ igebs2d_(&context, "All", " ", &iTWO, &iONE, binfo, &iTWO);
+ } else {
+ igebr2d_(&context, "All", " ", &iTWO, &iONE, binfo, &iTWO,
+ &rootrow, &rootcol);
+ }
+ BlockRows = binfo[0];
+ BlockCols = binfo[1];
+
+ /* Retrieve matrix global dimensions */
+ int minfo[2];
+ minfo[0] = LocalRows; minfo[1] = LocalCols;
+ igsum2d_(&context, "Col", " ", &iONE, &iONE, minfo, &iONE, &imONE, &imONE);
+ igsum2d_(&context, "Row", " ", &iONE, &iONE, minfo+1, &iONE, &imONE, &imONE);
+ GlobalRows = minfo[0]; GlobalCols = minfo[1];
+
+
+ /* Reserve space on root */
+ if (iamroot) {
+ GlobalMatrixVector.resize(GlobalRows*GlobalCols);
+ GlobalMatrix = &GlobalMatrixVector[0];
+ }
+
+ /* Gather matrix */
+ int srcr = 0, srcc = 0;
+ int SendRows, SendCols;
+ int StartRow = 0, StartCol = 0;
+ for (int r = 0; r < GlobalRows; r += BlockRows, srcr=(srcr+1)%procrows) {
+ srcc = 0;
+
+ // Is this the last row bloc?
+ SendRows = BlockRows;
+ if (GlobalRows-r < BlockRows)
+ SendRows = GlobalRows-r;
+ if (SendRows <= 0)
+ SendRows = 0;
+
+ for (int c=0; c<GlobalCols; c+=BlockCols, srcc=(srcc+1)%proccols) {
+
+ // Is this the last column block?
+ SendCols = BlockCols;
+ if (GlobalCols-c < BlockCols)
+ SendCols = GlobalCols-c;
+
+ // Send data
+ if (myrow == srcr && mycol == srcc) {
+ FUNCNAME(gesd2d) (&context, &SendRows, &SendCols,
+ LocalMatrix+LocalRows*StartCol+StartRow,
+ &LocalRows, &rootrow, &rootcol
+ );
+
+ // Adjust the next starting column
+ StartCol = (StartCol + SendCols) % LocalCols;
+ }
+
+ // Receive data
+ if (iamroot) {
+ FUNCNAME(gerv2d) (&context, &SendRows, &SendCols,
+ GlobalMatrix + GlobalRows*c + r,
+ &GlobalRows, &srcr, &srcc
+ );
+ }
+ }
+
+ // Adjust the next starting row
+ if (myrow == srcr)
+ StartRow = (StartRow + SendRows) % LocalRows;
+
+ }
+
+}
diff --git a/btl/libs/BLACS/scatter.h b/btl/libs/BLACS/scatter.h
new file mode 100644
index 0000000..170e82a
--- /dev/null
+++ b/btl/libs/BLACS/scatter.h
@@ -0,0 +1,61 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef SCATTER_H_
+#define SCATTER_H_
+
+#define TYPENAME float
+#define TYPEPREFIX s
+#include "scatter_impl.h"
+#undef TYPENAME
+#undef TYPEPREFIX
+
+#define TYPENAME double
+#define TYPEPREFIX d
+#include "scatter_impl.h"
+#undef TYPENAME
+#undef TYPEPREFIX
+
+template<typename T>
+void scatter_matrix(const std::vector<T>& GlobalMatrix, std::vector<T>& LocalMatrix,
+ int *desc,
+ const int& GlobalRows=0, const int& GlobalCols=0,
+ const int& BlockRows=0, const int& BlockCols=0
+ )
+{
+ int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols,
+ BlockRows_ = BlockRows, BlockCols_ = BlockCols,
+ LocalRows_, LocalCols_;
+ int ctxt;
+ {
+ int ignored, what = 0;
+ blacs_get_(&ignored, &what, &ctxt);
+ }
+ scatter(ctxt, GlobalMatrix, LocalMatrix,
+ GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_
+ );
+
+ const int iZERO = 0;
+ int info;
+ const int LLD = std::max(1, LocalRows_);
+ descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_,
+ &iZERO, &iZERO, &ctxt, &LLD, &info
+ );
+}
+
+
+#endif /* SCATTER_H_ */
diff --git a/btl/libs/BLACS/scatter_impl.h b/btl/libs/BLACS/scatter_impl.h
new file mode 100644
index 0000000..7c38c22
--- /dev/null
+++ b/btl/libs/BLACS/scatter_impl.h
@@ -0,0 +1,121 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#define CAT_(x,y) x##y
+#define CAT(x,y) CAT_(x,y)
+
+#define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_)
+#define vector_t std::vector<TYPENAME>
+
+#include <vector>
+
+inline void scatter(
+ const int& context, // [IN]
+ const vector_t& GlobalMatrixVector, // [IN] Only relevant for root
+ vector_t& LocalMatrixVector, // [OUT] The space is reserved here
+ int& GlobalRows, // [IN (root) / OUT (other)]
+ int& GlobalCols, // [IN (root) / OUT (other)]
+ int& BlockRows, // [IN (root) / OUT (other)]
+ int& BlockCols, // [IN (root) / OUT (other)]
+ int& LocalRows, // [OUT]
+ int& LocalCols, // [OUT]
+ const int& rootrow = 0, // [IN]
+ const int& rootcol = 0 // [IN]
+) {
+ /* Helper variables */
+ int iZERO = 0, iONE = 1, iFOUR = 4;
+
+ int myid, myrow, mycol, procrows, proccols, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ blacs_gridinfo_(&context, &procrows, &proccols, &myrow, &mycol);
+ bool iamroot = (myrow == rootrow && mycol == rootcol);
+ const TYPENAME *GlobalMatrix = &GlobalMatrixVector[0];
+ TYPENAME *LocalMatrix;
+
+ /* Broadcast matrix info */
+ int minfo[4];
+ if (iamroot) {
+ minfo[0] = GlobalRows;
+ minfo[1] = GlobalCols;
+ minfo[2] = BlockRows;
+ minfo[3] = BlockCols;
+ igebs2d_(&context, "All", " ", &iFOUR, &iONE, minfo, &iFOUR);
+ } else {
+ igebr2d_(&context, "All", " ", &iFOUR, &iONE, minfo, &iFOUR,
+ &iZERO, &iZERO);
+ }
+
+ GlobalRows = minfo[0];
+ GlobalCols = minfo[1];
+ BlockRows = minfo[2];
+ BlockCols = minfo[3];
+
+
+ /* Reserve space */
+ LocalRows = numroc_(&GlobalRows, &BlockRows, &myrow, &iZERO, &procrows);
+ LocalCols = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &proccols);
+ LocalMatrixVector.resize(LocalRows*LocalCols);
+ LocalMatrix = &LocalMatrixVector[0];
+
+ /* Scatter matrix */
+ int destr = 0, destc = 0;
+ int SendRows, SendCols;
+ int RecvRow = 0, RecvCol = 0;
+ for (int r = 0; r < GlobalRows; r += BlockRows, destr=(destr+1)%procrows) {
+ destc = 0;
+
+ // Is this the last row bloc?
+ SendRows = BlockRows;
+ if (GlobalRows-r < BlockRows)
+ SendRows = GlobalRows-r;
+ if (SendRows <= 0)
+ SendRows = 0;
+
+ for (int c=0; c<GlobalCols; c+=BlockCols, destc=(destc+1)%proccols) {
+
+ // Is this the last column block?
+ SendCols = BlockCols;
+ if (GlobalCols-c < BlockCols)
+ SendCols = GlobalCols-c;
+
+ // Send data
+ if (iamroot) {
+ FUNCNAME(gesd2d) (&context, &SendRows, &SendCols,
+ GlobalMatrix + GlobalRows*c + r,
+ &GlobalRows, &destr, &destc
+ );
+ }
+
+ // Rerceive data
+ if (myrow == destr && mycol == destc) {
+ FUNCNAME(gerv2d) (&context, &SendRows, &SendCols,
+ LocalMatrix+LocalRows*RecvCol+RecvRow,
+ &LocalRows, &rootrow, &rootcol
+ );
+
+ // Adjust the next starting column
+ RecvCol = (RecvCol + SendCols) % LocalCols;
+ }
+ }
+
+ // Adjust the next starting row
+ if (myrow == destr)
+ RecvRow = (RecvRow + SendRows) % LocalRows;
+
+ }
+
+}
diff --git a/btl/libs/BLAS/CMakeLists.txt b/btl/libs/BLAS/CMakeLists.txt
new file mode 100644
index 0000000..de42fe0
--- /dev/null
+++ b/btl/libs/BLAS/CMakeLists.txt
@@ -0,0 +1,60 @@
+
+find_package(ATLAS)
+if (ATLAS_FOUND)
+ btl_add_bench(btl_atlas main.cpp)
+ if(BUILD_btl_atlas)
+ target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
+ set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
+ endif(BUILD_btl_atlas)
+endif (ATLAS_FOUND)
+
+find_package(MKL)
+if (MKL_FOUND)
+ btl_add_bench(btl_mkl main.cpp)
+ if(BUILD_btl_mkl)
+ target_link_libraries(btl_mkl ${MKL_LIBRARIES})
+ set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
+ endif(BUILD_btl_mkl)
+endif (MKL_FOUND)
+
+
+find_package(GOTO2)
+if (GOTO2_FOUND)
+ btl_add_bench(btl_goto2 main.cpp)
+ if(BUILD_btl_goto2)
+ target_link_libraries(btl_goto2 ${GOTO_LIBRARIES} )
+ set_target_properties(btl_goto2 PROPERTIES COMPILE_FLAGS "-DCBLASNAME=GOTO2")
+ endif(BUILD_btl_goto2)
+endif (GOTO2_FOUND)
+
+find_package(GOTO)
+if (GOTO_FOUND)
+ if(GOTO2_FOUND)
+ btl_add_bench(btl_goto main.cpp OFF)
+ else()
+ btl_add_bench(btl_goto main.cpp)
+ endif()
+ if(BUILD_btl_goto)
+ target_link_libraries(btl_goto ${GOTO_LIBRARIES} )
+ set_target_properties(btl_goto PROPERTIES COMPILE_FLAGS "-DCBLASNAME=GOTO")
+ endif(BUILD_btl_goto)
+endif (GOTO_FOUND)
+
+find_package(ACML)
+if (ACML_FOUND)
+ btl_add_bench(btl_acml main.cpp)
+ if(BUILD_btl_acml)
+ target_link_libraries(btl_acml ${ACML_LIBRARIES} )
+ set_target_properties(btl_acml PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ACML -DHAS_LAPACK=1")
+ endif(BUILD_btl_acml)
+endif (ACML_FOUND)
+
+if(Eigen_SOURCE_DIR AND CMAKE_Fortran_COMPILER_WORKS)
+ # we are inside Eigen and blas/lapack interface is compilable
+ include_directories(${Eigen_SOURCE_DIR})
+ btl_add_bench(btl_eigenblas main.cpp)
+ if(BUILD_btl_eigenblas)
+ target_link_libraries(btl_eigenblas eigen_blas eigen_lapack )
+ set_target_properties(btl_eigenblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=EigenBLAS")
+ endif()
+endif()
diff --git a/btl/libs/BLAS/blas.h b/btl/libs/BLAS/blas.h
new file mode 100644
index 0000000..25607a5
--- /dev/null
+++ b/btl/libs/BLAS/blas.h
@@ -0,0 +1,681 @@
+#ifndef BLAS_H
+#define BLAS_H
+
+#define BLASFUNC(FUNC) FUNC##_
+
+#ifdef __WIN64__
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#include <complex>
+
+extern "C" {
+
+int BLASFUNC(xerbla)(const char *, int *info, int);
+
+float BLASFUNC(sdot) (int *, float *, int *, float *, int *);
+float BLASFUNC(sdsdot)(int *, float *, float *, int *, float *, int *);
+
+double BLASFUNC(dsdot) (int *, float *, int *, float *, int *);
+double BLASFUNC(ddot) (int *, double *, int *, double *, int *);
+double BLASFUNC(qdot) (int *, double *, int *, double *, int *);
+
+#if defined(F_INTERFACE_GFORT) && !defined(__64BIT__)
+int BLASFUNC(cdotu) (int *, float * , int *, float *, int *);
+int BLASFUNC(cdotc) (int *, float *, int *, float *, int *);
+void BLASFUNC(zdotu) (double *, int *, double *, int *, double *, int *);
+void BLASFUNC(zdotc) (double *, int *, double *, int *, double *, int *);
+void BLASFUNC(xdotu) (double *, int *, double *, int *, double *, int *);
+void BLASFUNC(xdotc) (double *, int *, double *, int *, double *, int *);
+#elif defined(F_INTERFACE_F2C) || \
+ defined(F_INTERFACE_PGI) || \
+ defined(F_INTERFACE_GFORT) || \
+ (defined(F_INTERFACE_PATHSCALE) && defined(__64BIT__))
+void BLASFUNC(cdotu) (float *, int *, float * , int *, float *, int *);
+void BLASFUNC(cdotc) (float *, int *, float *, int *, float *, int *);
+void BLASFUNC(zdotu) (double *, int *, double *, int *, double *, int *);
+void BLASFUNC(zdotc) (double *, int *, double *, int *, double *, int *);
+void BLASFUNC(xdotu) (double *, int *, double *, int *, double *, int *);
+void BLASFUNC(xdotc) (double *, int *, double *, int *, double *, int *);
+#else
+std::complex<float> BLASFUNC(cdotu) (int *, float *, int *, float *, int *);
+std::complex<float> BLASFUNC(cdotc) (int *, float *, int *, float *, int *);
+std::complex<double> BLASFUNC(zdotu) (int *, double *, int *, double *, int *);
+std::complex<double> BLASFUNC(zdotc) (int *, double *, int *, double *, int *);
+double BLASFUNC(xdotu) (int *, double *, int *, double *, int *);
+double BLASFUNC(xdotc) (int *, double *, int *, double *, int *);
+#endif
+
+int BLASFUNC(cdotuw) (int *, float *, int *, float *, int *, float*);
+int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*);
+int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*);
+int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*);
+
+int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *);
+int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *);
+int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *);
+int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *);
+int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *);
+int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *);
+int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *);
+
+int BLASFUNC(scopy) (int *, float *, int *, float *, int *);
+int BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
+int BLASFUNC(qcopy) (int *, double *, int *, double *, int *);
+int BLASFUNC(ccopy) (int *, float *, int *, float *, int *);
+int BLASFUNC(zcopy) (int *, double *, int *, double *, int *);
+int BLASFUNC(xcopy) (int *, double *, int *, double *, int *);
+
+int BLASFUNC(sswap) (int *, float *, int *, float *, int *);
+int BLASFUNC(dswap) (int *, double *, int *, double *, int *);
+int BLASFUNC(qswap) (int *, double *, int *, double *, int *);
+int BLASFUNC(cswap) (int *, float *, int *, float *, int *);
+int BLASFUNC(zswap) (int *, double *, int *, double *, int *);
+int BLASFUNC(xswap) (int *, double *, int *, double *, int *);
+
+float BLASFUNC(sasum) (int *, float *, int *);
+float BLASFUNC(scasum)(int *, float *, int *);
+double BLASFUNC(dasum) (int *, double *, int *);
+double BLASFUNC(qasum) (int *, double *, int *);
+double BLASFUNC(dzasum)(int *, double *, int *);
+double BLASFUNC(qxasum)(int *, double *, int *);
+
+int BLASFUNC(isamax)(int *, float *, int *);
+int BLASFUNC(idamax)(int *, double *, int *);
+int BLASFUNC(iqamax)(int *, double *, int *);
+int BLASFUNC(icamax)(int *, float *, int *);
+int BLASFUNC(izamax)(int *, double *, int *);
+int BLASFUNC(ixamax)(int *, double *, int *);
+
+int BLASFUNC(ismax) (int *, float *, int *);
+int BLASFUNC(idmax) (int *, double *, int *);
+int BLASFUNC(iqmax) (int *, double *, int *);
+int BLASFUNC(icmax) (int *, float *, int *);
+int BLASFUNC(izmax) (int *, double *, int *);
+int BLASFUNC(ixmax) (int *, double *, int *);
+
+int BLASFUNC(isamin)(int *, float *, int *);
+int BLASFUNC(idamin)(int *, double *, int *);
+int BLASFUNC(iqamin)(int *, double *, int *);
+int BLASFUNC(icamin)(int *, float *, int *);
+int BLASFUNC(izamin)(int *, double *, int *);
+int BLASFUNC(ixamin)(int *, double *, int *);
+
+int BLASFUNC(ismin)(int *, float *, int *);
+int BLASFUNC(idmin)(int *, double *, int *);
+int BLASFUNC(iqmin)(int *, double *, int *);
+int BLASFUNC(icmin)(int *, float *, int *);
+int BLASFUNC(izmin)(int *, double *, int *);
+int BLASFUNC(ixmin)(int *, double *, int *);
+
+float BLASFUNC(samax) (int *, float *, int *);
+double BLASFUNC(damax) (int *, double *, int *);
+double BLASFUNC(qamax) (int *, double *, int *);
+float BLASFUNC(scamax)(int *, float *, int *);
+double BLASFUNC(dzamax)(int *, double *, int *);
+double BLASFUNC(qxamax)(int *, double *, int *);
+
+float BLASFUNC(samin) (int *, float *, int *);
+double BLASFUNC(damin) (int *, double *, int *);
+double BLASFUNC(qamin) (int *, double *, int *);
+float BLASFUNC(scamin)(int *, float *, int *);
+double BLASFUNC(dzamin)(int *, double *, int *);
+double BLASFUNC(qxamin)(int *, double *, int *);
+
+float BLASFUNC(smax) (int *, float *, int *);
+double BLASFUNC(dmax) (int *, double *, int *);
+double BLASFUNC(qmax) (int *, double *, int *);
+float BLASFUNC(scmax) (int *, float *, int *);
+double BLASFUNC(dzmax) (int *, double *, int *);
+double BLASFUNC(qxmax) (int *, double *, int *);
+
+float BLASFUNC(smin) (int *, float *, int *);
+double BLASFUNC(dmin) (int *, double *, int *);
+double BLASFUNC(qmin) (int *, double *, int *);
+float BLASFUNC(scmin) (int *, float *, int *);
+double BLASFUNC(dzmin) (int *, double *, int *);
+double BLASFUNC(qxmin) (int *, double *, int *);
+
+int BLASFUNC(sscal) (int *, float *, float *, int *);
+int BLASFUNC(dscal) (int *, double *, double *, int *);
+int BLASFUNC(qscal) (int *, double *, double *, int *);
+int BLASFUNC(cscal) (int *, float *, float *, int *);
+int BLASFUNC(zscal) (int *, double *, double *, int *);
+int BLASFUNC(xscal) (int *, double *, double *, int *);
+int BLASFUNC(csscal)(int *, float *, float *, int *);
+int BLASFUNC(zdscal)(int *, double *, double *, int *);
+int BLASFUNC(xqscal)(int *, double *, double *, int *);
+
+float BLASFUNC(snrm2) (int *, float *, int *);
+float BLASFUNC(scnrm2)(int *, float *, int *);
+
+double BLASFUNC(dnrm2) (int *, double *, int *);
+double BLASFUNC(qnrm2) (int *, double *, int *);
+double BLASFUNC(dznrm2)(int *, double *, int *);
+double BLASFUNC(qxnrm2)(int *, double *, int *);
+
+int BLASFUNC(srot) (int *, float *, int *, float *, int *, float *, float *);
+int BLASFUNC(drot) (int *, double *, int *, double *, int *, double *, double *);
+int BLASFUNC(qrot) (int *, double *, int *, double *, int *, double *, double *);
+int BLASFUNC(csrot) (int *, float *, int *, float *, int *, float *, float *);
+int BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *);
+int BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *);
+
+int BLASFUNC(srotg) (float *, float *, float *, float *);
+int BLASFUNC(drotg) (double *, double *, double *, double *);
+int BLASFUNC(qrotg) (double *, double *, double *, double *);
+int BLASFUNC(crotg) (float *, float *, float *, float *);
+int BLASFUNC(zrotg) (double *, double *, double *, double *);
+int BLASFUNC(xrotg) (double *, double *, double *, double *);
+
+int BLASFUNC(srotmg)(float *, float *, float *, float *, float *);
+int BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
+
+int BLASFUNC(srotm) (int *, float *, int *, float *, int *, float *);
+int BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *);
+int BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);
+
+/* Level 2 routines */
+
+int BLASFUNC(sger)(int *, int *, float *, float *, int *,
+ float *, int *, float *, int *);
+int BLASFUNC(dger)(int *, int *, double *, double *, int *,
+ double *, int *, double *, int *);
+int BLASFUNC(qger)(int *, int *, double *, double *, int *,
+ double *, int *, double *, int *);
+int BLASFUNC(cgeru)(int *, int *, float *, float *, int *,
+ float *, int *, float *, int *);
+int BLASFUNC(cgerc)(int *, int *, float *, float *, int *,
+ float *, int *, float *, int *);
+int BLASFUNC(zgeru)(int *, int *, double *, double *, int *,
+ double *, int *, double *, int *);
+int BLASFUNC(zgerc)(int *, int *, double *, double *, int *,
+ double *, int *, double *, int *);
+int BLASFUNC(xgeru)(int *, int *, double *, double *, int *,
+ double *, int *, double *, int *);
+int BLASFUNC(xgerc)(int *, int *, double *, double *, int *,
+ double *, int *, double *, int *);
+
+int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *,
+ float *, int *);
+int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *,
+ float *, int *);
+int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *);
+int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *);
+int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
+
+int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *,
+ float *, int *);
+int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *,
+ float *, int *);
+int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *);
+int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(ctpmv) (char *, char *, char *, int *, float *, float *, int *);
+int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *);
+
+int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
+int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
+int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
+int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
+int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(ssymv) (char *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(dsymv) (char *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(qsymv) (char *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(csymv) (char *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zsymv) (char *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xsymv) (char *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(sspmv) (char *, int *, float *, float *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(dspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(qspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(cspmv) (char *, int *, float *, float *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xspmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyr) (char *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(dsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(qsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(csyr) (char *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(zsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(xsyr) (char *, int *, double *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(ssyr2) (char *, int *, float *,
+ float *, int *, float *, int *, float *, int *);
+int BLASFUNC(dsyr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *, int *);
+int BLASFUNC(qsyr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *, int *);
+int BLASFUNC(csyr2) (char *, int *, float *,
+ float *, int *, float *, int *, float *, int *);
+int BLASFUNC(zsyr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *, int *);
+int BLASFUNC(xsyr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(sspr) (char *, int *, float *, float *, int *,
+ float *);
+int BLASFUNC(dspr) (char *, int *, double *, double *, int *,
+ double *);
+int BLASFUNC(qspr) (char *, int *, double *, double *, int *,
+ double *);
+int BLASFUNC(cspr) (char *, int *, float *, float *, int *,
+ float *);
+int BLASFUNC(zspr) (char *, int *, double *, double *, int *,
+ double *);
+int BLASFUNC(xspr) (char *, int *, double *, double *, int *,
+ double *);
+
+int BLASFUNC(sspr2) (char *, int *, float *,
+ float *, int *, float *, int *, float *);
+int BLASFUNC(dspr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *);
+int BLASFUNC(qspr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *);
+int BLASFUNC(cspr2) (char *, int *, float *,
+ float *, int *, float *, int *, float *);
+int BLASFUNC(zspr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *);
+int BLASFUNC(xspr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *);
+
+int BLASFUNC(cher) (char *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(zher) (char *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(xher) (char *, int *, double *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(chpr) (char *, int *, float *, float *, int *, float *);
+int BLASFUNC(zhpr) (char *, int *, double *, double *, int *, double *);
+int BLASFUNC(xhpr) (char *, int *, double *, double *, int *, double *);
+
+int BLASFUNC(cher2) (char *, int *, float *,
+ float *, int *, float *, int *, float *, int *);
+int BLASFUNC(zher2) (char *, int *, double *,
+ double *, int *, double *, int *, double *, int *);
+int BLASFUNC(xher2) (char *, int *, double *,
+ double *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(chpr2) (char *, int *, float *,
+ float *, int *, float *, int *, float *);
+int BLASFUNC(zhpr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *);
+int BLASFUNC(xhpr2) (char *, int *, double *,
+ double *, int *, double *, int *, double *);
+
+int BLASFUNC(chemv) (char *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zhemv) (char *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xhemv) (char *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(chpmv) (char *, int *, float *, float *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zhpmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xhpmv) (char *, int *, double *, double *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(snorm)(char *, int *, int *, float *, int *);
+int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
+int BLASFUNC(cnorm)(char *, int *, int *, float *, int *);
+int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
+
+int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+/* Level 3 routines */
+
+int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *,
+ float *, int *, float *, int *, float *, float *, int *);
+int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *,
+ double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *,
+ double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *,
+ float *, int *, float *, int *, float *, float *, int *);
+int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *,
+ double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *,
+ double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
+ float *, int *, float *, int *, float *, float *, int *);
+int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,
+ double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,
+ double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,
+ float *, float *, int *, float *, int *,
+ float *, float *, int *);
+int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *,
+ double *, double *, int *);
+int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,
+ float *, float *, int *, float *, int *,
+ float *, float *, int *);
+int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *,
+ double *, double *, int *);
+
+int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *,
+ float *, float *, int *, float *, int *);
+int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *,
+ float *, float *, int *, float *, int *);
+int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+
+int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *,
+ float *, float *, int *, float *, int *);
+int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *,
+ float *, float *, int *, float *, int *);
+int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *,
+ double *, double *, int *, double *, int *);
+
+int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *,
+ float *, float *, int *);
+int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *,
+ double *, double *, int *);
+int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *,
+ double *, double *, int *);
+int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *,
+ float *, float *, int *);
+int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *,
+ double *, double *, int *);
+int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *,
+ double *, double *, int *);
+
+int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+
+int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
+ double *, int *, double *, double *, int *);
+
+int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *,
+ float *, float *, int *);
+int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *,
+ double *, double *, int *);
+int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *,
+ double *, double *, int *);
+
+int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *,
+ float *, int *, float *, float *, int *);
+int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
+ double*, int *, double *, double *, int *);
+
+int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
+ double *, int *);
+int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *,
+ float *, int *);
+int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
+ double *, int *);
+
+int BLASFUNC(sgema)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgema)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgems)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgems)(char *, char *, int *, int *, float *,
+ float *, int *, float *, float *, int *, float *, int *);
+int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
+ double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
+
+//int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
+//int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
+int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
+
+int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
+int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
+int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
+
+int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+
+int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
+int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+
+int BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
+
+//int BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
+//int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
+int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
+int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauum)(char *, int *, float *, int *, int *);
+int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauum)(char *, int *, float *, int *, int *);
+int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
+int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(spotri)(char *, int *, float *, int *, int *);
+int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
+int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
+
+}
+
+#endif
diff --git a/btl/libs/BLAS/blas_interface.hh b/btl/libs/BLAS/blas_interface.hh
new file mode 100644
index 0000000..19fc3c1
--- /dev/null
+++ b/btl/libs/BLAS/blas_interface.hh
@@ -0,0 +1,77 @@
+//=====================================================
+// File : blas_interface.hh
+// Author : L. Plagne <laurent.plagne@edf.fr)>
+// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef blas_PRODUIT_MATRICE_VECTEUR_HH
+#define blas_PRODUIT_MATRICE_VECTEUR_HH
+
+#include <c_interface_base.h>
+#include <complex>
+extern "C"
+{
+#include "blas.h"
+#ifdef CBLAS_INTERFACE
+# include "cblas.h"
+#endif
+}
+
+#define MAKE_STRING2(S) #S
+#define MAKE_STRING(S) MAKE_STRING2(S)
+
+#define CAT2(A,B) A##B
+#define CAT(A,B) CAT2(A,B)
+
+
+template<class real> class blas_interface;
+
+
+static char notrans = 'N';
+static char trans = 'T';
+static char nonunit = 'N';
+static char lower = 'L';
+static char right = 'R';
+static char left = 'L';
+static int intone = 1;
+
+
+#define SCALAR float
+#define SCALAR_PREFIX s
+#ifdef CBLAS_INTERFACE
+# include "cblas_interface_impl.hh"
+#else
+# include "blas_interface_impl.hh"
+#endif
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+
+#define SCALAR double
+#define SCALAR_PREFIX d
+#ifdef CBLAS_INTERFACE
+# include "cblas_interface_impl.hh"
+#else
+# include "blas_interface_impl.hh"
+#endif
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+#endif
+
+
+
diff --git a/btl/libs/BLAS/blas_interface_impl.hh b/btl/libs/BLAS/blas_interface_impl.hh
new file mode 100644
index 0000000..63883a9
--- /dev/null
+++ b/btl/libs/BLAS/blas_interface_impl.hh
@@ -0,0 +1,96 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+
+#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
+
+template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
+{
+
+public :
+
+ static SCALAR fone;
+ static SCALAR fzero;
+
+ static inline std::string name()
+ {
+ return MAKE_STRING(CBLASNAME);
+ }
+
+ static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(gemv)(&notrans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
+ }
+
+ static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone);
+ }
+
+ static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
+ }
+
+ static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
+ BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N);
+ }
+
+ static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){
+ BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s);
+ }
+
+ static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
+ }
+
+ static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
+ BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
+ }
+
+ static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
+ BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
+ }
+
+ static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
+ BLAS_FUNC(syrk)(&lower,&notrans,&N,&N,&fone,A,&N,&fzero,X,&N);
+ }
+
+ static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){
+ BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone);
+ }
+
+ static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){
+ BLAS_FUNC(scal)(&N,&b,Y,&intone);
+ BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone);
+ }
+
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+ BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
+ BLAS_FUNC(trsv)(&lower, &notrans, &nonunit, &N, L, &N, X, &intone);
+ }
+
+ static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
+ BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
+ BLAS_FUNC(trsm)(&right, &lower, &notrans, &nonunit, &N, &N, &fone, L, &N, X, &N);
+ }
+
+ static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
+ BLAS_FUNC(trmm)(&left, &lower, &notrans,&nonunit, &N,&N,&fone,A,&N,B,&N);
+ }
+
+};
+
+SCALAR blas_interface<SCALAR>::fone = SCALAR(1);
+SCALAR blas_interface<SCALAR>::fzero = SCALAR(0);
diff --git a/btl/libs/BLAS/c_interface_base.h b/btl/libs/BLAS/c_interface_base.h
new file mode 100644
index 0000000..3b6afbb
--- /dev/null
+++ b/btl/libs/BLAS/c_interface_base.h
@@ -0,0 +1,89 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef BTL_C_INTERFACE_BASE_H
+#define BTL_C_INTERFACE_BASE_H
+
+#include "utilities.h"
+#include <vector>
+
+template<class real> class c_interface_base
+{
+
+public:
+
+ typedef real real_type;
+ typedef std::vector<real> stl_vector;
+ typedef std::vector<stl_vector > stl_matrix;
+
+ typedef real* gene_matrix;
+ typedef real* gene_vector;
+
+ static void free_matrix(gene_matrix & A, int N){
+ delete A;
+ }
+
+ static void free_vector(gene_vector & B){
+ delete B;
+ }
+
+ static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+ int N = A_stl.size();
+ A = new real[N*N];
+ for (int j=0;j<N;j++)
+ for (int i=0;i<N;i++)
+ A[i+N*j] = A_stl[j][i];
+ }
+
+ static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ B = new real[N];
+ for (int i=0;i<N;i++)
+ B[i] = B_stl[i];
+ }
+
+ static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ for (int i=0;i<N;i++)
+ B_stl[i] = B[i];
+ }
+
+ static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
+ int N = A_stl.size();
+ for (int j=0;j<N;j++){
+ A_stl[j].resize(N);
+ for (int i=0;i<N;i++)
+ A_stl[j][i] = A[i+N*j];
+ }
+ }
+
+ static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
+ for (int i=0;i<N;i++)
+ cible[i]=source[i];
+ }
+
+ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
+ for (int j=0;j<N;j++){
+ for (int i=0;i<N;i++){
+ cible[i+N*j] = source[i+N*j];
+ }
+ }
+ }
+
+};
+
+#endif
diff --git a/btl/libs/BLAS/cblas_interface_impl.hh b/btl/libs/BLAS/cblas_interface_impl.hh
new file mode 100644
index 0000000..2da195e
--- /dev/null
+++ b/btl/libs/BLAS/cblas_interface_impl.hh
@@ -0,0 +1,96 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+
+#define BLAS_FUNC(NAME) CAT(cblas_,CAT(SCALAR_PREFIX,NAME))
+
+template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
+{
+
+public :
+
+ static SCALAR fone;
+ static SCALAR fzero;
+
+ static inline std::string name()
+ {
+ return MAKE_STRING(CBLASNAME);
+ }
+
+ static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(gemv)(CblasColMajor,CblasNoTrans,N,N,fone,A,N,B,intone,fzero,X,intone);
+ }
+
+ static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(symv)(CblasColMajor,CblasLower,N,fone,A,N,B,intone,fzero,X,intone);
+ }
+
+ static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(syr2)(CblasColMajor,CblasLower,N,fone,B,intone,X,intone,A,N);
+ }
+
+ static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
+ BLAS_FUNC(ger)(CblasColMajor,N,N,fone,X,intone,Y,intone,A,N);
+ }
+
+ static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){
+ BLAS_FUNC(rot)(N,A,intone,B,intone,c,s);
+ }
+
+ static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ BLAS_FUNC(gemv)(CblasColMajor,CblasTrans,N,N,fone,A,N,B,intone,fzero,X,intone);
+ }
+
+ static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
+ BLAS_FUNC(gemm)(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,fone,A,N,B,N,fzero,X,N);
+ }
+
+ static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
+ BLAS_FUNC(gemm)(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,fone,A,N,B,N,fzero,X,N);
+ }
+
+ static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
+ BLAS_FUNC(syrk)(CblasColMajor,CblasLower,CblasNoTrans,N,N,fone,A,N,fzero,X,N);
+ }
+
+ static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){
+ BLAS_FUNC(axpy)(N,coef,X,intone,Y,intone);
+ }
+
+ static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){
+ BLAS_FUNC(scal)(N,b,Y,intone);
+ BLAS_FUNC(axpy)(N,a,X,intone,Y,intone);
+ }
+
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+ BLAS_FUNC(copy)(N, B, intone, X, intone);
+ BLAS_FUNC(trsv)(CblasColMajor,CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, intone);
+ }
+
+ static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
+ BLAS_FUNC(copy)(N, B, intone, X, intone);
+ BLAS_FUNC(trsm)(CblasColMajor,CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, fone, L, N, X, N);
+ }
+
+ static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
+ BLAS_FUNC(trmm)(CblasColMajor,CblasLeft, CblasLower, CblasNoTrans,CblasNonUnit, N,N,fone,A,N,B,N);
+ }
+
+};
+
+SCALAR blas_interface<SCALAR>::fone = SCALAR(1);
+SCALAR blas_interface<SCALAR>::fzero = SCALAR(0);
diff --git a/btl/libs/BLAS/main.cpp b/btl/libs/BLAS/main.cpp
new file mode 100644
index 0000000..401cb5b
--- /dev/null
+++ b/btl/libs/BLAS/main.cpp
@@ -0,0 +1,116 @@
+//=====================================================
+// File : main.cpp
+// Author : L. Plagne <laurent.plagne@edf.fr)>
+// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+
+#ifndef BLAS_INTERFACE
+# ifndef CBLAS_INTERFACE
+# define BLAS_INTERFACE
+# endif
+#endif
+
+#include "utilities.h"
+#include "blas_interface.hh"
+#include "bench.hh"
+#include "basic_actions.hh"
+#include "action_trisolve_matrix.hh"
+
+#include <string>
+
+BTL_MAIN;
+
+int main(int argv, char **argc)
+{
+ bool
+ axpy=false, axpby=false, rot=false,
+ matrix_vector=false, atv=false, symv=false, syr2=false, ger=false, trisolve_vector=false,
+ matrix_matrix=false, aat=false, trisolve_matrix=false, trmm=false
+ ;
+ int N = 100;
+
+
+ for (int i = 1; i < argv; ++i) {
+ std::string arg = argc[i];
+ if (arg == "axpy") axpy = true;
+ else if (arg == "axpby") axpby = true;
+ else if (arg == "rot") rot = true;
+ else if (arg == "matrix_vector") matrix_vector = true;
+ else if (arg == "atv") atv = true;
+ else if (arg == "symv") symv = true;
+ else if (arg == "syr2") syr2 = true;
+ else if (arg == "ger") ger = true;
+ else if (arg == "trisolve_vector") trisolve_vector = true;
+ else if (arg == "matrix_matrix") matrix_matrix = true;
+ else if (arg == "aat") aat = true;
+ else if (arg == "trisolve_matrix") trisolve_matrix = true;
+ else if (arg == "trmm") trmm = true;
+
+ else if (arg[0] == '1' && arg[1] == '\0') {
+ axpy = true; axpby = true; rot = true;
+ }
+ else if (arg[0] == '2' && arg[1] == '\0') {
+ matrix_vector=true; atv=true; symv=true; syr2=true; ger=true; trisolve_vector=true;
+ }
+ else if (arg[0] == '3' && arg[1] == '\0') {
+ matrix_matrix=true; aat=true; trisolve_matrix=true; trmm=true;
+ }
+
+ // Check switch -N
+ else if (arg[0] == '-' && arg[1] == 'N') {
+ if (arg[2] != '\0')
+ N = atoi(arg.c_str()+2);
+ else
+ N = atoi(argc[++i]);
+ }
+ }
+
+ if (axpy)
+ bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N);
+ if (axpby)
+ bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N);
+ if (rot)
+ bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N);
+
+ if (matrix_vector)
+ bench<Action_matrix_vector_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N);
+ if (atv)
+ bench<Action_atv_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N);
+ if (symv)
+ bench<Action_symv<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N);
+ if (syr2)
+ bench<Action_syr2<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N);
+ if (ger)
+ bench<Action_ger<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N);
+ if (trisolve_vector)
+ bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N);
+
+ if (matrix_matrix)
+ bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N);
+ if (aat)
+ bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N);
+ if (trisolve_matrix)
+ bench<Action_trisolve_matrix<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N);
+ if (trmm)
+ bench<Action_trmm<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N);
+
+
+ return 0;
+}
+
+
diff --git a/btl/libs/FFTW/fftw_interface.hh b/btl/libs/FFTW/fftw_interface.hh
new file mode 100644
index 0000000..81701e7
--- /dev/null
+++ b/btl/libs/FFTW/fftw_interface.hh
@@ -0,0 +1,87 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef BTL_FFTW_INTERFACE_H
+#define BTL_FFTW_INTERFACE_H
+
+#include <complex>
+#include <fftw3.h>
+#include <vector>
+
+class fftw_interface
+{
+public:
+
+ static inline std::string name()
+ {
+ return std::string("fftw");
+ }
+
+ typedef std::complex<double> real_type;
+ typedef std::vector<std::complex<double> > stl_vector;
+
+ typedef fftw_complex* gene_vector;
+
+ typedef fftw_plan plan;
+
+ static inline void free_vector(gene_vector & B){
+ fftw_free(B);
+ }
+
+ static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ B = (gene_vector) fftw_malloc(sizeof(fftw_complex) * N);
+ std::complex<double>* B_cplx = reinterpret_cast<std::complex<double>* >(B);
+
+ for (int i=0;i<N;i++)
+ B_cplx[i] = B_stl[i];
+ }
+
+ static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
+ int N = B_stl.size();
+ std::complex<double>* B_cplx = reinterpret_cast<std::complex<double>* >(B);
+ for (int i=0;i<N;i++)
+ B_stl[i] = B_cplx[i];
+ }
+
+ static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
+ for (int i=0;i<N;i++) {
+ cible[i][0]=source[i][0];
+ cible[i][1]=source[i][1];
+ }
+ }
+
+ /* Begin FFTW operations */
+
+ static inline void fftw_init_plan(plan & p, const int & N, gene_vector & x, gene_vector & y, const int & sign, const int & flags){
+ p = fftw_plan_dft_1d(N, x, y, sign, flags);
+ }
+
+ static inline void fftw_init_plan_2d(plan & p, const int & N, gene_vector & x, gene_vector& y, const int & sign, const int & flags){
+ p = fftw_plan_dft_2d(N, N, x, y, sign, flags);
+ }
+
+ static inline void fftw_init_plan_3d(plan & p, const int & N, gene_vector & x, gene_vector& y, const int & sign, const int & flags){
+ p = fftw_plan_dft_3d(N, N, N, x, y, sign, flags);
+ }
+
+ static inline void fftw_run(plan & p){
+ fftw_execute(p);
+ }
+};
+
+#endif // BTL_FFTW_INTERFACE_H
diff --git a/btl/libs/FFTW/main.cpp b/btl/libs/FFTW/main.cpp
new file mode 100644
index 0000000..8fa452b
--- /dev/null
+++ b/btl/libs/FFTW/main.cpp
@@ -0,0 +1,107 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#include "utilities.h"
+#include "bench.hh"
+#include "fftw_interface.hh"
+#include "fftw_actions.hh"
+
+#include <string>
+
+BTL_MAIN;
+
+int main(int argc, char **argv)
+{
+ bool
+ fftw_1d_forward_measure = false,
+ fftw_1d_forward_estimate = false,
+ fftw_1d_backward_measure = false,
+ fftw_1d_backward_estimate = false,
+
+ fftw_2d_forward_measure = false,
+ fftw_2d_forward_estimate = false,
+ fftw_2d_backward_measure = false,
+ fftw_2d_backward_estimate = false,
+
+ fftw_3d_forward_measure = false,
+ fftw_3d_forward_estimate = false,
+ fftw_3d_backward_measure = false,
+ fftw_3d_backward_estimate = false
+ ;
+ int N = 100;
+
+
+ for (int i = 1; i < argc; ++i) {
+ std::string arg = argv[i];
+
+ if (arg == "FFTW_1D_Forward_Measure" || arg == "all") fftw_1d_forward_measure = true;
+ else if (arg == "FFTW_1D_Forward_Estimate" || arg == "all") fftw_1d_forward_estimate = true;
+ else if (arg == "FFTW_1D_Backward_Measure" || arg == "all") fftw_1d_backward_measure = true;
+ else if (arg == "FFTW_1D_Backward_Estimate" || arg == "all") fftw_1d_backward_estimate = true;
+
+ else if (arg == "FFTW_2D_Forward_Measure" || arg == "all") fftw_2d_forward_measure = true;
+ else if (arg == "FFTW_2D_Forward_Estimate" || arg == "all") fftw_2d_forward_estimate = true;
+ else if (arg == "FFTW_2D_Backward_Measure" || arg == "all") fftw_2d_backward_measure = true;
+ else if (arg == "FFTW_2D_Backward_Estimate" || arg == "all") fftw_2d_backward_estimate = true;
+
+ else if (arg == "FFTW_3D_Forward_Measure" || arg == "all") fftw_3d_forward_measure = true;
+ else if (arg == "FFTW_3D_Forward_Estimate" || arg == "all") fftw_3d_forward_estimate = true;
+ else if (arg == "FFTW_3D_Backward_Measure" || arg == "all") fftw_3d_backward_measure = true;
+ else if (arg == "FFTW_3D_Backward_Estimate" || arg == "all") fftw_3d_backward_estimate = true;
+
+ // Check switch -N
+ else if (arg[0] == '-' && arg[1] == 'N') {
+ if (arg[2] != '\0')
+ N = atoi(arg.c_str()+2);
+ else
+ N = atoi(argv[++i]);
+ }
+ }
+
+
+ if (fftw_1d_forward_measure)
+ bench<Action_FFTW_1D_Forward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N);
+ if (fftw_1d_forward_estimate)
+ bench<Action_FFTW_1D_Forward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N);
+ if (fftw_1d_backward_measure)
+ bench<Action_FFTW_1D_Backward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N);
+ if (fftw_1d_backward_estimate)
+ bench<Action_FFTW_1D_Backward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N);
+
+ if (fftw_2d_forward_measure)
+ bench<Action_FFTW_2D_Forward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N);
+ if (fftw_2d_forward_estimate)
+ bench<Action_FFTW_2D_Forward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N);
+ if (fftw_2d_backward_measure)
+ bench<Action_FFTW_2D_Backward_Measure<fftw_interface> >(MIN_MV,MAX_MV, N);
+ if (fftw_2d_backward_estimate)
+ bench<Action_FFTW_2D_Backward_Estimate<fftw_interface> >(MIN_MV,MAX_MV, N);
+
+ if (fftw_3d_forward_measure)
+ bench<Action_FFTW_3D_Forward_Measure<fftw_interface> >(MIN_MV,250, N);
+ if (fftw_3d_forward_estimate)
+ bench<Action_FFTW_3D_Forward_Estimate<fftw_interface> >(MIN_MV,250, N);
+ if (fftw_3d_backward_measure)
+ bench<Action_FFTW_3D_Backward_Measure<fftw_interface> >(MIN_MV,250, N);
+ if (fftw_3d_backward_estimate)
+ bench<Action_FFTW_3D_Backward_Estimate<fftw_interface> >(MIN_MV,250, N);
+
+
+ return 0;
+}
+
+
diff --git a/btl/libs/LAPACK/lapack.hh b/btl/libs/LAPACK/lapack.hh
new file mode 100644
index 0000000..a6d46f1
--- /dev/null
+++ b/btl/libs/LAPACK/lapack.hh
@@ -0,0 +1,33 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef LAPACK_HH
+#define LAPACK_HH
+
+#define SCALAR float
+#define SCALAR_PREFIX s
+#include "lapack_.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+#define SCALAR double
+#define SCALAR_PREFIX d
+#include "lapack_.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+#endif /* LAPACK_HH */
diff --git a/btl/libs/LAPACK/lapack_.hh b/btl/libs/LAPACK/lapack_.hh
new file mode 100644
index 0000000..e7c8c3f
--- /dev/null
+++ b/btl/libs/LAPACK/lapack_.hh
@@ -0,0 +1,38 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef CAT
+# define CAT2(A,B) A##B
+# define CAT(A,B) CAT2(A,B)
+#endif
+#define LAPACKFUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void LAPACKFUNC(getrf)(const int*, const int*, SCALAR*, const int*, int*, int*);
+void LAPACKFUNC(potrf)(const char*, const int*, SCALAR*, const int*, int*);
+void LAPACKFUNC(geqrf)(const int*, const int*, SCALAR*, const int*, SCALAR*, SCALAR*, const int*, int*);
+void LAPACKFUNC(gesvd)(const char*, const char*, const int*, const int*, SCALAR*, const int*, SCALAR*, SCALAR*, const int*, SCALAR*, const int*, SCALAR*, const int*, int*);
+void LAPACKFUNC(syev)(const char*, const char*, const int*, SCALAR*, const int*, SCALAR*, SCALAR*, const int*, int*);
+void LAPACKFUNC(stev)(const char*, const int*, SCALAR*, SCALAR*, SCALAR*, const int*, SCALAR*, int*);
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/btl/libs/LAPACK/lapack_interface.hh b/btl/libs/LAPACK/lapack_interface.hh
new file mode 100644
index 0000000..a0ec52b
--- /dev/null
+++ b/btl/libs/LAPACK/lapack_interface.hh
@@ -0,0 +1,78 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef LAPACK_INTERFACE_HH
+#define LAPACK_INTERFACE_HH
+
+#include <../BLAS/c_interface_base.h>
+#include <complex>
+#include "lapack.hh"
+
+extern "C" {
+#include "../BLAS/blas.h"
+
+//void sgesv_(int*, int*, float *, int*, int*, float *, int*, int*);
+//void dgesv_(int*, int*, double*, int*, int*, double*, int*, int*);
+
+void sgels_(char*, int*, int*, int*, float *, int*, float *, int*, float *, int*, int*);
+void dgels_(char*, int*, int*, int*, double*, int*, double*, int*, double*, int*, int*);
+
+//void sgetrf_(int*, int*, float *, int*, int*, int*);
+//void dgetrf_(int*, int*, double*, int*, int*, int*);
+
+//void spotrf_(char*, int*, float *, int*, int*);
+//void dpotrf_(char*, int*, double*, int*, int*);
+
+//void ssyev_(char*, char*, int*, float *, int*, float *, float *, int*, int*);
+//void dsyev_(char*, char*, int*, double*, int*, double*, double*, int*, int*);
+}
+
+
+#define MAKE_STRING2(S) #S
+#define MAKE_STRING(S) MAKE_STRING2(S)
+
+#ifndef CAT
+# define CAT2(A,B) A##B
+# define CAT(A,B) CAT2(A,B)
+#endif
+
+template <typename real> class lapack_interface;
+
+
+static char notrans = 'N';
+static char trans = 'T';
+static char nonunit = 'N';
+static char lower = 'L';
+static char right = 'R';
+static char left = 'L';
+static int intone = 1;
+static int zeroint = 0;
+
+
+#define SCALAR float
+#define SCALAR_PREFIX s
+#include "lapack_interface_impl.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+#define SCALAR double
+#define SCALAR_PREFIX d
+#include "lapack_interface_impl.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+#endif /* LAPACK_INTERFACE_HH */
diff --git a/btl/libs/LAPACK/lapack_interface_impl.hh b/btl/libs/LAPACK/lapack_interface_impl.hh
new file mode 100644
index 0000000..a95bc5b
--- /dev/null
+++ b/btl/libs/LAPACK/lapack_interface_impl.hh
@@ -0,0 +1,139 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#define LAPACKFUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
+
+template<> class lapack_interface<SCALAR> : public c_interface_base<SCALAR>
+{
+public:
+
+ static inline std::string name()
+ {
+ return MAKE_STRING(LAPACKNAME);
+ }
+
+ static inline void general_solve(gene_matrix& A, gene_vector& b, gene_vector& x, int N)
+ {
+ int *ipiv = new int[N];
+ int info;
+ LAPACKFUNC(copy)(&N, b, &intone, x, &intone);
+ LAPACKFUNC(gesv)(&N, &intone, A, &N, ipiv, x, &N, &info);
+ delete[] ipiv;
+ }
+
+ static inline void least_squares(gene_matrix& A, gene_vector& b, gene_vector& x, int N)
+ {
+ int *ipiv = new int[N];
+ int info;
+ LAPACKFUNC(copy)(&N, b, &intone, x, &intone);
+ SCALAR work1;
+ int MONE = -1;
+ LAPACKFUNC(gels)(&notrans, &N, &N, &intone, A, &N, x, &N, &work1, &MONE, &info);
+ int lwork = (int)work1;
+ SCALAR *work2 = new SCALAR[lwork];
+ LAPACKFUNC(gels)(&notrans, &N, &N, &intone, A, &N, x, &N, work2, &lwork, &info);
+ delete[] work2;
+ delete[] ipiv;
+ }
+
+ static inline void lu_decomp(const gene_matrix& X, gene_matrix& C, int N)
+ {
+ int N2 = N*N;
+ int *ipiv = new int[N];
+ int info;
+ LAPACKFUNC(copy)(&N2, X, &intone, C, &intone);
+ LAPACKFUNC(getrf)(&N, &N, C, &N, ipiv, &info);
+ delete[] ipiv;
+ }
+
+ static inline void cholesky(const gene_matrix& X, gene_matrix& C, int N)
+ {
+ int N2 = N*N;
+ int info;
+ LAPACKFUNC(copy)(&N2, X, &intone, C, &intone);
+ LAPACKFUNC(potrf)(&lower, &N, C, &N, &info);
+ }
+
+ static inline void qr_decomp(const gene_matrix& X, gene_matrix& QR, gene_vector& tau, const int& N)
+ {
+ int N2 = N*N;
+ LAPACKFUNC(copy)(&N2, X, &intone, QR, &intone);
+
+ SCALAR *work = new SCALAR;
+ int lwork = -1, info;
+ LAPACKFUNC(geqrf)(&N, &N, QR, &N, tau, work, &lwork, &info);
+ lwork = *work;
+ delete work;
+ work = new SCALAR[lwork];
+ LAPACKFUNC(geqrf)(&N, &N, QR, &N, tau, work, &lwork, &info);
+ delete[] work;
+ }
+
+ static inline void svd_decomp(const gene_matrix& X, gene_matrix& U, gene_vector& S, gene_matrix& VT, const int& N)
+ {
+ int N2 = N*N;
+ stl_vector Xcopy(N2);
+ LAPACKFUNC(copy)(&N2, X, &intone, &Xcopy[0], &intone);
+
+ stl_vector work(1);
+ int lwork = -1, info;
+ LAPACKFUNC(gesvd)("A", "A", &N, &N, &Xcopy[0], &N, S, U, &N, VT, &N, &work[0], &lwork, &info);
+ lwork = work[0];
+ work.resize(lwork);
+ LAPACKFUNC(gesvd)("A", "A", &N, &N, &Xcopy[0], &N, S, U, &N, VT, &N, &work[0], &lwork, &info);
+ }
+
+ static inline void syev(const gene_matrix& X, gene_matrix& V, gene_vector& W, const int& N)
+ {
+ int N2 = N*N;
+ LAPACKFUNC(copy)(&N2, X, &intone, V, &intone);
+
+ stl_vector work(1);
+ int lwork = -1, info;
+ LAPACKFUNC(syev)("V", "U", &N, V, &N, W, &work[0], &lwork, &info);
+ lwork = work[0];
+ work.resize(lwork);
+ LAPACKFUNC(syev)("V", "U", &N, V, &N, W, &work[0], &lwork, &info);
+ }
+
+ /* Size of D, W: N; size of E: N-1, size of V: NxN */
+ static inline void stev(const gene_vector& D, const gene_vector& E, gene_vector& W, gene_matrix& V, const int& N)
+ {
+ int N0 = N;
+ int N1 = N-1;
+ LAPACKFUNC(copy)(&N0, D, &intone, W, &intone);
+ stl_vector E_(E, E+N-1), work(max(1, 2*N-2));
+
+ int info;
+ LAPACKFUNC(stev)("V", &N, W, &E_[0], V, &N, &work[0], &info);
+ }
+
+ static inline void symm_ev(const gene_matrix& X, gene_vector& W, int N)
+ {
+ char jobz = 'N';
+ SCALAR *work = new SCALAR;
+ int lwork = -1, info;
+ LAPACKFUNC(syev)(&jobz, &lower, &N, X, &N, W, work, &lwork, &info);
+ lwork = *work;
+ delete work;
+ work = new SCALAR[lwork];
+ LAPACKFUNC(syev)(&jobz, &lower, &N, X, &N, W, work, &lwork, &info);
+ delete[] work;
+ }
+
+
+};
diff --git a/btl/libs/LAPACK/main.cpp b/btl/libs/LAPACK/main.cpp
new file mode 100644
index 0000000..e70c161
--- /dev/null
+++ b/btl/libs/LAPACK/main.cpp
@@ -0,0 +1,95 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#include <btl.hh>
+#include <bench.hh>
+#include <action_general_solve.hh>
+#include <action_least_squares.hh>
+#include <action_lu_decomp.hh>
+#include <action_cholesky.hh>
+#include <action_qr_decomp.hh>
+#include <action_svd_decomp.hh>
+#include <action_syev.hh>
+#include <action_stev.hh>
+#include <action_symm_ev.hh>
+#include <lapack_interface.hh>
+
+#include <string>
+#include <cstdlib>
+
+BTL_MAIN;
+
+int main(int argc, char **argv)
+{
+ bool
+ general_solve=false, least_squares=false, lu_decomp=false, cholesky=false, qr_decomp=false, svd_decomp=false,
+ syev=false, stev=false, symm_ev=false
+ ;
+ int N = 100;
+
+
+ for (int i = 1; i < argc; ++i) {
+ std::string arg = argv[i];
+ if (arg == "general_solve") general_solve = true;
+ else if (arg == "least_squares") least_squares = true;
+ else if (arg == "lu_decomp") lu_decomp = true;
+ else if (arg == "cholesky") cholesky = true;
+ else if (arg == "qr_decomp") qr_decomp = true;
+ else if (arg == "svd_decomp") svd_decomp = true;
+ else if (arg == "syev") syev = true;
+ else if (arg == "stev") stev = true;
+ else if (arg == "symm_ev") symm_ev = true;
+
+ // Check switch -N
+ else if (arg[0] == '-' && arg[1] == 'N') {
+ if (arg[2] != '\0')
+ N = atoi(arg.c_str()+2);
+ else
+ N = atoi(argv[++i]);
+ }
+ }
+
+
+ if (general_solve)
+ bench<Action_general_solve<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N);
+
+ if (least_squares)
+ bench<Action_least_squares<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N);
+
+ if (lu_decomp)
+ bench<Action_lu_decomp<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N);
+
+ if (cholesky)
+ bench<Action_cholesky<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N);
+
+ if (qr_decomp)
+ bench<Action_qr_decomp<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N);
+
+ if (svd_decomp)
+ bench<Action_svd_decomp<lapack_interface<REAL_TYPE> > >(MIN_MM,750,N);
+
+ if (syev)
+ bench<Action_syev<lapack_interface<REAL_TYPE> > >(MIN_MM,750,N);
+
+ if (stev)
+ bench<Action_stev<lapack_interface<REAL_TYPE> > >(MIN_MM,1000,N);
+
+ if (symm_ev)
+ bench<Action_symm_ev<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,N);
+
+ return 0;
+}
diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp
new file mode 100644
index 0000000..427cf7e
--- /dev/null
+++ b/btl/libs/PBLAS/main.cpp
@@ -0,0 +1,103 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#define DISTRIBUTED
+
+#include "mpi.h"
+#include "utilities.h"
+#include "bench.hh"
+
+#include <iostream>
+//using namespace std;
+
+#include "pblas_interface.hh"
+#include "blacsinit.hh"
+
+#include "action_parallel_axpy.hh"
+#include "action_parallel_matrix_vector_product.hh"
+#include "action_parallel_lu_decomp.hh"
+#include "action_parallel_cholesky.hh"
+#include "action_parallel_qr_decomp.hh"
+#include "action_parallel_svd_decomp.hh"
+#include "action_parallel_symm_ev.hh"
+
+#include <string>
+
+BTL_MAIN;
+
+int main(int argc, char **argv)
+{
+ bool iamroot = blacsinit(&argc, &argv);
+
+ bool
+ axpy=false, matrix_vector=false,
+ lu_decomp=false, cholesky=false, qr_decomp=false, svd_decomp=false,
+ symm_ev=false
+ ;
+
+ int N = 100;
+
+
+ for (int i = 1; i < argc; ++i) {
+ std::string arg = argv[i];
+ if (arg == "axpy") axpy = true;
+ else if (arg == "matrix_vector") matrix_vector=true;
+ else if (arg == "lu_decomp") lu_decomp = true;
+ else if (arg == "cholesky") cholesky = true;
+ else if (arg == "qr_decomp") qr_decomp = true;
+ else if (arg == "svd_decomp") svd_decomp = true;
+ else if (arg == "symm_ev") symm_ev = true;
+
+ // Check switch -N
+ else if (arg[0] == '-' && arg[1] == 'N') {
+ if (arg[2] != '\0')
+ N = atoi(arg.c_str()+2);
+ else
+ N = atoi(argv[++i]);
+ }
+ }
+
+
+
+ if (axpy)
+ distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY, N, !iamroot);
+
+ if (matrix_vector)
+ distr_bench<Action_parallel_matrix_vector_product<pblas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV, N, !iamroot);
+
+ if (lu_decomp)
+ distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N, !iamroot);
+
+ if (cholesky)
+ distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, N, !iamroot);
+
+ if (qr_decomp)
+ distr_bench<Action_parallel_qr_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM, N, !iamroot);
+
+ if (svd_decomp)
+ distr_bench<Action_parallel_svd_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,750, N, !iamroot);
+
+ if (symm_ev)
+ distr_bench<Action_parallel_symm_ev<pblas_interface<REAL_TYPE> > >(MIN_MM,750, N, !iamroot);
+
+
+ int iZERO = 0;
+ blacs_exit_(&iZERO);
+ return 0;
+}
+
+
diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h
new file mode 100644
index 0000000..be76e8f
--- /dev/null
+++ b/btl/libs/PBLAS/pblas.h
@@ -0,0 +1,106 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef SCALAPACK_H_
+#define SCALAPACK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ int numroc_(const int*, const int*, const int*, const int*, const int*);
+ void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*);
+
+
+ /* Level 1 */
+
+ // Single
+ void psaxpy_(const int*, const float*,
+ const float*, const int*, const int*, const int*, const int*,
+ const float*, const int*, const int*, const int*, const int*
+ );
+
+ // Double
+ void pdaxpy_(const int*, const double*,
+ const double*, const int*, const int*, const int*, const int*,
+ const double*, const int*, const int*, const int*, const int*
+ );
+
+
+
+ /* Level 2 */
+
+ // Single
+ void psgemv_(const char*, const int*, const int*,
+ const float*, const float*, const int*, const int*, const int*,
+ const float*, const int*, const int*, const int*, const int*,
+ const float*, float*, const int*, const int*, const int*, const int*
+ );
+
+ // Double
+ void pdgemv_(const char*, const int*, const int*,
+ const double*, const double*, const int*, const int*, const int*,
+ const double*, const int*, const int*, const int*, const int*,
+ const double*, double*, const int*, const int*, const int*, const int*
+ );
+
+
+ /* Level 3 */
+
+ // Single
+ void pstrmm_(const char*, const char*, const char*, const char*, const int*, const int*, const float*,
+ const float*, const int*, const int*, const int*,
+ float*, const int*, const int*, const int*);
+
+ // Double
+ void pdtrmm_(const char*, const char*, const char*, const char*, const int*, const int*, const double*,
+ const double*, const int*, const int*, const int*,
+ double*, const int*, const int*, const int*);
+
+
+
+ /*************
+ * Scalapack *
+ *************/
+ // lu_decomp
+ void psgetrf_(const int*, const int*, float*, const int*, const int*, const int*, int*, int*);
+ void pdgetrf_(const int*, const int*, double*, const int*, const int*, const int*, int*, int*);
+
+ // cholesky
+ void pspotrf_(const char*, const int*, float*, const int*, const int*, const int*, int*);
+ void pdpotrf_(const char*, const int*, double*, const int*, const int*, const int*, int*);
+
+ // qr_decomp
+ void psgeqpf_(const int*, const int*, float*, const int*, const int*, const int*, int*, float*, float*, const int*, int*);
+ void pdgeqpf_(const int*, const int*, double*, const int*, const int*, const int*, int*, double*, double*, const int*, int*);
+
+ // svd_decomp
+ void psgesvd_(const char*, const char*, const int*, const int*, float*, const int*, const int*, const int*, float*, float*, const int*, const int*, const int*, float*, const int*, const int*, const int*, float*, const int*, int*);
+ void pdgesvd_(const char*, const char*, const int*, const int*, double*, const int*, const int*, const int*, double*, double*, const int*, const int*, const int*, double*, const int*, const int*, const int*, double*, const int*, int*);
+
+ // symm_ev
+ void pssyevd_(const char*, const char*, const int*, float*, const int*, const int*, const int*, float*, float*, const int*, const int*, const int*, float*, const int*, int*, const int*, int*);
+ void pdsyevd_(const char*, const char*, const int*, double*, const int*, const int*, const int*, double*, double*, const int*, const int*, const int*, double*, const int*, int*, const int*, int*);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* SCALAPACK_H_ */
diff --git a/btl/libs/PBLAS/pblas_interface.hh b/btl/libs/PBLAS/pblas_interface.hh
new file mode 100644
index 0000000..6dfd201
--- /dev/null
+++ b/btl/libs/PBLAS/pblas_interface.hh
@@ -0,0 +1,57 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#include "blas.h"
+#include "pblas.h"
+#include "blacs_interface.hh"
+
+#define MAKE_STRING2(S) #S
+#define MAKE_STRING(S) MAKE_STRING2(S)
+
+#ifndef CAT
+#define CAT2(A,B) A##B
+#define CAT(A,B) CAT2(A,B)
+#endif
+
+
+template<class real> class pblas_interface;
+
+/*
+static char notrans = 'N';
+static char trans = 'T';
+static char nonunit = 'N';
+static char lower = 'L';
+static char right = 'R';
+static char left = 'L';
+static int intone = 1;
+*/
+
+#define SCALAR float
+#define SCALAR_PREFIX s
+#include "pblas_interface_impl.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+
+#define SCALAR double
+#define SCALAR_PREFIX d
+#include "pblas_interface_impl.hh"
+#undef SCALAR
+#undef SCALAR_PREFIX
+
+
+
diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh
new file mode 100644
index 0000000..a96b0fd
--- /dev/null
+++ b/btl/libs/PBLAS/pblas_interface_impl.hh
@@ -0,0 +1,246 @@
+//=====================================================
+// Copyright (C) 2011 Andrea Arteaga <andyspiros@gmail.com>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+
+#define PBLAS_PREFIX p
+#define PBLAS_FUNC(NAME) CAT(CAT(CAT(PBLAS_PREFIX, SCALAR_PREFIX),NAME),_)
+#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
+
+template<> class pblas_interface<SCALAR> : public blacs_interface<SCALAR>
+{
+public:
+ static inline std::string name()
+ {
+ return MAKE_STRING(PBLASNAME);
+ }
+
+ static inline void parallel_axpy(const SCALAR& coef,
+ gene_vector& x, int *descX,
+ gene_vector& y, int *descY,
+ const int& size
+ )
+ {
+ int iZERO = 0, iONE = 1;
+ PBLAS_FUNC(axpy)(&size, &coef,
+ x, &iONE, &iONE, descX, &iONE,
+ y, &iONE, &iONE, descY, &iONE
+ );
+ }
+
+ static inline void parallel_matrix_vector_product(
+ int GlobalRows, int GlobalCols,
+ gene_matrix& A, int *descA,
+ gene_vector& x, int *descX,
+ gene_vector& y, int *descY
+ )
+ {
+ real_type alpha = 1., beta = 0.;
+ int iONE = 1;
+ int myid, procnum;
+ const char notrans = 'N';
+ blacs_pinfo_(&myid, &procnum);
+
+ PBLAS_FUNC(gemv)(&notrans, &GlobalRows, &GlobalCols,
+ &alpha, A, &iONE, &iONE, descA,
+ x, &iONE, &iONE, descX, &iONE,
+ &beta, y, &iONE, &iONE, descY, &iONE
+ );
+
+ }
+
+
+ static inline void parallel_lu_decomp(gene_matrix& X, std::vector<int>& ipiv, const int* desc)
+ {
+ const int GlobalRows = desc[2], GlobalCols = desc[3];
+ const int iONE = 1;
+ int info;
+ ipiv.resize(desc[8] + desc[4]);
+ PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc,
+ &ipiv[0], &info);
+ if (info != 0 && myid() == 0)
+ cout << " { LU error : " << info << " } ";
+ }
+
+ static inline void parallel_cholesky(gene_matrix& X, const int* desc)
+ {
+ const int N = desc[2], iONE = 1;
+ const char UP = 'U';
+ int info;
+ PBLAS_FUNC(potrf)(&UP, &N, X, &iONE, &iONE, desc, &info);
+ if (info != 0 && myid() == 0)
+ cout << " { cholesky error : " << info << " } ";
+ }
+
+ static inline void parallel_qr_decomp(gene_matrix& X, const int* desc)
+ {
+ const int GlobalRows = desc[2], GlobalCols = desc[3],
+ BlockRows = desc[4], BlockCols = desc[5],
+ ctxt = desc[1];
+
+ int myrow, mycol, nprow, npcol, lwork;
+ SCALAR lworkd;
+ blacs_gridinfo_(&ctxt, &nprow, &npcol, &myrow, &mycol);
+
+ const int iONE = 1, iZERO = 0, imONE = -1,
+ ipivdim = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &npcol);
+ int info;
+ std::vector<int> ipiv(ipivdim);
+ std::vector<SCALAR> tau(ipivdim);
+
+ // Retrieve LWORK
+ PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &lworkd, &imONE, &info);
+ lwork = static_cast<int>(lworkd);
+ if (info != 0 && myid() == 0)
+ cout << " { qr_decomp lwork error } ";
+
+ std::vector<SCALAR> work(lwork);
+ PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &work[0], &lwork, &info);
+ if (info != 0 && myid() == 0)
+ cout << " { qr_decomp computation error } ";
+ }
+
+ static inline void parallel_symm_ev(gene_matrix& A, const int* descA, gene_vector& w, gene_matrix& Z, const int* descZ)
+ {
+ const char jobz = 'V', uplo = 'u';
+ const int N = descA[2], iONE = 1, iZERO = 0, imONE = -1;
+ std::vector<SCALAR> work;
+ std::vector<int> iwork;
+ int lwork, liwork, info;
+ SCALAR lworkd;
+
+ // Retrieve l(i)work
+ PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
+ Z, &iONE, &iONE, descZ, &lworkd, &imONE, &liwork, &imONE, &info);
+ lwork = static_cast<int>(lworkd);
+ work.resize(lwork); iwork.resize(liwork);
+ if (info != 0 && myid() == 0)
+ cout << " { symm_ev l(i)work error } ";
+
+ PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
+ Z, &iONE, &iONE, descZ, &work[0], &lwork, &iwork[0], &liwork, &info);
+ if (info != 0 && myid() == 0)
+ cout << " { symm_ev computation error } ";
+ }
+
+ static inline void parallel_svd_decomp(gene_matrix& A, int* descA, gene_matrix& U, int *descU, gene_matrix& V, int *descV, gene_vector& s)
+ {
+ const char job = 'V';
+ const int size = descA[2], iONE = 1, iZERO = 0, imONE = -1;
+ std::vector<SCALAR> work;
+ int info, lwork;
+ SCALAR lworkd;
+
+ // Retrieve lwork
+ PBLAS_FUNC(gesvd)(&job, &job, &size, &size, A, &iONE, &iONE, descA, s,
+ U, &iONE, &iONE, descU, V, &iONE, &iONE, descV, &lworkd, &imONE, &info);
+ if (info != 0 && myid() == 0)
+ cout << " { svd_decomp lwork error } ";
+ lwork = static_cast<int>(lworkd);
+ work.resize(lwork);
+
+ PBLAS_FUNC(gesvd)(&job, &job, &size, &size, A, &iONE, &iONE, descA, s,
+ U, &iONE, &iONE, descU, V, &iONE, &iONE, descV, &work[0], &lwork, &info);
+ if (info != 0 && myid() == 0)
+ cout << " { svd_decomp computation error } ";
+ }
+
+ static inline real_type
+ test_LU(stl_matrix& Global_A, gene_matrix& Local_LU, int *desc)
+ {
+ bool iamroot = myid() == 0;
+ int _size = desc[2];
+
+ // Create and scatter Identity
+ int Testdesc[9];
+ stl_matrix Global_Test_stl, Local_Test_stl;
+ if (iamroot)
+ {
+ stl_matrix Identity(_size * _size);
+ for (int r = 0; r < _size; ++r)
+ Identity[r + _size * r] = 1;
+ scatter_matrix(Identity, Local_Test_stl, Testdesc, _size, _size,
+ desc[4], desc[5]);
+ }
+ else
+ scatter_matrix(stl_matrix(), Local_Test_stl, Testdesc);
+
+ // Compute L * U
+ real_type alpha = 1., malpha = -1;
+ int iONE = 1;
+ PBLAS_FUNC(trmm)("L", "L", "N", "N", desc + 2, desc + 2, &alpha, Local_LU,
+ &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc);
+ PBLAS_FUNC(trmm)("R", "U", "N", "N", desc + 2, desc + 2, &alpha, Local_LU,
+ &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc);
+
+ // Gather result
+ gather_matrix(Global_Test_stl, Local_Test_stl, desc);
+ if (iamroot)
+ {
+ int size2 = _size*_size;
+ BLAS_FUNC(axpy)(&size2, &malpha, &Global_A[0], &iONE,
+ &Global_Test_stl[0], &iONE);
+ double error = BLAS_FUNC(nrm2)(&size2, &Global_Test_stl[0], &iONE);
+ error /= BLAS_FUNC(nrm2)(&size2, &Global_A[0], &iONE);
+ return error;
+ }
+ else
+ return 0.;
+ }
+
+ static inline real_type
+ test_cholesky(stl_matrix& Global_A, gene_matrix& Local_U, int *desc)
+ {
+ bool iamroot = myid() == 0;
+ int _size = desc[2];
+
+ // Create and scatter Identity
+ int Testdesc[9];
+ stl_matrix Global_Test_stl, Local_Test_stl;
+ if (iamroot)
+ {
+ stl_matrix Identity(_size * _size);
+ for (int r = 0; r < _size; ++r)
+ Identity[r + _size * r] = 1;
+ scatter_matrix(Identity, Local_Test_stl, Testdesc, _size, _size,
+ desc[4], desc[5]);
+ }
+ else
+ scatter_matrix(stl_matrix(), Local_Test_stl, Testdesc);
+
+ // Compute U' * U
+ real_type alpha = 1., malpha = -1;
+ int iONE = 1;
+ PBLAS_FUNC(trmm)("L", "U", "T", "N", desc + 2, desc + 2, &alpha, Local_U,
+ &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc);
+ PBLAS_FUNC(trmm)("R", "U", "N", "N", desc + 2, desc + 2, &alpha, Local_U,
+ &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc);
+
+ // Gather result
+ gather_matrix(Global_Test_stl, Local_Test_stl, desc);
+ if (iamroot)
+ {
+ int size2 = _size*_size;
+ BLAS_FUNC(axpy)(&size2, &malpha, &Global_A[0], &iONE,
+ &Global_Test_stl[0], &iONE);
+ double error = BLAS_FUNC(nrm2)(&size2, &Global_Test_stl[0], &iONE);
+ error /= BLAS_FUNC(nrm2)(&size2, &Global_A[0], &iONE);
+ return error;
+ }
+ else
+ return 0.;
+ }
+};
diff --git a/btl/libs/STL/CMakeLists.txt b/btl/libs/STL/CMakeLists.txt
new file mode 100644
index 0000000..4cfc2dc
--- /dev/null
+++ b/btl/libs/STL/CMakeLists.txt
@@ -0,0 +1,2 @@
+
+btl_add_bench(btl_STL main.cpp OFF)
diff --git a/btl/libs/STL/STL_interface.hh b/btl/libs/STL/STL_interface.hh
new file mode 100644
index 0000000..060cb69
--- /dev/null
+++ b/btl/libs/STL/STL_interface.hh
@@ -0,0 +1,255 @@
+//=====================================================
+// File : STL_interface.hh
+// Author : L. Plagne <laurent.plagne@edf.fr)>
+// Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#ifndef STL_INTERFACE_HH
+#define STL_INTERFACE_HH
+#include <string>
+#include <vector>
+#include "utilities.h"
+
+using namespace std;
+
+template<class real>
+class STL_interface{
+
+public :
+
+ typedef real real_type ;
+
+ typedef std::vector<real> stl_vector;
+ typedef std::vector<stl_vector > stl_matrix;
+
+ typedef stl_matrix gene_matrix;
+
+ typedef stl_vector gene_vector;
+
+ static inline std::string name( void )
+ {
+ return "STL";
+ }
+
+ static void free_matrix(gene_matrix & A, int N){}
+
+ static void free_vector(gene_vector & B){}
+
+ static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+ A = A_stl;
+ }
+
+ static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+ B = B_stl;
+ }
+
+ static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
+ B_stl = B ;
+ }
+
+
+ static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
+ A_stl = A ;
+ }
+
+ static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
+ for (int i=0;i<N;i++){
+ cible[i]=source[i];
+ }
+ }
+
+
+ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
+ for (int i=0;i<N;i++)
+ for (int j=0;j<N;j++)
+ cible[i][j]=source[i][j];
+ }
+
+// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
+// {
+// real somme;
+// for (int j=0;j<N;j++){
+// for (int i=0;i<N;i++){
+// somme=0.0;
+// for (int k=0;k<N;k++)
+// somme += A[i][k]*A[j][k];
+// X[j][i]=somme;
+// }
+// }
+// }
+
+ static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
+ {
+ real somme;
+ for (int j=0;j<N;j++){
+ for (int i=0;i<N;i++){
+ somme=0.0;
+ if(i>=j)
+ {
+ for (int k=0;k<N;k++){
+ somme+=A[k][i]*A[k][j];
+ }
+ X[j][i]=somme;
+ }
+ }
+ }
+ }
+
+
+ static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
+ {
+ real somme;
+ for (int j=0;j<N;j++){
+ for (int i=0;i<N;i++){
+ somme=0.0;
+ for (int k=0;k<N;k++)
+ somme+=A[k][i]*B[j][k];
+ X[j][i]=somme;
+ }
+ }
+ }
+
+ static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
+ {
+ real somme;
+ for (int i=0;i<N;i++){
+ somme=0.0;
+ for (int j=0;j<N;j++)
+ somme+=A[j][i]*B[j];
+ X[i]=somme;
+ }
+ }
+
+ static inline void matrix_vector_product(gene_vector& A, gene_vector & B, gene_vector & X, int N)
+ {
+ real somme;
+ for (int i=0;i<N;i++){
+ somme=0.0;
+ for (int j=0;j<N;j++)
+ somme+=A[j*N+i]*B[j];
+ X[i]=somme;
+ }
+ }
+
+ static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
+ {
+ for (int j=0; j<N; ++j)
+ X[j] = 0;
+ for (int j=0; j<N; ++j)
+ {
+ real t1 = B[j];
+ real t2 = 0;
+ X[j] += t1 * A[j][j];
+ for (int i=j+1; i<N; ++i) {
+ X[i] += t1 * A[j][i];
+ t2 += A[j][i] * B[i];
+ }
+ X[j] += t2;
+ }
+ }
+
+ static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
+ {
+ for (int j=0; j<N; ++j)
+ {
+ for (int i=j; i<N; ++i)
+ A[j][i] += B[i]*X[j] + B[j]*X[i];
+ }
+ }
+
+ static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N)
+ {
+ for (int j=0; j<N; ++j)
+ {
+ for (int i=j; i<N; ++i)
+ A[j][i] += X[i]*Y[j];
+ }
+ }
+
+ static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
+ {
+ real somme;
+ for (int i=0;i<N;i++){
+ somme = 0.0;
+ for (int j=0;j<N;j++)
+ somme += A[i][j]*B[j];
+ X[i] = somme;
+ }
+ }
+
+ static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
+ for (int i=0;i<N;i++)
+ Y[i]+=coef*X[i];
+ }
+
+ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ for (int i=0;i<N;i++)
+ Y[i] = a*X[i] + b*Y[i];
+ }
+
+ static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
+ copy_vector(B,X,N);
+ for(int i=0; i<N; ++i)
+ {
+ X[i] /= L[i][i];
+ real tmp = X[i];
+ for (int j=i+1; j<N; ++j)
+ X[j] -= tmp * L[i][j];
+ }
+ }
+
+ static inline real norm_diff(const stl_vector & A, const stl_vector & B)
+ {
+ int N=A.size();
+ real somme=0.0;
+ real somme2=0.0;
+
+ for (int i=0;i<N;i++){
+ real diff=A[i]-B[i];
+ somme+=diff*diff;
+ somme2+=A[i]*A[i];
+ }
+ return somme/somme2;
+ }
+
+ static inline real norm_diff(const stl_matrix & A, const stl_matrix & B)
+ {
+ int N=A[0].size();
+ real somme=0.0;
+ real somme2=0.0;
+
+ for (int i=0;i<N;i++){
+ for (int j=0;j<N;j++){
+ real diff=A[i][j] - B[i][j];
+ somme += diff*diff;
+ somme2 += A[i][j]*A[i][j];
+ }
+ }
+
+ return somme/somme2;
+ }
+
+ static inline void display_vector(const stl_vector & A)
+ {
+ int N=A.size();
+ for (int i=0;i<N;i++){
+ INFOS("A["<<i<<"]="<<A[i]<<endl);
+ }
+ }
+
+};
+
+#endif
diff --git a/btl/libs/STL/main.cpp b/btl/libs/STL/main.cpp
new file mode 100644
index 0000000..4e73328
--- /dev/null
+++ b/btl/libs/STL/main.cpp
@@ -0,0 +1,42 @@
+//=====================================================
+// File : main.cpp
+// Author : L. Plagne <laurent.plagne@edf.fr)>
+// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//
+#include "utilities.h"
+#include "STL_interface.hh"
+#include "bench.hh"
+#include "basic_actions.hh"
+
+BTL_MAIN;
+
+int main()
+{
+ bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+ bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+ bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+ bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+ bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+ bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+ bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
+ return 0;
+}
+
+