# HG changeset patch
# User Clement Pernet <clement.pernet@gmail.com>
# Date 1224064381 -7200
# Node ID 27b3032e924cbab41eb79da139e41d02f2533c6e
# Parent  7d525bf8b16f4c51f6e45e0b09dda5cdf3eae34c
* fixed bug in permutation applytrans
* added test_zero
* added license in parity
* added linear system solving using pluq
* added appropriate test suite for solve

diff -r 7d525bf8b16f -r 27b3032e924c src/packedmatrix.c
--- a/src/packedmatrix.c	Sat Oct 11 10:16:18 2008 +0200
+++ b/src/packedmatrix.c	Wed Oct 15 11:53:01 2008 +0200
@@ -984,6 +984,45 @@ permutation *mzd_col_block_rotate(packed
   return P;
 }
 
+
+
+
+int mzd_test_zero(packedmatrix *A)
+{
+  /* Could be improved: stopping as the first non zero value is found (status!=0)*/
+  size_t mb = A->nrows;
+  size_t nb = A->ncols;
+  size_t Aoffset = A->offset;
+  size_t nbrest = (nb + Aoffset) % RADIX;
+  int status=0;
+  if (nb + Aoffset >= RADIX) {
+          // Large A
+    word mask_begin = RIGHT_BITMASK(RADIX-Aoffset);
+    if (Aoffset == 0)
+      mask_begin = ~mask_begin;
+    word mask_end = LEFT_BITMASK(nbrest);
+    size_t i;
+    for (i=0; i<mb; ++i) {
+        status |= A->values [A->rowswap [i]] & mask_begin;
+        size_t j;
+        for ( j = 1; j < A->width-1; ++j)
+            status |= A->values [A->rowswap [i] + j];
+        status |= A->values [A->rowswap [i] + A->width - 1] & mask_end;
+    }
+  } else {
+          // Small A
+    word mask = ((ONE << nb) - 1) ;
+    mask <<= (RADIX-nb-Aoffset);
+
+    size_t i;
+    for (i=0; i < mb; ++i) {
+        status |= A->values [A->rowswap [i]] & mask;
+    }
+  }
+  
+  return !status;
+}
+
 void mzd_apply_p_left(packedmatrix *A, permutation *P) {
   assert(A->offset == 0);
   size_t i;
@@ -995,8 +1034,8 @@ void mzd_apply_p_left(packedmatrix *A, p
 
 void mzd_apply_p_left_trans(packedmatrix *A, permutation *P) {
   assert(A->offset == 0);
-  size_t i;
-  for (i=0; i<P->length; i++) {
+  int i;
+  for (i=P->length-1; i>=0; i--) {
     if(P->values[i] != i) 
       mzd_row_swap(A, i, P->values[i]);
   }
@@ -1004,8 +1043,8 @@ void mzd_apply_p_left_trans(packedmatrix
 
 void mzd_apply_p_right_trans(packedmatrix *A, permutation *P) {
   assert(A->offset == 0);
-  size_t i;
-  for (i=0; i<P->length; i++) {
+  int i;
+  for (i = P->length-1;i>=0; i--) {
     if(P->values[i] != i) 
       mzd_col_swap(A, i, P->values[i]);
   }
@@ -1014,7 +1053,7 @@ void mzd_apply_p_right(packedmatrix *A, 
 void mzd_apply_p_right(packedmatrix *A, permutation *P) {
   assert(A->offset == 0);
   size_t i;
-  for (i=0; 0<P->length; i++) {
+  for (i=0; i<P->length; i++) {
     if(P->values[i] != i) 
       mzd_col_swap(A, i, P->values[i]);
   }
diff -r 7d525bf8b16f -r 27b3032e924c src/packedmatrix.h
--- a/src/packedmatrix.h	Sat Oct 11 10:16:18 2008 +0200
+++ b/src/packedmatrix.h	Wed Oct 15 11:53:01 2008 +0200
@@ -762,6 +762,17 @@ static inline void mzd_clear_bits(const 
   }
 }
 
+
+
+/**
+ * \brief Zero test for matrix.
+ *
+ * \param A Input matrix.
+ *
+ */
+int mzd_test_zero(packedmatrix *A) ;
+
+
 /**
  * Rotate zero columns to the end.
  *
diff -r 7d525bf8b16f -r 27b3032e924c src/parity.h
--- a/src/parity.h	Sat Oct 11 10:16:18 2008 +0200
+++ b/src/parity.h	Wed Oct 15 11:53:01 2008 +0200
@@ -5,6 +5,27 @@
  *
  * \author David Harvey
  */
+
+#ifndef PARITY_H
+#define PARITY_H
+ /*******************************************************************
+ *
+ *            M4RI: Method of the Four Russians Inversion
+ *
+ *       Copyright (C) 2008 David Harvey
+ *
+ *  Distributed under the terms of the GNU General Public License (GPL)
+ *
+ *    This code is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *    General Public License for more details.
+ *
+ *  The full text of the GPL is available at:
+ *
+ *                  http://www.gnu.org/licenses/
+ *
+ ********************************************************************/
 
 /**
  * \brief Step for mixing two 64-bit words to compute their parity.
@@ -98,3 +119,5 @@ static inline word parity64(word* buf)
 
    return MIX1(e1, e0);
 }
+
+#endif
diff -r 7d525bf8b16f -r 27b3032e924c src/solve.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/solve.c	Wed Oct 15 11:53:01 2008 +0200
@@ -0,0 +1,151 @@
+ /*******************************************************************
+ *
+ *            M4RI: Method of the Four Russians Inversion
+ *
+ *       Copyright (C) 2008 Jean-Guillaume.Dumas@imag.fr
+ *
+ *  Distributed under the terms of the GNU General Public License (GPL)
+ *
+ *    This code is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *    General Public License for more details.
+ *
+ *  The full text of the GPL is available at:
+ *
+ *                  http://www.gnu.org/licenses/
+ *
+ ********************************************************************/
+
+#include "solve.h"
+#include "strassen.h"
+#include "pluq.h"
+#include "trsm.h"
+#include "permutation.h"
+
+/**
+ * \brief System solving with matrix.
+ *
+ * Solves A X = B where A, and B are input matrices.
+ * The solution X is stored inplace on B
+ * * 
+ *
+ * \param A Input matrix.
+ * \param B Input matrix, being overwritten by the solution matrix X
+ * \param cutoff Minimal dimension for Strassen recursion.
+ *
+ */
+void mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck) 
+{    
+  if(A->ncols > B->nrows)
+    m4ri_die("mzd_solve_left: A ncols (%d) need to be lower than B nrows (%d).\n", A->ncols, B->nrows);
+  if (cutoff <= 0)
+    m4ri_die("mzd_solve_left: cutoff must be > 0.\n");
+
+  _mzd_solve_left (A, B, cutoff, InconsistencyCheck);
+}
+ 
+void mzd_pluq_solve_left (packedmatrix *A, size_t rank, 
+                          permutation *P, permutation *Q, 
+                          packedmatrix *B, const int cutoff, const int InconsistencyCheck) 
+{
+  if(A->ncols > B->nrows)
+    m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to be lower than B nrows (%d).\n", A->ncols, B->nrows);
+  if(P->length != A->nrows)
+      m4ri_die("mzd_pluq_solve_left: A nrows (%d) need to match P size (%d).\n", A->nrows, P->length);
+  if(Q->length != A->ncols)
+      m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to match Q size (%d).\n", A->ncols, P->length);
+
+  if (cutoff <= 0)
+    m4ri_die("mzd_pluq_solve_left: cutoff must be > 0.\n");
+
+  _mzd_pluq_solve_left (A, rank, P, Q, B, cutoff, InconsistencyCheck);
+}
+
+void _mzd_pluq_solve_left (packedmatrix *A, size_t rank, 
+                           permutation *P, permutation *Q, 
+                           packedmatrix *B, const int cutoff, const int InconsistencyCheck) 
+{
+        /** A is supposed to store L lower triangular and U upper triangular
+         *  B is modified in place 
+         *  (Bi's in the comments are just modified versions of B)
+         *  PLUQ = A
+         *  1°) P B2 = B1
+         *  2°) L B3 = B2
+         *  3°) U B4 = B3
+         *  4°) Q B5 = B4
+         */
+    
+        /* P B2 = B1 or B2 = P^T B1*/
+    mzd_apply_p_left_trans(B, P);
+    
+        /* L B3 = B2 */
+   
+        /* view on the upper part of L */
+    packedmatrix *LU = mzd_init_window(A,0,0,rank,rank);
+    packedmatrix *Y1 = mzd_init_window(B,0,0,rank,B->ncols);
+    _mzd_trsm_lower_left(LU, Y1, cutoff);
+    
+    if (InconsistencyCheck) {
+          /* Check for inconsistency */
+          /** FASTER without this check
+           * 
+           * update with the lower part of L 
+           */
+      packedmatrix *H = mzd_init_window(A,rank,0,A->nrows,rank);
+      packedmatrix *Y2 = mzd_init_window(B,rank,0,B->nrows,B->ncols);
+      mzd_addmul(Y2, H, Y1, cutoff);
+          /*
+           * test whether Y2 is the zero matrix
+           */
+      if(! mzd_test_zero(Y2) ) {
+          printf("inconsistent system of size %d x %d\n", Y2->nrows, Y2->ncols);
+          printf("Y2=");
+          mzd_print_matrix(Y2);
+      }
+      mzd_free_window(H);
+      mzd_free_window(Y2);
+    }
+        /* U B4 = B3 */
+    _mzd_trsm_upper_left(LU, Y1, cutoff);
+    mzd_free_window(LU);
+    mzd_free_window(Y1);
+
+    if (! InconsistencyCheck) {
+       /** Default is to set the indefined bits to zero 
+         * if inconsistency has been checked then 
+         *    Y2 bits are already all zeroes
+         * thus this clearing is not needed
+         */
+      if (rank < B->nrows) mzd_clear_bits(B,rank,0, (B->nrows-rank)*B->ncols);
+    }
+    
+        /* Q B5 = B4 or B5 = Q^T B4*/
+    mzd_apply_p_left_trans(B, Q);
+        /* P L U Q B5 = B1 */
+}
+
+
+void _mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck) 
+{
+        /**
+         *  B is modified in place 
+         *  (Bi's in the comments are just modified versions of B)
+         *  1°) PLUQ = A
+         *  2°) P B2 = B1
+         *  3°) L B3 = B2
+         *  4°) U B4 = B3
+         *  5°) Q B5 = B4
+         */
+    permutation * P = mzp_init(A->nrows);
+    permutation * Q = mzp_init(A->ncols);
+    
+        /* PLUQ = A */
+    size_t rank = _mzd_pluq(A, P, Q, cutoff);  
+        /* 2°, 3°, 4°, 5° */
+    mzd_pluq_solve_left(A, rank, P, Q, B, cutoff, InconsistencyCheck);
+
+    mzp_free(P);
+    mzp_free(Q);
+}
+
diff -r 7d525bf8b16f -r 27b3032e924c src/solve.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/solve.h	Wed Oct 15 11:53:01 2008 +0200
@@ -0,0 +1,123 @@
+/**
+ * \file solve.h
+ *
+ * \brief system solving with matrix routines.
+ *
+ * \author Jean-Guillaume Dumas <Jean-Guillaume.Dumas@imag.fr>
+ *
+ */
+#ifndef SOLVE_H
+#define SOLVE_H
+ /*******************************************************************
+ *
+ *            M4RI: Method of the Four Russians Inversion
+ *
+ *       Copyright (C) 2008 Jean-Guillaume.Dumas@imag.fr
+ *
+ *  Distributed under the terms of the GNU General Public License (GPL)
+ *
+ *    This code is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *    General Public License for more details.
+ *
+ *  The full text of the GPL is available at:
+ *
+ *                  http://www.gnu.org/licenses/
+ *
+ ********************************************************************/
+
+
+#include "misc.h"
+#include "packedmatrix.h"
+#include "stdio.h"
+
+/**
+ * \brief System solving with matrix.
+ *
+ * Solves A X = B where A, and B are input matrices.
+ * The solution X is stored inplace on B
+ *
+ * \param A Input matrix.
+ * \param B Input matrix, being overwritten by the solution matrix X
+ * \param cutoff Minimal dimension for Strassen recursion.
+ * \param InconsistencyCheck decide wether or not to check for incosistency (faster without but output not defined if system is not consistent).
+ *
+ */
+void mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck);
+
+
+/**
+ * \brief System solving with matrix.
+ *
+ * Solves (P L U Q) X = B where 
+ * A is an input matrix supposed to store both:
+ *      an upper right triangular matrix U
+ *      a lower left unitary triangular matrix L
+ * P and Q are permutation matrices
+ * B is the input matrix to be solved.
+ * The solution X is stored inplace on B
+ * This version assumes that the matrices are at an even position on
+ * the RADIX grid and that their dimension is a multiple of RADIX.
+ *
+ * \param A Input upper/lower triangular matrices.
+ * \param rank is rank of A.
+ * \param P Input row permutation matrix.
+ * \param Q Input column permutation matrix.
+ * \param B Input matrix, being overwritten by the solution matrix X.
+ * \param cutoff Minimal dimension for Strassen recursion.
+ * \param InconsistencyCheck decide whether or not to check for incosistency (faster without but output not defined if system is not consistent).
+ * 
+ */
+void mzd_pluq_solve_left (packedmatrix *A, size_t rank, 
+                          permutation *P, permutation *Q, 
+                          packedmatrix *B, const int cutoff, const int InconsistencyCheck);
+
+
+
+/**
+ * \brief System solving with matrix.
+ *
+ * Solves (P L U Q) X = B where 
+ * A is an input matrix supposed to store both:
+ *      an upper right triangular matrix U
+ *      a lower left unitary triangular matrix L
+ * P and Q are permutation matrices
+ * B is the input matrix to be solved.
+ * The solution X is stored inplace on B
+ * This version assumes that the matrices are at an even position on
+ * the RADIX grid and that their dimension is a multiple of RADIX.
+ *
+ * \param A Input upper/lower triangular matrices.
+ * \param rank is rank of A.
+ * \param P Input row permutation matrix.
+ * \param Q Input column permutation matrix.
+ * \param B Input matrix, being overwritten by the solution matrix X.
+ * \param cutoff Minimal dimension for Strassen recursion.
+ * \param InconsistencyCheck decide whether or not to check for incosistency (faster without but output not defined if system is not consistent).
+ *
+ * \internal
+ */
+void _mzd_pluq_solve_left (packedmatrix *A, size_t rank, 
+                           permutation *P, permutation *Q, 
+                           packedmatrix *B, const int cutoff, const int InconsistencyCheck);
+
+/**
+ * \brief System solving with matrix.
+ *
+ * Solves A X = B where A, and B are input matrices.
+ * The solution X is stored inplace on B
+ * This version assumes that the matrices are at an even position on
+ * the RADIX grid and that their dimension is a multiple of RADIX.
+ *
+ * \param A Input matrix.
+ * \param B Input matrix, being overwritten by the solution matrix X.
+ * \param cutoff Minimal dimension for Strassen recursion.
+ * \param InconsistencyCheck decide whether or not to check for incosistency (faster without but output not defined if system is not consistent).
+ *
+ * \internal
+ */
+void _mzd_solve_left (packedmatrix *A, packedmatrix *B, const int cutoff, const int InconsistencyCheck);
+
+
+#endif // SOLVE_H
diff -r 7d525bf8b16f -r 27b3032e924c testsuite/test_solve.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/testsuite/test_solve.c	Wed Oct 15 11:53:01 2008 +0200
@@ -0,0 +1,156 @@
+#include <stdlib.h>
+#include "m4ri.h"
+#include "strassen.h"
+#include "trsm.h"
+#include "solve.h"
+
+int test_pluq_solve_left (int m, int n, int offsetA, int offsetB){
+  packedmatrix* Abase = mzd_init (2048,2048);
+  packedmatrix* Bbase = mzd_init (2048,2048);
+  mzd_randomize (Abase);
+  mzd_randomize (Bbase);
+  packedmatrix* Bbasecopy = mzd_copy (NULL, Bbase);
+
+  packedmatrix* A = mzd_init_window (Abase, 0, offsetA, m, offsetA + m);
+  packedmatrix* B = mzd_init_window (Bbase, 0, offsetB, m, offsetB + n);
+  
+  size_t i,j;
+
+  packedmatrix* W = mzd_init (B->nrows, B->ncols);
+  for ( i=0; i<B->nrows; ++i)
+      for ( j=0; j<B->ncols; ++j)
+          mzd_write_bit(W,i,j, mzd_read_bit (B,i,j));
+
+  for (i=0; i<m; ++i){
+    mzd_write_bit(A,i,i, 1);
+  }  
+
+  mzd_solve_left(A, B, 2048, 1);
+
+/*  mzd_addmul(W, A, B, 2048); */
+  packedmatrix *L = mzd_init(A->nrows,A->nrows);
+  for ( i=0; i<m; ++i)
+      for ( j=0; j<=i; ++j)
+          if (mzd_read_bit (A,i,j)) 
+              mzd_write_bit(L,i,j,1);
+  
+
+  packedmatrix *U = mzd_init(A->nrows,A->ncols);
+  for ( i=0; i<A->nrows; ++i)
+      for ( j=i; j<A->ncols; ++j)
+          if (mzd_read_bit (A,i,j)) 
+              mzd_write_bit(U,i,j,1);
+
+  packedmatrix *X = mzd_init(B->nrows,B->ncols);
+  for ( i=0; i<B->nrows; ++i)
+      for ( j=0; j<B->ncols; ++j)
+          mzd_write_bit(X,i,j, mzd_read_bit (B,i,j));
+  packedmatrix *T = mzd_init(A->nrows,B->ncols);
+  mzd_mul(T, U, X, 2048);
+
+  packedmatrix *H = mzd_init(B->nrows,B->ncols);
+  mzd_mul(H, L, T, 2048);
+
+  packedmatrix *Z = mzd_init(B->nrows,B->ncols);
+
+  mzd_add(Z, W, H);
+
+  int status = 0;
+  for ( i=0; i<m; ++i)
+    for ( j=0; j<n; ++j){
+      if (mzd_read_bit (Z,i,j)){
+	status = 1;
+      }
+    }
+/*
+  // Verifiying that nothing has been changed around the submatrices
+  mzd_addmul(W, A, B, 2048);
+
+  mzd_copy (B, W);
+
+  for ( i=0; i<2048; ++i)
+    for ( j=0; j<2048/RADIX; ++j){
+      if (Bbase->values [Bbase->rowswap[i] + j] != Bbasecopy->values [Bbasecopy->rowswap[i] + j]){
+	status = 1;
+      }
+    }
+*/
+
+  mzd_free(L);
+  mzd_free(U);
+  mzd_free(T);
+  mzd_free(H);
+  mzd_free(Z);
+  mzd_free_window (A);
+  mzd_free_window (B);
+  mzd_free_window (W);
+  mzd_free(Abase);
+  mzd_free(Bbase);
+  mzd_free(Bbasecopy);
+
+  if (!status)
+    printf("passed\n");
+  else
+    printf("failed\n");
+  return status;
+}
+
+int main(int argc, char **argv) {
+  int status = 0;
+
+  printf("SolveLeft: small A even, small B odd  ... ");
+  status += test_pluq_solve_left (2, 4, 0, 1);
+
+  printf("SolveLeft: small A even, small B even ... ");
+  status += test_pluq_solve_left (10, 20, 0, 0);
+  printf("SolveLeft: small A even, large B even ... ");
+  status += test_pluq_solve_left (10, 80, 0, 0);
+
+  printf("SolveLeft: small A even, small B odd  ... ");
+  status += test_pluq_solve_left (10, 20, 0, 15);
+  printf("SolveLeft: small A even, large B odd  ... ");
+  status += test_pluq_solve_left (10, 80, 0, 15);
+
+  printf("SolveLeft: small A odd, small B even  ... ");
+  status += test_pluq_solve_left (10, 20, 15, 0);
+  printf("SolveLeft: small A odd, large B even  ... ");
+  status += test_pluq_solve_left (10, 80, 15, 0);
+
+  printf("SolveLeft: small A odd, small B odd   ... ");
+  status += test_pluq_solve_left (10, 20, 15, 20);
+  printf("SolveLeft: small A odd, large B odd   ... ");
+  status += test_pluq_solve_left (10, 80, 15, 20);
+
+  printf("SolveLeft: large A even, small B even ... ");
+  status += test_pluq_solve_left (70, 20, 0, 0);
+  printf("SolveLeft: large A even, large B even ... ");
+  status += test_pluq_solve_left (70, 80, 0, 0);
+
+  printf("SolveLeft: large A even, small B odd  ... ");
+  status += test_pluq_solve_left (70, 10, 0, 15);
+  printf("SolveLeft: large A even, large B odd  ... ");
+  status += test_pluq_solve_left (70, 80, 0, 15);
+
+  printf("SolveLeft: large A odd, small B even  ... ");
+  status += test_pluq_solve_left (70, 20, 15, 0);
+  printf("SolveLeft: large A odd, large B even  ... ");
+  status += test_pluq_solve_left (70, 80, 15, 0);
+
+  printf("SolveLeft: large A odd, small B odd   ... ");
+  status += test_pluq_solve_left (70, 20, 15, 20);
+  printf("SolveLeft: large A odd, large B odd   ... ");
+  status += test_pluq_solve_left (70, 80, 15, 20);
+
+  printf("SolveLeft: larger A odd, larger B odd ... ");
+  status += test_pluq_solve_left (770, 1600, 75, 89);
+  printf("SolveLeft: larger A odd, larger B odd ... ");
+  status += test_pluq_solve_left (1764, 1345, 198, 123);
+
+  
+  if (!status) {
+    printf("All tests passed.\n");
+  }
+
+  m4ri_destroy_all_codes();
+  return 0;
+}
