Gauss Jordan Elimination and SVD Least Squares Algorithm

Answer for assignment 9

The General Least Squares Fit

The least squares error is given by chi^2 = sum((yn - af(xn) -bg(un) -c)^2)

Taking the partial derivatives of this equation with respect to the coefficients a, b, and c we arrive at

dchi^2/da = sum(-2(yn-af(xn)-bg(un)-c)f(xn))

dchi^2/db = sum(-2(yn-af(xn)-bg(un)-c)g(xn))

dchi^2/dc = sum(-2(yn-af(xn)-bg(un)-c))

Setting these partial derivatives to zero will minimize the least squares error. It will also allow us to write the resultant equations in matrix form.

sum((yn-af(xn)-bg(un)-c)f(xn))) = 0

sum(ynf(xn)) - a*sum(f(xn)f(xn)) - b*sum(g(un)f(xn) -c*sum(f(xn)) = 0

Solving for sum(ynf(xn)) and transforming into a matrix equation gives us

(sum(f(xn)f(xn)) + sum(g(un)f(xn)) + sum(f(xn)))(a/b/c) = sum(ynf(xn));

This is for the first row in the matrix using the partial derivative with respect to a.

The next rows can also be found in the same manner yielding a 3x3 matrix whose values are equal to what they would be if the transpose of the design matrix were multipiled by itself, and the design matrix was multiplied by the solution matrix on the left hand side.

In order to find the coefficients a, b and c given the noisy data and the dynamical equation xn+1=a+bxn-1+cxn^2, the Gauss Jordan elimination method and SVD decomposition with back-substitution was employed. The coefficients were found to be

a = [1.40108 ]
b = [0.299191 ]
c = [-0.999894 ]

The difference between the two methods was minimal:

aGJ - aSVD = 1.77636e-015
bGJ - bSVD = -5.55112e-017
cGJ - cSVD = -5.55112e-016

This is indicative of our data set not being ill-conditioned.

The iterative equation was extrapolated and compared with the actual values to give:

Predicted	Actual
1.52691	1.5264
-0.802081	-0.793814
1.21465	1.22997
-0.314112	-0.344299
1.66583	1.64695

Further information can be found in the output file, or by running the source code.

Further work needs be done to understand SVD decomposition.

The goal of this effort is to produce a matrix library such as the The Bioinformatics Template Library written using the Standard Template Library. Sourceforge's linalg library (see source code for link) does a good job of this. Neither of these libraries would compile on my Windows 98 machine.

Source Code

//
// Noisy.cpp is designed to examine two methods of performing a least squares approximation on 
//	a given set of experimental data.   This program matches the data to the the iterative function
//
// x(n+1) = a + bx(n-1) + c*xn^2
//
// The two methods used are Gauss-Jordan Elimination and SVD with backsubstitution.  
// 
// References
//
//	http://sourceforge.net/projects/linalg/  - warning : this hard crashed Win98 when linking the DLL
//

#include <vector>
#include <map>
#include "Function.h"

typedef vector<vector<double> > dvector ;

class Matrix : public dvector
{

public:
	Matrix(int a, int b) : vector<vector<double> >(a), nRows(a), nCols(b)
	{
		for (int i =0;i<a;i++)
			(*this)[i].resize(b);
	}

	Matrix(vector<double>& a) : vector<vector<double> > ((size_type)a.size()), nCols(1), nRows(a.size())
	{
		for (int i =0;i<a.size();i++)
			(*this)[i][0] = a[i];
	}

	Matrix(dvector::const_iterator b, dvector::const_iterator e, int firstColumn, int numberOfColumns)
		: dvector(b,e), nCols(numberOfColumns)
	{//tbd
	}

	void subMatrix(Matrix m, int first_row, int numOfRows, int firstCol, int numOfCols)
	{//tbd
	}

	//has bug
	Matrix minor(int row, int col)
	{
		Matrix m(*this);
		dvector::iterator row_iter = m.begin();
		for (int i = 0; i< m.numOfRows();i++, row_iter++)
		{
			if (i== row)
				m.erase(row_iter);
			else
			{
				vector<double>::iterator col_iter = row_iter->begin();
				for (int c=0;c<m.numOfColumns();c++, col_iter++);
				row_iter->erase(col_iter);
			}
		};
		return m;
		//attempt to use STL to perform matrix operations efficiently
	    //copy(map.begin(), map2.end(), insert_iterator<Matrix> (map1, map1.begin())
	}

	//recursive alogrithm grinds to a halt for matrices with rank greater than 7
	double det(int row=0, int col=0)
	{
		if (nRows != nCols) cerr << "Determinant can not be found for non square matrix [" << nRows << "][" << nCols << "]" << endl;
		double sum = (nRows==1) ? (*this)[0][0] : 0;
		for (int r = 0;r < nRows;r++)
			if ((r)%2)
					sum += (*this)[row][col]*minor(row,col).det();
				else
					sum -= (*this)[row][col]*minor(row,col).det();
		return sum;
	}

	Matrix operator+(const Matrix& m)
	{
		Matrix ret(nRows, nCols);
		for (int i=0; i < m.size(); i++)
			for (int j=0;j < m[i].size(); j++)
				ret[i][j] = m[i][j]+(*this)[i][j];
		return ret;
	};

	Matrix operator~()  //transpose
	{
		Matrix ret(nCols,nRows);
		for (int i=0; i < nRows; i++)
			for (int j=0;j < nCols; j++)
				ret[j][i] = (*this)[i][j];
		return ret;
	};

	Matrix operator*(const Matrix& m)
	{
		Matrix ret(nRows, m.numOfColumns());
		for (int i=0; i < ret.numOfRows(); i++)
		{
			for (int j=0;j < ret.numOfColumns(); j++)
			{
				double sum = 0;
				for (int k = 0; k < nCols;k++)
					sum += m[k][j]*(*this)[i][k];
				ret[i][j] = sum;
			}
		}
		return ret;
	}

	Matrix operator-(const Matrix& m)
	{
		Matrix ret(nRows, nCols);
		for (int i=0; i < ret.numOfRows(); i++)
		{
			for (int j=0;j < ret.numOfColumns(); j++)
			{
				ret[i][j] = (*this)[i][j] - m[i][j];
			}
		}
		return ret;
	}

	virtual void setsize(int a, int b)
	{
		resize(a);
		for (int i=0;i<a;i++)
			(*this)[i].resize(b);
		nRows = a;
		nCols = b;
	}

	vector<double> getColumn(int colNum)
	{
		vector<double> ret(nRows);
		for (int i=0;i<nRows;i++)
			ret[i] = (*this)[i][colNum];
		return ret;
	};

	void gaussJordanElimination(Matrix& multipliers, vector<int>& interchanges)
	{
		for (int i = 0 ; i < nCols;i++)
		{
			for (int j = i+1 ; j < nRows; j++)
			{
				//if ((*this)[i][i] == 0) //what to do.. what to do...
				pivot(multipliers, j , nRows);  //stolen from posted solution 
				multipliers[j][i] = (*this)[j][i]/(*this)[i][i]; //multipliers of ith row that is subtracted from the kth row during the ith step of the reduction process
				for (int k=i;k< nCols;k++)
					(*this)[j][k] -= multipliers[j][i]*(*this)[i][k];
			}
		};
		//cout << "After gauss-jordan elimination this is \n" << *this << endl;
		//cout << "The multipliers were \n" << multipliers << endl;
	}

	void gaussJordan(Matrix& solution)
	{

		Matrix multipliers(nRows, nCols);
		vector<int> interchanges;
		int i, j;

		gaussJordanElimination(multipliers, interchanges);

		//forward substitution
		for (i = 0;i<solution.numOfRows();i++)
			for (j=0;j<i;j++)
				solution[i][0] -= multipliers[i][j]*solution[j][0];

		//cout << "solution after forward substitution\n" << solution << endl;
		//back substitution
		for (i = solution.numOfRows()-1; i>=0;i--)
		{
			for (j = i+1;j< solution.numOfRows();j++)
				solution[i][0] -= (*this)[i][j]*solution[j][0];
			solution[i][0] /=(*this)[i][i];
		}
	};

	//from posted solution 
	void pivot(Matrix& solution,int level,int n)
	//this will find the largest pivot and rotate row
	//	to find the largest, it will only look downward
	{
		int i,maxi;
		double maxval;

		//find max pivot
		maxval=fabs((*this)[level][level]);
		maxi=level;
		for(i=level+1;i<n;i++)
			{
			if(fabs((*this)[i][level])>maxval)
				{
				maxval=(*this)[i][level];
				maxi=i;
				}
			}

		//rotate rows
		if(maxi!=level)
		{
			for(i=level;i<n;i++)
				{
				maxval=(*this)[level][i];
				(*this)[level][i]=(*this)[maxi][i];
				(*this)[maxi][i]=maxval;
				}
			maxval=solution[level][0];
			solution[level][0]=solution[maxi][0];
			solution[maxi][0]=maxval;
		}
	}

	friend ostream& operator<<(ostream& out, const Matrix& m)
	{
		for (int i =0;i<m.size();i++)
		{
			out << i << "\t";
			for (int j = 0;j<m[i].size();j++)
				out << ((double)m[i][j]) << "\t";
			out  << endl;
		};
		return out;
	};
	int numOfRows() const {return nRows;}
	int numOfColumns() const {return nCols;};
protected:
	int nRows;
	int nCols;
};

//taken from Press (;-)shhh
void svdbksb(Matrix& u, vector<double>& w, Matrix& v, Matrix& b, vector<double>& x)
{
	int jj, j, i;
	double s;

	int m = u.numOfRows();
	int n = u.numOfColumns();

	vector<double> tmp(n);
	for (j=0;j<n;j++)
	{
		s=0.0;
		if (w[j])
		{
			for (i=0;i<m;i++) s +=- u[i][j]*b[i][0];
			s /=w[j];
		}
		tmp[j] = s;
	}
	for (j=0;j<n;j++)
	{
		s=0;
		for (jj=0;jj<n;jj++) s += v[j][jj]*tmp[jj];
		x[j] = s;
	}
};

//SVD algorithms taken from sourceforge's linalg, a Simple Linear Algebra C++ Library

/*
PYTHAG computes sqrt(a^{2} + b^{2}) without destructive overflow or underflow.
*/
static double at, bt, ct;
#define PYTHAG(a, b) ((at = fabs(a)) > (bt = fabs(b)) ? \
(ct = bt/at, at*sqrt(1.0+ct*ct)): (bt ? (ct = at/bt, bt*sqrt(1.0+ct*ct)): 0.0))

static double maxarg1, maxarg2;
#define MAX(a, b) (maxarg1 = (a), maxarg2 = (b), (maxarg1) > (maxarg2) ? \
(maxarg1) : (maxarg2))

#define SIGN(a, b) ((b) < 0.0 ? -fabs(a): fabs(a))

void error(const char error_text[])
  /* Numerical Recipes standard error handler.			*/
{
  cerr << "Numerical Recipes run-time error..." << endl;
  cerr << error_text << endl;
  throw runtime_error(error_text);
}

/*
Given a matrix a[m][n], this routine computes its singular value
decomposition, A = U*W*V^{T}.  The matrix U replaces a on output.
The diagonal matrix of singular values W is output as a vector w[n].
The matrix V (not the transpose V^{T}) is output as v[n][n].
m must be greater or equal to n;  if it is smaller, then a should be
filled up to square with zero rows.
*/
void svdcmp(Matrix& a, int m, int n, vector<double>& w, Matrix&  v,
            int maxIterations)
  {
    int flag, i, its, j, jj, k;
    int l  = 0;
    int nm = 0;
    double c, f, h, s, x, y, z;
    double anorm = 0.0, g = 0.0, scale = 0.0;

    if (m < n)
      cerr << "SVDCMP: Matrix A must be augmented with extra rows of zeros.";
    double* rv1 = new double [n];

    /* Householder reduction to bidiagonal form.			*/
    for (i = 0; i < n; i++)
      {
	l = i + 1;
        rv1[i] = scale*g;
        g = s = scale = 0.0;
        if (i < m)
	  {
	    for (k = i; k < m; k++)
	      scale += fabs(a[k][i]);
	    if (scale)
	      {
	        for (k = i; k < m; k++)
	          {
		    a[k][i] /= scale;
		    s += a[k][i]*a[k][i];
	          };
	        f = a[i][i];
	        g = -SIGN(sqrt(s), f);
	        h = f*g - s;
	        a[i][i] = f - g;
	        if (i != n - 1)
	          {
		    for (j = l; j < n; j++)
		      {
		        for (s  = 0.0, k = i; k < m; k++)
		          s += a[k][i]*a[k][j];
		        f = s/h;
		        for ( k = i; k < m; k++)
			  a[k][j] += f*a[k][i];
		      };
	          };
	        for (k = i; k < m; k++)
	          a[k][i] *= scale;
	      };
          };
        w[i] = scale*g;
        g = s= scale = 0.0;
        if (i < m && i != n - 1)
          {
	    for (k = l; k < n; k++)
	      scale += fabs(a[i][k]);
	    if (scale)
	      {
	        for (k = l; k < n; k++)
	          {
		    a[i][k] /= scale;
		    s += a[i][k]*a[i][k];
	          };
	        f = a[i][l];
	        g = -SIGN(sqrt(s), f);
	        h = f*g - s;
	        a[i][l] = f - g;
	        for (k = l; k < n; k++)
	          rv1[k] = a[i][k]/h;
	        if (i != m - 1)
	          {
		    for (j = l; j < m; j++)
		      {
		        for (s = 0.0, k = l; k < n; k++)
		          s += a[j][k]*a[i][k];
		        for (k = l; k < n; k++)
		          a[j][k] += s*rv1[k];
		      };
	          };
	        for (k = l; k < n; k++)
	          a[i][k] *= scale;
	      };
	  };
        anorm = MAX(anorm, (fabs(w[i]) + fabs(rv1[i])));
      };
    /* Accumulation of right-hand transformations.			*/
    for (i = n - 1; 0 <= i; i--)
      {
	if (i < n - 1)
	  {
	    if (g)
	      {
		for (j = l; j < n; j++)
		  v[j][i] = (a[i][j]/a[i][l])/g;
		  /* Double division to avoid possible underflow:	*/
		for (j = l; j < n; j++)
		  {
		    for (s = 0.0, k = l; k < n; k++)
		      s += a[i][k]*v[k][j];
		    for (k = l; k < n; k++)
		      v[k][j] += s*v[k][i];
		  };
	      };
	    for (j = l; j < n; j++)
	      v[i][j] = v[j][i] = 0.0;
	  };
	v[i][i] = 1.0;
	g = rv1[i];
	l = i;
      };
    /* Accumulation of left-hand transformations.			*/
    for (i = n - 1; 0 <= i; i--)
      {
	l = i + 1;
	g = w[i];
	if (i < n - 1)
	  for (j = l; j < n; j++)
	    a[i][j] = 0.0;
	if (g)
	  {
	    g = 1.0/g;
	    if (i != n - 1)
	      {
		for (j = l; j < n; j++)
		  {
		    for (s = 0.0, k = l; k < m; k++)
		      s += a[k][i]*a[k][j];
		    f = (s/a[i][i])*g;
		    for (k = i; k < m; k++)
		      a[k][j] += f*a[k][i];
		  };
	       };
	    for (j = i; j < m; j++)
	      a[j][i] *= g;
	  }
	else
	  for (j = i; j < m; j++)
	    a[j][i] = 0.0;
	++a[i][i];
      };
    /* Diagonalization of the bidiagonal form.				*/
    for (k = n - 1; 0 <= k; k--)	/* Loop over singular values.	*/
      {
	for (its = 0; its < maxIterations; its++)	/* Loop over allowed iterations.*/
	  {
	    flag = 1;
	    for (l = k; 0 <= l; l--)	/* Test for splitting:		*/
	      {
		nm = l - 1;		/* Note that rv1[0] is always zero.*/
		if (fabs(rv1[l]) + anorm == anorm)
		  {
		    flag = 0;
		    break;
		  };
		if (fabs(w[nm]) + anorm == anorm)
		  break;
	      };
	    if (flag)
	      {
		c = 0.0;		/* Cancellation of rv1[l], if l>0:*/
		s = 1.0;
		for (i = l; i <= k; i++) {
		    f = s*rv1[i];
		    if (fabs(f) + anorm != anorm)
		      {
			g = w[i];
			h = PYTHAG(f, g);
			w[i] = h;
			h = 1.0/h;
			c = g*h;
			s = (-f*h);
			for (j = 0; j < m; j++)
			  {
			    y = a[j][nm];
			    z = a[j][i];
			    a[j][nm] = y*c + z*s;
			    a[j][i]  = z*c - y*s;
			  };
		      };
		  };
	      };
	    z = w[k];
	    if (l == k)		/* Convergence.				*/
	      {
		if (z < 0.0)	/* Singular value is made non-negative.	*/
		  {
		    w[k] = -z;
		    for (j = 0; j < n; j++)
		      v[j][k] = (-v[j][k]);
		  };
		break;
	      };
	    if (its == (maxIterations - 1))
	      error("No convergence in SVDCMP iterations.");
	    x = w[l];		/* Shift from bottom 2-by-2 minor.	*/
	    nm = k - 1;
	    y = w[nm];
	    g = rv1[nm];
	    h = rv1[k];
	    f = ((y - z)*(y + z) + (g - h)*(g + h))/(2.0*h*y);
	    g = PYTHAG(f, 1.0);
	    f = ((x - z)*(x + z) + h*((y/(f + SIGN(g, f))) - h))/x;
	    /* Next QR transformation:					*/
	    c = s = 1.0;
	    for (j = l; j <= nm; j++)
	      {
		i = j + 1;
		g = rv1[i];
		y = w[i];
		h = s*g;
		g = c*g;
		z = PYTHAG(f, h);
		rv1[j] = z;
		c = f/z;
		s = h/z;
		f = x*c + g*s;
		g = g*c - x*s;
		h = y*s;
		y = y*c;
		for (jj = 0; jj < n;  jj++)
		  {
		    x = v[jj][j];
		    z = v[jj][i];
		    v[jj][j] = x*c + z*s;
		    v[jj][i] = z*c - x*s;
		  };
		z = PYTHAG(f, h);
		w[j] = z;	/* Rotation can be arbitrary if z = 0.	*/
		if (z)
		  {
		    z = 1.0/z;
		    c = f*z;
		    s = h*z;
		  };
		f = (c*g) + (s*y);
		x = (c*y) - (s*g);
		for (jj = 0; jj < m; jj++)
		  {
		    y = a[jj][j];
		    z = a[jj][i];
		    a[jj][j] = y*c + z*s;
		    a[jj][i] = z*c - y*s;
		  };
	      };
	    rv1[l] = 0.0;
	    rv1[k] = f;
	    w[k] = x;
	  };
      };
    delete [] rv1;
  }  //to understand this.....

int main(int argc, char* argv[])
{
	//we could have used functors to define f(x) and g(x) (or an arbitrary number of iterative dependencies
	//vector<Function*> fn(N-1);
	//Function f, f1;
	//fn[0] = &(f*f);
	//fn[1] = &f1;

	//test functions with 3x3 matrix
	//Matrix test1(3,3), test2(3,3);
	//for (i=0;i<3;i++)
	//	for (j=0;j<3;j++)
	//	{
	//		test1[i][j] = (i+j)*j;
	//		test2[i][j] = fabs(i-j)*i;
	//	};

	//test functionality 
	//cout << test1 << "\n*\n" << test2 << "\n=\n" << (test1*test2) << endl;
	//cout << "The transpose of \n" << test1 << "\nis\n" << t << endl;
	//cout << ((~test1)*test2) << endl;

	int i=0, j=0, k=0, r=0;  //need counters

	int N = 3;         //number of columns 
	int M = 2000; //maximimum number of rows (reset after data is read in)

	
	Matrix A(M,N);  //design matrix
	Matrix d(M, 1);  //data points
	Matrix b(M,1);	//dependent variables

	//read data in ( use command "c:\>noisy < noisydata.txt"  to read in data )
	for (i=0;i<N;i++)
		cin >> d[i][0];

	do 
	{
		b[i-N][0]=d[i-1][0]; //set dependent values
		cin >> d[i][0];
		i++;
	} while (d[i-2][0] != d[i-1][0]);

	M = i-N-1;	//number of rows

	//construct design matrix using experimental data points
	for (i=0;i<M;i++)
	{
		A[i][0] = 1;
		A[i][1] = d[i][0];
		A[i][2] = d[i+1][0]*d[i+1][0];
	}

	//saftey check
	A.setsize(M,N);
	b.setsize(M,1);

	//backup's 
	Matrix designMatrix = A;
	Matrix dependentVariables = b;

	//debug output
/*	cout << "Read " << M << " rows into the design matrix" << endl;
	cout << "The Design Matrix has " << A.size() << " rows" << endl;
	cout << "The Design Matrix has " << A[0].size() << " columns" << endl;
	cout << A <<endl;
	cout << "The transpose of the design matrix is \n" << (~A) << endl;
	cout << "The solution matrix is \n" << b << endl;
*/

	Matrix rhs = (~A)*A;
	Matrix lhs = (~A)*b;

	//  more debug output
	//	cout << "At*A=\n" << rhs << endl;
	//	cout << "At*b=\n" << lhs << endl;

	//perform gauss jordan approximation
	//lhs will be converted to the least squares approximation fo coefficients a, b and c
	rhs.gaussJordan(lhs);

//	cout <<"The Gauss Jordan calucation resulted in" << endl;  //is this the inverse?
//	cout << rhs << endl;

	cout << "Least Squares approximation for a, b and c is given by" << endl;
	cout << lhs << endl;

	cout << "The error in each equation is given by" << endl;
	cout << ((designMatrix*lhs)-dependentVariables) << endl;

	// calculation of least squares approximation to iterative equation using 
	// Singular Value Decomposition with backsubstitution
	vector<double> w = dependentVariables.getColumn(0);
	Matrix V(N,N);
	Matrix U = designMatrix;
	Matrix b_svd = dependentVariables;
	vector<double> solution(N);

	svdcmp(U, M, N, w, V, 30);
	svdbksb(U, w, V, b_svd, solution);

	cout << "The solution using SVD = \n" << endl;
	for (i=0;i<solution.size(); i++)
		cout << (solution[i]=-solution[i]) << endl;  //no,  i do not know why i have to do this

	Matrix msolution(solution);  //need to create Matrix fuctions that take vectors for arithmetic operations

	cout << "The error in each equation is given by" << endl;		
	cout << ((designMatrix*msolution)-dependentVariables) << endl;

	cout << "\nThe difference between the two methods is given by " << endl;
	cout << (msolution - lhs);

	
	cout << "The sequence can be extrapolated using the coefficients to retrieve the predicted values\n" << endl;

	Matrix extra_vals(1,N);
	int numOfExtraVals = 5;
	vector<double> actual(numOfExtraVals);
	actual[0] =	1.5264;
	actual[1] = -0.793814;
	actual[2] = 1.22997;
	actual[3] = 	-0.344299;
	actual[4] = 	1.64695;

	extra_vals[0][0] = 1;
	extra_vals[0][1] = dependentVariables[M-2][0];
	extra_vals[0][2] = dependentVariables[M-1][0]*dependentVariables[M-1][0];
	cout << "Predicted\tActual\n";
	double xn, x0 = dependentVariables[M-1][0];

	for (i=0;i<numOfExtraVals;i++)
	{
		Matrix val = extra_vals*msolution;
		xn = val[0][0];
		cout << xn	<< "\t\t" << actual[i]<< endl;
		extra_vals[0][1] = x0;
		extra_vals[0][2] = xn*xn;
		x0 = xn;
	};

	cout << "have a nice day :)" << endl;

	return 0;
};