// // LeastSquaresFit.h // // This file contains the LeastSquaresFit template class. // This code is part of the Bioinformatics Template Library (BTL). // // Copyright (C) 1997 Birkbeck College, Malet Street, London WC1E 7HX, U.K. // (classlib@mail.cryst.bbk.ac.uk) // // This library is free software; you can // redistribute it and/or modify it under the terms of // the GNU Library General Public License as published by the Free // Software Foundation; either version 2 of the License, or (at your // option) any later version. This library is distributed in the hope // that it will be useful, but WITHOUT ANY WARRANTY; without even the // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. See the GNU Library General Public License for more details. // You should have received a copy of the GNU Library General Public // License along with this library; if not, write to the Free Software // Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. /////////////////////////////////////////////////////////////////////////// #if !defined (LEASTSQUARESFIT_H) #define MATRIX3D_H #include "Typedefn.h" #include "Matrix.h" #include "NumericLimits.h" /**#: [Description="This class contains methods for calculating the applying the least squares fit of one set of x,y,z coordinates onto another. The method used is that of Simon K. Kearsley (Acta Cryst.'89, A45, 208-10). "] [Usage="All members in this class are static and therefore an object need not be create before the members can be used. The methods in this class are generic algorithms and can be used with a wide range of container types. This flexibility is achieved through the use of iterators. Any container used must have an appropriate iterator type that can be used to traverse through its elements. Most methods require two iterators per container, marking the first and last element to be processed. This style of function is designed to be the same as used in the Standard Template Library.

e.g.

// Set up aliases of the type to be used

typedef vector::iterator iterator;
typedef vector::const_iterator const_iterator;
typedef LeastSquaresFit<iterator,const_iterator> Fitter;

// Construct two empty vectors

vector v1,v2;

// Put some coordinates into these vector

....

// Do least squares fit on these coodinates

REAL rmsd = Fitter::Fit(v1.begin(),v1.end(),v2.begin(),v2.end());

"] [Files="LeastSquaresFit.h"] [Dependencies="Matrix, NumericLimits"] [Authors="W.R.Pitt, D.S.Moss"]*/ template class LeastSquaresFit { private: /**#: [Hidden] */ static Vector CalcCentroid(ConstForwardIterator begin, ConstForwardIterator end, const unsigned int size); /**#: [Hidden] */ static REAL CalcFit(ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2, Vector& centroid1, Vector& centroid2, Matrix& evec); /**#: [Hidden] */ static REAL CalcRMSD(const Vector& eigenValues, const unsigned int size); /**#: [Hidden] */ static unsigned int CheckData(ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2); /**#: [Hidden] */ static Matrix CreateRotationMatrix(const Matrix& eigenVectors); /**#: [Hidden] */ static Matrix FitMatrix(ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, const Vector& centroid1, const Vector& centroid2); /**#: [Hidden] */ static void RotateTranslate(ForwardIterator begin, ForwardIterator end, const Matrix& rot, const Vector& centroid1, const Vector& centroid2); public: /**#: [Description="Calculate the root mean squared deviation of two sets of coordinates."] [Restrictions="Each set must have the same number of coordinates. There is no restriction on the number of dimensions which the coordinates represent (except that it must be the same in every case). WARNING: if coordinates with varying dimensions are input then this may well not be detected."]*/ static REAL RMSD(ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2, const unsigned int nDimensions); /**#: [Description="Calculate the root mean squared deviation after a least squares fit of the two sets of coordinates using the method of Kearsley. Both sets of coordinates remain unchanged by this function"] [Restrictions="Each set must have the same number of coordinates. Each coordinate must be orthogonal and in three dimensions (i.e. normal x,y,z coordinates). WARNING: if coordinates with varying dimensions are input then this may well not be detected."]*/ static REAL Compare(ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2); /**#: [Description="Least squares fit one set of coordinates to another using the method of Kearsley. Return the root mean squared deviation after the fit. The second set of coordinates remains unaltered by this function."] [Restrictions="Each set must have the same number of coordinates. Each coordinate must be orthogonal and in three dimensions (i.e. normal x,y,z coordinates). WARNING: if coordinates with varying dimensions are input then this may well not be detected."]*/ static REAL Fit(ForwardIterator begin1, ForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2); }; //............................................................................. // Calculate the centroid of a set of x,y,z coordinates. template Vector LeastSquaresFit::CalcCentroid( ConstForwardIterator begin, ConstForwardIterator end, const unsigned int size) { Vector centroid; for (unsigned i=0; begin!= end; i++) { if (i>2) i=0; centroid[i] += *begin++; } centroid /= size; return centroid; } //............................................................................. // Calculate the Root Mean Square Deviation between two n x 3 Matrices // The method used is that of Simon K. Kearsley (Acta Cryst. '89, A45, 208-10) template REAL LeastSquaresFit::CalcFit( ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2, Vector& centroid1, Vector& centroid2, Matrix& evec) { unsigned int size = CheckData(begin1, end1, begin2, end2); // Calculate centroids of two sets of coordinates centroid1 = CalcCentroid(begin1,end1,size); centroid2 = CalcCentroid(begin2,end2,size); // Calculate Kearsley matrix Matrix matfit = FitMatrix(begin1,end1,begin2,centroid1,centroid2); // Calculate eigenvectors and eigenvalues of matfit Vector eval(4); matfit.Eigens(eval, evec); // Calculate RMSD REAL rmsd = CalcRMSD(eval,size); return rmsd; }; //............................................................................. // Calculate the RMSD from the smallest eigen value template REAL LeastSquaresFit::CalcRMSD( const Vector& eval, const unsigned int size) { // Calculate rmsd from smallest eigen value. If this value is very small // then round rmsd down to zero. REAL minEigen = eval(1) * NumericLimits::epsilon(); REAL rmsd; if (eval(4) < minEigen) rmsd = 0.0; else rmsd = sqrt(eval(4)/size); return rmsd; } //............................................................................. // Check that both input containers have the same number of elements and that // number is multiple of 3. Output the number of x,y,z positions template unsigned int LeastSquaresFit::CheckData( ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2) { unsigned int ndata1=0; while (begin1 != end1) { ndata1++; begin1++; } if (ndata1%3 != 0) FATAL_ERROR("The number of elements in the 1st container is not a " "multiple of 3"); unsigned int ndata2=0; while (begin2 != end2) { ndata2++; begin2++; } if (ndata2%3 != 0) FATAL_ERROR("The number of elements in the 2nd container is not a " "multiple of 3"); if (ndata1 != ndata2) FATAL_ERROR("There are different numbers of elements in the two input " "containers"); return ndata1/3; } //............................................................................. // Create fitting rotation matrix from the eigen vector that corresponds to the // smallest eigen vector of Kearsley's matrix. template Matrix LeastSquaresFit:: CreateRotationMatrix(const Matrix& eigenVectors) { REAL q1=eigenVectors(1,4); REAL q2=eigenVectors(2,4); REAL q3=eigenVectors(3,4); REAL q4=eigenVectors(4,4); // initialize rotation matrix Matrix rot(3,3); rot(1,1) = q1*q1 + q2*q2 - q3*q3 - q4*q4; rot(1,2) = 2.0 * (q2*q3 + q1*q4); rot(1,3) = 2.0 * (q2*q4 - q1*q3); rot(2,1) = 2.0 * (q2*q3 - q1*q4); rot(2,2) = q1*q1 + q3*q3 - q2*q2 - q4*q4; rot(2,3) = 2.0 * (q3*q4 + q1*q2); rot(3,1) = 2.0 * (q2*q4 + q1*q3); rot(3,2) = 2.0 * (q3*q4 - q1*q2); rot(3,3) = q1*q1 + q4*q4 - q2*q2 - q3*q3; return rot; } //............................................................................. template Matrix LeastSquaresFit::FitMatrix( ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, const Vector& centroid1, const Vector& centroid2) { // Assumptions: centroid1 and centroid2 are of size 3 and that both inputs // have 3n coordinates (x,y,z) * n // Set up matrix to be diagonalized Matrix matfit(4,4); Vector summ,diff,coord1,coord2; while (begin1 != end1) { // Calculate sums and differences of corresponding co-ordinates for (unsigned int i=0; i<3; i++) { coord1[i] = *begin1++ - centroid1[i]; coord2[i] = *begin2++ - centroid2[i]; diff[i] = coord2[i] - coord1[i]; summ[i] = coord2[i] + coord1[i]; } matfit(1,1) += diff[0]*diff[0] + diff[1]*diff[1] + diff[2]*diff[2]; matfit(1,2) += summ[1]*diff[2] - diff[1]*summ[2]; matfit(1,3) += diff[0]*summ[2] - summ[0]*diff[2]; matfit(1,4) += summ[0]*diff[1] - diff[0]*summ[1]; matfit(2,2) += summ[1]*summ[1] + summ[2]*summ[2] + diff[0]*diff[0]; matfit(2,3) += diff[0]*diff[1] - summ[0]*summ[1]; matfit(2,4) += diff[0]*diff[2] - summ[0]*summ[2]; matfit(3,3) += summ[0]*summ[0] + summ[2]*summ[2] + diff[1]*diff[1]; matfit(3,4) += diff[1]*diff[2] - summ[1]*summ[2]; matfit(4,4) += summ[0]*summ[0] + summ[1]*summ[1] + diff[2]*diff[2]; } // Fill in the lower triangle of mat for (short i=2; i<5; i++) for (short j=1; j void LeastSquaresFit::RotateTranslate( ForwardIterator begin, ForwardIterator end, const Matrix& rot, const Vector& centroid1, const Vector& centroid2) { Vector row, temp; ForwardIterator rowbegin; while (begin != end) { // Copy current x,y,z coordinate to a Vector // rowbegin = begin; for (unsigned int i=0; i<3; i++) row[i] = *begin++; // Apply the transformation to this Vector // // Transformation = translate so centroid of 1st set of coordinates // lies at the origin, rotate to fit second set of coordinates then // translate so that the original of the 1st set of coordinates lies // over that of the 2nd. // temp = rot * (row - centroid1) + centroid2; // Write transformed Vector to the input container // // Extra set of curly brackets for benefit of SGI CC v7.1 {for (unsigned int i=0; i<3; i++) *rowbegin++ = temp[i];} } } //............................................................................. // Calculate the root mean squared deviation of two sets of coordinates. Each // set must have the same number of coordinates but there is no restriction on // the number of dimensions which the coordinates represent (except that it // must be the same in every case). WARNING: if coordinates with varying // dimensions are input then this may well not be detected. template REAL LeastSquaresFit::RMSD( ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2, const unsigned int ndim) { REAL dev,sumsqrdev=0.0; unsigned int size = 0; while (begin1 != end1) { size++; dev = *begin1++ - *begin2++; sumsqrdev += dev * dev; } if (size == 0) { WARNING("begin1 iterator equals the end1 iterator. " "No coordinates found"); return 0.0; } if (begin2 != end2) FATAL_ERROR("The number of input coordinates did not match"); if (size%ndim != 0) FATAL_ERROR("The number input values is not a multiple of the number" "of dimensions"); return sqrt(sumsqrdev/(size/ndim)); } //............................................................................. // Calculate the root mean squared deviation after a least squares fit of // the two sets of coordinates using the method of Kearsley. template REAL LeastSquaresFit::Compare( ConstForwardIterator begin1, ConstForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2) { // Create data structures to hold intermediate results // Matrix eigenVectors(4,4); Vector centroid1, centroid2; // Calculate the root mean squared deviation after a least squares fit of // the two sets of coordinates. // REAL rmsd = CalcFit(begin1,end1,begin2,end2, centroid1,centroid2,eigenVectors); return rmsd; } //............................................................................. // Least squares fit one set of coordinates to another using the method of // Kearsley. Return the root mean squared deviation after the fit. The second // set of coordinates remains unaltered by this function. template REAL LeastSquaresFit::Fit( ForwardIterator begin1, ForwardIterator end1, ConstForwardIterator begin2, ConstForwardIterator end2) { // Create data structures to hold intermediate results // Matrix eigenVectors(4,4); Vector centroid1, centroid2; // Calculate centroids of the two sets of input coordinates, and the eigen // vectors need to generate the fitting rotation matrix. // REAL rmsd = CalcFit(begin1,end1,begin2,end2, centroid1,centroid2,eigenVectors); // Create the rotation matrix // Matrix rot = CreateRotationMatrix(eigenVectors); // Fit atoms of inputMatrix using rotation and translations. // RotateTranslate(begin1,end1,rot,centroid1,centroid2); return rmsd; } #endif // LeastSquaresFit.h