/*
Copyright (c) 2009, Michael Kazhdan
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer. Redistributions in binary form must reproduce
the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution. 

Neither the name of the Johns Hopkins University nor the names of its contributors
may be used to endorse or promote products derived from this software without specific
prior written permission. 

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
*/

#include "MemoryUsage.h"
#include "CmdLineParser.h"
#include "LaplacianMatrix1D.h"
#include "TriangleIntegrals.h"
#include "Util.h"

template< class Real , bool Primal >
int MeshOctree< Real , Primal >::_setPrimalTree( int depth , double cutOff )
{
	Point3D< double >* samples;
	double* weights;
	int sampleCount;

	_dimension.resize( depth+1 );
	fData.set( depth , PPolynomial< ( Primal ? 1 : 2 ) >::GaussianApproximation() , 0 , 1 , false , fData.D0_FLAG | fData.D1_FLAG , true , 0 , 2 );

	Point3D< Real > position;
	TreeOctNode* temp;

	for( int i=0 ; i<=depth ; i++ )	_dimension[i] = 0;
	neighborKey3.set( depth );

	int dd , off[3];
	double *values[2][3];

	// Add the necessary nodes to the tree and set the indices
	for( TreeOctNode* node = tree.nextNode() ; node ; node = tree.nextNode( node ) )
		if( node->depth()==depth && node->nodeData.tStart<node->nodeData.tEnd ) // If the node has triangles associated to it
		{
			neighborKey3.setNeighbors( node , 0 , 1 );  // This is actually over-kill as we only need 2^3 neighbors, not all 3^3

			// Set the indices for all the 
			for( int d=0 ; d<=depth ; d++ )
			{
				typename TreeOctNode::Neighbors3& neighbors = neighborKey3.neighbors[d];
				for( int i=1 ; i<3 ; i++ )
					for( int j=1 ; j<3 ; j++ )
						for( int k=1 ; k<3 ; k++ )
							if( neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->nodeData.index==-1 )
								neighbors.neighbors[i][j][k]->nodeData.index = _dimension[d]++;
			}
		}

	Vector< Real > iValues;
	iValues.Resize( _dimension[depth] );

	for( TreeOctNode* node = tree.nextNode() ; node ; node = tree.nextNode( node ) )
		if( node->depth()==depth && node->nodeData.tStart<node->nodeData.tEnd )
		{
			neighborKey3.getNeighbors( node );
			if( _fromSamples ) sampleCount =  node->nodeData.tEnd-node->nodeData.tStart;
			else               sampleCount = (node->nodeData.tEnd-node->nodeData.tStart) * TriangleIntegrator10< double >::SampleNum;
			samples = new Point3D< double >[ sampleCount ];
			weights = new double[ sampleCount ];
			for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) values[i][j] = new double[ sampleCount ];

			for( int t=0 ; t<node->nodeData.tEnd-node->nodeData.tStart ; t++ )
				if( _fromSamples )
				{
					samples[t] = Point3D< double >( _samples[node->nodeData.tStart+t].position );
					weights[t] = Length( Point3D< double >( _samples[node->nodeData.tStart+t].normal ) );
				}
				else
				{
					TriangleIndex& tIndex = _triangles[t+node->nodeData.tStart];
					TriangleIntegrator10< double > triIntegrator( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
					for( int i=0 ; i<TriangleIntegrator10< double >::SampleNum ; i++ )
					{
						samples[ t*TriangleIntegrator10< double >::SampleNum + i ] = triIntegrator[i];
						weights[ t*TriangleIntegrator10< double >::SampleNum + i ] = triIntegrator.weights[i];
					}
				}

			typename TreeOctNode::Neighbors3& neighbors = neighborKey3.neighbors[ node->depth() ];
			temp = neighbors.neighbors[1][1][1];
			temp->depthAndOffset( dd , off );
			for( int o=0 ; o<=1 ; o++ )
				for( int c=0 ; c<3 ; c++ )
				{
					if( off[c]+o<0 || off[c]+o>=(1<<dd) ) continue;
					int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
					Polynomial< 1 > poly;
					for( int j=0 ; j<=1-o ; j++ ) poly += fData.baseFunctions[idx].polys[j].p;
					for( int s=0 ; s<sampleCount ; s++ )
					{
						values[o][c][s] = poly( samples[s][c] );
						if( !c ) values[o][c][s] *= weights[s];
					}
				}
			for( int i=1 ; i<3 ; i++ )
				for( int j=1 ; j<3 ; j++ )
					for( int k=1 ; k<3 ; k++ )
						if( neighbors.neighbors[i][j][k] )
						{
							double temp = 0;
							for( int s=0 ; s<sampleCount ; s++ ) temp += values[i-1][0][s] * values[j-1][1][s] * values[k-1][2][s];
							iValues[ neighbors.neighbors[i][j][k]->nodeData.index ] += Real( temp );
						}
			delete[] samples;
			delete[] weights;
			for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) delete[] values[i][j];
		}

	_dimension[ depth ] = 0;
	double co = cutOff * 1.0/ ( 1<<( 2* depth ) );
	for( TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode( node ) )
		if( node->depth()==depth )
		{
			if( node->nodeData.index==-1 || iValues[node->nodeData.index]<co ) node->nodeData.index = -1;
			else                                                               node->nodeData.index = _dimension[ depth ]++;
		}
	_setPrimalMultiRes( );

	MemoryUsage();
	return 1;
}
template< class Real , bool Primal >
void MeshOctree< Real , Primal >::_setPrimalMultiRes( void )
{
	int maxDepth = tree.maxDepth();
	for( int d=maxDepth-1 ; d>=0 ; d-- )
	{
		_dimension[d] = 0;
		for( TreeOctNode* node=tree.nextNode( ) ; node ; node=tree.nextNode( node ) )
			if( node->depth()==d )
			{
				int count = 0;
				typename TreeOctNode::Neighbors3& neighbors=neighborKey3.getNeighbors( node );
				for( int i=0 ; i<2 ; i++ )
					for( int j=0 ; j<2 ; j++ )
						for( int k=0 ; k<2 ; k++ )
							if( neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->children )
							{
								TreeOctNode* nNode = neighbors.neighbors[i][j][k];
								for( int x=0 ; x<2 ; x++ )
									if( ( (i<<1)|x )>=1 && ( (i<<1)|x )<=3 )
										for( int y=0 ; y<2 ; y++ )
											if( ( (j<<1)|y )>=1 && ( (j<<1)|y )<=3 )
												for( int z=0 ; z<2 ; z++ )
													if( ( (k<<1)|z )>=1 && ( (k<<1)|z )<=3 )
														if( nNode->children[ Cube::CornerIndex( x , y , z) ].nodeData.index!=-1 ) count++;
							}
				if( !count ) node->nodeData.index = -1;
				else         node->nodeData.index = _dimension[ d ]++;
			}
	}
}

template< class Real , bool Primal >
void MeshOctree< Real , Primal >::_setFromPrimalSums( SparseMatrix< Real >& matrix, double sums[2][2][2][2][2][2] , int idx ) const
{
	for( int i=0 ; i<2 ; i++ )
		for( int j=0 ; j<2 ; j++ )
			for( int k=0 ; k<2 ; k++ )
				if( matrix[idx][(i+1)*3*3+(j+1)*3+(k+1)].N != -1 )
				{
					int idx1 = matrix[idx][(i+1)*3*3+(j+1)*3+(k+1)].N;
					for( int ii=i ; ii<2 ; ii++ )
						for( int jj=0 ; jj<2 ; jj++ )
							if( ii==i && jj<j ) continue;
							else
								for( int kk=0 ; kk<2 ; kk++ )
									if( ii==i && jj==j && kk<k ) continue;
									else if( matrix[idx][(ii+1)*3*3+(jj+1)*3+(kk+1)].N != -1 )
									{
										double temp = sums[i][j][k][ii][jj][kk];
										int idx2 = matrix[idx][(ii+1)*3*3+(jj+1)*3+(kk+1)].N;
										int subIndex1 = (i-ii+1)*3*3+(j-jj+1)*3+(k-kk+1);
										int subIndex2 = (ii-i+1)*3*3+(jj-j+1)*3+(kk-k+1);
										matrix[ idx1 ][ subIndex2 ].Value += Real( temp );
										if( idx1!=idx2 ) matrix[ idx2 ][ subIndex1 ].Value += Real( temp );
									}
				}
}

template< class Real , bool Primal >
int MeshOctree< Real , Primal >::_getPrimalDotProductMatrix( int depth , SparseMatrix<Real>& dotProduct , bool progress ) const
{
	Point3D< double >* samples;
	double* weights = NULL;
	int sampleCount;

	depth++;
	int count = _dimension[depth];
	double* subValues[2][3];
	double* fullValues[2][2][2];

	int max = _setAdjacency( depth , dotProduct );

	if( !_fromSamples ) max *= TriangleIntegrator10< double >::SampleNum;
	samples = new Point3D< double >[ max ];
	weights = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) subValues[i][j] = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) fullValues[i][j][k] = new double[ max ];

	ProgressBar* bar=NULL;
	if( progress )
	{
		if( _fromSamples ) bar = new ProgressBar( 20 , _samples.size()   , "Setting Matrix" );
		else               bar = new ProgressBar( 20 , _triangles.size() , "Setting Matrix" );
	}
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth || node->nodeData.index==-1 ) continue;
		int idx = node->nodeData.index;
		Polynomial< 1 > polys[2][3];
		int dd , off[3];
		node->depthAndOffset( dd , off );

		// Accumulate the polynomials corresponding to the node
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
			{
				if( off[c]+o<0 || off[c]+o>=(1<<dd) )	continue;
				int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
				for( int j=0 ; j<=1-o ; j++ ) polys[o][c] += fData.baseFunctions[idx].polys[j].p;
			}
		double sums[2][2][2][2][2][2];
		memset( sums , 0 , sizeof( sums ) );

		if( _fromSamples ) sampleCount =  node->nodeData.tEnd-node->nodeData.tStart;
		else               sampleCount = (node->nodeData.tEnd-node->nodeData.tStart) * TriangleIntegrator10< double >::SampleNum;

		for( int t=0 ; t<node->nodeData.tEnd-node->nodeData.tStart ; t++ )
		{
			if( progress ) bar->update();
			if( _fromSamples )
			{
				samples[t] = Point3D< double >( _samples[node->nodeData.tStart+t].position );
				weights[t] = Length( Point3D< double >( _samples[node->nodeData.tStart+t].normal ) );
			}
			else
			{
				const TriangleIndex& tIndex = _triangles[t+node->nodeData.tStart];
				TriangleIntegrator10< double > triIntegrator( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
				for( int i=0 ; i<TriangleIntegrator10< double >::SampleNum ; i++ )
				{
					samples[ t*TriangleIntegrator10< double >::SampleNum + i ] = triIntegrator[i];
					weights[ t*TriangleIntegrator10< double >::SampleNum + i ] = triIntegrator.weights[i];
				}
			}
		}

		for( int s=0 ; s<sampleCount ; s++ ) weights[s] = sqrt( weights[s] );

		// Evaluate each of the constituent functions at each of of the samples associated to the triangle
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
				if( off[c]+o>=0 && off[c]+o<=(1<<dd) )
					for( int s=0 ; s<sampleCount ; s++ )
						subValues[o][c][s] = polys[o][c]( samples[s][c] );

		// Evaluate the cumulative polynomial at each sample point
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( dotProduct[idx][(i+1)*3*3+(j+1)*3+(k+1)].N != -1 )
						for( int s=0 ; s<sampleCount ; s++ )
							fullValues[i][j][k][s] = subValues[i][0][s] * subValues[j][1][s] * subValues[k][2][s] * weights[ s ];

		// Iterate over all pairs of nodes whose support overlaps the triangle
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( dotProduct[idx][(i+1)*3*3+(j+1)*3+(k+1)].N != -1 )
						for( int ii=i ; ii<2 ; ii++ )
							for( int jj=0 ; jj<2 ; jj++ )
								if(ii==i && jj<j ) continue;
								else
									for( int kk=0 ; kk<2 ; kk++ )
										if( ii==i && jj==j && kk<k ) continue;
										else if( dotProduct[idx][(ii+1)*3*3+(jj+1)*3+(kk+1)].N != -1 )
										{
											double temp = 0;
											for( int s=0 ; s<sampleCount ; s++ ) temp += fullValues[i][j][k][s] * fullValues[ii][jj][kk][s];
											sums[i][j][k][ii][jj][kk] += temp;
										}
		_setFromPrimalSums( dotProduct , sums , idx );
	}
	if( progress ) delete bar;
	MemoryUsage();
	_compressMatrix( dotProduct );
	MemoryUsage();
	delete[] samples;
	delete[] weights;
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) delete[] subValues[i][j];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) delete[] fullValues[i][j][k];
	return 1;
}
template< class Real , bool Primal >
int MeshOctree< Real , Primal >::_getPrimalLaplacianMatrix( int depth , SparseMatrix<Real>& laplacian , bool progress ) const
{
	Point3D< double > *samples , *normals;
	double* weights;
	int sampleCount;

	depth++;
	int count = _dimension[depth];
	double*  subValues[2][3];
	double* subDValues[2][3];
	Point3D< double >* fullValues[2][2][2];

	int max = _setAdjacency( depth , laplacian );

	if( !_fromSamples ) max *= TriangleIntegrator10< double >::SampleNum;
	samples = new Point3D< double >[ max ];
	normals = new Point3D< double >[ max ];
	weights = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) subValues[i][j] = new double[ max ] , subDValues[i][j] = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) fullValues[i][j][k] = new Point3D< double >[ max ];

	ProgressBar* bar=NULL;
	if( progress )
	{
		if( _fromSamples ) bar = new ProgressBar( 20 , _samples.size()   , "Setting Matrix" );
		else               bar = new ProgressBar( 20 , _triangles.size() , "Setting Matrix" );
	}
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth || node->nodeData.index==-1 ) continue;

		int idx = node->nodeData.index;
		Polynomial< 1 >  polys[2][3];
		Polynomial< 0 > dPolys[2][3];
		int dd , off[3];
		node->depthAndOffset( dd , off );

		// Accumulate the polynomials corresponding to the node
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
			{
				if( off[c]+o<0 || off[c]+o>=(1<<dd) )	continue;
				int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
				for( int j=0 ; j<=1-o ; j++ ) polys[o][c] += fData.baseFunctions[idx].polys[j].p;
				dPolys[o][c] = polys[o][c].derivative();
			}

		double sums[2][2][2][2][2][2];
		memset( sums , 0 , sizeof( sums ) );

		if( _fromSamples ) sampleCount =  node->nodeData.tEnd-node->nodeData.tStart;
		else               sampleCount = (node->nodeData.tEnd-node->nodeData.tStart) * TriangleIntegrator10< double >::SampleNum;

		for( int t=0 ; t<node->nodeData.tEnd-node->nodeData.tStart ; t++ )
		{
			if( progress ) bar->update();
			if( _fromSamples )
			{
				samples[t] = Point3D< double >( _samples[node->nodeData.tStart+t].position );
				weights[t] = Length( Point3D< double >( _samples[node->nodeData.tStart+t].normal ) );
				normals[t] = Point3D< double >( _samples[node->nodeData.tStart+t].normal ) / weights[t];
			}
			else
			{
				const TriangleIndex& tIndex = _triangles[t+node->nodeData.tStart];
				TriangleIntegrator10< double > triIntegrator( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
				for( int i=0 ; i<TriangleIntegrator10< double >::SampleNum ; i++ )
				{
					int ii = t*TriangleIntegrator10< double >::SampleNum + i;
					samples[ ii ] = triIntegrator[i];
					weights[ ii ] = triIntegrator.weights[i];
					normals[ ii ] = Point3D< double >( _baseNormals[ _parents[ t+node->nodeData.tStart ] ] );
				}
			}
		}
		for( int s=0 ; s<sampleCount ; s++ ) weights[s] = sqrt( weights[s] );

		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
				if( off[c]+o>=0 && off[c]+o<=(1<<dd) )
					for( int s=0 ; s<sampleCount ; s++ )
					{
						subValues [o][c][s] =  polys[o][c]( samples[s][c] );
						subDValues[o][c][s] = dPolys[o][c]( samples[s][c] );
					}
		// Iterate over all pairs of neighbors
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( laplacian[idx][(i+1)*3*3+(j+1)*3+(k+1)].N != -1 )
						for( int s=0 ; s<sampleCount ; s++ )
						{
							fullValues[i][j][k][s][0] = subDValues[i][0][s] *  subValues[j][1][s] *  subValues[k][2][s];
							fullValues[i][j][k][s][1] =  subValues[i][0][s] * subDValues[j][1][s] *  subValues[k][2][s];
							fullValues[i][j][k][s][2] =  subValues[i][0][s] *  subValues[j][1][s] * subDValues[k][2][s];
							fullValues[i][j][k][s] -= normals[s] * Point3D< double >::Dot( normals[s] , fullValues[i][j][k][s] );
							fullValues[i][j][k][s] *= weights[s];
						}
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( laplacian[idx][(i+1)*3*3+(j+1)*3+(k+1)].N != -1 )
						for( int ii=i ; ii<2 ; ii++ )
							for( int jj=0 ; jj<2 ; jj++ )
								if( ii==i && jj<j ) continue;
								else
									for( int kk=0 ; kk<2 ; kk++ )
										if( ii==i && jj==j && kk<k ) continue;
										else if( laplacian[idx][(ii+1)*3*3+(jj+1)*3+(kk+1)].N != -1 )
										{
											double temp = 0;
											for( int s=0 ; s<sampleCount ; s++ ) temp += Point3D< double >::Dot( fullValues[i][j][k][s] , fullValues[ii][jj][kk][s] );
											sums[i][j][k][ii][jj][kk] += temp;
										}
		_setFromPrimalSums( laplacian , sums , idx );
	}
	if( progress ) delete bar;

	MemoryUsage();
	_compressMatrix( laplacian );
	MemoryUsage();
	delete[] samples;
	delete[] weights;
	delete[] normals;
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) delete[] subValues[i][j] , delete[] subDValues[i][j];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) delete[] fullValues[i][j][k];
	return 1;
}
template< class Real , bool Primal >
int MeshOctree< Real , Primal >::_getPrimalConstrainedLaplacianMatrix( int depth , SparseMatrix<Real>& laplacian , Real dotWeight , Real lapWeight , bool progress ) const
{
	TriangleIntegrator10< double > triIntegrator;
	Point3D< double > *samples , *normals;
	double* weights;
	int sampleCount;
	if( _fromSamples ) sampleCount = 1;
	else               sampleCount = TriangleIntegrator10< double >::SampleNum;

	depth++;
	int count = _dimension[depth];
	double*  subValues[2][3];
	double* subDValues[2][3];
	double* fullDotValues[2][2][2];
	Point3D< double >* fullLapValues[2][2][2];

	int max = _setAdjacency( depth , laplacian );

	if( !_fromSamples ) max *= TriangleIntegrator10< double >::SampleNum;
	samples = new Point3D< double >[ max ];
	normals = new Point3D< double >[ max ];
	weights = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) subValues[i][j] = new double[ max ] , subDValues[i][j] = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) fullDotValues[i][j][k] = new double[ max ] , fullLapValues[i][j][k] = new Point3D< double >[ max ];

	ProgressBar* bar=NULL;
	if( progress )
	{
		if( _fromSamples ) bar = new ProgressBar( 20 , _samples.size()   , "Setting Matrix" );
		else               bar = new ProgressBar( 20 , _triangles.size() , "Setting Matrix" );
	}
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth || node->nodeData.index==-1 ) continue;

		int idx = node->nodeData.index;
		Polynomial< 1 >  polys[2][3];
		Polynomial< 0 > dPolys[2][3];
		int dd , off[3];
		node->depthAndOffset( dd , off );

		// Accumulate the polynomials corresponding to the node
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
			{
				if( off[c]+o<0 || off[c]+o>=(1<<dd) )	continue;
				int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
				for( int j=0 ; j<=1-o ; j++ ) polys[o][c] += fData.baseFunctions[idx].polys[j].p;
				dPolys[o][c] = polys[o][c].derivative();
			}

		double sums[2][2][2][2][2][2];
		memset( sums , 0 , sizeof( sums ) );

		if( _fromSamples ) sampleCount =  node->nodeData.tEnd-node->nodeData.tStart;
		else               sampleCount = (node->nodeData.tEnd-node->nodeData.tStart) * TriangleIntegrator10< double >::SampleNum;

		for( int t=0 ; t<node->nodeData.tEnd-node->nodeData.tStart ; t++ )
		{
			if( progress ) bar->update();
			if( _fromSamples )
			{
				samples[t] = Point3D< double >( _samples[node->nodeData.tStart+t].position );
				weights[t] = Length( Point3D< double >( _samples[node->nodeData.tStart+t].normal ) );
				normals[t] = Point3D< double >( _samples[node->nodeData.tStart+t].normal ) / weights[t];
			}
			else
			{
				const TriangleIndex& tIndex = _triangles[t+node->nodeData.tStart];
				triIntegrator = TriangleIntegrator10< double >( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
				for( int i=0 ; i<TriangleIntegrator10< double >::SampleNum ; i++ )
				{
					int ii = t*TriangleIntegrator10< double >::SampleNum + i;
					samples[ ii ] = triIntegrator[i];
					weights[ ii ] = triIntegrator.weights[i];
					normals[ ii ] = Point3D< double >( _baseNormals[ _parents[ t+node->nodeData.tStart ] ] );
				}
			}
		}
		for( int s=0 ; s<sampleCount ; s++ ) weights[s] = sqrt( weights[s] );

		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
				if( off[c]+o>=0 && off[c]+o<=(1<<dd) )
					for( int s=0 ; s<sampleCount ; s++ )
					{
						subValues [o][c][s] =  polys[o][c]( samples[s][c] );
						subDValues[o][c][s] = dPolys[o][c]( samples[s][c] );
					}
		// Iterate over all pairs of neighbors
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( laplacian[idx][(i+1)*3*3+(j+1)*3+(k+1)].N != -1 )
					{
						for( int s=0 ; s<sampleCount ; s++ )
						{
							fullDotValues[i][j][k][s]    =  subValues[i][0][s] *  subValues[j][1][s] *  subValues[k][2][s] * weights[ s ];
							fullLapValues[i][j][k][s][0] = subDValues[i][0][s] *  subValues[j][1][s] *  subValues[k][2][s];
							fullLapValues[i][j][k][s][1] =  subValues[i][0][s] * subDValues[j][1][s] *  subValues[k][2][s];
							fullLapValues[i][j][k][s][2] =  subValues[i][0][s] *  subValues[j][1][s] * subDValues[k][2][s];
							fullLapValues[i][j][k][s] -= normals[s] * Point3D< double >::Dot( normals[s] , fullLapValues[i][j][k][s] );
							fullLapValues[i][j][k][s] *= weights[s];
						}
					}
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( laplacian[idx][(i+1)*3*3+(j+1)*3+(k+1)].N != -1 )
						for( int ii=i ; ii<2 ; ii++ )
							for( int jj=0 ; jj<2 ; jj++ )
								if( ii==i && jj<j ) continue;
								else
									for( int kk=0 ; kk<2 ; kk++ )
										if( ii==i && jj==j && kk<k ) continue;
										else if( laplacian[idx][(ii+1)*3*3+(jj+1)*3+(kk+1)].N != -1 )
										{
											double tempLap = 0 , tempDot = 0;
											for( int s=0 ; s<sampleCount ; s++ )
											{
												tempDot += fullDotValues[i][j][k][s] * fullDotValues[ii][jj][kk][s];
												tempLap += Point3D< double >::Dot( fullLapValues[i][j][k][s] , fullLapValues[ii][jj][kk][s] );
											}
											sums[i][j][k][ii][jj][kk] += tempDot*dotWeight + tempLap*lapWeight;
										}
		_setFromPrimalSums( laplacian , sums , idx );
	}
	if( progress ) delete bar;

	MemoryUsage();
	_compressMatrix( laplacian );
	MemoryUsage();

	delete[] samples;
	delete[] normals;
	delete[] weights;
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) delete[] subValues[i][j] , delete[] subDValues[i][j];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) delete[] fullDotValues[i][j][k] , delete[] fullLapValues[i][j][k];
	return 1;
}
template< class Real , bool Primal >
template< class V , class HighPrecisionV , class Function >
void MeshOctree< Real , Primal >::_getPrimalPreciseDotVector( const std::vector< Function >& functions , int depth , std::vector< V >& b , bool progress ) const
{
	TriangleIntegrator10< double > triIntegrator;
	Point3D< double >* samples;
	double* weights;
	int sampleCount;
	if( _fromSamples ) sampleCount = 1;
	else               sampleCount = TriangleIntegrator10< double >::SampleNum;

	depth++;
	b.resize( _dimension[depth] );
	for( int i=0 ; i<_dimension[depth] ; i++ ) b[i] *= 0;

	int count = _dimension[depth];
	double* subValues[2][3];
	HighPrecisionV* tValues;

	ProgressBar* bar=NULL;
	if( progress ) bar = new ProgressBar( 20 , _dimension[depth] , "Setting Constraints" );

	int max = 0;
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
		if( node->depth()==depth && node->nodeData.index!=-1 && node->nodeData.tEnd-node->nodeData.tStart>max )
			max = node->nodeData.tEnd-node->nodeData.tStart;

	if( !_fromSamples ) max *= TriangleIntegrator10< double >::SampleNum;
	samples = new Point3D< double >[ max ];
	weights = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) subValues[i][j] = new double[ max ];
	tValues = new HighPrecisionV[ max ];

	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth || node->nodeData.index==-1 ) continue;
		if( progress ) bar->update();

		typename TreeOctNode::Neighbors3& neighbors = neighborKey3.getNeighbors( node );

		Polynomial< 1 > polys[2][3];
		int dd , off[3];
		node->depthAndOffset( dd , off );
		// Accumulate the polynomials corresponding to the node
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
			{
				if( off[c]+o<0 || off[c]+o>=(1<<dd) )	continue;
				int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
				for( int j=0 ; j<=1-o ; j++ ) polys[o][c] += fData.baseFunctions[idx].polys[j].p;
			}

		HighPrecisionV sums[2][2][2];
		for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) sums[i][j][k] *= 0;

		if( _fromSamples ) sampleCount =  node->nodeData.tEnd-node->nodeData.tStart;
		else               sampleCount = (node->nodeData.tEnd-node->nodeData.tStart) * TriangleIntegrator10< double >::SampleNum;

		for( int t=0 ; t<node->nodeData.tEnd-node->nodeData.tStart ; t++ )
		{
			int idx = _parents[ t+node->nodeData.tStart ];
			if( _fromSamples )
			{
				samples[t] = Point3D< double >( _samples[node->nodeData.tStart+t].position );
				weights[t] = Length( Point3D< double >( _samples[node->nodeData.tStart+t].normal ) );
				tValues[t] = HighPrecisionV( functions[idx]( Point3D< typename V::R >( samples[t]) ) ) * typename HighPrecisionV::R( weights[t] );
			}
			else
			{
				const TriangleIndex& tIndex = _triangles[t+node->nodeData.tStart];
				triIntegrator = TriangleIntegrator10< double >( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
				for( int i=0 ; i<TriangleIntegrator10< double >::SampleNum ; i++ )
				{
					int ii = t*TriangleIntegrator10< double >::SampleNum + i;
					samples[ ii ] = triIntegrator[i];
					weights[ ii ] = triIntegrator.weights[i];
					tValues[ ii ] = HighPrecisionV( functions[idx]( Point3D< typename V::R >( samples[ii]) ) ) * typename HighPrecisionV::R( weights[ii] );
				}
			}
		}

		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
				if( off[c]+o>=0 && off[c]+o<=(1<<dd) )
					for( int s=0 ; s<sampleCount ; s++ ) subValues[o][c][s] = polys[o][c]( samples[s][c] );

		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
					{
						HighPrecisionV temp;
						temp *= 0;
						for( int s=0 ; s<sampleCount ;s++ )
							temp += typename HighPrecisionV::R(subValues[i][0][s] * subValues[j][1][s] * subValues[k][2][s]) * tValues[s];
						sums[i][j][k] += temp;
					}
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
						b[ neighbors.neighbors[i+1][j+1][k+1]->nodeData.index ] += V( sums[i][j][k] );
	}
	if( progress ) delete bar;

	delete[] samples;
	delete[] weights;
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) delete[] subValues[i][j];
	delete[] tValues;
	MemoryUsage();
}
template< class Real , bool Primal >
template< class V , class HighPrecisionV , class Function >
int MeshOctree< Real , Primal >::_getPrimalPreciseGradientDotGradientVector( const std::vector< Function >& gradientFunctions , int depth , std::vector< V >& b , bool progress , bool normalize ) const
{
	TriangleIntegrator10< double > triIntegrator;
	Point3D< double > *samples;
	double* weights;
	int sampleCount;
	if( _fromSamples ) sampleCount = 1;
	else               sampleCount = TriangleIntegrator10< double >::SampleNum;

	depth++;
	b.resize(_dimension[depth]);
	for( int i=0 ; i<_dimension[depth] ; i++ ) b[i] *= 0;

	int count = _dimension[depth];
	double*  subValues[2][3];
	double* subDValues[2][3];
	Point3D< double >* fullValues[2][2][2];
	Gradient3D< HighPrecisionV >* grads;

	ProgressBar* bar=NULL;
	if( progress ) bar = new ProgressBar( 20 , _dimension[depth] , "Setting Constraints" );

	int max = 0;
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
		if( node->depth()==depth && node->nodeData.index!=-1 && node->nodeData.tEnd-node->nodeData.tStart>max )
			max = node->nodeData.tEnd-node->nodeData.tStart;

	if( !_fromSamples ) max *= TriangleIntegrator10< double >::SampleNum;
	samples = new Point3D< double >[ max ];
	weights = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) subValues[i][j] = new double[ max ] , subDValues[i][j] = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) fullValues[i][j][k] = new Point3D< double >[ max ];
	grads = new Gradient3D< HighPrecisionV >[ max ];

	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth || node->nodeData.index==-1 ) continue;
		if( progress ) bar->update();

		typename TreeOctNode::Neighbors3& neighbors = neighborKey3.getNeighbors( node );

		Polynomial< 1 >  polys[2][3];
		Polynomial< 0 > dPolys[2][3];
		int dd , off[3];
		node->depthAndOffset( dd , off );
		// Accumulate the polynomials corresponding to the node
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
			{
				if( off[c]+o<0 || off[c]+o>=(1<<dd) )	continue;
				int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
				for( int j=0 ; j<=1-o ; j++ ) polys[o][c] += fData.baseFunctions[idx].polys[j].p;
				dPolys[o][c] = polys[o][c].derivative();
			}

		HighPrecisionV sums[2][2][2];
		for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) sums[i][j][k] *= 0;

		if( _fromSamples ) sampleCount =  node->nodeData.tEnd-node->nodeData.tStart;
		else               sampleCount = (node->nodeData.tEnd-node->nodeData.tStart) * TriangleIntegrator10< double >::SampleNum;

		for( int t=0 ; t<node->nodeData.tEnd-node->nodeData.tStart ; t++ )
		{
			int idx = _parents[ t+node->nodeData.tStart ];
			if( _fromSamples )
			{
				samples[t] = Point3D< double >( _samples[node->nodeData.tStart+t].position );
				weights[t] = Length( Point3D< double >( _samples[node->nodeData.tStart+t].normal ) );
				Point3D< double > normal = Point3D< double >( _samples[node->nodeData.tStart+t].normal ) / weights[t];
				if( normalize )
				{
					HighPrecisionV dot;
					dot *= 0;
					Gradient3D< V >  g = gradientFunctions[idx]( Point3D< typename V::R >( samples[t] ) );
					for( int c=0 ; c<3 ; c++ ) grads[t][c] = HighPrecisionV( g[c] );
					for( int c=0 ; c<3 ; c++ ) dot += grads[t][c] * normal[c];
					for( int c=0 ; c<3 ; c++ ) grads[t][c] -= dot * normal[c];
				}
				else
				{
					Gradient3D< V >  g = gradientFunctions[idx]( Point3D< typename V::R >( samples[t] ) );
					for( int c=0 ; c<3 ; c++ ) grads[t][c] = HighPrecisionV( g[c] );
				}
				grads[t] *= weights[t];
			}
			else
			{
				Point3D< double > normal = Point3D< double >( _baseNormals[ idx ] );
				const TriangleIndex& tIndex = _triangles[t+node->nodeData.tStart];
				triIntegrator = TriangleIntegrator10< double >( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
				for( int i=0 ; i<TriangleIntegrator10< double >::SampleNum ; i++ )
				{
					int ii = t*TriangleIntegrator10< double >::SampleNum + i;
					samples[ ii ] = triIntegrator[i];
					weights[ ii ] = triIntegrator.weights[i];
					if( normalize )
					{
						HighPrecisionV dot;
						dot *= 0;
						Gradient3D< V > g = gradientFunctions[idx]( Point3D< typename V::R >( samples[ii] ) );
						for( int c=0 ; c<3 ; c++ ) grads[ii][c] = HighPrecisionV( g[c] );
						for( int c=0 ; c<3 ; c++ ) dot += grads[ii][c] * normal[c];
						for( int c=0 ; c<3 ; c++ ) grads[ii][c] -= dot * normal[c];
					}
					else
					{
						Gradient3D< V > g = gradientFunctions[idx]( Point3D< typename V::R >( samples[ii] ) );
						for( int c=0 ; c<3 ; c++ ) grads[ii][c] = HighPrecisionV( g[c] );
					}
					grads[ii] *= weights[ii];
				}
			}
		}

		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
				if( off[c]+o>=0 && off[c]+o<=(1<<dd) )
					for( int s=0 ; s<sampleCount ; s++ )
					{
						subValues [o][c][s] =  polys[o][c]( samples[s][c] );
						subDValues[o][c][s] = dPolys[o][c]( samples[s][c] );
					}

		// Iterate over all neighbors
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
						for( int s=0 ; s<sampleCount ; s++ )
						{
							fullValues[i][j][k][s][0] = subDValues[i][0][s] *  subValues[j][1][s] *  subValues[k][2][s];
							fullValues[i][j][k][s][1] =  subValues[i][0][s] * subDValues[j][1][s] *  subValues[k][2][s];
							fullValues[i][j][k][s][2] =  subValues[i][0][s] *  subValues[j][1][s] * subDValues[k][2][s];
						}
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
					{
						int idx1 = neighbors.neighbors[i+1][j+1][k+1]->nodeData.index;
						HighPrecisionV temp;
						temp *= 0;
						for( int s=0 ; s<sampleCount ; s++ ) for( int c=0 ; c<3 ; c++ ) temp += grads[s][c] * fullValues[i][j][k][s][c];
						sums[i][j][k] += temp;
					}
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
						b[ neighbors.neighbors[i+1][j+1][k+1]->nodeData.index ] += V( sums[i][j][k] );
	}
	if( progress ) delete bar;
	MemoryUsage();

	delete[] samples;
	delete[] weights;
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) delete[] subValues[i][j] , delete[] subDValues[i][j];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) delete[] fullValues[i][j][k];
	delete[] grads;

	return 1;
}

template< class Real , bool Primal >
template< class V , class HighPrecisionV , class Function >
int MeshOctree< Real , Primal >::_getPrimalPreciseConstrainedGradientDotGradientVector( const std::vector< Function >& functions , Real dotWeight , Real lapWeight , int depth , std::vector< V >& b , bool progress , bool normalize ) const
{
	TriangleIntegrator10< double > triIntegrator;
	Point3D< double > *samples;
	double* weights;
	int sampleCount;
	if( _fromSamples ) sampleCount = 1;
	else               sampleCount = TriangleIntegrator10< double >::SampleNum;

	depth++;
	b.resize( _dimension[depth] );
	for( int i=0 ; i<_dimension[depth] ; i++ ) b[i] *= 0;

	int count = _dimension[depth];

	HighPrecisionV* tValues;
	double*  subValues[2][3];
	double* subDValues[2][3];
	Point3D< double >* fullValues[2][2][2];
	Gradient3D< HighPrecisionV >* grads;

	ProgressBar* bar=NULL;
	if( progress ) bar = new ProgressBar( 20 , _dimension[depth] , "Setting Constraints" );

	int max = 0;
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
		if( node->depth()==depth && node->nodeData.index!=-1 && node->nodeData.tEnd-node->nodeData.tStart>max )
			max = node->nodeData.tEnd-node->nodeData.tStart;

	if( !_fromSamples ) max *= TriangleIntegrator10< double >::SampleNum;
	samples = new Point3D< double >[ max ];
	weights = new double[ max ];
	tValues = new HighPrecisionV[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) subValues[i][j] = new double[ max ] , subDValues[i][j] = new double[ max ];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) fullValues[i][j][k] = new Point3D< double >[ max ];
	grads = new Gradient3D< HighPrecisionV >[ max ];

	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth || node->nodeData.index==-1 ) continue;
		if( progress ) bar->update();

		typename TreeOctNode::Neighbors3& neighbors = neighborKey3.getNeighbors( node );

		Polynomial< 1 >  polys[2][3];
		Polynomial< 0 > dPolys[2][3];
		int dd , off[3];
		node->depthAndOffset( dd , off );

		// Accumulate the polynomials corresponding to the node
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
			{
				if( off[c]+o<0 || off[c]+o>=(1<<dd) )	continue;
				int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
				for( int j=0 ; j<=1-o ; j++ ) polys[o][c] += fData.baseFunctions[idx].polys[j].p;
				dPolys[o][c] = polys[o][c].derivative();
			}

		HighPrecisionV sums[2][2][2];
		for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) sums[i][j][k] *= 0;

		if( _fromSamples ) sampleCount =  node->nodeData.tEnd-node->nodeData.tStart;
		else               sampleCount = (node->nodeData.tEnd-node->nodeData.tStart) * TriangleIntegrator10< double >::SampleNum;

		for( int t=0 ; t<node->nodeData.tEnd-node->nodeData.tStart ; t++ )
		{
			int idx = _parents[ t+node->nodeData.tStart ];
			if( _fromSamples )
			{
				samples[t] = Point3D< double >( _samples[node->nodeData.tStart+t].position );
				weights[t] = Length( Point3D< double >( _samples[node->nodeData.tStart+t].normal ) );
				Point3D< double > normal = Point3D< double >( _samples[node->nodeData.tStart+t].normal ) / weights[t];
				if( normalize )
				{
					HighPrecisionV dot;
					dot *= 0;
					Gradient3D< V >  g = functions[idx].gradient( Point3D< typename V::R >( samples[t] ) );
					for( int c=0 ; c<3 ; c++ ) grads[t][c] = HighPrecisionV( g[c] );
					for( int c=0 ; c<3 ; c++ ) dot += grads[t][c] * normal[c];
					for( int c=0 ; c<3 ; c++ ) grads[t][c] -= dot * normal[c];
				}
				else
				{
					Gradient3D< V >  g = functions[idx].gradient( Point3D< typename V::R >( samples[t] ) );
					for( int c=0 ; c<3 ; c++ ) grads[t][c] = HighPrecisionV( g[c] );
				}
				grads[t] *= weights[t];
				tValues[t] = HighPrecisionV( functions[idx]( Point3D< typename V::R >( samples[t]) ) ) * weights[t];
			}
			else
			{
				Point3D< double > normal = Point3D< double >( _baseNormals[ idx ] );
				const TriangleIndex& tIndex = _triangles[t+node->nodeData.tStart];
				triIntegrator = TriangleIntegrator10< double >( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
				for( int i=0 ; i<TriangleIntegrator10< double >::SampleNum ; i++ )
				{
					int ii = t*TriangleIntegrator10< double >::SampleNum + i;
					samples[ ii ] = triIntegrator[i];
					weights[ ii ] = triIntegrator.weights[i];
					if( normalize )
					{
						HighPrecisionV dot;
						dot *= 0;
						Gradient3D< V >  g = functions[idx].gradient( Point3D< typename V::R >( samples[ii] ) );
						for( int c=0 ; c<3 ; c++ ) grads[ii][c] = HighPrecisionV( g[c] );
						for( int c=0 ; c<3 ; c++ ) dot += grads[ii][c] * normal[c];
						for( int c=0 ; c<3 ; c++ ) grads[ii][c] -= dot * normal[c];
					}
					else
					{
						Gradient3D< V >  g = functions[idx].gradient( Point3D< typename V::R >( samples[ii] ) );
						for( int c=0 ; c<3 ; c++ ) grads[ii][c] = HighPrecisionV( g[c] );
					}
					grads[ii] *= weights[ii];
					tValues[ii] = HighPrecisionV( functions[idx]( Point3D< typename V::R >( samples[ii]) ) ) * weights[ii];
				}
			}
		}
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
				if( off[c]+o>=0 && off[c]+o<=(1<<dd) )
					for( int s=0 ; s<sampleCount ; s++ )
					{
						subValues [o][c][s] =  polys[o][c]( samples[s][c] );
						subDValues[o][c][s] = dPolys[o][c]( samples[s][c] );
					}

		// Iterate over all neighbors
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
						for( int s=0 ; s<sampleCount ; s++ )
						{
							fullValues[i][j][k][s][0] = subDValues[i][0][s] *  subValues[j][1][s] *  subValues[k][2][s];
							fullValues[i][j][k][s][1] =  subValues[i][0][s] * subDValues[j][1][s] *  subValues[k][2][s];
							fullValues[i][j][k][s][2] =  subValues[i][0][s] *  subValues[j][1][s] * subDValues[k][2][s];
						}
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
					{
						HighPrecisionV dotTemp , lapTemp;
						dotTemp *= 0;
						lapTemp *= 0;
						for( int s=0 ; s<sampleCount ; s++ ) dotTemp += subValues[i][0][s] * subValues[j][1][s] * subValues[k][2][s] * tValues[s];
						for( int s=0 ; s<sampleCount ; s++ ) for( int c=0 ; c<3 ; c++ ) lapTemp += grads[s][c] * fullValues[i][j][k][s][c];
						sums[i][j][k] += dotTemp * dotWeight + lapTemp * lapWeight;
					}
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
						b[ neighbors.neighbors[i+1][j+1][k+1]->nodeData.index ] += V( sums[i][j][k] );
	}
	if( progress ) delete bar;
	MemoryUsage();

	delete[] samples;
	delete[] weights;
	delete[] tValues;
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<3 ; j++ ) delete[] subValues[i][j] , delete[] subDValues[i][j];
	for( int i=0 ; i<2 ; i++ ) for( int j=0 ; j<2 ; j++ ) for( int k=0 ; k<2 ; k++ ) delete[] fullValues[i][j][k];
	delete[] grads;

	return 1;
}

template< class Real , bool Primal >
template< class V , class HighPrecisionV >
V MeshOctree< Real , Primal >::_getPrimalPreciseCoefficientIntegral( const std::vector< V >& coefficients, int depth ) const
{
	TriangleIntegrator10< double > triIntegrator;
	Point3D< double > scratchSample;
	double scratchWeight;
	const Point3D< double >* samples;
	double* weights;
	int sampleCount;
	if( _fromSamples ) sampleCount = 1;
	else               sampleCount = TriangleIntegrator10< double >::SampleNum;

	depth++;
	HighPrecisionV integral;
	integral *= 0;
	int count = _dimension[depth];
	double subValues[ TriangleIntegrator10< double >::SampleNum ][2][3];

	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth || node->nodeData.index==-1 ) continue;

		typename TreeOctNode::Neighbors3& neighbors = neighborKey3.getNeighbors( node );

		int idx = node->nodeData.index;
		Polynomial< 1 > polys[2][3];
		int dd , off[3];
		node->depthAndOffset( dd , off );

		// Accumulate the polynomials corresponding to the node
		for( int o=0 ; o<=1 ; o++ )
			for( int c=0 ; c<3 ; c++ )
			{
				if( off[c]+o<0 || off[c]+o>=(1<<dd) )	continue;
				int idx = BinaryNode<Real>::CornerIndex( dd , off[c]+o );
				for( int j=0 ; j<=1-o ; j++ ) polys[o][c] += fData.baseFunctions[idx].polys[j].p;
			}

		HighPrecisionV nodeSum;
		nodeSum *= 0;
		// Iterate over all the triangles in the node
		for( int t=node->nodeData.tStart ; t<node->nodeData.tEnd ; t++ )
		{
			if( _fromSamples )
			{
				scratchSample = Point3D< double >( _samples[t].position );
				scratchWeight = Length( Point3D< double >( _samples[t].normal ) );
				samples = &scratchSample;
				weights = &scratchWeight;
			}
			else
			{
				const TriangleIndex& tIndex = _triangles[t];
				triIntegrator = TriangleIntegrator10< double > ( _vertices[ tIndex[0] ] , _vertices[ tIndex[1] ] , _vertices[ tIndex[2] ] );
				samples = &triIntegrator[0];
				weights = triIntegrator.weights;
			}
			for( int o=0 ; o<=1 ; o++ )
				for( int c=0 ; c<3 ; c++ )
					if( off[c]+o>=0 && off[c]+o<=(1<<dd) )
						for( int s=0 ; s<sampleCount ; s++ ) subValues[s][o][c] = polys[o][c]( samples[s][c] );
			HighPrecisionV triangleSum;
			triangleSum *= 0;
			for( int i=0 ; i<2 ; i++ )
				for( int j=0 ; j<2 ; j++ )
					for( int k=0 ; k<2 ; k++ )
						if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
						{
							double integral = 0;
							for( int s=0 ; s<sampleCount ; s++ ) integral += subValues[s][i][0] * subValues[s][j][1] * subValues[s][k][2] * weights[s];
							triangleSum += HighPrecisionV( coefficients[ neighbors.neighbors[i+1][j+1][k+1]->nodeData.index ] ) * integral;
						}
			nodeSum += triangleSum;
		}
		integral += nodeSum;
	}
	return V( integral );
}

template< class Real , bool Primal >
template< class Vertex , class C >
void MeshOctree< Real , Primal >::_getPrimalValues( const std::vector<Vertex>& points , const std::vector< C >& coefficients , std::vector< C >& pointValues )
{
	int maxDepth = tree.maxDepth();
	pointValues.resize(points.size());

	for( int p=0 ; p<points.size() ; p++ )
	{
		pointValues[p] *= 0;
		Point3D< Real > point = Point3D< Real >( points[p] );

		int depth , offset[3];
		depth = maxDepth;
		typename TreeOctNode::Neighbors3 neighbors = _getFinestNodeNeighbors( point , maxDepth , offset );

		double values[2][3];

		C value;
		value *= Real( 0 );

		for(int o=0;o<=1;o++)
			for(int c=0;c<3;c++)
			{
				if(offset[c]+o<0 || offset[c]+o>=(1<<depth))	continue;
				int idx=BinaryNode<Real>::CornerIndex(depth,offset[c]+o);
				values [o][c] = fData.baseFunctions [idx]( point[c] );
			}
		for(int i=0;i<2;i++)
			for(int j=0;j<2;j++)
				for(int k=0;k<2;k++)
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
						value += coefficients[neighbors.neighbors[i+1][j+1][k+1]->nodeData.index]*Real( values[i][0]*values[j][1]*values[k][2] );
		pointValues[ p ] = value;
	}
}
template< class Real , bool Primal >
template< class OrientedVertex , class V >
void MeshOctree< Real , Primal >::_getPrimalGradientValues( const std::vector< OrientedVertex >& points , const std::vector< V >& coefficients , std::vector< Gradient3D< V > >& pointGradients )
{
	int maxDepth = tree.maxDepth();
	pointGradients.resize( points.size() );
	for( int p=0 ; p<points.size() ; p++ )
	{
		pointGradients[p] *= 0;
		Point3D< Real > point = Point3D< Real >( points[p] );
		Point3D< double > normal = Point3D< double >( points[p].normal ) / Length( Point3D< double >( points[p].normal ) );

		int depth , offset[3];
		depth = maxDepth;
		typename TreeOctNode::Neighbors3 neighbors = _getFinestNodeNeighbors( point , maxDepth , offset );

		Gradient3D< V > gradient;
		double values[2][3],dValues[2][3];
		for(int o=0;o<=1;o++)
			for(int c=0;c<3;c++)
			{
				if(offset[c]+o<0 || offset[c]+o>=(1<<depth))	continue;
				int idx=BinaryNode<Real>::CornerIndex(depth,offset[c]+o);
				values [o][c] = fData.baseFunctions [idx]( point[c] );
				dValues[o][c] = fData.dBaseFunctions[idx]( point[c] );
			}

		for(int i=0;i<2;i++)
			for(int j=0;j<2;j++)
			{
				double d0 = dValues[i][0]* values[j][1];
				double d1 =  values[i][0]*dValues[j][1];
				double d2 =  values[i][0]* values[j][1];
				for(int k=0;k<2;k++)
					if( neighbors.neighbors[i+1][j+1][k+1] && neighbors.neighbors[i+1][j+1][k+1]->nodeData.index!=-1 )
					{
						Point3D< double > grad;
						grad[0] = d0* values[k][2];
						grad[1] = d1* values[k][2];
						grad[2] = d2*dValues[k][2];
						grad -= normal * Point3D< double >::Dot( normal , grad );
						gradient[0] += coefficients[neighbors.neighbors[i+1][j+1][k+1]->nodeData.index] * Real(grad[0]);
						gradient[1] += coefficients[neighbors.neighbors[i+1][j+1][k+1]->nodeData.index] * Real(grad[1]);
						gradient[2] += coefficients[neighbors.neighbors[i+1][j+1][k+1]->nodeData.index] * Real(grad[2]);
					}
			}
		for(int c=0;c<3;c++) pointGradients[ p ][c] = gradient[c];
	}
}
template< class Real , bool Primal >
template< class C >
void MeshOctree< Real , Primal >::_primalDownSample(const Vector< C >& high,int depth , Vector< C >& low ) const
{
	TreeOctNode* childNodes[3][3][3];

	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullProlongationStencil pStencil;
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullProlongationStencil::ProlongationStencil pStencils[3];
	// Modify this in case depth = 1 in which case there are no interior stencil values
	int tempD;
	FiniteElements1D<Real,ZERO_DERIVATIVE,1>::ProlongationStencil( 5 , pStencil , tempD );
	low.Resize(_dimension[depth-1]);
	for(const TreeOctNode* node=tree.nextNode();node;node=tree.nextNode(node))
	{
		if(node->depth()!=depth-1 || node->nodeData.index==-1)	continue;

		int d,off[3];
		typename TreeOctNode::Neighbors3& neighbors=neighborKey3.getNeighbors(node);
		node->depthAndOffset(d,off);

		// Set the array of children that could be effected by a prolongation
		memset( childNodes , 0 , sizeof( childNodes ) );

		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->children)
						for(int x=0;x<2;x++)
						{
							int ii=2*i+x-1;
							if(ii<0 || ii>=3)	continue;
							for(int y=0;y<2;y++)
							{
								int jj=2*j+y-1;
								if(jj<0 || jj>=3)	continue;
								for(int z=0;z<2;z++)
								{
									int kk=2*k+z-1;
									if(kk<0 || kk>=3)	continue;

									int corner = Cube::CornerIndex( x , y , z );
									if( neighbors.neighbors[i][j][k]->children[corner].nodeData.index!=-1 )
										childNodes[ii][jj][kk]=&neighbors.neighbors[i][j][k]->children[corner];
								}
							}
						}
		for( int c=0 ; c<3 ; c++ ) pStencils[c] = pStencil.caseTable[1];
		for( int i=0 ; i<3 ; i++ )
			for(int j=0 ; j<3 ; j++ )
				for(int k=0 ; k<3 ; k++ )
					if( childNodes[i][j][k] )
						low[node->nodeData.index] += high[childNodes[i][j][k]->nodeData.index]*pStencils[0].values[i]*pStencils[1].values[j]*pStencils[2].values[k];
	}
}

template< class Real , bool Primal>
template< class C >
void MeshOctree< Real , Primal >::_primalUpSample( const std::vector< C >& low , int depth , std::vector< C >& high ) const
{
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullRestrictionStencil rStencil;
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullRestrictionStencil::RestrictionStencil rStencils[3];
	int tempD;
	// Modify this in case depth = 1 in which case there are no interior stencil values
	FiniteElements1D<Real,ZERO_DERIVATIVE,1>::RestrictionStencil( 9 , rStencil , tempD );
	TreeOctNode* parentNodes[2][2][2];

	high.resize( _dimension[depth+1] );
	for( int i=0 ; i<high.size() ; i++ ) high[i] *= 0;
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
	{
		if( node->depth()!=depth+1 || node->nodeData.index==-1 ) continue;
		int d , off[3];
		typename TreeOctNode::Neighbors3& neighbors = neighborKey3.getNeighbors( node->parent );
		node->depthAndOffset( d , off );
		int idx = int( node-node->parent->children );
		int x , y , z;
		Cube::FactorCornerIndex( idx , x , y , z );

		memset( parentNodes , 0 , sizeof( parentNodes ) );

		// WARNING!!! Need to check that the indexing here is correct:
		// 1] In the case that x = 0, who are the two parents?
		// 2] Is caseTable[1] the odds?
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for( int k=0 ; k<2 ; k++ )
					if( neighbors.neighbors[x+i][y+j][z+k] && neighbors.neighbors[x+i][y+j][z+k]->nodeData.index!=-1 )
						parentNodes[i][j][k] = neighbors.neighbors[x+i][y+j][z+k];
		for( int c=0 ; c<3 ; c++ )
			if	( off[c]%2 )	rStencils[c] = rStencil.caseTable[1];
			else				rStencils[c] = rStencil.caseTable[2];
		for( int i=0 ; i<2 ; i++ )
			for( int j=0 ; j<2 ; j++ )
				for(int k=0 ; k<2 ; k++ )
					if( parentNodes[i][j][k] )
						high[node->nodeData.index] += low[parentNodes[i][j][k]->nodeData.index]*rStencils[0].values[i]*rStencils[1].values[j]*rStencils[2].values[k];
	}
}

template< class Real , bool Primal >
void MeshOctree< Real , Primal >::_primalDownSample( const SparseMatrix< Real >& high , const DownSampleMatrix< Real >& highToLow , int depth , SparseMatrix< Real >& low ) const
{
	int* lowIndices  = new int[ _dimension[depth-1] ];
	int* highIndices = new int[ _dimension[depth  ] ];
	
	// Get the indices of all the fine-level nodes
	for( const TreeOctNode* node=tree.nextNode() ; node ; node=tree.nextNode(node) )
		if( node->depth()==depth-1 && node->nodeData.index!=-1 )
		{
			int d , idx[3];
			int index = node->nodeData.index;
			node->depthAndOffset( d , idx );
			lowIndices[ index ] = 3 * 3 * idx[0] + 3 * idx[1] + idx[2];
		}
		else if (node->depth()==depth && node->nodeData.index!=-1 )
		{
			int d , idx[3];
			int index = node->nodeData.index;
			node->depthAndOffset( d , idx );
			highIndices[ index ] = 3 * 3 * (idx[0]>>1) + 3 * (idx[1]>>1) + (idx[2]>>1);
		}

	// Transpose the restriction operator
	SparseMatrix< Real > lowToHigh;
	lowToHigh.Resize( _dimension[depth] );
	for( int i=0 ; i<_dimension[depth] ; i++ ) lowToHigh.groupSizes[i] = 0;
	for( int i=0 ; i<highToLow.ds.groups ; i++ )
	{
		const MatrixEntry< Real >* _highToLow = highToLow.ds[i];
		for( int j=0 ; j<highToLow.ds.groupSizes[i] ; j++ ) if( _highToLow[j].N>=0 ) lowToHigh.groupSizes[ _highToLow[j].N ]++;
	}
	for( int i=0 ; i<lowToHigh.groups ; i++ )
	{
		int dim = lowToHigh.groupSizes[i];
		lowToHigh.groupSizes[i] = 0;
		lowToHigh.SetGroupSize( i , dim );
		lowToHigh.groupSizes[i] = 0;
	}
	for( int i=0 ; i<highToLow.ds.groups ; i++ )
	{
		const MatrixEntry< Real >* _highToLow = highToLow.ds[i];
		for( int j=0 ; j<highToLow.ds.groupSizes[i] ; j++ )
		{
			int ii = _highToLow[j].N;
			if( ii<0 ) continue;
			lowToHigh[ ii ][ lowToHigh.groupSizes[ii] ].N = i;
			lowToHigh[ ii ][ lowToHigh.groupSizes[ii] ].Value = _highToLow[j].Value;
			lowToHigh.groupSizes[ii]++;
		}
	}

	// Iterate over the finer elements to construct 
	// the matrix M * P
	SparseMatrix< Real > halfWay;
	halfWay.Resize( _dimension[depth] );		// For each finer node, what is the dot-product with the coarser node?
	for( int i=0; i<halfWay.groups ; i++ )
	{
		halfWay.SetGroupSize( i , 3*3*3 );
		MatrixEntry< Real >* _halfWay = halfWay[i];
		for( int j=0 ; j<3*3*3 ; j++ ) _halfWay[j] = MatrixEntry< Real >( -1 , 0 );
	}
	for( int high1=0 ; high1<_dimension[depth] ; high1++ )
	{
		MatrixEntry< Real >* _halfWay = halfWay[high1] + 3 * 3 * 1 + 3 * 1 + 1 - highIndices[high1];
		const MatrixEntry< Real >* _high = high[high1];
		for( int j=0 ; j<high.groupSizes[high1] ; j++ )
		{
			int high2 = _high[j].N;
			Real value = _high[j].Value;
			const MatrixEntry< Real >* _lowToHigh = lowToHigh[high2];
			for( int k=0 ; k<lowToHigh.groupSizes[high2] ; k++ )
			{
				int low = _lowToHigh[k].N;
				int idx = lowIndices[low];
				_halfWay[idx].N = low;
				_halfWay[idx].Value += value * _lowToHigh[k].Value;
			}
		}
	}

	// Clean up the half-way matrix
	for( int i=0 ; i<halfWay.groups ; i++ )
	{
		int count = 0;
		MatrixEntry< Real >* _halfWay = halfWay[i];
		for( int j=0 ; j<3*3*3 ; j++ ) if( _halfWay[j].N !=-1 ) _halfWay[count++] = _halfWay[j];
		halfWay.groupSizes[i] = count;
	}

	low.Resize    ( _dimension[depth-1] );	// For each coarser node, what is the dot-product with the coarser node?
	for( int i=0; i<_dimension[depth-1] ; i++ )
	{
		low.SetGroupSize( i , 3*3*3 );
		MatrixEntry< Real >* _low = low[i];
		for( int j=0 ; j<3*3*3 ; j++ ) _low[j] = MatrixEntry< Real >( -1 , 0 );
	}
	// Now construct the matrix R * M * P
	for( int low1=0 ; low1<_dimension[depth-1] ; low1++ )
	{
		MatrixEntry< Real >* _low = low[low1] + 3 * 3 * 1 + 3 * 1 + 1 - lowIndices[low1];
		const MatrixEntry< Real >* _highToLow = highToLow.ds[low1];
		for( int j=0 ; j<highToLow.ds.groupSizes[low1] ; j++ )
		{
			int high = _highToLow[j].N;
			Real value = _highToLow[j].Value;
			const MatrixEntry< Real >* _halfWay = halfWay[high];
			for( int k=0 ; k<halfWay.groupSizes[high] ; k++ )
			{
				int low2 = _halfWay[k].N;
				int idx = lowIndices[low2];
				_low[idx].N = low2;
				_low[idx].Value += value * _halfWay[k].Value;
			}
		}
	}
	delete[] lowIndices;
	delete[] highIndices;
	MatrixEntry< Real > tempEntries[3*3*3];
	for( int i=0 ; i<low.groups ; i++ )
	{
		int count = 0;
		for( int j=0 ; j<3*3*3 ; j++ ) if( low[i][j].N !=-1 ) tempEntries[count++] = low[i][j];
		low.SetGroupSize( i , count );
		memcpy( low[i] , tempEntries , sizeof( MatrixEntry< Real > ) * count );
	}
}

template< class Real , bool Primal >
void MeshOctree< Real , Primal >::_primalDownSample( const SparseMatrix<Real>& high , int depth , SparseMatrix<Real>& low ) const
{
	class ChildNodeData
	{
	public:
		TreeOctNode* node;
		Real value;
	};
	ChildNodeData childNodeData[5][5][5];
	TreeOctNode* childNodes[3][3][3];
	TreeOctNode* parentNodes[2][2][2];
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullProlongationStencil pStencil;
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullRestrictionStencil rStencil;
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullProlongationStencil::ProlongationStencil pStencils[3];
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullRestrictionStencil::RestrictionStencil rStencils[3];

	int tempD;
	// Modify this in case depth = 1 in which case there are no interior stencil values
	FiniteElements1D<Real,ZERO_DERIVATIVE,1>::ProlongationStencil( 5 , pStencil , tempD );
	FiniteElements1D<Real,ZERO_DERIVATIVE,1>::RestrictionStencil( 9 , rStencil , tempD );

	low.Resize(_dimension[depth-1]);
	for(const TreeOctNode* node=tree.nextNode();node;node=tree.nextNode(node))
	{
		if(node->depth()!=depth-1 || node->nodeData.index==-1)	continue;
		int d,off[3],start[3];
		node->depthAndOffset(d,off);
		for(int c=0;c<3;c++)	start[c]=2*off[c]-3;
		typename TreeOctNode::Neighbors3 neighbors=neighborKey3.getNeighbors(node);

		// Set the row index within the 1-ring neighborhood
		int nCount=0;
		for(int i=0;i<3;i++)
			for(int j=0;j<3;j++)
				for(int k=0;k<3;k++)
					if(neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->nodeData.index!=-1)
						neighbors.neighbors[i][j][k]->nodeData.tempIndex=nCount++;
		low.SetGroupSize(node->nodeData.index,nCount);
		// Initialize the row entries
		nCount=0;
		for(int i=0;i<3;i++)
			for(int j=0;j<3;j++)
				for(int k=0;k<3;k++)
					if(neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->nodeData.index!=-1)
					{
						low[node->nodeData.index][nCount].N=neighbors.neighbors[i][j][k]->nodeData.index;
						low[node->nodeData.index][nCount].Value=0;
						nCount++;
					}

		// Set the array of children that could be effected by a prolongation
		memset( childNodes , 0 , sizeof( childNodes ) );

		for(int i=0;i<2;i++)
			for(int j=0;j<2;j++)
				for(int k=0;k<2;k++)
					if(neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->children)
						for(int x=0;x<2;x++)
						{
							int ii=2*i+x-1;
							if(ii<0 || ii>=3)	continue;
							for(int y=0;y<2;y++)
							{
								int jj=2*j+y-1;
								if(jj<0 || jj>=3)	continue;
								for(int z=0;z<2;z++)
								{
									int kk=2*k+z-1;
									if(kk<0 || kk>=3)	continue;

									int corner=Cube::CornerIndex(x,y,z);
									if(neighbors.neighbors[i][j][k]->children[corner].nodeData.index!=-1)
										childNodes[ii][jj][kk] = &neighbors.neighbors[i][j][k]->children[corner];
								}
							}
						}
		memset( childNodeData , 0 , sizeof(childNodeData) );

		// Set the 5x5x5 neighbors at the finer resolution
		for(int i=0;i<3;i++)
			for(int j=0;j<3;j++)
				for(int k=0;k<3;k++)
					if(neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->children)
						for(int x=0;x<2;x++)
						{
							int ii=2*i+x;
							if(ii<0 || ii>=5)	continue;
							for(int y=0;y<2;y++)
							{
								int jj=2*j+y;
								if(jj<0 || jj>=5)	continue;
								for(int z=0;z<2;z++)
								{
									int kk=2*k+z;
									if(kk<0 || kk>=5)	continue;

									int corner=Cube::CornerIndex(x,y,z);
									if(neighbors.neighbors[i][j][k]->children[corner].nodeData.index!=-1)
										childNodeData[ii][jj][kk].node=&neighbors.neighbors[i][j][k]->children[corner];
								}
							}
						}
		// Compute the dot-product of the Laplacian of the coarser node with all finer nodes
		for(int c=0;c<3;c++) pStencils[c]=pStencil.caseTable[1];


		for(int i=0;i<3;i++)
			for(int j=0;j<3;j++)
				for(int k=0;k<3;k++)
					// For each child node in the prolongation, search its 1-ring neighborhood
					if(childNodes[i][j][k])
					{
						int childD , childOff[3] , childIndex[3];
						childNodes[i][j][k]->depthAndOffset( childD , childOff );
						for( int c=0 ; c<3 ; c++ ) childIndex[c]=childOff[c]-start[c];
						double pValue=pStencils[0].values[i]*pStencils[1].values[j]*pStencils[2].values[k];
						int tempNCount=0;
						for( int x=0 ; x<3 ; x++ )
							for( int y=0 ; y<3 ; y++ )
								for( int z=0 ; z<3 ; z++ )
									if(	childNodeData[childIndex[0]+x-1][childIndex[1]+y-1][childIndex[2]+z-1].node &&
										childNodeData[childIndex[0]+x-1][childIndex[1]+y-1][childIndex[2]+z-1].node->nodeData.index!=-1)
									{
										int tempD,tempOff[3];
										// For nodes in the 1-ring neighborhood of the child, find the coarser nodes they restrict to
										TreeOctNode* childNeighborNode=childNodeData[childIndex[0]+x-1][childIndex[1]+y-1][childIndex[2]+z-1].node;
										childNeighborNode->depthAndOffset(tempD,tempOff);
										for(int c=0;c<3;c++) tempOff[c]-=start[c];
										childNodeData[tempOff[0]][tempOff[1]][tempOff[2]].node=childNeighborNode;
										childNodeData[tempOff[0]][tempOff[1]][tempOff[2]].value+=
											high[childNodes[i][j][k]->nodeData.index][tempNCount].Value*float(pValue);
										tempNCount++;
									}
					}
		// Use the dot-products of the finer nodes to set the dot-products of the coarser nodes
		for( int i=0 ; i<5 ; i++ )
			for( int j=0 ; j<5 ; j++ )
				for( int k=0 ; k<5 ; k++ )
					if( childNodeData[i][j][k].node )
					{
						TreeOctNode* childNeighborNode=childNodeData[i][j][k].node;
						double matrixValue=childNodeData[i][j][k].value;

						int tempD1 , tempOff1[3];
						int tempD2 , tempOff2[3] , dOff[3];
						childNeighborNode->depthAndOffset(tempD1,tempOff1);
						childNeighborNode->parent->depthAndOffset(tempD2,tempOff2);
						for( int c=0 ; c<3 ; c++ ) dOff[c]=tempOff2[c]-off[c];
						int idx=int( childNeighborNode-childNeighborNode->parent->children );
						int xx,yy,zz;
						Cube::FactorCornerIndex(idx,xx,yy,zz);

						memset( parentNodes , 0 , sizeof(parentNodes) );
						for( int i=0 ; i<2 ; i++ )
							for( int j=0 ; j<2 ; j++ )
								for( int k=0 ; k<2 ; k++ )
									if(neighbors.neighbors[xx+i+dOff[0]][yy+j+dOff[1]][zz+k+dOff[2]] && neighbors.neighbors[xx+i+dOff[0]][yy+j+dOff[1]][zz+k+dOff[2]]->nodeData.index!=-1)
										parentNodes[i][j][k]=neighbors.neighbors[xx+i+dOff[0]][yy+j+dOff[1]][zz+k+dOff[2]];

						for(int c=0;c<3;c++)
							if	(tempOff1[c]%2)	rStencils[c]=rStencil.caseTable[1];
							else				rStencils[c]=rStencil.caseTable[2];
						for(int i=0;i<2;i++)
						{
							double temp1=matrixValue*rStencils[0].values[i];
							for(int j=0;j<2;j++)
							{
								double temp2=temp1*rStencils[1].values[j];
								for(int k=0;k<2;k++)
									if(parentNodes[i][j][k])
										low[node->nodeData.index][parentNodes[i][j][k]->nodeData.tempIndex].Value+=Real( temp2*rStencils[2].values[k] );
							}
						}
				}
	}
}
template< class Real , bool Primal >
void MeshOctree< Real , Primal >::_getPrimalDownSampleMatrix( int depth , DownSampleMatrix< Real > &highToLow ) const
{
	highToLow.inDim  = _dimension[depth  ];
	highToLow.outDim = _dimension[depth-1];
	highToLow.ds.rowMajor =  true;
	highToLow.ds.Resize( _dimension[depth-1] );
	TreeOctNode* childNodes[3][3][3];

	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullProlongationStencil pStencil;
	typename FiniteElements1D<Real,ZERO_DERIVATIVE,1>::FullProlongationStencil::ProlongationStencil pStencils[3];
	int tempD;
	// Modify this in case depth = 1 in which case there are no interior stencil values
	FiniteElements1D<Real,ZERO_DERIVATIVE,1>::ProlongationStencil( 5 , pStencil , tempD );

	for( const TreeOctNode* node = tree.nextNode() ; node ; node = tree.nextNode(node) )
	{
		if( node->depth()!=depth-1 || node->nodeData.index==-1 ) continue;

		int idx = node->nodeData.index;
		int cCount , d , off[3];
		typename TreeOctNode::Neighbors3& neighbors=neighborKey3.getNeighbors( node );

		node->depthAndOffset( d , off );

		// Set the array of children that could be effected by a prolongation
		memset( childNodes , 0 , sizeof(childNodes) );

		for( int i=0 ; i<3 ; i++ ) for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ )
			if(neighbors.neighbors[i][j][k] && neighbors.neighbors[i][j][k]->children)
				for(int x=0;x<2;x++)
				{
					int ii=2*i+x-1;
					if(ii<0 || ii>=3)	continue;
					for(int y=0;y<2;y++)
					{
						int jj=2*j+y-1;
						if(jj<0 || jj>=3)	continue;
						for(int z=0;z<2;z++)
						{
							int kk=2*k+z-1;
							if(kk<0 || kk>=3)	continue;

							int corner=Cube::CornerIndex(x,y,z);
							if(neighbors.neighbors[i][j][k]->children[corner].nodeData.index!=-1)
								childNodes[ii][jj][kk]=&neighbors.neighbors[i][j][k]->children[corner];
						}
					}
				}
		cCount = 0;
		for( int i=0 ; i<3 ;i++ ) for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ ) if( childNodes[i][j][k] ) cCount++;
		highToLow.ds.SetGroupSize( idx , cCount );

		for(int c=0;c<3;c++) pStencils[c] = pStencil.caseTable[1];
		cCount = 0;
		for(int i=0;i<3;i++) for(int j=0;j<3;j++) for(int k=0;k<3;k++)
			if(childNodes[i][j][k])
			{
				highToLow.ds[idx][cCount].N = childNodes[i][j][k]->nodeData.index;
				highToLow.ds[idx][cCount].Value += pStencils[0].values[i]*pStencils[1].values[j]*pStencils[2].values[k];
				cCount++;
			}
	}
}
