/*
Copyright (c) 2009, Michael Kazhdan and Ming Chuang
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer. Redistributions in binary form must reproduce
the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution. 

Neither the name of the Johns Hopkins University nor the names of its contributors
may be used to endorse or promote products derived from this software without specific
prior written permission. 

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
*/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "Util.h"
#include "RangeGrid.h"
#include "Ply.h"
#include "Mesh.h"
#include "Octree.h"
#include "SparseMatrix.h"
#include "CmdLineParser.h"
#include "MemoryUsage.h"
#include "PoissonMeshData.h"
#include "KDtree.h"
#include "Multigrid.h"

template< class Vertex , class Real >
struct KDMesh
{
	Point3D< Real > DoP;
	std::vector< Vertex > v;
	std::vector< TriangleIndex > f;
	mutable std::vector< std::vector< int > > af;
	KDtree< Real > *kd;

	KDMesh() : kd( NULL ) {}
	~KDMesh() { if (kd) delete kd; }

	void build_kd(void)
	{
		if (kd) delete kd;
		kd = new KDtree< Real >( &v[0].point[0] , v.size() , sizeof( Vertex ) );
	}

	void need_adjacent_faces() const
	{
		if( af.size() == v.size() ) return;

		af.clear();
		af.resize( v.size() );
		for( size_t i = 0 ; i<f.size() ; i++ )
			for( int j=0 ; j<3 ; j++ ) 
				af[ f[i][j] ].push_back((int) i);
	}
	template< class Real >
	int closest_vertex( Point3D< Real > p ) const
	{
		assert( kd );
		Real maxDist2 = -1;
		Point3D< Real > *nearest = (Point3D<Real> *) kd->closest_to_pt((const Real *) &p[0], maxDist2 );
		if( nearest ) return (int)( ( size_t( nearest ) - size_t( &v[0].point[0] ) ) / sizeof( Vertex ) );
		else return -1;
	}
	Point3D< Real > normal( int face ) const
	{
		return Point3D< Real >::CrossProduct( Point3D< Real >( v[ f[face][1] ] ) - Point3D< Real >( v[ f[face][0] ] ) , Point3D< Real >( v[ f[face][2] ] ) - Point3D< Real >( v[ f[face][0] ] ) );
	}
	void setDirectionOfProjection( void )
	{
		std::vector< Point3D< Real > > normals;
		DoP *= 0;
		normals.resize( v.size() );
		for( int i=0 ; i<v.size() ; i++ ) normals[i] *= 0;
		for( int i=0 ; i<f.size() ; i++ )
		{
			Point3D< Real > normal = this->normal( i );
			for( int j=0 ; j<3 ; j++ ) normals[ f[i][j] ] += normal;
		}
		for( int i=0 ; i<v.size() ; i++ )
		{
			Real w = (Real)Length( normals[i] );
			if( w ) DoP += normals[i] / w / w;
		}
	}
};

cmdLineString In( "in" ) , Out( "out" ) , Scans( "scans" ) , XForms( "xForms" );
cmdLineInt Depth( "depth" , 8 ) , Iters( "iters" , 10 ) , Subdivide( "subdivide" , 0 ) , Cycles( "cycles" , 1 ) , ParallelDepth( "parallelDepth" , 5 ) , Threads( "threads" , omp_get_num_procs() ) , GSBlockSize( "gsBlockSize" , 2 );
cmdLineFloat CleanTree( "cutOff" , 0.f ) , VWeight( "vWeight" , 0.f ) , GWeight( "gWeight" , 1.f ) , GrazingFallOff( "grazingFallOff" , 2.f );
cmdLineReadable Verbose( "verbose" ) , ASCII( "ascii" ) , UseKD( "useKD" ) , Progress( "progress" ) , Dual( "dual" ) , VCycle( "vCycle" );
cmdLineReadable Elements( "elements" );
cmdLineReadable* params[]=
{
	&In , &Out , &Scans , &XForms,
	&Depth , &Iters , &Subdivide , &Cycles , &ParallelDepth , &Threads , &GSBlockSize ,
	&CleanTree , &VWeight , &GWeight , &GrazingFallOff ,
	&Verbose , &ASCII , &UseKD , &Progress , &Dual , &VCycle ,
	&Elements ,
};

void ShowUsage( char* ex )
{
	printf( "Usage: %s\n" , ex );
	printf( "\t --%s <input triangle mesh>\n" , In.name );
	printf( "\t\t The triangle mesh on which the texture will be stitched.\n" );

	printf( "\t --%s <input range scans>\n" , Scans.name );
	printf( "\t\t The list of scans from which the texture will be read.\n" );

	printf( "\t --%s <ouput triangle mesh>\n" , Out.name );
	printf( "\t\t The triangle mesh to which the stitched texture will be output.\n" );

	printf( "\t[--%s]\n" , UseKD.name );
	printf( "\t\t If specified, this argument indicates that the input scans should\n" );
	printf( "\t\t be treated as meshes and a kD-tree should be used for finding nearest\n" );
	printf( "\t\t samples. Otherwise, it is assumed that the scans are in range-grid format.\n" );

	printf( "\t[--%s <range grid transformations>]\n" , XForms.name );
	printf( "\t\t If there are transformations that need to be applied to place the scans\n" );
	printf( "\t\t into a global coordinate system, this file holds the names of the transformations\n" );
	printf( "\t\t associated with each of the scans.\n" );

	printf( "\t[--%s <maximum reconstruction depth>=%d]\n" , Depth.name , Depth.value );
	printf( "\t\t Running at depth d corresponds to solving on a 2^d x 2^d x 2^d\n" );
	printf( "\t\t voxel grid.\n" );

	printf( "\t[--%s <solver iterations>=%d]\n" , Iters.name , Iters.value );
	printf( "\t\t The number of solver iterations within each resolution\n" );

	printf( "\t[--%s <multigrid cycles>=%d]\n" , Cycles.name , Cycles.value );
	printf( "\t\t The number of solver cycles to be performed\n" );

	printf( "\t[--%s <value weight>=%f]\n" , VWeight.name , VWeight.value );
	printf( "\t[--%s <subdivision iterations>=%d]\n" , Subdivide.name , Subdivide.value );

	printf( "\t[--%s <fall-off exponent>=%f]\n" , GrazingFallOff.name , GrazingFallOff.value );
	printf( "\t\t Specifies the exponent used to modulate the fall-off at grazing angles\n" );

	printf( "\t[--%s <number of threads>=%d]\n" , Threads.name , Threads.value );

	printf( "\t[--%s]\n" , Dual.name );
	printf( "\t[--%s]\n" , Verbose.name );
	printf( "\t[--%s]\n" , Progress.name );
	printf( "\t[--%s]\n" , Elements.name );
}

template< class Real , class Vertex >
void GetRangeFunctions( const std::vector<Vertex>& vertices , const std::vector< TriangleIndex >& triangles , 
					    const Point3D< Real >& center , Real scale , Real confidence ,
						const RangeGrid< PlyOrientedColorVertex< Real > , Real > *rGrids , int gridCount ,
						std::vector< LinearFunction3D< Point3D< Real > > >& functions )
{
	typedef typename RangeGrid< PlyOrientedColorVertex< Real > , Real >::Sample GridSample;
	XForm4x4<Real> xForm		= XForm4x4<Real>::Identity();

	XForm4x4<Real>* xForms		= new XForm4x4<Real>	[gridCount];
	XForm4x4<Real>* iXForms		= new XForm4x4<Real>	[gridCount];
	Point3D<Real>* centers		= new Point3D<Real>		[gridCount];
	// Transformations:
	// 1] Unit-cube to 3D global:					3D -> 3D	xForm
	// 2] Unit-cube to camera coordinates:			3D -> 3D	iXForms		C
	// 3] Camera coordinates to scan grid:			3D -> 2D	 xforms		D
	// 6] Camera centers in 3D global:				3D			centers

	for(int c=0;c<3;c++)
	{
		xForm(3,c) = Real( center[c] );
		xForm(c,c) = Real( scale );
	}

	ProgressBar* bar = NULL;
	if( Progress.set ) bar = new ProgressBar( 20 , gridCount , "Transforming Grids" );
#pragma omp parallel for num_threads( Threads.value )
	for( int t=0 ; t<Threads.value ; t++ )
		for( int i=(gridCount*t)/Threads.value ; i<(gridCount*(t+1))/Threads.value ; i++ )
		{
			if( Progress.set ) bar->update( t==0 );
			iXForms[i]	= rGrids[i].xForm.inverse()*xForm;
			xForms[i]	= rGrids[i].backProjectionXForm( false );
			centers[i]	= rGrids[i].xForm( Point3D<Real>( ) );
		}
	if( Progress.set ) delete bar;

	functions.resize( triangles.size() );

	if( Progress.set ) bar = new ProgressBar( 20 , triangles.size() , "Setting Functions" );
#pragma omp parallel for num_threads( Threads.value )
	for( int thread=0 ; thread<Threads.value ; thread++ )
	{
		GridSample* gridSamples = new GridSample[gridCount];
		GridSample vSamples[3];

		for( int i=(triangles.size()*thread)/Threads.value ; i<(triangles.size()*(thread+1))/Threads.value ; i++ )
		{
			if( Progress.set ) bar->update( thread==0 );
			functions[i] *= 0;
			// Get the index of the grid closest to the triangle center
			Point3D< Real > verts[] = { Point3D< Real >( vertices[ triangles[i][0] ] ) , Point3D< Real >( vertices[ triangles[i][1] ] ) , Point3D< Real >( vertices[ triangles[i][2] ] ) };
			Point3D< double > dVerts[] = { Point3D< double >( vertices[ triangles[i][0] ] ) , Point3D< double >( vertices[ triangles[i][1] ] ) , Point3D< double >( vertices[ triangles[i][2] ] ) };
			Point3D< Real > tCenter = ( verts[0] + verts[1] + verts[2] ) / 3;
			int idx = -1;
			{
				Point3D< Real > p = xForm( tCenter );							// Unit-cube -> 3D global
				for( int j=0 ; j<gridCount ; j++ )
				{
					Point3D< Real > q = xForms[j]( iXForms[j]( tCenter ) );		// Unit-cube -> grid
					double x = q[0] / q[2];
					double y = q[1] / q[2];
					gridSamples[j] = rGrids[j].sample( x , y );					// Sample the scan at the projected position (in 3D global)
				}
				double dist = 0;
				for( int j=0 ; j<gridCount ; j++ )	// Iterate over the valid back-projected samples
					// in all the scans and find the closest one
					if( gridSamples[j].validSample )
					{
						Real temp = Point3D< Real >::SquareDistance( p , gridSamples[j].v.point );
						double len = sqrt( Point3D<Real>::SquareNorm( centers[j]-Point<Real, 3>(p) ) * Point3D<Real>::SquareNorm( gridSamples[j].v.normal ) );
						double dot = Point3D<Real>::Dot( centers[j]-Point<Real, 3>(p) ,gridSamples[j].v.normal ) / len;
						if( confidence>0 && dot>0 ) temp /= Real( pow( double(dot) , double(confidence) ) );
						if( idx==-1 || temp<dist ) idx = j , dist = temp;
					}
			}

			if( idx>=0 )
			{
				bool validVertices = true;
				for( int j=0 ; j<3 ; j++ )
				{
					Point3D< Real > p = xForm( verts[j] );							// Unit-cube -> 3D global
					Point3D< Real > q = xForms[idx]( iXForms[idx]( verts[j] ) );	// Unit-cube -> grid
					double x = q[0] / q[2];
					double y = q[1] / q[2];
					vSamples[j] = rGrids[idx].sample( x , y );			// Sample the scan at the projected position (in 3D global)
					validVertices &= vSamples[j].validSample;
				}
#if PAN_FIX
				if( !validVertices ) functions[i].offset = -Point3D< Real >( gridSamples[idx].v.color );
#else // !PAN_FIX
				if( !validVertices ) functions[i].offset =  Point3D< Real >( gridSamples[idx].v.color );
#endif // PAN_FIX
				else
				{
					Point3D< double > values[3];
					for( int j=0 ; j<3 ; j++ ) values[j] = Point3D< double >( vSamples[j].v.color );
					functions[i] = LinearFunction3D< Point3D< Real > >( LinearFunction3D< Point3D< double > >::GetInterpolant( dVerts , values ) );
					for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ ) if( functions[i].gradients[j][k]!=functions[i].gradients[j][k] )
					{
						printf( "Bad gradients[%d]...\n", i );
						functions[i].gradients *= 0;
#if PAN_FIX
						functions[i].offset = -Point3D< Real >( values[0] + values[1] + values[2] ) / 3;
#else // !PAN_FIX
						functions[i].offset =  Point3D< Real >( values[0] + values[1] + values[2] ) / 3;
#endif // PAN_FIX
					}
				}
			}
		}
		delete[] gridSamples;
	}
	if( Progress.set ) delete bar;
	delete[] xForms;
	delete[] iXForms;
	delete[] centers;
}

template< class Real >
Real ClosestPointOnEdge( const Point3D< Real >& p , const Point3D< Real > v[2] , Real a[2] )
{
	if( Point3D< Real >::SquareNorm( v[0]-v[1] ) )
	{
		// Solve for s minimizing:
		//		E(s) = || p - ( v0 + s * ( v1 - v0 ) ) ||^2
		//           = || ( p - v0 ) - s * ( v1 - v0 ) ||^2
		//		     = || p - v0 ||^2 - 2 * s * < p - v0 , v1 - v0 > + s^2 * || v1 - v0 ||^2
		// Setting the derivative w.r.t. to s to zero gives:
		//		   0 = - 2 * < p - v0 , v1 - v0 > + 2 * s * || v1 - v0 ||^2
		// <=>     s = < p - v0 , v1 - v0 > /  || v1 - v0 ||^2
		a[1] = std::max< Real >( 0 , std::min< Real >( 1 , Point3D< Real >::Dot( p - v[0] , v[1] - v[0] ) / Point3D< Real >::SquareNorm( v[1] - v[0] ) ) );
		a[0] = (Real)1. - a[1];
	}
	else
	{
		if( Point3D< Real >::SquareNorm( p-v[0] ) < Point3D< Real >::SquareNorm( p-v[1] ) ) a[0] = 1 , a[1] = 0;
		else                                                                                a[0] = 0 , a[1] = 1;
	}
	return Point3D< Real >::SquareNorm( ( v[0] * a[0] + v[1] * a[1] ) - p );
}
template< class Real >
Real ClosestPointOnEdge( const Point3D< Real >& p , const Point3D< Real >& v0 , const Point3D< Real >& v1 , Real& a0 , Real& a1 )
{
	Point3D< Real > _v[] = { v0 , v1 };
	Real _a[2];
	Real d2 = ClosestPointOnEdge( p , _v , _a );
	a0 = _a[0] , a1 = _a[1];
	return d2;
}
template< class Real >
void ClosestPointOnTriangle( const Point3D< Real >& p , const Point3D< Real > v[3] , Real a[3] )
{
	Point3D< Real > n = Point3D< Real >::CrossProduct( v[1]-v[0] , v[2]-v[0] );
	if( Point3D< Real >::SquareNorm( n ) )
	{
		// Solve for (s,t) minimizing:
		//		E(s,t) = || p - ( v0 + s * ( v1 - v0 ) + t * ( v2 - v0 ) ) ||^2
		//             = || ( p - v0 ) - s * ( v1 - v0 ) - t * ( v2 - v0 ) ||^2
		//		       = || p - v0 ||^2 - 2 * s * < p - v0 , v1 - v0 > + s^2 * || v1 - v0 ||^2 - 2 * t * < p - v0 , v2 - v0 > + t^2 * || v2 - v0 ||^2 + 2 * st * < v1 - v0 , v2 - v0 >
		// Setting the derivative w.r.t. to s and t to zero gives:
		//		   0 = - 2 * < p - v0 , v1 - v0 > + 2 * s * || v1 - v0 ||^2 + 2 * t * < v1 - v0 , v2 - v0 >
		//		   0 = - 2 * < p - v0 , v2 - v0 > + 2 * t * || v1 - v0 ||^2 + 2 * s * < v1 - v0 , v2 - v0 >
		// <=>	   | < v1 - v0 , v1 - v0 >  < v1 - v0 , v2 - v0 > | * | s | = | < p - v0 , v1 - v0 > |
		//		   | < v1 - v0 , v2 - v0 >  < v2 - v0 , v2 - v0 > |   | t |   | < p - v0 , v2 - v0 > |
		SquareMatrix< Real , 2 > M;
		M(0,0) = Point3D< Real >::SquareNorm( v1-v0 ) , M(1,1) = Point3D< Real >::SquareNorm( v2-v0 );
		M(0,1) = M(1,0) = Point3D< Real >::Dot( v1-v0 , v2-v0 );
		Point2D< Real > b( Point3D< Real >::Dot( p-v0 , v1-v0 ) , Point3D< Real >::Dot( p-v0 , v2-v0 ) );
		if( M.determinant()>0 )
		{
			Point2D< Real > x = M.inverse() * b;
			a[0] = (Real)1. - x[0]  - x[1] , a[1] = x[0] , a[2] = x[1];
			if( a[0]>=0 && a[1]>=0 && a[2]>=0 ) return Point3D< Real >::SquareNorm( p - ( v[0]*a[0] + v[1]*a[1] + v[2]*a[2] ) );
		}
	}
	Real _a[3][2] , _d[3];
	for( int i=0 ; i<3 ; i++ ) _d[i] = ClosestPointOnEdge( p , v[i] , v[(i+1)%3] , _a[i][0] , _a[i][1] );
	int idx = 0;
	if( _d[1]<_d[idx] ) idx = 1;
	if( _d[2]<_d[idx] ) idx = 2;
	a[idx] = _a[idx][0] , a[(idx+1)%3] = _a[idx][1] , a[(idx+2)%3] = 0;
}
template< class Real >
Real ClosestPointOnTriangle( const Point3D< Real >& p , const Point3D< Real >& v0 , const Point3D< Real >& v1 , const Point3D< Real >& v2 , Real& a0 , Real& a1 , Real& a2 )
{
	Point3D< Real > _v[] = { v0 , v1 , v2 };
	Real _a[3];
	Real d2 = ClosestPointOnEdge( p , _v , _a );
	a0 = _a[0] , a1 = _a[1] , a2 = _a[2];
	return d2;
}

// Returns the closest face on the mesh when the nearest vertex is known
template< class Vertex , class Real >
static int closest_point_on_face( 
								 const KDMesh< Vertex , Real > &m ,	// The Mesh
								 int vert ,							// Index of the nearest point to pin
								 const Point3D<Real> &pIn ,			// The point of interest
								 Vertex &vOut ,						// The point on the triangle that is closest
								 Real &a0 , Real &a1 , Real &a2		// The barycentric coordinates of the point on the face
								 )
{
	if( vert<0 ) return -1;
	int f = -1;
	Real dst = 0;
	Real a[3];
#pragma omp critical
	{
		m.need_adjacent_faces();
	}
	for( size_t i=0; i<m.af[vert].size() ; i++ )				// Iterate over all faces adjacent to the nearest vertex
	{
		const TriangleIndex &face = m.f[ m.af[vert][i] ];		// The current face of interest
		ClosestPointOnTriangle( pIn , m.v[ face[0] ].point , m.v[ face[1] ].point , m.v[ face[2] ].point , a[0] , a[1] , a[2] );

		Real sum = 0;
		for( int d=0 ; d<3 ; d++ )
		{
			if( a[d]<0 ) a[d] = 0;
			sum += a[d];
		}
		for( int d=0 ; d<3 ; d++ ) a[d] /= sum;
		Vertex v = m.v[face[0]] * a[0] + m.v[face[1]] * a[1] +  m.v[face[2]] * a[2];
		Real dist = Point3D< Real >::SquareNorm( Point3D< Real >( v ) - pIn);
		if( f<0 || dist<dst )
		{
			f = (int) m.af[vert][i];
			dst = dist;
			vOut = v;
			a0 = a[0] , a1 = a[1] , a2 = a[2];
		}
	}
	return f;
}
template< class Vertex , class Real >
static int closest_point_on_face( 
								 const KDMesh< Vertex , Real > &m ,		// The Mesh
								 int vert ,							// Index of the nearest point to pin
								 const Point3D<Real> &pIn ,		// The point of interest
								 Real &a0 , Real &a1 , Real &a2	// The barycentric coordinates of the point on the face
								 )
{
	Vertex vOut;
	return closest_point_on_face( m , vert , pIn , vOut , a0 , a1 , a2 );
}

// Returns the closes face on the mesh when the nearest vertex is unknown
template< class Vertex , class Real >
static int closest_point_on_face( 
								 const KDMesh< Vertex , Real > &m ,	// The Mesh
								 const Point3D<Real> &pIn ,			// The point of interest
								 Vertex &vOut ,						// The point on the triangle that is closest
								 Real &a0 , Real &a1 , Real &a2		// The barycentric coordinates of the point on the face
						)
{
	return closest_point_on_face( m , m.closest_vertex(pIn) , pIn , vOut , a0 , a1 , a2 );
}
template< class Vertex , class Real >
static int closest_point_on_face( 
								 const KDMesh< Vertex , Real > &m ,	// The Mesh
								 const Point3D<Real> &pIn ,			// The point of interest
								 Real &a0 , Real &a1 , Real &a2		// The barycentric coordinates of the point on the face
						)
{
	Vertex vOut;
	return closest_point_on_face( m , pIn , vOut , a0 , a1 , a2 );
}
template< class Vertex , class Real >
struct MeshSample
{
	int mesh , f;
	Vertex v;
	Real a0 , a1 , a2;
};
template< class Real , class Vertex , class MeshVertex >
void GetMeshFunctions(
					  const std::vector< Vertex >& vertices , const std::vector< TriangleIndex >& triangles , 
					  const Point3D< Real >& center , Real scale , Real confidence ,
					  const KDMesh< MeshVertex , Real >* rMeshes , int gridCount ,
					  std::vector< LinearFunction3D< Point3D< Real > > >& functions )
{
	XForm4x4< Real > xForm = XForm4x4< Real >::Identity();

	for( int c=0 ; c<3 ; c++ )
	{
		xForm( 3 , c ) = Real( center[c] );
		xForm( c , c ) = Real( scale );
	}
	functions.resize( triangles.size() );

	ProgressBar* bar = NULL;
	if( Progress.set ) bar = new ProgressBar( 20 , triangles.size() , "Setting Functions" );
#pragma omp parallel for num_threads( Threads.value )
	for( int thread=0 ; thread<Threads.value ; thread++ )
	{
		MeshSample< MeshVertex , Real >* meshSamples = new MeshSample< MeshVertex , Real >[gridCount];
		MeshSample< MeshVertex , Real > vSamples[3];
		for( int t=(triangles.size()*thread)/Threads.value ; t<(triangles.size()*(thread+1))/Threads.value ; t++ )
		{
			if( Progress.set ) bar->update( thread==0 );
			functions[t] *= 0;
			// Get the index of the grid closest to the triangle center
			Point3D< Real   >  verts[] = { Point3D< Real   >( vertices[ triangles[t][0] ] ) , Point3D< Real   >( vertices[ triangles[t][1] ] ) , Point3D< Real   >( vertices[ triangles[t][2] ] ) };
			Point3D< double > dVerts[] = { Point3D< double >( vertices[ triangles[t][0] ] ) , Point3D< double >( vertices[ triangles[t][1] ] ) , Point3D< double >( vertices[ triangles[t][2] ] ) };

			Point3D< Real > tCenter = ( verts[0] + verts[1] + verts[2] ) / 3;	// The center of the triangle , defining the mesh of interest
			int idx = -1;
			{
				Point3D< Real > p = xForm( tCenter );
				// Find the closest point on each mesh to the center of the triangle
				for( int g=0 ; g<gridCount ; g++ )
					meshSamples[g].f = closest_point_on_face( rMeshes[g] , p , meshSamples[g].v , meshSamples[g].a0 , meshSamples[g].a1 , meshSamples[g].a2 );
				double dist = 0;
				for( int g=0 ; g<gridCount ; g++ )	// Iterate over the valid back-projected samples
					// in all the scans and find the closest one
					if( meshSamples[g].f!=-1 )
					{
						Real temp = Point3D< Real >::SquareDistance( p , meshSamples[g].v.point );
						Point3D< Real > n = Point3D< Real >( rMeshes[g].normal( meshSamples[g].f ) );
						double len = sqrt( Point3D<Real>::SquareNorm( rMeshes[g].DoP ) * Point3D<Real>::SquareNorm( n ) );
						double dot = Point3D<Real>::Dot( rMeshes[g].DoP , n ) / len;
						if( confidence>0 && dot>0 ) temp /= Real( pow( double(dot) , double(confidence) ) );
						if( idx==-1 || temp<dist ) idx = g , dist = temp;
					}
			}
			if( idx>=0 )
			{
				bool validVertices = true;
				int f = meshSamples[idx].f;
				for( int v=0 ; v<3 ; v++ )
				{
					Point3D< Real > p = xForm( verts[v] );
					vSamples[v].f = closest_point_on_face( rMeshes[idx] , p , vSamples[v].v , vSamples[v].a0 , vSamples[v].a1 , vSamples[v].a2 );
					for( int j=0 ; j<3 ; j++ ) if( vSamples[v].v.color[j]!=vSamples[v].v.color[j] )
					{
						fprintf( stderr , "[ERROR] Bad color values: (%d,%d) %g %g %g\n" , t , v , vSamples[v].v.color[0] , vSamples[v].v.color[1] , vSamples[v].v.color[2] );
						fprintf( stderr , "\tBarycentric Coordinates: %g %g %g\n" , vSamples[v].a0 , vSamples[v].a1 , vSamples[v].a2 );
						fprintf( stderr , "\tFace: %d\n" , vSamples[v].f );
						fprintf( stderr , "\tMesh: %d\n" , idx );

						vSamples[v].f = closest_point_on_face( rMeshes[idx] , p , vSamples[v].v , vSamples[v].a0 , vSamples[v].a1 , vSamples[v].a2 );
						exit( 0 );
					}

					if( vSamples[v].f==-1 ) validVertices=false;
				}
#if PAN_FIX
				if( !validVertices ) functions[t].offset = -Point3D< Real >( meshSamples[idx].v.color );
#else // !PAN_FIX
				if( !validVertices ) functions[t].offset =  Point3D< Real >( meshSamples[idx].v.color );
#endif // PAN_FIX
				else
				{
					Point3D< double > values[3];
					for( int v=0 ; v<3 ; v++ ) values[v] = Point3D< double >( vSamples[v].v.color );
					functions[t] = LinearFunction3D< Point3D< Real > >( LinearFunction3D< Point3D< double > >::GetInterpolant( dVerts , values ) );
					for( int j=0 ; j<3 ; j++ ) for( int k=0 ; k<3 ; k++ ) if( functions[t].gradients[j][k]!=functions[t].gradients[j][k] )
					{
						printf( "Bad gradients[%d]...\n", t );
						printf( "\t%f %f %f\t%f %f %f\n" , dVerts[0][0] , dVerts[0][1] , dVerts[0][2] , values[0][0] , values[0][1] , values[0][2] );
						printf( "\t%f %f %f\t%f %f %f\n" , dVerts[1][0] , dVerts[1][1] , dVerts[1][2] , values[1][0] , values[1][1] , values[1][2] );
						printf( "\t%f %f %f\t%f %f %f\n" , dVerts[2][0] , dVerts[2][1] , dVerts[2][2] , values[2][0] , values[2][1] , values[2][2] );
						functions[t].gradients *= 0;
#if PAN_FIX
						functions[t].offset = -Point3D< Real >( values[0] + values[1] + values[2] ) / 3;
#else // !PAN_FIX
						functions[t].offset =  Point3D< Real >( values[0] + values[1] + values[2] ) / 3;
#endif // PAN_FIX
					}
				}
			}
		}
		delete[] meshSamples;
	}
	if( Progress.set ) delete bar;
}

template< class Real , class Vertex>
void GetFunctions( const std::vector< Vertex >& vertices , const std::vector< TriangleIndex >& triangles ,
				   int gridCount , char** gridNames , char** xFormNames , Point3D< Real > center , Real scale , Real confidence ,
				   std::vector< LinearFunction3D< Point3D< Real > > >& functions )
{
	if( UseKD.set )
	{
		int fileType;
		KDMesh< PlyColorVertex< Real > , Real >* rMeshes = new KDMesh< PlyColorVertex< Real > , Real >[gridCount];
		ProgressBar* bar=NULL;
		if( Progress.set ) bar = new ProgressBar( 20 , gridCount , "Setting kD-tree" );
		for( int idx=0 ; idx<gridCount ; idx++ )
		{
			if( Progress.set ) bar->update();
			PlyReadTriangles( gridNames[idx] , rMeshes[idx].v , rMeshes[idx].f , PlyColorVertex< Real >::ReadProperties , PlyColorVertex< Real >::ReadComponents , fileType );

			if( xFormNames )
			{
				FILE* fp = fopen( xFormNames[idx] , "r" );
				XForm4x4< Real > xForm;
				if( fp )
				{
					for( int j = 0; j < 4; j++ )
						for(int i = 0; i < 4; i++)
						{
							Real d;
							if( fscanf( fp , " %f " , &d )!=1 ) fprintf( stderr , "Failed to read matrix coefficient ( %d , %d ) from %s\n" , i , j , xFormNames[idx] ) , exit( 0 );
							xForm.coords[i][j] = d;
						}
					fclose(fp);
				}
				else
				{
					xForm = XForm4x4< Real >::Identity( );
					fprintf( stderr , "Failed to read xForm from %s\n" , xFormNames[idx] );
				}
				for( size_t i = 0; i < rMeshes[idx].v.size(); i++) rMeshes[idx].v[i] = rMeshes[idx].v[i].xForm( xForm );
			}
			rMeshes[idx].build_kd();
			rMeshes[idx].setDirectionOfProjection();
		}
		// WARNING: Have to add a function for computing the meshes 
		if( Progress.set ) delete bar;
 		GetMeshFunctions( vertices , triangles , center , scale , confidence , rMeshes , gridCount , functions );
		delete[] rMeshes;
	}
	else
	{
		RangeGrid< PlyOrientedColorVertex< Real > , Real > *rGrids = new RangeGrid< PlyOrientedColorVertex< Real > , Real >[gridCount];
		ProgressBar* bar = NULL;
		if( Progress.set ) bar = new ProgressBar( 20 , gridCount , "Reading Grids" );
#pragma omp parallel for num_threads( Threads.value )
		for( int t=0 ; t<Threads.value ; t++ )
			for( int i=(gridCount*t)/Threads.value ; i<(gridCount*(t+1))/Threads.value ; i++ )
			{
				if( Progress.set ) bar->update( t==0 );
				if( xFormNames ) rGrids[i].Read( gridNames[i] , xFormNames[i] );
				else			 rGrids[i].Read( gridNames[i] );
			}
		if( Progress.set ) delete bar;
		GetRangeFunctions( vertices , triangles , center , scale , confidence , rGrids , gridCount , functions );
		delete[] rGrids;
	}
}
template< class Real , bool Primal >
int Execute(int argc,char* argv[])
{
	int paramNum=sizeof(params)/sizeof(cmdLineReadable*);

	int commentNum=0;
	char **comments;

	comments=new char*[paramNum+8];
	for(int i=0;i<paramNum+8;i++){comments[i]=new char[1024];}

	DumpOutput( Verbose.set , comments[commentNum++] , "Running Texture Stitcher\n" );
	char valueString[1024];
	for(int i=0;i<paramNum;i++)
		if(params[i]->set)
		{
			params[i]->writeValue(valueString);
			DumpOutput( Verbose.set , comments[commentNum++] , "\t--%s %s\n" , params[i]->name , valueString );
		}
	if( Threads.value<1 ) Threads.value = 1;
	if( ParallelDepth.value>Depth.value )
	{
		ParallelDepth.value = Depth.value;
		Threads.value = 1;
	}

	double t;
	double tt=Time();
	int fileType;
	Point3D< Real > center;
	Real scale;
	std::vector< PlyVertex< Real > > vertices;
	std::vector< TriangleIndex > triangles;
	MeshOctree< Real , Primal > tree;
	char **scanNames,**xFormNames=NULL;
	int scanCount,xFormCount=0;


	if( !In.set || !Out.set || !Scans.set )
	{
		ShowUsage(argv[0]);
		return 0;
	}
	VWeight.value *= 1<<( 2*Depth.value );

	scanNames = ReadWords( Scans.value , scanCount );
	if( !scanNames )
	{
		fprintf( stderr , "Failed to read grid names from: %s\n" , Scans.value );
		return EXIT_FAILURE;
	}
	if( XForms.set )
	{
		xFormNames = ReadWords( XForms.value , xFormCount );
		if( !xFormNames )
		{
			fprintf( stderr , "Failed to read xForm names from: %s\n" , XForms.value );
			return EXIT_FAILURE;
		}
		if( scanCount!=xFormCount )
		{
			fprintf( stderr , "Grid count and xForm count differ: %d != %d\n" , scanCount , xFormCount );
			return EXIT_FAILURE;
		}
	}

	// Read in the mesh
	{
		PlyReadTriangles( In.value , vertices , triangles , PlyVertex< Real >::ReadProperties , PlyVertex< Real >::ReadComponents , fileType );
		FitVertices< Real , PlyVertex< Real > >( vertices , Point3D< Real >( 0.5 , 0.5 , 0.5 ) , 1. , center , scale );
		for( int s=0 ; s<Subdivide.value ; s++ ) SubdivideTriangle( vertices , triangles );
	}
	// Set up the octree
	{
		t=Time();

		tree.setTree( vertices , triangles , Depth.value , CleanTree.value , Progress.set , Threads.value );
		DumpOutput( Verbose.set , comments[commentNum++],"#             Tree set in: %9.1f (s), %9.1f (MB)\n" , Time()-t , tree.maxMemoryUsage );
		if( Verbose.set )
		{
			printf( "    Dimension/Elements: %d / %d\n" , tree.basisDimension() , tree.elements() );
			printf( "    Vertices/Triangles: %d / %d\n"  , (int)vertices.size() , (int)triangles.size() );
			printf( "            Tree Depth: %d\n" , Depth.value );
		}
	}

	std::vector< PlyColorVertex< Real > > outVertices;

	Point3D< Real > inAverage;
	{
		// We define two different solvers, but will only use one of them,
		// depending on whether the "--elements" flag is set.
		MeshOctree< Real , Primal >::ParallelSolver< FastPoint3D< Real > > femSolver;
		ParallelMultigridSolver< Real , FastPoint3D< Real > , int > basisSolver;
		if( Elements.set ) tree.setSolver(   femSolver , ParallelDepth.value , Threads.value ,  GSBlockSize.value , true ) ,   femSolver.threads = Threads.value;
		else               tree.setSolver( basisSolver , ParallelDepth.value , Threads.value ,  GSBlockSize.value        ) , basisSolver.threads = Threads.value;

		Vector< FastPoint3D< Real > >* ptrX;
		Vector< FastPoint3D< Real > >* ptrB;
		SparseMatrix< Real , int >*    ptrM;

		// Set the system matrix
		double mTime = Time();
		if( Elements.set )
		{
			MeshOctree< Real , Primal >::ElementMatrix& eM = femSolver.eM();
			if		( !GWeight.value )	tree.getDotProductMatrix          ( eM ,                                 Progress.set );
			else if ( !VWeight.value )	tree.getLaplacianMatrix           ( eM ,                                 Progress.set );
			else						tree.getConstrainedLaplacianMatrix( eM , VWeight.value , GWeight.value , Progress.set );
			femSolver.ResetMatrix( Threads.value );
			ptrX = &( femSolver.x() );
			ptrB = &( femSolver.b() );
			ptrM = &( femSolver.M() );
		}
		else
		{
			SparseMatrix< Real , int >& M = basisSolver.M();
			if		( !GWeight.value )	tree.getDotProductMatrix          ( M ,                                 Progress.set );
			else if ( !VWeight.value )	tree.getLaplacianMatrix           ( M ,                                 Progress.set );
			else						tree.getConstrainedLaplacianMatrix( M , VWeight.value , GWeight.value , Progress.set );
			basisSolver.ResetMatrix( );
			ptrX = &( basisSolver.x() );
			ptrB = &( basisSolver.b() );
			ptrM = &( basisSolver.M() );
		}
		Vector< FastPoint3D< Real > >& _X = *ptrX;
		Vector< FastPoint3D< Real > >& _B = *ptrB;
		SparseMatrix< Real , int >&     M = *ptrM;
		mTime = Time()-mTime;
		double mMem = tree.MemoryUsage();

		// Set the RHS
		double bTime = Time();
		{
			std::vector< LinearFunction3D< Point3D< Real > > > functions;

			GetFunctions( vertices , triangles , scanCount , scanNames , xFormNames , center , scale , Real( GrazingFallOff.value ) , functions );
			for( int i=0 ; i<triangles.size() ; i++ )
			{
				Point3D< Real > normal = Point3D< Real >::CrossProduct( vertices[triangles[i][1]]-vertices[triangles[i][0]] , vertices[triangles[i][2]]-vertices[triangles[i][0]] );
				functions[i] = functions[i].fitToHyperplane( vertices[triangles[i][0]].point , normal );
			}

			inAverage = tree.template getPreciseIntegral< Point3D< Real > , Point3D< double > >( functions ) / tree.getArea();

			if( Elements.set )
			{
				typename MeshOctree< Real , Primal >::ElementVector< FastPoint3D< Real > > B;
				if( !GWeight.value )
					tree.template getPreciseDotVector< FastPoint3D< Real > , FastPoint3D< double > , LinearFunction3D< Point3D< Real > > >( functions , B , Progress.set );
				else if( !VWeight.value )
					tree.template getPreciseGradientDotGradientVector< FastPoint3D< Real > , FastPoint3D< double > , LinearFunction3D< Point3D< Real > > >( functions , B , Progress.set );
				else
					tree.template getPreciseConstrainedGradientDotGradientVector< FastPoint3D< Real > , FastPoint3D< double > , LinearFunction3D< Point3D< Real > > >( functions , B , VWeight.value , GWeight.value , Progress.set );
				femSolver.e2b.back().Multiply( B , _B , Threads.value , true );
			}
			else
			{
				std::vector< FastPoint3D< Real > > B;
				if( !GWeight.value )
					tree.template getPreciseDotVector< FastPoint3D< Real > , FastPoint3D< double > , LinearFunction3D< Point3D< Real > > >( functions , B , Progress.set );
				else if( !VWeight.value )
					tree.template getPreciseGradientDotGradientVector< FastPoint3D< Real > , FastPoint3D< double > , LinearFunction3D< Point3D< Real > > >( functions , B , Progress.set );
				else
					tree.template getPreciseConstrainedGradientDotGradientVector< FastPoint3D< Real > , FastPoint3D< double > , LinearFunction3D< Point3D< Real > > >( functions , B , VWeight.value , GWeight.value , Progress.set );
				_B.Resize( B.size() );
				for( int i=0 ; i<B.size() ; i++ ) _B[i] = B[i];
			}
		}
		bTime = Time()-bTime;
		double bMem = tree.MemoryUsage();


		_X.Resize( _B.Dimensions() );
		double normB = 0 , normR = 0;
		for( int i=0 ; i<_B.Dimensions() ; i++ ) normB += Point3D< Real >::SquareNorm( _B[i] );

		// Solve the linear system
		double sTime = Time();
		if( Elements.set )
		{
			femSolver.cycles = Cycles.value;
			femSolver.gsIters = Iters.value;
			femSolver.vCycle = VCycle.set;
			femSolver.cascadic = false;
#if 1
#pragma message( "[WARNING] Forcing MultigridSolver::Solve" )
			( (MultigridSolver< Real , FastPoint3D< Real > , int >*)&femSolver )->Solve();
#else
			femSolver.Solve( );
#endif
		}
		else
		{
			basisSolver.cycles = Cycles.value;
			basisSolver.gsIters = Iters.value;
			basisSolver.vCycle = VCycle.set;
			basisSolver.cascadic = false;
#if 1
#pragma message( "[WARNING] Forcing MultigridSolver::Solve" )
			( (MultigridSolver< Real , FastPoint3D< Real > , int >*)&basisSolver )->Solve();
#else
			basisSolver.Solve( );
#endif
		}
		sTime = Time()-sTime;

		Vector< FastPoint3D< Real > > temp;
		if( Elements.set ) temp =   femSolver * _X;
		else               temp = basisSolver * _X;
		for( int i=0 ; i<_B.Dimensions() ; i++ ) normR += Point3D< Real >::SquareNorm( _B[i]-temp[i] );
		double sMem = tree.MemoryUsage();
		if( Verbose.set )
		{
			printf( "         Set initial constraints in: %9.1f (s), %9.1f (MB)\n" , bTime , bMem );
			printf( "              Set initial matrix in: %9.1f (s), %9.1f (MB), %d entries\n" , mTime , mMem , M.Entries() );
		}
		DumpOutput( Verbose.set , comments[commentNum++] , "Solver Time:\t%f (s)\n" , sTime );
		DumpOutput( Verbose.set , comments[commentNum++] , "Solver Error:\t%f -> %f = %f\n" , sqrt(normB) , sqrt(normR) , sqrt( normR / normB ) );
		DumpOutput( Verbose.set , comments[commentNum++] , "#     Solved system in: %9.1f (s), %9.1f (MB)\n" , sTime , sMem );

		std::vector< FastPoint3D< Real > > values;
		outVertices.resize( vertices.size() );
		for( int i=0 ; i<vertices.size() ; i++ ) outVertices[i].point = vertices[i].point;
		tree.getValues( outVertices , _X , values );
		Point3D< Real > outAverage = Point3D< Real >( tree.template getPreciseCoefficientIntegral< FastPoint3D< Real > , FastPoint3D< double > >( _X ) ) / tree.getArea();
		Point3D< Real > offset;
		if( !VWeight.value ) offset = inAverage - outAverage;
		else offset *= 0;

		for( int i=0 ; i<outVertices.size() ; i++ )
		{
			outVertices[i].color = Point3D< Real >( values[i] ) + offset;
			outVertices[i].point = outVertices[i].point*scale + center;
			for( int c=0 ; c<3 ; c++ )
				if		( outVertices[i].color[c]<0   )	outVertices[i].color[c] =   0;
				else if ( outVertices[i].color[c]>255 )	outVertices[i].color[c] = 255;
		}
	}
	if( ASCII.set ) fileType = PLY_ASCII;
	PlyWriteTriangles( Out.value , outVertices , triangles , PlyColorVertex< Real >::WriteProperties , PlyColorVertex< Real >::WriteComponents , fileType , comments , commentNum );
	return 1;
}

int main( int argc , char* argv[] )
{
	srand( 0 );
	int paramNum = sizeof( params ) / sizeof( cmdLineReadable* );
	cmdLineParse( argc-1 , &argv[1] , paramNum , params , 0 );

	if( Dual.set ) Execute< double , false >( argc , argv );
	else           Execute< float  , true  >( argc , argv );
	return EXIT_SUCCESS;
 }