/*
Copyright (c) 2011, Michael Kazhdan and Ming Chuang
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer. Redistributions in binary form must reproduce
the above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution. 

Neither the name of the Johns Hopkins University nor the names of its contributors
may be used to endorse or promote products derived from this software without specific
prior written permission. 

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.
*/
#include "mgui.h"
#include "shaders.inl"

enum
{
	PLOT_BACK  ,
	PLOT_FRONT ,
	PLOT_COUNT
};
enum
{
	NO_COLOR,
	PAINT_COLOR ,
	FLEETING_COLOR ,
	COLOR_COUNT
};

// Values locked on the host
int coefficientMemSize , colorMemSize;
float* host_coefficientValues;
float* host_colorValues;
// Values stored on the GPU
float* gpu_evaluationValues;
float* gpu_coefficientValues;
float* gpu_colorValues;
unsigned int* gpu_evaluationIndices;
unsigned int* gpu_colorIndices;

unsigned int timer = 0;
const int FPS_FRAMES_TO_REFRESH = 20;
double fps;
double currentTime, lastTime;
int frameCount=0;

const int Samples = 100;
float* plotValues = new float[ Samples ];

Point3D<double> clickedPt , mappedPt;
float minBrushRadius = 0.001f;
float maxBrushRadius = 0.2f;
float brushRadius = 0.07;
const int brushRes = 50;
unsigned int colorMode;
bool fleeting_color = false;


/////////////////////////
// GL Shading Language //
/////////////////////////
GLuint ShaderProgram;
GLuint fShader;
GLuint vShader;
GLuint ShaderProgram_Picking;
GLuint fShader_Picking;
GLuint vShader_Picking;

Platform<float> guiPlatform;
MessageBoard<float> messageStatus;
MessageBoard<float> messageColorMode;
MessageBoard<float> messageFPS;
MessageBoard<float> messageState;
MessageBoard< float > messageResidual;
Histogram< float > curvatureHistogram;
KnottedPlot< float > knottedPlot;


int showPlot = PLOT_BACK;
bool showHistogram = false;
Slider<float> sliderGS;
Slider< float > sliderSprayGS;
std::vector< bool > needsUpdating;

int window_width;
int window_height;
int sliderOffsetLeft = 30 , sliderOffsetRight = 90 , fpsOffset;
Point3D< float > clearColor( 217./255 , 217./255 , 217./255 );
Point3D< float > sliderFillColor( 229./255 , 76./255 , 153./255 ) , sliderLineColor( 127./255 , 127./255 , 229./255 );
Point3D< float > functionCurveColor( 0. , 1. , 0. ) , functionFrameColor( 0. , 0. , 0. );
Point3D< float > backgroundColor( 1. , 1. , 1. );
float transparency = 0.5;

bool pointUnderMouse = false;
int viewerThickness = 8;

Mouse mouse;

bool updateWeights     = false;
bool updateConstraints = false;
bool updateSystem      = false;
enum
{
	NO_UPDATE ,
	UPDATE_SOLUTION ,
	UPDATE_CONSTRAINTS ,
	UPDATE_SYSTEM ,
	UPDATE_COUNT
};
int ForceUpdate;

Point3D< double > gray( 0.8 , 0.8 , 0.8 );
Point3D< double > red ( 1.0 , 0.0 , 0.0 );
Point3D< double > blue( 0.0 , 0.0 , 1.0 );

template< class Real >
Point3D< Real > ColorFunction( Real value )
{
	if( value<0 ) return Point3D< Real >( blue * ( -value ) + gray * ( 1 + value ) );
	else          return Point3D< Real >( red  * (  value ) + gray * ( 1 - value ) );
}
template< class Real >
void GetRepresentativeColor( const Real& max, const Real& value, Point3D<Real>& color )
{
	if( value<1 ) color = ColorFunction( Real(  value-1             ) );
	else          color = ColorFunction( Real( (value-1) / (max-1.) ) );
	color = ( color + Point3D< Real >( gray ) ) / 2;
}

bool UpdatePointUnderMouse( int mouseX , int mouseY , int width , int height , Point3D< double >& point )
{
	GLint viewport[4];
	GLdouble mvmatrix[16] , projmatrix[16];
	GLfloat	winx , winy , winz;
	glGetIntegerv( GL_VIEWPORT , viewport);			
	glGetDoublev ( GL_MODELVIEW_MATRIX , mvmatrix );		
	glGetDoublev ( GL_PROJECTION_MATRIX , projmatrix );
	winx = mouseX;
	winy = mouseY;
	winz = 0.0;
	glReadPixels( mouseX , mouseY , 1 , 1 , GL_DEPTH_COMPONENT , GL_FLOAT , &winz );
	gluUnProject( winx , winy , winz , mvmatrix , projmatrix , viewport, &point.coords[0] , &point.coords[1] , &point.coords[2] );
	return (winz!=1.f) && (winz!=0.f) && (mouseX>=0 && mouseY>=0 && mouseX<width && mouseY<height);
}

template< class Real , bool Primal >
void cpuUpdateVBO( void )
{
	int threads = Tree< Real , Primal >::solver.pX().threads();
	const int supportCount = Tree< Real , Primal >::supportCount;
	Vector< FastPoint3D< Real > >& X = Tree< Real , Primal >::solver.x();
	Tree< Real , Primal >::solver.pX().SetToArray( X );
	static float* colorValues = new float[ myStage.vertices.size() ];
#pragma omp parallel for num_threads( threads )
	for( int i=0 ; i<myStage.vertices.size() ; i++ )
	{
		myStage.vertices[i] = Point3D< float >( );
		for( int j=0 ; j<supportCount ; j++ )
			if( Tree< Real , Primal >::E[i][j].N>=0 ) myStage.vertices[i] += Point3D< Real >( X[ Tree< Real , Primal >::E[i][j].N ] * Tree< Real , Primal >::E[i][j].Value );

		int ii = Tree< Real , Primal >::nIndices[i];
		float color;
		switch( colorMode )
		{
		case PAINT_COLOR:
			color = Tree< Real , Primal >::gradientScale[ii];
			if( color<1 ) color = (color-1);
			else          color = (color-1)/(MaxGradientScale.value - 1 );
			break;
		case NO_COLOR:
			color = 0;
			break;
		}
		if( color<-1 ) color = -1;
		if( color> 1 ) color =  1;
		colorValues[i] = color;
	}
	myStage.setVertexBufferObject( colorValues );
}

template< class Real , bool Primal >
void gpuUpdateVBO( void )
{
	int threads = Tree< Real , Primal >::solver.pX().threads();
#pragma omp parallel for num_threads( threads )
	for( int t=0 ; t<threads ; t++ )
	{
		int ii=0;
		for( int j=0 ; j<t ; j++ ) ii += Tree< Real , Primal >::solver.pX().size( j );
		ConstPointer( FastPoint3D< Real > ) _X = Tree< Real , Primal >::solver.pX()[t];
		for( int j=0 ; j<Tree< Real , Primal >::solver.pX().size( t ) ; j++ , ii++ )
		{
			Point3D< Real > x = _X[j];
			host_coefficientValues[ii*4+0] = (float)x[0];
			host_coefficientValues[ii*4+1] = (float)x[1];
			host_coefficientValues[ii*4+2] = (float)x[2];
		}

		for( int i=(Tree< Real , Primal >::gradientScale.size()*t)/threads ; i<(Tree< Real , Primal >::gradientScale.size()*(t+1))/threads ; i++ )
		{
			float color;
			if( colorMode==PAINT_COLOR  || ( colorMode==FLEETING_COLOR && fleeting_color ) )
			{
				color = Tree< Real , Primal >::gradientScale[i];
				if( color<1 ) color = (color-1);
				else          color = (color-1)/(MaxGradientScale.value - 1 );
			}
			else color = 0;
			if( color<-1 ) color = -1;
			if( color> 1 ) color =  1;
			host_colorValues[i] = color;
		}
	}
	float4 *dptr;
	cutilSafeCall( cudaMemcpy( gpu_coefficientValues , host_coefficientValues , coefficientMemSize , cudaMemcpyHostToDevice ) );
	cutilSafeCall( cudaMemcpy( gpu_colorValues       , host_colorValues       , colorMemSize       , cudaMemcpyHostToDevice ) );
	cutilSafeCall( cudaGLMapBufferObject( (void**)&dptr , GLuint( myStage.vbo ) ) );
	Tree< Real , Primal >::cuKernel( dptr , gpu_evaluationIndices , gpu_colorIndices , gpu_evaluationValues , gpu_coefficientValues , gpu_colorValues , (GLuint)Tree< Real , Primal >::fitVertices.size() , Tree< Real , Primal >::solver.pX().size() , Tree< Real , Primal >::gradientScale.size() );
	cutilSafeCall( cudaGLUnmapBufferObject( myStage.vbo ) );
}

template< class Real , bool Primal >
void updateVBO( void )
{
	if( CPUEvaluation.set ) cpuUpdateVBO< Real , Primal >( );
	else                    gpuUpdateVBO< Real , Primal >( );
}

template< bool Primal , class Real >
void OutputEditedPLY( char* filename , bool outputColor )
{
	int threads = Tree< Real , Primal >::solver.pX().threads();
	const int supportCount = Tree< Real , Primal >::supportCount;
	Vector< FastPoint3D< Real > >& X = Tree< Real , Primal >::solver.x();
	Tree< Real , Primal >::solver.pX().SetToArray( X );

	if( outputColor )
	{
		Point3D< Real > gray( 0.8 , 0.8 , 0.8 ) , blue( 0.0 , 0.0 , 1.0 ) , red( 1.0 , 0.0 , 0.0 );
		std::vector< PlyColorVertex< Real > > outVertices;
		outVertices.resize( myStage.vertices.size() );
#pragma omp parallel for num_threads( threads )
		for( int i=0 ; i<outVertices.size() ; i++ )
		{
			outVertices[i].point = Point3D< Real >();
			for( int j=0 ; j<supportCount ; j++ )
				if( Tree< Real , Primal >::E[i][j].N>=0 ) outVertices[i].point += X[ Tree< Real , Primal >::E[i][j].N ] * Tree< Real , Primal >::E[i][j].Value;
			float color = Tree< Real , Primal >::gradientScale[ Tree< Real , Primal >::nIndices[i] ];
			if( color<1 ) color = (color-1);
			else          color = (color-1)/(MaxGradientScale.value - 1 );
			if( color<0 ) outVertices[i].color = blue * (-color) + gray * ( 1. + color );
			else          outVertices[i].color = red  * color + gray * ( 1. - color );
			outVertices[i].color = ( outVertices[i].color+gray ) / 2;
			outVertices[i].color *= 256;
		}
		PlyWriteTriangles( filename , outVertices , myStage.triangles , PlyColorVertex< Real >::ReadProperties , PlyColorVertex< Real >::ReadComponents , file_type , comments , commentNum );
	}
	else
	{
		std::vector< PlyVertex< Real > > outVertices;
		outVertices.resize( myStage.vertices.size() );
#pragma omp parallel for num_threads( threads )
		for( int i=0 ; i<outVertices.size() ; i++ )
		{
			outVertices[i].point = Point3D< Real >();
			for( int j=0 ; j<supportCount ; j++ )
				if( Tree< Real , Primal >::E[i][j].N>=0 ) outVertices[i].point += X[ Tree< Real , Primal >::E[i][j].N ] * Tree< Real , Primal >::E[i][j].Value;
		}
		PlyWriteTriangles( filename , outVertices , myStage.triangles , PlyVertex< Real >::ReadProperties , PlyVertex< Real >::ReadComponents , file_type , comments , commentNum );
	}
	printf( "Done writing to: %s\n" , filename );
}

void drawTimer( void )
{
	frameCount++;
	if( frameCount==FPS_FRAMES_TO_REFRESH )
	{
		currentTime=Time();
		unsigned int free, total;
		cuMemGetInfo(&free,&total);
		free /=1<<20;
		total/=1<<20;
		char message[64];
		fps = float(frameCount)/(currentTime-lastTime);
		sprintf( message , "%u/%u mb, %.2f fps", total-free, total, fps );
		glutSetWindowTitle(message);
		frameCount=0;
		lastTime=Time();
	}
	char message[ 64 ];
	sprintf( message , "FPS: %.2f" , fps );
	messageFPS.setColor( 1.0, 1.0, 1.0 );
	messageFPS.setMessage( message );
}

template< class Real , bool Primal >
void SolveSystem( void )
{
	double vectorSetTime, matrixSetTime, solveTime;
	vectorSetTime=matrixSetTime=solveTime=0;
	bool unitDiagonal=true , strippedDiagonal=true;
	double t1;
	double normX = 0 , normB = 0 , normR = 0 , _normB = 0;
	if( ResetSolution.set )
	{
		Vector< FastPoint3D< Real > > temp;
		temp.Resize( Tree< Real , Primal >::solver.b().Dimensions() );
		for( int i=0 ; i<Tree< Real , Primal >::solver.b().Dimensions() ; i++ )  temp[i] *= 0;
		Tree< Real , Primal >::solver.pX().SetFromArray( temp );
	}
	if( ShowError.set )
	{
		Vector< FastPoint3D< Real > > X;
		X.Resize( Tree< Real , Primal >::solver.b().Dimensions() );
		Tree< Real , Primal >::solver.pX().SetToArray( X );
		Vector< FastPoint3D< Real > > temp = Tree< Real , Primal >::solver * X;
		for( int i=0 ; i<Tree< Real , Primal >::solver.b().Dimensions() ; i++ ) normX += FastPoint3D< Real >::SquareNorm( X[i] );
		for( int i=0 ; i<Tree< Real , Primal >::solver.b().Dimensions() ; i++ )
		{
			normB  += FastPoint3D< Real >::SquareNorm( Tree< Real , Primal >::solver.b()[i] );
			_normB += FastPoint3D< Real >::SquareNorm( Tree< Real , Primal >::solver.b()[i]-temp[i] );
		}
		t1 = Time();
	}
	Tree< Real , Primal >::solver.cascadic = Cascadic.set;
	Tree< Real , Primal >::solver.vCycle   =  !WCycle.set;
	Tree< Real , Primal >::solver.cycles   =  VCycles.value;
	Tree< Real , Primal >::solver.gsIters  =    Iters.value;
	// If we are not solving in verbose mode, then we will have
	// pre-divided the constraint vector whenever it is updated
	// so that the solver does not have to.
	Tree< Real , Primal >::solver.Solve( false , !ShowError.set );
	if( ShowError.set )
	{
		t1 = Time() - t1;
		Vector< FastPoint3D< Real > > X;
		X.Resize( Tree< Real , Primal >::solver.b().Dimensions( ) );
		Tree< Real , Primal >::solver.pX().SetToArray( X );
		Vector< FastPoint3D< Real > > temp = Tree< Real , Primal >::solver * X;
		for( int i=0 ; i<Tree< Real , Primal >::solver.b().Dimensions() ; i++ ) normR += FastPoint3D< Real >::SquareNorm( Tree< Real , Primal >::solver.b()[i]-temp[i] );
		printf( "                                                                                              \r" );
		printf( "Error: (%g)\t%g -> %g -> %g ( %g = %g * %g )\t(%.3f seconds)\r" , sqrt( normX ) , sqrt( normB ) , sqrt( _normB ) , sqrt( normR ) , sqrt( normR/normB ) , sqrt( _normB/normB ) , sqrt( normR/_normB) , t1 );

		char message[1024];
		sprintf( message , "Residual: %.2g\n" , sqrt(normR/normB ) );
		messageResidual.setMessage( message );
	}
}
template< class Real , bool Primal >
void DisplayFunction( void )
{
	{
		if( ForceUpdate==UPDATE_CONSTRAINTS ) for( int i=0 ; i<needsUpdating.size() ; i++ ) needsUpdating[i] = true;
		if     ( updateWeights                                        ) UpdateAnisotropicWeights< Real , Primal >( );
		else if( updateSystem      || ForceUpdate==UPDATE_SYSTEM      ) UpdateLinearSystem      < Real , Primal >( );
		else if( updateConstraints || ForceUpdate==UPDATE_CONSTRAINTS ) UpdateConstraints       < Real , Primal >( true );
		if( ForceUpdate!=NO_UPDATE ) SolveSystem< Real , Primal >( );
		if( !CUDAOffline.set ) updateVBO< Real , Primal >( );
		updateWeights = updateSystem = updateConstraints = false;
	}

	glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

	GLint cp;
	glGetIntegerv(GL_CURRENT_PROGRAM,&cp);
	glUseProgram( NULL );
	if( showHistogram ) curvatureHistogram.drawOpenGL( );
	if( showPlot==PLOT_BACK ) knottedPlot.drawOpenGL( );
	glUseProgram( cp );

	char updateName[512];
	char geometryName[512];
	char message[1024];
	switch( ForceUpdate )
	{
	case NO_UPDATE:          sprintf( updateName , "None"        ) ; break;
	case UPDATE_SOLUTION:    sprintf( updateName , "Solution"    ) ; break;
	case UPDATE_CONSTRAINTS: sprintf( updateName , "Constraints" ) ; break;
	case UPDATE_SYSTEM:      sprintf( updateName , "System"      ) ; break;
	}
	if     (   CUDAOffline.set ) sprintf( geometryName , "Off" );
	else if( CPUEvaluation.set ) sprintf( geometryName , "CPU" );
	else                         sprintf( geometryName , "GPU" );

	sprintf( message , "Linear Update (%s) , Geometry Update (%s)" , updateName , geometryName );
	messageState.setMessage( message );
	myStage.drawOpenGL();

	glGetIntegerv( GL_CURRENT_PROGRAM , &cp );
	glUseProgram( NULL );

	if( showPlot==PLOT_FRONT ) knottedPlot.drawOpenGL( );

	guiPlatform.drawOpenGL();

	// Draw the Spraycan
	if( pointUnderMouse )
	{
		Point3D< float > color;
		GetRepresentativeColor( MaxGradientScale.value, sliderSprayGS.getValue() , color );
		glColor3f( color[0] , color[1] , color[2] );
		glLineWidth(2.0);
		glDisable( GL_DEPTH_TEST );
		glPushMatrix();
		glTranslatef( clickedPt[0] , clickedPt[1] , clickedPt[2] );
		Point3D<float> up = myStage.camera->up;
		Point3D<float> rt = myStage.camera->right;
		Point3D<float> di = myStage.camera->position - myStage.camera->ref;
		di = di / sqrt( di.InnerProduct(di) );
		float matrix[] = {  rt[0], rt[1], rt[2], 0.0,
			up[0], up[1], up[2], 0.0,
			di[0], di[1], di[2], 0.0,
			0.0,   0.0,	  0.0,   1.0 };
		glMultMatrixf(matrix);
		float brushSubRadius = brushRadius * .8;
		glBegin( GL_QUAD_STRIP );
		for( int i=0 ; i<=brushRes ; i++ )
		{
			float angle = float(i)/brushRes*PI*2;
			glVertex2f( brushSubRadius*cos(angle) , brushSubRadius*sin(angle) );
			glVertex2f( brushRadius*cos(angle) , brushRadius*sin(angle) );
		}
		glEnd();
		glEnd();
		glEnable( GL_DEPTH_TEST );
		glPopMatrix();
	} // end of drawing spraycan

	glUseProgram(cp);
	drawTimer();
	glutSwapBuffers();
}
template< class Real , bool Primal >
void ProcessSprayCan( int x , int y )
{
	GLint cp;
	glGetIntegerv( GL_CURRENT_PROGRAM , &cp );
	glUseProgram( ShaderProgram_Picking );
	GLint viewport[4];
	glGetIntegerv ( GL_VIEWPORT , viewport );
	const float d = myStage.radius + sqrt( Point3D<float>::SquareNorm(myStage.camera->position - myStage.center) );
	glMatrixMode( GL_PROJECTION );
	glPushMatrix();
	glLoadIdentity ();
	gluPickMatrix( x , y , 1.0, 1.0, viewport);
	gluPerspective( myStage.camera->heightAngle, float(window_width)/window_height, .1*d, 1*d );
	glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
	myStage.drawOpenGL();
	GLubyte uPixel[3];
	glReadPixels( x , y , 1, 1, GL_RGB, GL_UNSIGNED_BYTE,(void *)uPixel );
	glMatrixMode( GL_PROJECTION );
	glPopMatrix();
	glUseProgram( cp );
	unsigned int tID = uPixel[2]*(1<<16) + uPixel[1]*(1<<8) + uPixel[0];
	if( tID<myStage.triangles.size() )
	{
		int supportCount = Tree< Real , Primal >::supportCount;
		clickedPt *= 0;
		Vector< FastPoint3D< Real > >& X = Tree< Real , Primal >::solver.x();
		Tree< Real , Primal >::solver.pX().SetToArray( X );
		for( int v=0 ; v<3 ; v++ )
		{
			int vID = myStage.triangles[tID][v];
			for( int s=0 ; s<supportCount ; s++ ) for( int c=0 ; c<3 ; c++ ) clickedPt[c] += X[ Tree< Real , Primal >::E[vID][s].N][c] * Tree< Real , Primal >::E[vID][s].Value;
		}
		clickedPt /= 3.0;
		mappedPt = Point3D<double>
			( (
			Tree< Real , Primal >::fitVertices[ myStage.triangles[tID][0] ] +
			Tree< Real , Primal >::fitVertices[ myStage.triangles[tID][1] ] +
			Tree< Real , Primal >::fitVertices[ myStage.triangles[tID][2] ] 
			) / 3.0 );
		pointUnderMouse = true;
	}
	else pointUnderMouse = false;

	messageStatus.setColor( 1.0, 0.0, 1.0 );
	messageStatus.setMessage( "Painting with Spraycan" );
	float sprayValue = sliderSprayGS.getValue();
	Point3D<float> color , oldColor;
	const double cutOff = 1e-7;
	const double logCutOff = - log( cutOff ) / 2;

#pragma omp parallel for num_threads( Threads.value )
	for( int i=0 ; i<Tree< Real , Primal >::elementPosition.size() ; i++ )
	{
		Point3D<double> pt = Point3D<double>( Tree< Real , Primal >::elementPosition[i] );
		double len = Length<double>( mappedPt - pt );
		if(len>brushRadius ) continue;
		len /= brushRadius;
		len  = exp( -2*len*len );			
		len /= GaussianModulator.value;
		Tree< Real , Primal >::gradientScale[i] = Tree< Real , Primal >::gradientScale[i]*(1-len) + sprayValue*len;
		needsUpdating[i] = true;
	}
}

template< class Real , bool Primal >
void MotionFunction( int , int );

template< class Real , bool Primal >
void IdleFunction( void )
{
	if( showPlot==PLOT_FRONT )
	{
		DisplayFunction< Real , Primal >( );
		return;
	}
	if( mouse.rightDown ) MotionFunction< Real , Primal >( mouse.endX , window_height-mouse.endY );
	// Update the Cursor Position
	DisplayFunction< Real , Primal >( );
}

template< class Real , bool Primal >
void MouseFunction( int button, int state, int x, int y )
{
	fleeting_color = false;
	mouse.update( button , state , x , window_height-y );
	
	if( state==GLUT_DOWN && knottedPlot.MouseDown( button , x , window_height-y ) ) return;
	if( state==GLUT_UP && knottedPlot.MouseUp( button , x , window_height-y ) )
	{
		updateWeights = true;
		DisplayFunction< Real , Primal >( );
		return;
	}

	sliderSprayGS.mouseFunc( x, window_height-y );

	if( sliderGS.mouseFunc( x, window_height-y ) )
	{
		for( int i=0 ; i<needsUpdating.size() ; i++ ) needsUpdating[i] = true;
		updateConstraints = true;
		if( !mouse.leftDown )
		{
			Point3D<float> color;
			GradientScale.value = sliderGS.getValue();
			GetRepresentativeColor( MaxGradientScale.value, GradientScale.value, color );
			Tree< Real , Primal >::gradientScale.clear();
			Tree< Real , Primal >::gradientScale.assign( Tree< Real , Primal >::weightsMin.size() , GradientScale.value );
		}
	}
	int mod = glutGetModifiers();
	if( !mod || (mod==GLUT_ACTIVE_SHIFT&&!mouse.rightDown) ) messageStatus.setMessage("");	 
	glutPostRedisplay();
}
template< class Real , bool Primal >
void MotionFunction( int x, int y )
{
	if( knottedPlot.DepressedMotion( x , window_height-y ) )
	{
		updateWeights = true;
		DisplayFunction< Real , Primal >( );
		return;
	}

	mouse.move( x , window_height-y );

	// Process the spray can event
	if( mouse.rightDown )
	{
		fleeting_color = true;
		ProcessSprayCan< Real , Primal >( mouse.endX , mouse.endY );

		messageStatus.setColor( 1.0, 0.0, 1.0 );
		messageStatus.setMessage( "Painting with Spraycan" );

		updateConstraints = true;
	}
	// Process the gradient slider
	else if( sliderGS.motionFunc( mouse ) )
	{
		fleeting_color = true;
		GradientScale.value = sliderGS.getValue();
		Tree< Real , Primal >::gradientScale.clear();
		Tree< Real , Primal >::gradientScale.assign( Tree< Real , Primal >::weightsMin.size(), GradientScale.value );
		for( int i=0 ; i<needsUpdating.size() ; i++ ) needsUpdating[i] = true;
		updateConstraints = true;
	}
	// Process the spray's alpha slider event
	else if( sliderSprayGS.motionFunc( mouse ) )
	{
		fleeting_color = true;
		float sprayValue = sliderSprayGS.getValue();
		Point3D<float> color;
		GetRepresentativeColor( MaxGradientScale.value, sprayValue, color );
		sliderSprayGS.setColor( sliderFillColor , color , color );
	}
	else if( mouse.leftDown && mouse.ctrlDown )
	{
		knottedPlot.IncreaseHeight( 0.002 * mouse.shiftY );
		updateWeights = true;
		DisplayFunction< Real , Primal >( );
	}
	else myStage.MotionFunction( mouse );
	glutPostRedisplay();
}
template< bool Primal >
void PassiveMotionFunction( int x, int y )
{
	if( showPlot )
	{
		knottedPlot.PassiveMotion( x , y );
		glutPostRedisplay( );
		return;
	}

	mouse.move( x , window_height-y );
	pointUnderMouse = UpdatePointUnderMouse( mouse.endX , mouse.endY , window_width , window_height , clickedPt );
	glutPostRedisplay();
}

template< class Real , bool Primal >
void UpdateAnisotropicWeights( void )
{
	double t = Time();
	Real scale = Real( 1. / sqrt( double( Alpha.value ) ) );
	knottedPlot.getValues( plotValues , Samples );
for( int i=0 ; i<Samples ; i++ ) plotValues[i] = -plotValues[i];
	for( int i=0 ; i<Samples ; i++ ) plotValues[i] = exp( plotValues[i] );
	for( int i=0 ; i<Tree< Real , Primal >::maxCurvature.Dimensions() ; i++ )
	{
		int x1 , x2;
		Real x , dx;

		x = ( 1 + Tree< Real , Primal >::minCurvature[i] ) / 2. * Samples;
		x1 = int( x );
		x2 = x1 + 1;
		dx = x - x1;
		if( x1<0 ) x1 = 0;
		if( x2<0 ) x2 = 0;
		if( x1>=Samples ) x1 = Samples-1;
		if( x2>=Samples ) x2 = Samples-1;
		Real minWeight = plotValues[x1] * ( 1.-dx ) + plotValues[x2] * dx;

		x = ( 1 + Tree< Real , Primal >::maxCurvature[i] ) / 2 * Samples;
		x1 = int( x );
		x2 = x1 + 1;
		dx = x - x1;
		if( x1<0 ) x1 = 0;
		if( x2<0 ) x2 = 0;
		if( x1>=Samples ) x1 = Samples-1;
		if( x2>=Samples ) x2 = Samples-1;
		Real maxWeight = plotValues[x1] * ( 1.-dx ) + plotValues[x2] * dx;

#if 0
		Tree< Real , Primal >::weightsMin[i] = scale / minWeight;
		Tree< Real , Primal >::weightsMax[i] = scale / maxWeight;
#else
		Tree< Real , Primal >::weightsMin[i] = scale / maxWeight;
		Tree< Real , Primal >::weightsMax[i] = scale / minWeight;
	}
	if( FullVerbose.set ) printf( "Updated weights in: %f\n" , Time()-t );
#endif
	UpdateLinearSystem< Real , Primal >( );
}


template< class Real , bool Primal >
void UpdateLinearSystem( void )
{
	double t = Time();
	MeshOctree< Real , Primal >::ElementMatrix& eM = Tree< Real , Primal >::solver.eM();
#pragma omp parallel for num_threads( Threads.value )
	for( int i=0 ; i<eM.size() ; i++ )
	{
		Real wDot = Tree< Real , Primal >::weightsMin[i] * Tree< Real , Primal >::weightsMax[i];
		Real wMin = Tree< Real , Primal >::weightsMax[i] / Tree< Real , Primal >::weightsMin[i];
		Real wMax = Tree< Real , Primal >::weightsMin[i] / Tree< Real , Primal >::weightsMax[i];
		eM[i].setScaled( Tree< Real , Primal >::dot[i]    , wDot );
		eM[i].addScaled( Tree< Real , Primal >::lapMin[i] , wMin );
		eM[i].addScaled( Tree< Real , Primal >::lapMax[i] , wMax );
		for( int j=0 ; j<(Primal?8:27) ; j++ )
		{
			Tree< Real , Primal >::dotB[i][j] = Tree< Real , Primal >::initialB[i][j] * wDot;
			Tree< Real , Primal >::lapB[i][j] = Tree< Real , Primal >::initialLapMinB[i][j] * wMin + Tree< Real , Primal >::initialLapMaxB[i][j] * wMax;
		}
	}
	Tree< Real , Primal >::solver.ResetMatrix( Threads.value );

	// Next, construct the constraints
	if( FullVerbose.set ) printf( "Updated linear system in: %f\n" , Time() - t );
	for( int i=0 ; i<needsUpdating.size() ; i++ ) needsUpdating[i] = true;
	UpdateConstraints< Real , Primal >( true );
}
template< class Real , bool Primal >
void UpdateConstraints( bool diagonalRescale )
{
	double t = Time();
#pragma omp parallel for
	for( int i=0 ; i<Tree< Real , Primal >::B.size() ; i++ )
		if( needsUpdating[i] )
		{
			Real w = Tree< Real , Primal >::gradientScale[i];
			for( int j=0 ; j<(Primal?8:27) ; j++ ) Tree< Real , Primal >::B[i][j] = Tree< Real , Primal >::dotB[i][j] + Tree< Real , Primal >::lapB[i][j] * w;
		}
	Tree< Real , Primal >::solver.e2b.back().Multiply( Tree< Real , Primal >::B , Tree< Real , Primal >::solver.b() , Threads.value , false );
#pragma omp parallel for
	for( int i=0 ; i<needsUpdating.size() ; i++ ) needsUpdating[i] = false;
	if( !ShowError.set ) Tree< Real , Primal >::solver.PreDivide();
	if( FullVerbose.set ) printf( "Updated constraints in: %f\n" , Time() - t );
}

template< bool Primal >
void ReshapeFunction( int width , int height )
{
	window_width = width , window_height = height;
	myStage.Reshape( width , height );

	if( Minimal.set )
	{
		sliderGS.setSize     ( window_width-2*sliderOffsetLeft , 12 );
		sliderSprayGS.setSize( window_width-2*sliderOffsetLeft , 12 );
	}
	else
	{
		sliderGS.setSize     ( window_width-sliderOffsetLeft-sliderOffsetRight , 12 );
		sliderSprayGS.setSize( window_width-sliderOffsetLeft-sliderOffsetRight , 12 );
	}
	sliderGS.setPosition     ( sliderOffsetLeft , window_height-24 );
	sliderSprayGS.setPosition( sliderOffsetLeft , window_height-48 );

	curvatureHistogram.SetPosition( window_width , window_height , Point2D< float >( 0. , 0. ) , Point2D< float >( 1. , 1. ) );
	knottedPlot.SetPosition       ( window_width , window_height , Point2D< float >( 0. , 0. ) , Point2D< float >( 1. , 1. ) );

	messageFPS.setPosition( window_width-sliderOffsetLeft-fpsOffset, 22 );
	glutPostRedisplay( );
}
template< class Real , bool Primal >
void KeyboardFunction( unsigned char c, int x, int y )
{
	if( knottedPlot.Keyboard( c , x , y ) )
	{
		updateWeights = true;
		DisplayFunction< Real , Primal >( );
		return;
	}

	int modifiers = glutGetModifiers();
	switch( c )
	{
		case '\033':
			exit( 0 );
			break;
		case 'p':
			showPlot = (showPlot+1)%PLOT_COUNT;
			knottedPlot.lockPlot = (showPlot!=PLOT_FRONT);
			break;
		case 'C':
		{
			colorMode = (colorMode+1)%COLOR_COUNT;
			switch( colorMode )
			{
			case NO_COLOR:
				messageColorMode.setMessage( "Color Mode: None" );
				break;
			case PAINT_COLOR:
				messageColorMode.setMessage( "Color Mode: Paint" );
				break;
			case FLEETING_COLOR:
				messageColorMode.setMessage( "Color Mode: Fleeting" );
				break;
			}
		}
		break;
		case 'O':
		{
			char filename[256];
			printf( "PLY File Name: " );
			gets( filename );
			OutputEditedPLY< Primal , Real >( filename , true );
		}
		break;
		case 'o':
		{
			char filename[256];
			printf( "XForm File Name: " );
			gets( filename );
			Matrix4D m = myStage.camera->get();
			FILE* fp = fopen( filename , "w" );
			if( !fp ) fprintf( stderr , "Could not write xForm: %s\n" , filename );
			else
			{
				for( int j=0 ; j<4 ; j++ ) for( int i=0 ; i<4 ; i++ ) fprintf( fp , "%f " , m.m[i][j] );
				fclose( fp );
			}
		}
		break;
		case ' ':
		{
		   sliderSprayGS.setValue( 1. );
		   Point3D< float > color;
		   GetRepresentativeColor( MaxGradientScale.value , 1.f , color );
		   sliderSprayGS.setColor( sliderFillColor , color , color );
		}
		break;
		default: myStage.KeyboardFunction( c );
	}
	pointUnderMouse = false;
}
template< class Real , bool Primal >
void SpecialFunction( int key, int x, int y )
{
	if( knottedPlot.SpecialFunction( key , x , y ) )
	{
		updateWeights = true;
		DisplayFunction< Real , Primal >( );
		return;
	}
	switch( key )
	{
		case GLUT_KEY_F9:
			CPUEvaluation.set = !CPUEvaluation.set;
			break;
		case GLUT_KEY_F10:
			ForceUpdate = (ForceUpdate+1)%UPDATE_COUNT;
			break;
		case GLUT_KEY_UP:
		{
			float value = sliderSprayGS.getValue() + MaxGradientScale.value/20.0;
			if(value>MaxGradientScale.value) value=MaxGradientScale.value;
			sliderSprayGS.setValue( value );
			Point3D<float> color;
			GetRepresentativeColor( MaxGradientScale.value, value, color );
			sliderSprayGS.setColor( clearColor , color , color );
		}
		break;
		case GLUT_KEY_DOWN:
		{
			float value = sliderSprayGS.getValue() - MaxGradientScale.value/10.0;
			if(value<0) value=0;
			sliderSprayGS.setValue( value );
			Point3D<float> color;
			GetRepresentativeColor( MaxGradientScale.value, value, color );
			sliderSprayGS.setColor( clearColor , color , color );
		}
		break;
		case GLUT_KEY_LEFT:
        {
			brushRadius -= ( maxBrushRadius-minBrushRadius ) / 20.;
			if( brushRadius<minBrushRadius ) brushRadius = minBrushRadius;
        }
        break;
        case GLUT_KEY_RIGHT:
        {
			brushRadius += ( maxBrushRadius-minBrushRadius ) / 20.;
			if( brushRadius>maxBrushRadius ) brushRadius = maxBrushRadius;
        }
        break;
		default: myStage.SpecialFunction( key );
	}
	glutPostRedisplay();
}

void setShaders( void )
{	
	const char * vv = vertex_code;
	const char * ff = fragment_code;
	vShader = glCreateShader(GL_VERTEX_SHADER);
	fShader = glCreateShader(GL_FRAGMENT_SHADER);	
	glShaderSource(vShader, 1, &vv, NULL);
	glShaderSource(fShader, 1, &ff, NULL);

	GLint success;
	GLchar infoLog[1024];
	glCompileShader(vShader);
	glGetShaderiv(vShader, GL_COMPILE_STATUS, &success);
	if( !success )
	{
	   glGetShaderInfoLog(vShader, 1024, NULL, infoLog);
	   fprintf(stderr, "Error in the vertex shader compilation:\n %s\n", infoLog);
	}

	glCompileShader(fShader);
	glGetShaderiv(fShader, GL_COMPILE_STATUS, &success);
	if( !success )
	{
	   glGetShaderInfoLog(fShader, 1024, NULL, infoLog);
	   fprintf(stderr, "Error in the fragment shader compilation:\n %s\n", infoLog);
	}

	ShaderProgram = glCreateProgram();		
	glAttachShader(ShaderProgram,vShader);
	glAttachShader(ShaderProgram,fShader);

	glLinkProgram(ShaderProgram);
	glGetProgramiv(ShaderProgram, GL_LINK_STATUS, &success);
	if (!success)
	{
		glGetProgramInfoLog(ShaderProgram, 1024, NULL, infoLog);
		fprintf(stderr, "Error in th shader linking:\n %s\n", infoLog);
	}
	glUseProgram(ShaderProgram);

	const char * vv_p = selection_vertex_code;
	const char * ff_p = selection_fragment_code;
	vShader_Picking = glCreateShader(GL_VERTEX_SHADER);	
	fShader_Picking = glCreateShader(GL_FRAGMENT_SHADER);	
	glShaderSource(vShader_Picking, 1, &vv_p, NULL);
	glShaderSource(fShader_Picking, 1, &ff_p, NULL);
	glCompileShader(vShader_Picking);
	glGetShaderiv(vShader_Picking, GL_COMPILE_STATUS, &success);
	if( !success )
	{
	   glGetShaderInfoLog(vShader_Picking, 1024, NULL, infoLog);
	   fprintf(stderr, "Error in the vertex shader (for picking) compilation:\n %s\n", infoLog);
	}
	glCompileShader(fShader_Picking);
	glGetShaderiv(fShader_Picking, GL_COMPILE_STATUS, &success);
	if( !success )
	{
	   glGetShaderInfoLog(fShader_Picking, 1024, NULL, infoLog);
	   fprintf(stderr, "Error in the fragment shader (for picking) compilation:\n %s\n", infoLog);
	}
	ShaderProgram_Picking = glCreateProgram();		
	glAttachShader(ShaderProgram_Picking, vShader_Picking);
	glAttachShader(ShaderProgram_Picking, fShader_Picking);
	glLinkProgram(ShaderProgram_Picking);
	glGetProgramiv(ShaderProgram_Picking, GL_LINK_STATUS, &success);
	if (!success)
	{
		glGetProgramInfoLog(ShaderProgram_Picking, 1024, NULL, infoLog);
		fprintf(stderr, "Error in the (picking) shader linking:\n %s\n", infoLog);
	}
}

template< class Real , bool Primal >
int Visualize(int argc,char* argv[])
{
	window_width  = Width.value;
	window_height = Height.value;
	/////////////////////////
	// GLUT Initialization //
	/////////////////////////
	glutInit( &argc , argv ); 
	glutInitDisplayMode( GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH );
    glutInitWindowSize( window_width , window_height );
	glutCreateWindow( "Screened PoissonMesh" ); 
	glClearColor( clearColor[0] , clearColor[1] , clearColor[2] , 1.0 );

	glutDisplayFunc			( DisplayFunction< Real , Primal >	);
	glutIdleFunc			( IdleFunction< Real , Primal >		);
	glutMouseFunc			( MouseFunction< Real , Primal >	);
	glutMotionFunc			( MotionFunction< Real , Primal >	);
	glutPassiveMotionFunc	( PassiveMotionFunction< Primal >	);
	glutKeyboardFunc		( KeyboardFunction< Real , Primal >	);
	glutSpecialFunc			( SpecialFunction< Real , Primal >	);
	glutReshapeFunc			( ReshapeFunction< Primal >			);

	glEnable( GL_DEPTH_TEST );
	glEnable( GL_NORMALIZE );
	glDepthMask( GL_TRUE );
	glDisable( GL_BLEND );
	glPolygonMode( GL_FRONT_AND_BACK , GL_FILL );
	glCullFace( GL_BACK );
	glEnable( GL_CULL_FACE );


	////////////////////////
	// GUI Initialization //
	////////////////////////
	curvatureHistogram.Initialize( -1 , 1 , 100 );
	curvatureHistogram.colorFunction = ColorFunction< float >;
	curvatureHistogram.SetPosition( window_width , window_height , Point2D< float >( 0. , 0. ) , Point2D< float >( 1. , 1. ) );

	knottedPlot.Initialize( -1 , 1 , -log( fabs( PlotRange.value) ) , log( fabs( PlotRange.value ) ) );
	knottedPlot.SetDefault( 0 );
	knottedPlot.lockPlot = (showPlot!=PLOT_FRONT);
	knottedPlot.setLineWidth( PlotWidth.value );
	knottedPlot.SetPosition( window_width , window_height , Point2D< float >( 0. , 0. ) , Point2D< float >( 1. , 1. ) );

	Point3D<float> sColor;

	sColor[0]=1.0;	sColor[1]=1.0;	sColor[2]=1.0;
	messageColorMode.initialize( 20, 46, sColor );
	messageColorMode.setMessage( "Color Mode: Painting" );

	sColor[0]=1.0;	sColor[1]=1.0;	sColor[2]=1.0;
	messageState.initialize( 20 , 22 , sColor );

	sColor[0]=1.0;	sColor[1]=0.0;	sColor[2]=1.0;
	messageStatus.initialize( 20 , 70 , sColor );

	if( ShowError.set )
	{
		sColor[0]=1.0;	sColor[1]=1.0;	sColor[2]=1.0;
		messageResidual.initialize( window_width/2 , 22 , sColor );
	}

	sColor = Point3D< float >( 1. , 1. , 1. );
	const unsigned char message[] = "FPS: 99.99";
	fpsOffset = glutBitmapLength( GLUT_BITMAP_HELVETICA_18 , message );
	messageFPS.initialize( window_width-sliderOffsetLeft-fpsOffset, 22, sColor );
	messageFPS.setMessage( "FPS:" );
	
	sColor[0]=0.9;	sColor[1]=0.3;	sColor[2]=0.6;
	Tree< Real , Primal >::gradientScale.clear();
	Tree< Real , Primal >::gradientScale.assign( Tree< Real , Primal >::minCurvature.Dimensions(), GradientScale.value );

	sColor[0]=0.5;	sColor[1]=0.5;	sColor[2]=0.9;
	sliderGS.initialize( sliderOffsetLeft, window_height-48, 360, 12 , 20 , 2 , sliderFillColor , sliderLineColor , Point3D< float >( 255 , 126 , 0 ) / 255 , 0.0f , MaxGradientScale.value, GradientScale.value , true , false , !Minimal.set );
	
	GetRepresentativeColor( MaxGradientScale.value, GradientScale.value, sColor );
	sliderSprayGS.initialize( sliderOffsetLeft , window_height-72, 360, 12 , 20 , 2 , sliderFillColor , sColor , sColor , 0.0, MaxGradientScale.value, GradientScale.value , true , false , !Minimal.set );

	guiPlatform.add( &sliderGS );
	if( Verbose.set )
	{
		guiPlatform.add( &messageStatus      );
		guiPlatform.add( &messageColorMode   );
		guiPlatform.add( &messageState       );
	}
	if( ShowError.set) guiPlatform.add( &messageResidual );
//	if( !Minimal.set ) guiPlatform.add( &sliderSprayGS );
	if( ShowFPS.set ) guiPlatform.add( &messageFPS );

	/////////////////////////
	// Camera and Lighting //
	/////////////////////////
	if( XForm.set )
	{
		Matrix4D m;
		FILE* fp = fopen( XForm.value , "r" );
		if( !fp ) fprintf( stderr , "Could not read xForm: %s\n" , XForm.value );
		else
		{
			for( int j=0 ; j<4 ; j++ ) for( int i=0 ; i<4 ; i++ ) fscanf( fp , " %f " , &m.m[i][j] );
			fclose( fp );
		}
		myStage.camera->set( m );
	}
	myStage.center[0] = myStage.center[1] = myStage.center[2] = 0.5;
	myStage.radius = 1.28;
	myStage.Reshape( window_width , window_height );

	GLfloat lightPosition[] = { 1.0f, 100.0f, 100.0f, 1.0 };
	GLfloat LightColor[] = { 0.8f, 0.8f, 0.8f, 1.0 };
	GLfloat White[] = { 1.0f, 1.0f, 1.0f, 1.0 }; 
	GLfloat Gray[]  = { 0.8f, 0.8f, 0.8f, 1.0 };
	glLightfv(GL_LIGHT0, GL_POSITION, lightPosition);
	glLightfv(GL_LIGHT0, GL_DIFFUSE,  LightColor);
	glLightfv(GL_LIGHT0, GL_SPECULAR, LightColor);
	glColorMaterial(GL_FRONT, GL_DIFFUSE);
	glMaterialfv(GL_FRONT, GL_SPECULAR, White);
	glMaterialfv(GL_FRONT, GL_DIFFUSE,  White);
	glMateriali (GL_FRONT, GL_SHININESS,100.0);
	glEnable(GL_LIGHTING);
	glEnable(GL_LIGHT0);
	glEnable(GL_COLOR_MATERIAL);
	GetRepresentativeColor( MaxGradientScale.value , 1.f , myStage.color );

	///////////////////////////////////
	// CUDA and Vertex Object Buffer //
	///////////////////////////////////
	glewInit();
	if( !glewIsSupported( "GL_VERSION_2_0 " ) ) printf( "ERROR: Support for necessary OpenGL extensions missing." );
	cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
	cutilCheckError( cutCreateTimer(&timer) );

	Point3D<float> color;
	GetRepresentativeColor( MaxGradientScale.value, GradientScale.value, color );

	myStage.setTriangleBufferObject( );
	cutilSafeCall( cudaGLRegisterBufferObject( myStage.tbo ) );
	myStage.setVertexBufferObject( );
	cutilSafeCall( cudaGLRegisterBufferObject( myStage.vbo ) );

	int memSize;

	memSize = Tree< Real , Primal >::fitVertices.size() * sizeof(float) * Tree< Real , Primal >::supportCount;
	float* eValues = new float[ memSize ];
	for( int i=0 ; i<Tree< Real , Primal >::E.groups ; i++ )
		for( int j=0 ; j<Tree< Real , Primal >::supportCount ; j++ )
			eValues[ i*Tree< Real , Primal >::supportCount + j ] = Tree< Real , Primal >::E[i][j].Value;
	cutilSafeCall( cudaMalloc((void**) &gpu_evaluationValues, memSize) );
    cutilSafeCall( cudaMemcpy(gpu_evaluationValues, eValues, memSize, cudaMemcpyHostToDevice) );
	delete[] eValues;

	memSize = Tree< Real , Primal >::E.groups * Tree< Real , Primal >::supportCount * sizeof( unsigned int );
	unsigned int* eIndicesTemp = (unsigned int*) malloc( memSize );
	unsigned int LastNonZeroIndex=0;
	for( int i=0 ; i<Tree< Real , Primal >::E.groups ; i++ )
		for( int j=0 ; j<Tree< Real , Primal >::supportCount ; j++ )
			eIndicesTemp[ i*Tree< Real , Primal >::supportCount+j ] = LastNonZeroIndex = unsigned int( Tree< Real , Primal >::E[i][j].N );

	cutilSafeCall( cudaMalloc((void**) &gpu_evaluationIndices, memSize) );
    cutilSafeCall( cudaMemcpy(gpu_evaluationIndices, eIndicesTemp, memSize, cudaMemcpyHostToDevice) );
	delete eIndicesTemp;

	memSize = Tree< Real , Primal >::nIndices.size()*sizeof(unsigned int);
	unsigned int* nIndicesTemp = (unsigned int*) malloc( memSize );
	for( int i=0 ; i<Tree< Real , Primal >::nIndices.size() ; i++ )
		nIndicesTemp[i] = unsigned int( Tree< Real , Primal >:: nIndices[i] ); 

	cutilSafeCall( cudaMalloc((void**) &gpu_colorIndices, memSize) );
    cutilSafeCall( cudaMemcpy( gpu_colorIndices, nIndicesTemp , memSize, cudaMemcpyHostToDevice ) );
	delete nIndicesTemp;

	coefficientMemSize = sizeof(float)*4*Tree< Real , Primal >::solver.pX().size( );
	cudaMallocHost( (void**)&host_coefficientValues , coefficientMemSize );
	cutilSafeCall( cudaMalloc((void**) &gpu_coefficientValues , coefficientMemSize ) );

	colorMemSize = sizeof( float )*Tree< Real , Primal >::gradientScale.size();
	cudaMallocHost( (void**)&host_colorValues , colorMemSize );
	cutilSafeCall( cudaMalloc((void**) &gpu_colorValues , colorMemSize ) );

	needsUpdating.resize( Tree< Real , Primal >::minCurvature.Dimensions() );
	Tree< Real , Primal >::weightsMin.resize( Tree< Real , Primal >::minCurvature.Dimensions() );
	Tree< Real , Primal >::weightsMax.resize( Tree< Real , Primal >::maxCurvature.Dimensions() );
	for( int i=0 ; i<Tree< Real , Primal >::minCurvature.Dimensions() ; i++ ) curvatureHistogram.AddEntry( Tree< Real , Primal >::minCurvature[i] , 1 , false ) , curvatureHistogram.AddEntry( Tree< Real , Primal >::maxCurvature[i] , 1 , false );
	curvatureHistogram.Normalize( );
	curvatureHistogram.SetMax( 0.9 );
	UpdateAnisotropicWeights< Real , Primal >( );

	if( Offline.set ) ForceUpdate = NO_UPDATE;
	else              ForceUpdate = UPDATE_SOLUTION;
	setShaders();

	if( NoColor.set ) colorMode = NO_COLOR;
	else              colorMode = FLEETING_COLOR;


	glutMainLoop();
    cudaThreadExit();
    cutilExit( argc , argv );

	return 1;
}