#include "Util/MultiStreamIO.h"

//////////////////////
// ProcessPartition //
//////////////////////
template< class PData >
bool ProcessPartition< PData >::IsValidBandSize( int width , int height , int iters , int minSize )
{
	if( height< 2*Degree ) return false;
	if( width < 16*(iters+2)*WordPerDegree || width<minSize ) return false;
#if 1 // QUESTION: Don't we require that the row width be a multiple of four?
	if( width&3 ) return false;
#endif
	return true;
}
template< class PData >
void ProcessPartition< PData >::setBounds( int idx , int& start , int& stop )
{
	start = 0;
	for( int i=0 ; i<idx ; i++ ) start += processData[i].width;
	stop = start + processData[idx].width;
}
template< class PData > int ProcessPartition< PData >::size( void ) const { return int( processData.size() ); }
template< class PData > void ProcessPartition< PData >::resize( int sz ) { processData.resize( sz ); }
template< class PData > typename ProcessPartition< PData >::MyData& ProcessPartition< PData >::operator[] ( const int& idx ) { return processData[idx]; }
template< class PData > const typename ProcessPartition< PData >::MyData& ProcessPartition< PData >::operator[] ( const int& idx ) const { return processData[idx]; }

///////////////////////////////
// ProcessPartitionHierarchy //
///////////////////////////////

template< class PData > int ProcessPartitionHierarchy< PData >::size( void ) const { return int( levels.size() ); }
template< class PData > void ProcessPartitionHierarchy< PData >::resize( int sz ){ levels.resize( sz ); }
template< class PData > ProcessPartition< PData >& ProcessPartitionHierarchy< PData >::operator[] ( const int& idx ) { return levels[idx]; }
template< class PData > const ProcessPartition< PData >& ProcessPartitionHierarchy< PData >::operator[] ( const int& idx ) const { return levels[idx]; }

template< class PData >
template< class TData >
bool ProcessPartitionHierarchy< PData >::Initialize( const ProcessPartition< TData >& initialPartition , int height , int iters , int depths , bool repeat , int minSize )
{
	ProcessPartitionHierarchy temp;
	if( repeat ) temp.resize( (depths+1)*2 );
	else		 temp.resize( (depths+1) );
	temp[0].resize( initialPartition.size() );
#if SHOW_PARTITION
	{
		IOServer::StdoutLock lock;
		printf( "Process Partition:\n" ) , fflush( stdout );
		for( int j=0 ; j<initialPartition.size() ; j++ )
		{
			printf( "[%6d" , initialPartition[j].width );
			int spaces = 6*1 + 2*(1-1) - 6;
			for( int k=0 ; k<spaces ; k++ ) printf( " " );
			printf( "]" );
		}
		printf( "\n" ) , fflush( stdout );
	}
#endif // SHOW_PARTITION


	for( int j=0 ; j<initialPartition.size() ; j++ )
	{
		if( !ProcessPartition< PData >::IsValidBandSize( initialPartition[j].width , height , iters , minSize ) ) return false;
		temp[0][j].width = initialPartition[j].width;
	}
	int myDepth = depths;
	int depth = 0;
	for( int i=1 ; i<=myDepth ; i++ )
	{
		bool downSample = false;
		for( int j=0 ; j<temp[i-1].size() ; j++ ) if( !ProcessPartition< PData >::IsValidBandSize( temp[i-1][j].width>>1 , height>>(depth+1) , iters , minSize ) ) downSample = true;
		if( downSample )
		{
			if( repeat ) myDepth++;
			else		 depth++;

			if( temp[i-1].size()<2 ) fprintf( stderr , "[ERROR] Attempting to merge with only one thread\n" ) , exit(0);
			temp[i].resize( temp[i-1].size()>>1 );

			for( int j=0 ; j<temp[i  ].size() ; j++ ) temp[i][j].width =  0;
			for( int j=0 ; j<temp[i-1].size() ; j++ )
			{
				int jj = j>>1;
				if( jj>=temp[i].size() ) jj = temp[i].size()-1;
				temp[i][jj].width += temp[i-1][j].width;
				temp[i][jj].children.push_back( j );
			}
			for( int j=0 ; j<temp[i].size() ; j++ )
			{
				if( repeat )
				{
					if( !ProcessPartition< PData >::IsValidBandSize( temp[i][j].width>>1 , height>>(depth+1) , iters , minSize ) )
					{
						IOServer::printfID( "ProcessPartition::IsValidBandSize( %d , %d , %d , %d ) failed\n" , temp[i][j].width>>1 , height>>(depth+1) , iters , minSize ) , fflush( stdout );
						return false;
					}
				}
				else
				{
					temp[i][j].width >>= 1;
					if( !ProcessPartition< PData >::IsValidBandSize( temp[i][j].width , height>>(depth+1) , iters , minSize ) )
					{
						IOServer::printfID( "ProcessPartition::IsValidBandSize( %d , %d , %d , %d ) failed\n" , temp[i][j].width , height>>(depth+1) , iters , minSize ) , fflush( stdout );
						return false;
					}
				}
			}
		}
		else
		{
			depth++;
			temp[i].resize( temp[i-1].size() );
			for( int j=0 ; j<temp[i-1].size() ; j++ )
			{
				temp[i][j].width = temp[i-1][j].width>>1;
				temp[i][j].children.push_back( j );
			}
		}
#if SHOW_PARTITION
		{
			IOServer::StdoutLock lock;
			for( int j=0 ; j<temp[i].size() ; j++ )
			{
				printf( "[%6d" , temp[i][j].width );
				int spaces = 6*temp.leaves(i,j) + 2*(temp.leaves(i,j)-1) - 6;
				for( int k=0 ; k<spaces ; k++ ) printf( " " );
				printf( "]" );
			}
			printf("\n") , fflush( stdout );
		}
#endif // SHOW_PARTITION
	}
	int count = 1;

	for( int i=1 ; i<=myDepth ; i++ ) if( temp[i].size() != temp[i-1].size() ) count++;
	levels.resize( count );
	levels[0].startDepth = 0;
	levels[0].resize( temp[0].size() );
	for( int j=0 ; j<levels[0].size() ; j++ )
	{
		levels[0][j].width = temp[0][j].width;
		levels[0][j].children.resize( temp[0][j].children.size() );
		for( size_t k=0 ; k<(*this)[0][j].children.size() ; k++ ) (*this)[0][j].children[k] = temp[0][j].children[k];
	}

	count = 1;
	for( int i=1 ; i<=myDepth ; i++ )
		if( temp[i].size() != temp[i-1].size() )
		{
			if( repeat )
			{
				(*this)[count-1].endDepth   = i-count;
				(*this)[count  ].startDepth = i-count;
			}
			else
			{
				(*this)[count-1].endDepth   = i-1;
				(*this)[count  ].startDepth = i;
			}
			(*this)[count].resize( temp[i].size() );
			for( int j=0 ; j<(*this)[count].size() ; j++ )
			{
				(*this)[count][j].width = temp[i][j].width;
				(*this)[count][j].children.resize( temp[i][j].children.size() );
				for( size_t k=0 ; k<(*this)[count][j].children.size() ; k++ ) (*this)[count][j].children[k] = temp[i][j].children[k];
			}
			count++;
		}
	(*this)[count-1].endDepth = depths;
	return true;
}
template< class PData >
int ProcessPartitionHierarchy< PData >::leaves( const int& depth , const int& offset ) const
{
	int sum = 0;
	if( !depth ) return 1;
	else for( size_t i=0 ; i<(*this)[depth][offset].children.size() ; i++ ) sum += leaves( depth-1 , (*this)[depth][offset].children[i] );
	return sum;
}

/////////////////////
// MultigridThread //
/////////////////////
template< int PixelChannels , int LabelChannels , class StorageType , class SyncType , class PixelType , class LabelType >
MultigridThread< PixelChannels , LabelChannels , StorageType , SyncType , PixelType , LabelType >::MultigridThread( void )
{
	lowPixels = pixels = labels = NULL;
	_solverInfo = NullPointer< SolverInfo< PixelChannels > >( );
	_solvers = NullPointer< Pointer( SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType > ) >();
	_sRestriction = NULL;
	_sDivergence = NULL;
}
template< int PixelChannels , int LabelChannels , class StorageType , class SyncType , class PixelType , class LabelType >
MultigridThread< PixelChannels , LabelChannels , StorageType , SyncType , PixelType , LabelType >::~MultigridThread( void  )
{
	FreePointer( _solverInfo );
	if( _solvers )
	{
		for( int i=0 ; i<_blockCount ; i++ ) DeletePointer( _solvers[i] );
		DeletePointer( _solvers );
	}
	if( _sDivergence ) delete _sDivergence , _sDivergence = NULL;
	_sRestriction = NULL;
}
template< int PixelChannels , int LabelChannels , class StorageType , class SyncType , class PixelType , class LabelType >
void MultigridThread< PixelChannels , LabelChannels , StorageType , SyncType , PixelType , LabelType >::_init
(
	Pointer( ProcessingBlockData ) blockData , int blockCount , const GlobalData& globalData , bool showProgress , bool inCore , bool lowPixels , bool pixels , bool labels
)
{
	_outOfCore  = !inCore;
	_blockData  = blockData;
	_blockCount = blockCount;
	_vCycles   = globalData.vCycles;
	_verbose   = globalData.verbose;
	_periodicType = globalData.periodicType;
	int depths = 0;
	for( int i=0 ; i<_blockCount ; i++ ) depths += _blockData[i].pData.depths();
	_solverInfo = AllocPointer< SolverInfo< PixelChannels > >( depths );
	_solvers = NewPointer< Pointer( SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType > ) >( _blockCount );
	for( int i=0 ; i<_blockCount ; i++ ) 
	{
		_solvers[i] = NewPointer< SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType > >( _blockData[i].pData.depths() );
		for( int j=1 ; j<_blockData[i].pData.depths()   ; j++ )	_solvers[i][j].parent = &_solvers[i][j-1];
		for( int j=0 ; j<_blockData[i].pData.depths()-1 ; j++ )	_solvers[i][j].rChild =  _solvers[i][j].pChild = &_solvers[i][j+1];
		for( int j=0 ; j<_blockData[i].pData.depths()   ; j++ )	_solvers[i][j].laneNum = globalData.lanes;
		_solvers[i][_blockData[i].pData.depths()-1].rChild = NULL;
	}
	_solvers[0][_blockData[0].pData.depths()-1].showProgress = showProgress;

	if( pixels )
	{
		{
			_sDivergence = new SocketedStreamingDivergence< PixelChannels , LabelChannels , PixelType , LabelType , StorageType , SyncType >();
			_sDivergence->unknownType = globalData.unknownType;
			_sDivergence->parent = &_solvers[0][_blockData[0].pData.depths()-1];
			_sRestriction = _sDivergence;
		}
		_solvers[0][_blockData[0].pData.depths()-1].rChild = _sRestriction;
	}
}

template< int PixelChannels , int LabelChannels , class StorageType , class SyncType , class PixelType , class LabelType >
void MultigridThread< PixelChannels , LabelChannels , StorageType , SyncType , PixelType , LabelType >::Initialize
(
	const std::vector< std::pair< LabelData< LabelType , LabelChannels > , GradientAverage< PixelChannels > > >& gradientAverage ,
	MultiStreamIOServer* multiStreamIOServer ,
	Pointer( ProcessingBlockData ) blockData , int blockCount ,
	const GlobalData& globalData , bool showProgress , bool inCore ,
	DotProductStencil& dotMajor , DotProductStencil& d2DotMajor ,
	DotProductStencil& dotMinor , DotProductStencil& d2DotMinor
)
{
	double iWeight = globalData.iWeight;
	double gScale = globalData.gScale;
	int iters = globalData.iters;
	_init( blockData , blockCount , globalData , showProgress , inCore , lowPixels!=NULL , pixels!=NULL , labels!=NULL );

	this->multiStreamIOServer = multiStreamIOServer;
	if( lowPixels ) lowPixels->SetServer( multiStreamIOServer ); // QUESTION
	if( pixels ) pixels->SetServer( multiStreamIOServer ); // QUESTION
	if( labels ) labels->SetServer( multiStreamIOServer ); // QUESTION
	for( int i=0 ; i<_blockCount ; i++ )
	{
		if( pixels && !i )
				((SocketedStreamingDivergence< PixelChannels , LabelChannels , PixelType , LabelType , StorageType , SyncType >*)_sRestriction )->Initialize(
				lowPixels , pixels , labels , iWeight , gScale , blockData[0].pData.start , blockData[0].pData.stop , blockData[0].pData.width , blockData[0].pData.height , iters ,
				blockData[0].leftStream , blockData[0].syncSockets , blockData[0].rightStream ,
				_outOfCore , _periodicType , multiStreamIOServer , &gradientAverage
				);
		else
			if( globalData.iWeight )
				_solvers[i][_blockData[i].pData.depths()-1].Initialize( dotMajor , d2DotMajor , dotMinor , d2DotMinor , iWeight , blockData[i].pData.start , blockData[i].pData.stop , blockData[i].pData.width , blockData[i].pData.height , globalData.iters ,
				blockData[i].leftStream , blockData[i].syncSockets , blockData[i].rightStream ,
				_outOfCore , _periodicType
				, multiStreamIOServer
				);
			else
				_solvers[i][_blockData[i].pData.depths()-1].Initialize( dotMajor , d2DotMajor , dotMinor , d2DotMinor, blockData[i].pData.start , blockData[i].pData.stop , blockData[i].pData.width , blockData[i].pData.height , globalData.iters ,
				blockData[i].leftStream , blockData[i].syncSockets , blockData[i].rightStream ,
				_outOfCore , _periodicType , multiStreamIOServer
				);
		dotMajor   = _solvers[i][0].dotMajor;
		dotMinor   = _solvers[i][0].dotMinor;
		d2DotMajor = _solvers[i][0].d2DotMajor;
		d2DotMinor = _solvers[i][0].d2DotMinor;
	}
	for( int i=0 ; i<_blockCount-1 ; i++ )
	{
		_solvers[i][0].parent = &_solvers[i+1][_blockData[i+1].pData.depths()-1];
		_solvers[i+1][_blockData[i+1].pData.depths()-1].rChild = _solvers[i+1][_blockData[i+1].pData.depths()-1].pChild = &_solvers[i][0];
	}
}
template< int PixelChannels , int LabelChannels , class StorageType , class SyncType , class PixelType , class LabelType >
THREAD_FUNCTION_OUT MultigridThread< PixelChannels , LabelChannels , StorageType , SyncType , PixelType , LabelType >::RunThread( THREAD_FUNCTION_IN vparams )
{
#if MISHA_DENORMAL_CONTROL
	_MM_SET_FLUSH_ZERO_MODE( _MM_FLUSH_ZERO_ON );
#endif // MISHA_DENORMAL_CONTROL

	MultigridThread* thread = ( MultigridThread* )vparams;

	double t;

	MultiStreamIOClient* _X = NULL;
	MultiStreamIOClient* _B = NULL;

	// [Q] Don't we need to store the solution even if we don't have use _sRestriction?
	// [Q] Why does the usage of _X depend on whether on the block index is 0?
	// [A] Because block index 0 corresponds to the component of depths containing the highest resolution.
	//     In contrast, block thread->blockCount-1 is the component with the coarsest resolution.
	if( thread->_vCycles>1 && thread->_sRestriction )
	{
		_X = new MultiStreamIOClient( (thread->_blockData->pData.stop-thread->_blockData->pData.start) * sizeof( SyncType )*PixelChannels , thread->_blockData->pData.height , STREAMING_GRID_BUFFER_MULTIPLIER , NULL , true );
		_B = new MultiStreamIOClient( (thread->_blockData->pData.stop-thread->_blockData->pData.start) * sizeof( SyncType )*PixelChannels , thread->_blockData->pData.height , STREAMING_GRID_BUFFER_MULTIPLIER , NULL , true );
	}

	SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType >* highSolver = &thread->_solvers[0][thread->_blockData[0].pData.depths()-1];
	SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType >* lowSolver  = &thread->_solvers[thread->_blockCount-1][0];
	for( int ii=0 ; ii<thread->_vCycles ; ii++ )
	{
		if( !ii ) thread->_solvers[0][thread->_blockData[0].pData.depths()-1].rChild = thread->_sRestriction;
		else      thread->_solvers[0][thread->_blockData[0].pData.depths()-1].rChild = NULL;
		/////////////////
		// RESTRICTION //
		/////////////////
		for( int b=0 ; b<thread->_blockCount ; b++ )
		{
			SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType >* solvers = thread->_solvers[b];
			for( int i=0 ; i<thread->_blockData[b].pData.depths() ; i++ )
			{
				solvers[i].bSquareNorm = solvers[i].rSquareNorm = solvers[i].xSquareNorm = 0;
#if TIME_IO
				solvers[i].vSync = solvers[i].hSync = solvers[i].rSync = 0;
#endif // TIME_IO
				for ( int c = 0 ; c < PixelChannels ; c++ )	solvers[i].solutionSum[c] = 0;
				solvers[i].setResidual = true;
			}
			solvers[0].inX = NULL;
			solvers[0].inB = NULL;
			if( thread->_sRestriction ) // This is the bottom of the tree so the data can't come in from anywhere...
			{
				if( ii && !b ) solvers[thread->_blockData[b].pData.depths()-1].inX = _X;
				else		   solvers[thread->_blockData[b].pData.depths()-1].inX = NULL;
			}
			else               solvers[thread->_blockData[b].pData.depths()-1].inX = NULL;
			solvers[thread->_blockData[b].pData.depths()-1].inB = thread->_blockData[b].inHighB;
			solvers[thread->_blockData[b].pData.depths()-1].outR = NULL;
			solvers[thread->_blockData[b].pData.depths()-1].outX = NULL;
			solvers[thread->_blockData[b].pData.depths()-1].outB = NULL;
			solvers[0].outX = NULL;
			solvers[0].outB = NULL;
			solvers[0].outR = thread->_blockData[b].outLowR;
			if( thread->_sRestriction && !b )
				if( ii ) solvers[thread->_blockData[b].pData.depths()-1].inB   = _B;
				else     solvers[thread->_blockData[b].pData.depths()-1].outB  = _B;
		}
		// Data for the interleaved streaming multigrid
		t=Time();
		// Initialize
		if( ii || !thread->_sRestriction ) highSolver->InitRestriction() , highSolver->SetRestriction();
		else thread->_sRestriction->InitRestriction() , thread->_sRestriction->SetRestriction();
		// Solve
		// BADNESS!!! Why do I have to comment this out?
//		if( ii ) in->SetServer(&SocketedStreamingSolver<Channels>::server);
		if( ii || !thread->_sRestriction ) highSolver->SolveRestriction();
		else							   thread->_sRestriction->SolveRestriction();

		t = Time()-t;
		int idx = 0;
		for( int b=0 ; b<thread->_blockCount ; b++ )
		{
			SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType >* solvers = thread->_solvers[b];
			for( int i=0 ; i<thread->_blockData[b].pData.depths() ; i++ )
			{
				thread->_solverInfo[idx].bSquareNorm = solvers[i].bSquareNorm;
				thread->_solverInfo[idx].rSquareNorm = solvers[i].rSquareNorm;
				thread->_solverInfo[idx].xSquareNorm = solvers[i].xSquareNorm;
				for( int c=0 ; c<PixelChannels ; c++ )
				{
					thread->_solverInfo[idx].solutionSum[c]  = solvers[i].solutionSum[c];
					thread->_solverInfo[idx].solutionSum[c] /= solvers[i].major;
					thread->_solverInfo[idx].solutionSum[c] /= solvers[i].minor;
				}
				idx++;
			}
		}
		for( int b=0 ; b<thread->_blockCount ; b++ )
		{
			SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType >* solvers = thread->_solvers[b];
			char id[512];
			SetThisThreadID( id );
			{
				IOServer::StdoutLock lock;
				if( thread->_verbose )
				{
					printf( "Thread Restriction [%s]:    %f\n" , id , t ) , fflush( stdout );
#if TIME_IO
					if( !ii && thread->_sRestriction && !b )
						printf( "\tImage Synchronization Time ( vertical / horizontal ): %.3f / %.3f\n" , thread->_sDivergence->vSync , thread->_sDivergence->hSync ) , fflush( stdout );
#endif // TIME_IO
					for( int i=thread->_blockData[b].pData.depths()-1 ; i>=0 ; i-- )
					{
						printf( "\tError[%d x %d] %g -> %g\n" , solvers[i].size() , solvers[i].minor , sqrt( solvers[i].bSquareNorm ) , sqrt( solvers[i].rSquareNorm ) ) , fflush( stdout );
#if TIME_IO
						printf( "\t      Synchronization Time ( vertical / horizontal / depth ): %.3f / %.3f / %.3f\n" , solvers[i].vSync , solvers[i].hSync , solvers[i].rSync ) , fflush( stdout );
#endif // TIME_IO
					}
				}
			}
		}

		Pointer( SolverInfo< PixelChannels > ) solverInfo = thread->_solverInfo;
		for( int b=0 ; b<thread->_blockCount ; b++ )
		{
			SendOnSocket ( thread->_blockData[b].serverSocket , ( ConstPointer( SolverInfo< PixelChannels > ) )solverInfo , sizeof( SolverInfo<PixelChannels> )*thread->_blockData[b].pData.depths() , "Failed so send restriction info to server" );
			solverInfo += thread->_blockData[b].pData.depths();
		}

		if( !ii && thread->_sRestriction )
		{
			{
				SendOnSocket ( thread->_blockData[0].serverSocket , ( ConstPointer( AverageColor< PixelChannels > ) )GetPointer( thread->_sDivergence->average ) , sizeof(thread->_sDivergence->average) , "Failed to send average to server" );
				thread->_sDivergence->UnSetRestriction();
			}
		}
		else highSolver->UnSetRestriction();
		//////////////////
		// PROLONGATION //
		//////////////////
		for( int b=0 ; b<thread->_blockCount ; b++ )
		{
			SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType>* solvers = thread->_solvers[b];
			for( int i=0 ; i<thread->_blockData[b].pData.depths() ; i++ )
			{
				solvers[i].bSquareNorm = solvers[i].rSquareNorm = solvers[i].xSquareNorm = 0;
#if TIME_IO
				solvers[i].vSync = solvers[i].hSync = solvers[i].rSync = 0;
#endif // TIME_IO
				for ( int c=0 ; c<PixelChannels ; c++ )	solvers[i].solutionSum[c] = 0;
				solvers[i].setResidual = thread->_verbose;
			}
			// Clear everything first
			solvers[thread->_blockData[b].pData.depths()-1].inX = NULL;
			solvers[thread->_blockData[b].pData.depths()-1].inB = NULL;
			solvers[thread->_blockData[b].pData.depths()-1].outB = NULL;
			solvers[thread->_blockData[b].pData.depths()-1].outR = NULL;
			solvers[0].outX = NULL;
			solvers[0].outB = NULL;
			solvers[0].outR = NULL;

			// Then overwrite if need be
			solvers[0].inX = thread->_blockData[b].inLowX;
			solvers[0].inB = NULL;
			if( ii!=thread->_vCycles-1 && thread->_sRestriction && !b ) solvers[thread->_blockData[b].pData.depths()-1].outX = _X;
			else
			{
				solvers[thread->_blockData[b].pData.depths()-1].outR = thread->_blockData[b].outHighR;
				solvers[thread->_blockData[b].pData.depths()-1].outX = thread->_blockData[b].outHighX;
				if( _X ) delete _X , _X = NULL;
				if( _B ) delete _B , _B = NULL;
			}

		}
		// Solve the prolongation
		t=Time();
		// Set the child dependencies
		// Initialize
		highSolver->InitProlongation();
		lowSolver->SetProlongation();
		// Solve
		// BADNESS!!! Why do I have to comment this out?
//		if(ii<_vCycles-1)	_X->SetServer(&StreamingSolver<Real,Type,Degree,Channels>::server);
		lowSolver->SolveProlongation();

		t = Time() - t;
		idx = 0;
		for( int b=0 ; b<thread->_blockCount ; b++ )
		{
			SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType >* solvers = thread->_solvers[b];
			for( int i=0 ; i<thread->_blockData[b].pData.depths() ; i++ )
			{
				thread->_solverInfo[idx].bSquareNorm = solvers[i].bSquareNorm;
				thread->_solverInfo[idx].rSquareNorm = solvers[i].rSquareNorm;
				thread->_solverInfo[idx].xSquareNorm = solvers[i].xSquareNorm;
				for( int c=0 ; c<PixelChannels ; c++ )
				{
					thread->_solverInfo[idx].solutionSum[c]  = solvers[i].solutionSum[c];
					thread->_solverInfo[idx].solutionSum[c] /= solvers[i].major;
					thread->_solverInfo[idx].solutionSum[c] /= solvers[i].minor;
				}
				idx++;
			}
		}
		{
			char id[512];
			SetThisThreadID( id );
			{
				IOServer::StdoutLock lock;
				if( thread->_verbose )
				{
					printf( "Thread Prolongation [%s]:    %f\n" , id , t ) , fflush( stdout );
					for( int b=thread->_blockCount-1 ; b>=0 ; b-- )
					{
						SocketedMultiGridStreamingSolver< PixelChannels , StorageType , SyncType >* solvers = thread->_solvers[b];
						for( int i=0 ; i<thread->_blockData[b].pData.depths() ; i++ )
						{
							printf( "\tError[%d x %d] %g -> %g\n" , solvers[i].size() , solvers[i].minor , sqrt( solvers[i].bSquareNorm ) , sqrt( solvers[i].rSquareNorm ) ) , fflush( stdout );
#if TIME_IO
							printf( "\t      Synchronization Time ( vertical / horizontal / depth ): %.3f / %.3f / %.3f\n" , solvers[i].vSync , solvers[i].hSync , solvers[i].rSync ) , fflush( stdout );
#endif // TIME_IO
						}
					}
				}
			}
		}
		solverInfo = thread->_solverInfo;
		for( int b=0 ; b<thread->_blockCount ; b++ )
		{
			SendOnSocket ( thread->_blockData[b].serverSocket , ( ConstPointer( SolverInfo< PixelChannels > ) )solverInfo , sizeof( SolverInfo< PixelChannels > )*thread->_blockData[b].pData.depths() , "Failed so send restriction info to server" );
			solverInfo += thread->_blockData[b].pData.depths();
		}
		lowSolver->UnSetProlongation();
	}
	return 0;
}
