/*
	Challenge6_TestHarness.cpp

	Build a Win32 Console app with this code.
*/

#define WIN32_LEAN_AND_MEAN
#include <math.h>
#include <stdio.h>
#include <windows.h>
#include <algorithm>
#include <emmintrin.h>
using namespace std;

// !!Replace with your machine speed!!
#define CPU_GHZ 3.6

/*
	Function: Choose64_6_jhejl

		Write a brief summary of what your entry does, and how...
		And, please, replace "jhejl" with your name.
*/

void Choose64_6_jhejl(unsigned __int64 *pDest)
{
    // your code here
}

#define NUM_ELEMENTS 635376

// for qsort
int compare( const void *arg1, const void *arg2 )
{
	__int64 a = *(__int64 *)arg1, b = *(__int64 *)arg2;
	return (a < b) ? -1 : ((a > b) ? +1 : 0);
}

__declspec(align(32)) unsigned __int64 outdata[NUM_ELEMENTS];

class HDEntryC
{
public:

	__forceinline unsigned __int64 GetCycle() { __asm rdtsc }

	void Execute(long numIterations);

	int CheckResults(unsigned __int64 *results);
    void PrintStats();

    void		(*mFunc)(unsigned __int64 *);
    char        mName[512];
    float       m_fCyclesPerElement;

private:
	int CountBits(__int64 val);
	float CycleToMs(__int64 cycle);
};

int HDEntryC::CountBits(__int64 val)
{
	int count = 0;
	while(val)
	{
		val &= val - 1;
		count++;
	}
	return count;
}

float HDEntryC::CycleToMs(__int64 cycle)
{
	double cpuSpeed = 1000000000.0 * CPU_GHZ;
	return (float)((((double)cycle) / cpuSpeed) * 1000.0f);
}

void HDEntryC::Execute(long numIterations)
{
	unsigned __int64 cycle;

	// init
	m_fCyclesPerElement = 0;
	memset(outdata,0,635376*8);

	// check result
	mFunc(outdata);
	if (CheckResults(outdata)==false)
	{
		printf("%s failed.\n",mName);
		return;
	}

	// time
	for (int iCnt=0; iCnt<numIterations; iCnt++)
	{
		cycle = GetCycle();
		mFunc(outdata);
		cycle = GetCycle() - cycle;

		m_fCyclesPerElement += ((float)cycle)/((float)NUM_ELEMENTS);
	}
	m_fCyclesPerElement /= numIterations;
}

int HDEntryC::CheckResults(unsigned __int64 *results)
{
	// Check the values all have exactly 4 bits set
	for (int i = 0;i < NUM_ELEMENTS;i++)
	{
		if (CountBits(results[i]) != 4)
		{
			printf("Error: result %d (%I64x) has %d bits set\n", i, results[i], CountBits(results[i]));
			return false;
		}
	}

	// Sort the array
	qsort(&(results[0]), NUM_ELEMENTS, sizeof(__int64), compare);

	// Check there are no duplicates (...and number theory takes care of the rest ;) )
	for (int i = 0;i < (NUM_ELEMENTS - 1);i++)
	{
		if ((CountBits(results[i]) == 4) && (results[i] == results[i + 1]))
		{
			printf("Error: duplicated result (%I64x)\n", results[i]);
			return false;
		}
	}
	return true;
}

void HDEntryC::PrintStats()
{
	if (m_fCyclesPerElement > 0.0f)
	{
		unsigned long uiCycles =(unsigned long)(m_fCyclesPerElement*NUM_ELEMENTS);
		float fMs = CycleToMs(uiCycles);
		printf("%s\tCycles per element: %f (%f Ms)\n",mName,m_fCyclesPerElement,fMs);
	}
	else
	{
		// entry was failure
	}
}

int main()
{
    HDEntryC    entryList[] = 
    {   
		// add new entries here

        {Choose64_6_jhejl,"Choose64_6_jhejl",0.0f},

		{0,0}, // list terminator
    };

    HDEntryC    *pEntry;

    SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_TIME_CRITICAL);

    for (pEntry = entryList; pEntry->mFunc; pEntry++)
    {
		// number of executions to run (timing is an average)
		// increase this number until you get stable results
        pEntry->Execute(50);  
    }
    for (pEntry = entryList; pEntry->mFunc; pEntry++)
    {
        pEntry->PrintStats();
    }
    return 0;
}
