///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2004 Intel Corporation 
// All rights reserved. 
//
// Redistribution and use in source and binary forms, with or without 
// modification, are permitted provided that the following conditions are met: 
//
// * Redistributions of source code must retain the above copyright notice, 
// this list of conditions and the following disclaimer. 
// * Redistributions in binary form must reproduce the above copyright notice, 
// this list of conditions and the following disclaimer in the documentation 
// and/or other materials provided with the distribution. 
// * Neither name of Intel Corporation nor the names of its contributors 
// may be used to endorse or promote products derived from this software 
// without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR 
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////

//
// LUT.cpp : Implement table-based functions using ippiLUT
//

#include "stdafx.h"
#include "ipp.h"

void CompareLUT_8u(int w, int len);
void CompareLUT_16s(int w, int len);

int main(int argc, char* argv[])
{
	CompareLUT_8u(256, 256);

	CompareLUT_16s(256, 256);

	return 0;
}

// Run on Ipp8u data, using a dummy look-up function
void CompareLUT_8u(int w, int h)
{
	Ipp64u start, end;

	int dstStep;
	IppiSize size = { w, h };
	int i, j, k, loops=100;
	const int len=257;
	Ipp32s *pLevels = ippsMalloc_32s(len);
	Ipp32s *pValues = ippsMalloc_32s(len);
	Ipp8u *pDst = ippiMalloc_8u_C1(w, h, &dstStep);
	Ipp8u *pTmp;

	// Initialize a do-nothing value/level
	for (i =0; i<len; i++)
	{
		pValues[i] = i;
		pLevels[i] = i;
	}

	// Create a test image
	ippiImageJaehne_8u_C1R(pDst, dstStep, size);

	// Use ippiLUT to look up values
	start = ippCoreGetCpuClocks();
	for (k=0; k<loops; k++)
		ippiLUT_8u_C1R(pDst, dstStep, pDst, dstStep, size, pValues, pLevels, 256);
	end = ippCoreGetCpuClocks();
	printf("Clocks using IPP: %d (%d cpe)\n",(Ipp32s)(end - start)/loops,
		(end-start)/loops/(h*w));

	// Create a test image
	ippiImageJaehne_8u_C1R(pDst, dstStep, size);

	// Look up values by hand
	start = ippCoreGetCpuClocks();
	for (k=0; k<loops; k++)
	{
		pTmp=pDst;
		for (i =0; i<h; i++)
		{
			for (j =0; j<w; j++)
			{
				*pTmp++ = pLevels[*pTmp];
			}
			pTmp+= dstStep-w;
		}
	}
	end = ippCoreGetCpuClocks();
	printf("Clocks using C: %d (%d cpe)\n",(Ipp32s)(end - start)/loops,
		(end-start)/(h*w)/loops);
}

// Run on Ipp16s data, using a real look-up function (Sine)
void CompareLUT_16s(int w, int h)
{
	Ipp64u start, end;

	int dstStep;
	IppiSize size = { w, h };
	int i, j, k, loops=100;
	int i1, i2, w1, w2;
	const int len=257;
	Ipp32s *pLevels = ippsMalloc_32s(len);
	Ipp32f *pLevels32f = ippsMalloc_32f(len);
	Ipp32s *pValues = ippsMalloc_32s(len);
	Ipp16s *pDst = ippiMalloc_16s_C1(w, h, &dstStep);
	Ipp16s *pTmp;

	for (i =0; i<len; i++)
	{
		pValues[i] = i*256;
		pLevels[i] = i*256;
	}
	ippsConvert_32s32f(pLevels, pLevels32f, len);
	ippsDivC_32f_I(65536.0f/3.141592654f/2.0f, pLevels32f, len);
	ippsSin_32f_A21(pLevels32f,pLevels32f,len);
	ippsConvert_32f32s_Sfs(pLevels32f, pLevels, len, ippRndNear, -16);

	// Create a test image
	ippiImageJaehne_16s_C1R(pDst, dstStep, size);

	// Use look-up table by hand
	start = ippCoreGetCpuClocks();
	for (k=0; k<loops; k++)
	{
		pTmp=pDst;
		for (i =0; i<h; i++)
		{
			for (j =0; j<w; j++)
			{
				i1 = (*pTmp+32768+255)>>8;
				i2 = (*pTmp+32768)>>8;

				*pTmp++ = (pLevels[i1] +
					pLevels[i2])/2;
			}
			pTmp+= dstStep-w*2;
		}
	}
	end = ippCoreGetCpuClocks();
	printf("Clocks using C: %d (%d cpe)\n",(Ipp32s)(end - start)/loops,
		(end-start)/(h*w)/loops);

	// Use ippiLUT to look up values in a table.
	start = ippCoreGetCpuClocks();
	for (k=0; k<loops; k++)
		ippiLUT_Linear_16s_C1R(pDst, dstStep, pDst, dstStep, size, pValues, pLevels, 256);
	end = ippCoreGetCpuClocks();
	printf("Clocks using IPP: %d (%d cpe)\n",(Ipp32s)(end - start)/loops,
		(end-start)/(h*w)/loops);
}
