vm/jitrino/src/optimizer/optarithmetic.h - harmony-drlvm - Git at Google

 /*
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */

 /**
  * @author Intel, Pavel A. Ozhdikhin
  *
  */

 #ifndef _OPTARITHMETIC_H
 #define _OPTARITHMETIC_H

 namespace Jitrino {
 /*
  * Implemented algorithms are similar to the ones described in
  * [T.Granlund and P.L.Montgomery. Division by Invariant Integers using
  * Multiplication. PLDI, 1994]
  */

 template <typename inttype, int width> inline int popcount(inttype x);
 template <>
 inline
 int popcount<I_32, 32> (I_32 x)
 {
 #ifdef _USE_ITANIUM_INTRINSICS_
     U_32 y = x; // avoid sign extension
     __m64 z = _m_from_int(int(y));
     __m64 r = __m64_popcnt(z);
     return _m_to_int(r);
 #else
     x = x - ((x >>1) & 0x55555555);
     x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
     x = (x + (x >> 4)) & 0x0f0f0f0f;
     x = x + (x >> 8);
     x = x + (x >> 16);
     return x & 0x0000003F;
 #endif
 }
 template <>
 inline
 int popcount<int64, 64> (int64 x)
 {
 #ifdef _USE_ITANIUM_INTRINSICS_
     __m64 y = _m_fron_int(int(x));
     __m64 r = __m64_popcnt(y);
     return _m_to_int(r);
 #else
     int64 tmp5 = 0x55555555;
     tmp5 = tmp5 | (tmp5 << 32);
     x = x - ((x >>1) & tmp5);

     int64 tmp3 = 0x33333333;
     tmp3 = tmp3 | (tmp3 << 32);
     x = (x & tmp3) + ((x >> 2) & tmp3);

     int64 tmp0f = 0x0f0f0f0f;
     tmp0f = tmp0f | (tmp0f << 32);
     x = (x + (x >> 4)) & tmp0f;

     x = x + (x >> 8);
     x = x + (x >> 16);
     x = x + (x >> 32);
     return int(x & 0x0000007F);
 #endif
 }

 template <typename inttype>
 bool isPowerOf2(inttype d) {
     if (d < 0) {
         d = -d;
     }
     // turn trailing 0s into 1s, others become 0
     inttype justrightzeros = (~d) & (d - 1);
     inttype bitandleftbits = ~justrightzeros;
     // rightmost 1-bit and trailing 0s:
     inttype bitandrightzeros = d ^ (d - 1);
     inttype justrightbit = bitandrightzeros & bitandleftbits;
     if (d == justrightbit) {
         return true;
     } else {
         return false;
     }
 }

 template <typename inttype, typename uinttype,
           int width>
 void
 getMagic(inttype d, inttype *magicNum, inttype *shiftBy)
 {
     // d must not be -1, 0, 1
     assert((d < -1) || (d > 1));

     const uinttype hiBitSet = (uinttype)1 << (width-1);

     uinttype ad = abs(d);                         // ad = |d|
     uinttype t = hiBitSet + (((uinttype)d) >> (width-1));
     uinttype anc = t - 1 - (t%ad);                // anc = |nc|

     // initial value, we try values from [width-1, 2*width]
     uinttype p = width-1;                 // power of 2 to divide by

     // these are maintained incrementally in loop as p is changed
     uinttype q1 = hiBitSet / anc;             // q1 = 2**p/|nc|
     uinttype r1 = hiBitSet - q1*anc;              // r1 = rem(2**p, |nc|)
     uinttype q2 = hiBitSet / ad;                  // q2 = 2**p /|d|
     uinttype r2 = hiBitSet - q2*ad;               // r2 = rem(2**p, |d|)

     uinttype delta = 0;

     do {
         p = p + 1;

         // increment q1, r1, q2, r2
         q1 = q1 << 1;
         r1 = r1 << 1;
         q2 = q2 << 1;
         r2 = r2 << 1;
         // r1 overflows into q1
         if (r1 >= anc) {
             q1 = q1 + 1;
             r1 = r1 - anc;
         }
         // r2 overflows into q2
         if (r2 >= ad) {
             q2 = q2 + 1;
             r2 = r2 - ad;
         }

         delta = ad - r2;

     } while ((q1 < delta) || ((q1 == delta) && (r1 == 0)));

     inttype mag1 = q2 + 1;
     *magicNum = (d < 0) ? -mag1 : mag1;
     *shiftBy = p - width;
 }

 // isolate leftmost 1
 template <typename inttype>
 inline
 inttype leftmost1(inttype x);

 template <>
 inline
 I_32 leftmost1<I_32>(I_32 x)
 {
     if (x == 1) return 1;
     if (x == 0) return 0;
     x = x | (x >> 1);
     x = x | (x >> 2);
     x = x | (x >> 4);
     x = x | (x >> 8);
     x = x | (x >> 16);
     x = x >> 1;
     x = x + 1;
     return x;
 }

 template <>
 inline
 int64 leftmost1<int64>(int64 x)
 {
     if (x == 1) return 1;
     if (x == 0) return 0;
     x = x | (x >> 1);
     x = x | (x >> 2);
     x = x | (x >> 4);
     x = x | (x >> 8);
     x = x | (x >> 16);
     x = x | (x >> 32);
     x = x >> 1;
     x = x + 1;
     return x;
 }

 // number of leading (leftmost) zeros
 template <typename inttype, int width>
 inline
 int nlz(inttype x);

 template <>
 inline
 int nlz<I_32, 32>(I_32 x)
 {
     x = x | (x >> 1);
     x = x | (x >> 2);
     x = x | (x >> 4);
     x = x | (x >> 8);
     x = x | (x >> 16);
     return popcount<I_32, 32>(~x);
 }

 template <>
 inline
 int nlz<int64, 64>(int64 x)
 {
     x = x | (x >> 1);
     x = x | (x >> 2);
     x = x | (x >> 4);
     x = x | (x >> 8);
     x = x | (x >> 16);
     x = x | (x >> 32);
     return popcount<int64, 64>(~x);
 }

 // number of trailing (rightmost) zeros
 template <typename inttype, int width>
 inline
 int ntz(inttype x)
 {
     inttype trailingzeromask = (~x) & (x - 1);
     return popcount<inttype, width>(trailingzeromask);
 }

 template <typename inttype, int width>
 inline
 int whichPowerOf2(inttype d) {
     bool isNegative = ((d < 0) && (d != -d));
     if (isNegative) {
         d = -d;
     }
     int i = ntz<inttype, width>(d);
     if (isNegative)
         return -i;
     else
         return i;
 }

 template <typename inttype>
 inline
 int shifttomakeOdd(inttype d, inttype &newd)
 {
     assert(d != 0);
     int i = 0;
     while ((d & 1) == 0) {
         ++i;
         d = d >> 1;
     }
     newd = d;
     return i;
 }

 template <typename uinttype>
 inline uinttype isqrt_iter(uinttype n, uinttype approx)
 {
     uinttype q = n / approx;
     uinttype newapprox = (approx + q) / 2;
     while (q != newapprox) {
         if (newapprox == approx) return approx;
         approx = newapprox;
         q = n / approx;
         newapprox = (approx + q) / 2;
     }
     return newapprox;
 }

 template <typename uinttype> inline uinttype isqrt(uinttype n);

 template <> inline uint64 isqrt(uint64 n) { return isqrt_iter(n, (uint64) 65536); }
 template <> inline U_32 isqrt(U_32 n) { return isqrt_iter(n, (U_32) 256); }
 template <> inline uint16 isqrt(uint16 n) { return isqrt_iter(n, (uint16) 16); }
 template <> inline U_8 isqrt(U_8 n) { return isqrt_iter(n, (U_8) 4); }

 } //namespace Jitrino

 #endif // _OPTARITHMETIC_H
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/**
	* @author Intel, Pavel A. Ozhdikhin
	*
	*/

	#ifndef _OPTARITHMETIC_H
	#define _OPTARITHMETIC_H

	namespace Jitrino {
	/*
	* Implemented algorithms are similar to the ones described in
	* [T.Granlund and P.L.Montgomery. Division by Invariant Integers using
	* Multiplication. PLDI, 1994]
	*/

	template <typename inttype, int width> inline int popcount(inttype x);
	template <>
	inline
	int popcount<I_32, 32> (I_32 x)
	{
	#ifdef _USE_ITANIUM_INTRINSICS_
	U_32 y = x; // avoid sign extension
	__m64 z = _m_from_int(int(y));
	__m64 r = __m64_popcnt(z);
	return _m_to_int(r);
	#else
	x = x - ((x >>1) & 0x55555555);
	x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
	x = (x + (x >> 4)) & 0x0f0f0f0f;
	x = x + (x >> 8);
	x = x + (x >> 16);
	return x & 0x0000003F;
	#endif
	}
	template <>
	inline
	int popcount<int64, 64> (int64 x)
	{
	#ifdef _USE_ITANIUM_INTRINSICS_
	__m64 y = _m_fron_int(int(x));
	__m64 r = __m64_popcnt(y);
	return _m_to_int(r);
	#else
	int64 tmp5 = 0x55555555;
	tmp5 = tmp5 \| (tmp5 << 32);
	x = x - ((x >>1) & tmp5);

	int64 tmp3 = 0x33333333;
	tmp3 = tmp3 \| (tmp3 << 32);
	x = (x & tmp3) + ((x >> 2) & tmp3);

	int64 tmp0f = 0x0f0f0f0f;
	tmp0f = tmp0f \| (tmp0f << 32);
	x = (x + (x >> 4)) & tmp0f;

	x = x + (x >> 8);
	x = x + (x >> 16);
	x = x + (x >> 32);
	return int(x & 0x0000007F);
	#endif
	}

	template <typename inttype>
	bool isPowerOf2(inttype d) {
	if (d < 0) {
	d = -d;
	}
	// turn trailing 0s into 1s, others become 0
	inttype justrightzeros = (~d) & (d - 1);
	inttype bitandleftbits = ~justrightzeros;
	// rightmost 1-bit and trailing 0s:
	inttype bitandrightzeros = d ^ (d - 1);
	inttype justrightbit = bitandrightzeros & bitandleftbits;
	if (d == justrightbit) {
	return true;
	} else {
	return false;
	}
	}

	template <typename inttype, typename uinttype,
	int width>
	void
	getMagic(inttype d, inttype magicNum, inttype shiftBy)
	{
	// d must not be -1, 0, 1
	assert((d < -1) \|\| (d > 1));

	const uinttype hiBitSet = (uinttype)1 << (width-1);

	uinttype ad = abs(d); // ad = \|d\|
	uinttype t = hiBitSet + (((uinttype)d) >> (width-1));
	uinttype anc = t - 1 - (t%ad); // anc = \|nc\|

	// initial value, we try values from [width-1, 2*width]
	uinttype p = width-1; // power of 2 to divide by

	// these are maintained incrementally in loop as p is changed
	uinttype q1 = hiBitSet / anc; // q1 = 2**p/\|nc\|
	uinttype r1 = hiBitSet - q1anc; // r1 = rem(2*p, \|nc\|)
	uinttype q2 = hiBitSet / ad; // q2 = 2**p /\|d\|
	uinttype r2 = hiBitSet - q2ad; // r2 = rem(2*p, \|d\|)

	uinttype delta = 0;

	do {
	p = p + 1;

	// increment q1, r1, q2, r2
	q1 = q1 << 1;
	r1 = r1 << 1;
	q2 = q2 << 1;
	r2 = r2 << 1;
	// r1 overflows into q1
	if (r1 >= anc) {
	q1 = q1 + 1;
	r1 = r1 - anc;
	}
	// r2 overflows into q2
	if (r2 >= ad) {
	q2 = q2 + 1;
	r2 = r2 - ad;
	}

	delta = ad - r2;

	} while ((q1 < delta) \|\| ((q1 == delta) && (r1 == 0)));

	inttype mag1 = q2 + 1;
	*magicNum = (d < 0) ? -mag1 : mag1;
	*shiftBy = p - width;
	}

	// isolate leftmost 1
	template <typename inttype>
	inline
	inttype leftmost1(inttype x);

	template <>
	inline
	I_32 leftmost1<I_32>(I_32 x)
	{
	if (x == 1) return 1;
	if (x == 0) return 0;
	x = x \| (x >> 1);
	x = x \| (x >> 2);
	x = x \| (x >> 4);
	x = x \| (x >> 8);
	x = x \| (x >> 16);
	x = x >> 1;
	x = x + 1;
	return x;
	}

	template <>
	inline
	int64 leftmost1<int64>(int64 x)
	{
	if (x == 1) return 1;
	if (x == 0) return 0;
	x = x \| (x >> 1);
	x = x \| (x >> 2);
	x = x \| (x >> 4);
	x = x \| (x >> 8);
	x = x \| (x >> 16);
	x = x \| (x >> 32);
	x = x >> 1;
	x = x + 1;
	return x;
	}

	// number of leading (leftmost) zeros
	template <typename inttype, int width>
	inline
	int nlz(inttype x);

	template <>
	inline
	int nlz<I_32, 32>(I_32 x)
	{
	x = x \| (x >> 1);
	x = x \| (x >> 2);
	x = x \| (x >> 4);
	x = x \| (x >> 8);
	x = x \| (x >> 16);
	return popcount<I_32, 32>(~x);
	}

	template <>
	inline
	int nlz<int64, 64>(int64 x)
	{
	x = x \| (x >> 1);
	x = x \| (x >> 2);
	x = x \| (x >> 4);
	x = x \| (x >> 8);
	x = x \| (x >> 16);
	x = x \| (x >> 32);
	return popcount<int64, 64>(~x);
	}

	// number of trailing (rightmost) zeros
	template <typename inttype, int width>
	inline
	int ntz(inttype x)
	{
	inttype trailingzeromask = (~x) & (x - 1);
	return popcount<inttype, width>(trailingzeromask);
	}

	template <typename inttype, int width>
	inline
	int whichPowerOf2(inttype d) {
	bool isNegative = ((d < 0) && (d != -d));
	if (isNegative) {
	d = -d;
	}
	int i = ntz<inttype, width>(d);
	if (isNegative)
	return -i;
	else
	return i;
	}

	template <typename inttype>
	inline
	int shifttomakeOdd(inttype d, inttype &newd)
	{
	assert(d != 0);
	int i = 0;
	while ((d & 1) == 0) {
	++i;
	d = d >> 1;
	}
	newd = d;
	return i;
	}

	template <typename uinttype>
	inline uinttype isqrt_iter(uinttype n, uinttype approx)
	{
	uinttype q = n / approx;
	uinttype newapprox = (approx + q) / 2;
	while (q != newapprox) {
	if (newapprox == approx) return approx;
	approx = newapprox;
	q = n / approx;
	newapprox = (approx + q) / 2;
	}
	return newapprox;
	}

	template <typename uinttype> inline uinttype isqrt(uinttype n);

	template <> inline uint64 isqrt(uint64 n) { return isqrt_iter(n, (uint64) 65536); }
	template <> inline U_32 isqrt(U_32 n) { return isqrt_iter(n, (U_32) 256); }
	template <> inline uint16 isqrt(uint16 n) { return isqrt_iter(n, (uint16) 16); }
	template <> inline U_8 isqrt(U_8 n) { return isqrt_iter(n, (U_8) 4); }

	} //namespace Jitrino

	#endif // _OPTARITHMETIC_H