summaryrefslogtreecommitdiffstats
path: root/external/include/glm/detail/func_integer_simd.inl
blob: 690671a37024b8b9c3f58c6c5eab8e742703fda7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
/// @ref core
/// @file glm/detail/func_integer_simd.inl

#include "../simd/integer.h"

#if GLM_ARCH & GLM_ARCH_SSE2_BIT

namespace glm{
namespace detail
{
	template<qualifier Q>
	struct compute_bitfieldReverseStep<4, uint32, Q, true, true>
	{
		GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift)
		{
			__m128i const set0 = v.data;

			__m128i const set1 = _mm_set1_epi32(static_cast<int>(Mask));
			__m128i const and1 = _mm_and_si128(set0, set1);
			__m128i const sft1 = _mm_slli_epi32(and1, Shift);

			__m128i const set2 = _mm_andnot_si128(set0, _mm_set1_epi32(-1));
			__m128i const and2 = _mm_and_si128(set0, set2);
			__m128i const sft2 = _mm_srai_epi32(and2, Shift);

			__m128i const or0 = _mm_or_si128(sft1, sft2);

			return or0;
		}
	};

	template<qualifier Q>
	struct compute_bitfieldBitCountStep<4, uint32, Q, true, true>
	{
		GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift)
		{
			__m128i const set0 = v.data;

			__m128i const set1 = _mm_set1_epi32(static_cast<int>(Mask));
			__m128i const and0 = _mm_and_si128(set0, set1);
			__m128i const sft0 = _mm_slli_epi32(set0, Shift);
			__m128i const and1 = _mm_and_si128(sft0, set1);
			__m128i const add0 = _mm_add_epi32(and0, and1);

			return add0;
		}
	};
}//namespace detail

#	if GLM_ARCH & GLM_ARCH_AVX_BIT
	template<>
	GLM_FUNC_QUALIFIER int bitCount(uint32 x)
	{
		return _mm_popcnt_u32(x);
	}

#	if(GLM_MODEL == GLM_MODEL_64)
	template<>
	GLM_FUNC_QUALIFIER int bitCount(uint64 x)
	{
		return static_cast<int>(_mm_popcnt_u64(x));
	}
#	endif//GLM_MODEL
#	endif//GLM_ARCH

}//namespace glm

#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT