Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
2
 *
3
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
 * See https://llvm.org/LICENSE.txt for license information.
5
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
 *
7
 *===-----------------------------------------------------------------------===
8
 */
9
 
10
#ifndef __PMMINTRIN_H
11
#define __PMMINTRIN_H
12
 
13
#if !defined(__i386__) && !defined(__x86_64__)
14
#error "This header is only meant to be used on x86 and x64 architecture"
15
#endif
16
 
17
#include <emmintrin.h>
18
 
19
/* Define the default attributes for the functions in this file. */
20
#define __DEFAULT_FN_ATTRS \
21
  __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
22
 
23
/// Loads data from an unaligned memory location to elements in a 128-bit
24
///    vector.
25
///
26
///    If the address of the data is not 16-byte aligned, the instruction may
27
///    read two adjacent aligned blocks of memory to retrieve the requested
28
///    data.
29
///
30
/// \headerfile <x86intrin.h>
31
///
32
/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
33
///
34
/// \param __p
35
///    A pointer to a 128-bit integer vector containing integer values.
36
/// \returns A 128-bit vector containing the moved values.
37
static __inline__ __m128i __DEFAULT_FN_ATTRS
38
_mm_lddqu_si128(__m128i_u const *__p)
39
{
40
  return (__m128i)__builtin_ia32_lddqu((char const *)__p);
41
}
42
 
43
/// Adds the even-indexed values and subtracts the odd-indexed values of
44
///    two 128-bit vectors of [4 x float].
45
///
46
/// \headerfile <x86intrin.h>
47
///
48
/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
49
///
50
/// \param __a
51
///    A 128-bit vector of [4 x float] containing the left source operand.
52
/// \param __b
53
///    A 128-bit vector of [4 x float] containing the right source operand.
54
/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
55
///    differences of both operands.
56
static __inline__ __m128 __DEFAULT_FN_ATTRS
57
_mm_addsub_ps(__m128 __a, __m128 __b)
58
{
59
  return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
60
}
61
 
62
/// Horizontally adds the adjacent pairs of values contained in two
63
///    128-bit vectors of [4 x float].
64
///
65
/// \headerfile <x86intrin.h>
66
///
67
/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
68
///
69
/// \param __a
70
///    A 128-bit vector of [4 x float] containing one of the source operands.
71
///    The horizontal sums of the values are stored in the lower bits of the
72
///    destination.
73
/// \param __b
74
///    A 128-bit vector of [4 x float] containing one of the source operands.
75
///    The horizontal sums of the values are stored in the upper bits of the
76
///    destination.
77
/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
78
///    both operands.
79
static __inline__ __m128 __DEFAULT_FN_ATTRS
80
_mm_hadd_ps(__m128 __a, __m128 __b)
81
{
82
  return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
83
}
84
 
85
/// Horizontally subtracts the adjacent pairs of values contained in two
86
///    128-bit vectors of [4 x float].
87
///
88
/// \headerfile <x86intrin.h>
89
///
90
/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
91
///
92
/// \param __a
93
///    A 128-bit vector of [4 x float] containing one of the source operands.
94
///    The horizontal differences between the values are stored in the lower
95
///    bits of the destination.
96
/// \param __b
97
///    A 128-bit vector of [4 x float] containing one of the source operands.
98
///    The horizontal differences between the values are stored in the upper
99
///    bits of the destination.
100
/// \returns A 128-bit vector of [4 x float] containing the horizontal
101
///    differences of both operands.
102
static __inline__ __m128 __DEFAULT_FN_ATTRS
103
_mm_hsub_ps(__m128 __a, __m128 __b)
104
{
105
  return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
106
}
107
 
108
/// Moves and duplicates odd-indexed values from a 128-bit vector
109
///    of [4 x float] to float values stored in a 128-bit vector of
110
///    [4 x float].
111
///
112
/// \headerfile <x86intrin.h>
113
///
114
/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
115
///
116
/// \param __a
117
///    A 128-bit vector of [4 x float]. \n
118
///    Bits [127:96] of the source are written to bits [127:96] and [95:64] of
119
///    the destination. \n
120
///    Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
121
///    destination.
122
/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
123
///    values.
124
static __inline__ __m128 __DEFAULT_FN_ATTRS
125
_mm_movehdup_ps(__m128 __a)
126
{
127
  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
128
}
129
 
130
/// Duplicates even-indexed values from a 128-bit vector of
131
///    [4 x float] to float values stored in a 128-bit vector of [4 x float].
132
///
133
/// \headerfile <x86intrin.h>
134
///
135
/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
136
///
137
/// \param __a
138
///    A 128-bit vector of [4 x float] \n
139
///    Bits [95:64] of the source are written to bits [127:96] and [95:64] of
140
///    the destination. \n
141
///    Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
142
///    destination.
143
/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
144
///    values.
145
static __inline__ __m128 __DEFAULT_FN_ATTRS
146
_mm_moveldup_ps(__m128 __a)
147
{
148
  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
149
}
150
 
151
/// Adds the even-indexed values and subtracts the odd-indexed values of
152
///    two 128-bit vectors of [2 x double].
153
///
154
/// \headerfile <x86intrin.h>
155
///
156
/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
157
///
158
/// \param __a
159
///    A 128-bit vector of [2 x double] containing the left source operand.
160
/// \param __b
161
///    A 128-bit vector of [2 x double] containing the right source operand.
162
/// \returns A 128-bit vector of [2 x double] containing the alternating sums
163
///    and differences of both operands.
164
static __inline__ __m128d __DEFAULT_FN_ATTRS
165
_mm_addsub_pd(__m128d __a, __m128d __b)
166
{
167
  return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
168
}
169
 
170
/// Horizontally adds the pairs of values contained in two 128-bit
171
///    vectors of [2 x double].
172
///
173
/// \headerfile <x86intrin.h>
174
///
175
/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
176
///
177
/// \param __a
178
///    A 128-bit vector of [2 x double] containing one of the source operands.
179
///    The horizontal sum of the values is stored in the lower bits of the
180
///    destination.
181
/// \param __b
182
///    A 128-bit vector of [2 x double] containing one of the source operands.
183
///    The horizontal sum of the values is stored in the upper bits of the
184
///    destination.
185
/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
186
///    both operands.
187
static __inline__ __m128d __DEFAULT_FN_ATTRS
188
_mm_hadd_pd(__m128d __a, __m128d __b)
189
{
190
  return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
191
}
192
 
193
/// Horizontally subtracts the pairs of values contained in two 128-bit
194
///    vectors of [2 x double].
195
///
196
/// \headerfile <x86intrin.h>
197
///
198
/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
199
///
200
/// \param __a
201
///    A 128-bit vector of [2 x double] containing one of the source operands.
202
///    The horizontal difference of the values is stored in the lower bits of
203
///    the destination.
204
/// \param __b
205
///    A 128-bit vector of [2 x double] containing one of the source operands.
206
///    The horizontal difference of the values is stored in the upper bits of
207
///    the destination.
208
/// \returns A 128-bit vector of [2 x double] containing the horizontal
209
///    differences of both operands.
210
static __inline__ __m128d __DEFAULT_FN_ATTRS
211
_mm_hsub_pd(__m128d __a, __m128d __b)
212
{
213
  return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
214
}
215
 
216
/// Moves and duplicates one double-precision value to double-precision
217
///    values stored in a 128-bit vector of [2 x double].
218
///
219
/// \headerfile <x86intrin.h>
220
///
221
/// \code
222
/// __m128d _mm_loaddup_pd(double const *dp);
223
/// \endcode
224
///
225
/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
226
///
227
/// \param dp
228
///    A pointer to a double-precision value to be moved and duplicated.
229
/// \returns A 128-bit vector of [2 x double] containing the moved and
230
///    duplicated values.
231
#define        _mm_loaddup_pd(dp)        _mm_load1_pd(dp)
232
 
233
/// Moves and duplicates the double-precision value in the lower bits of
234
///    a 128-bit vector of [2 x double] to double-precision values stored in a
235
///    128-bit vector of [2 x double].
236
///
237
/// \headerfile <x86intrin.h>
238
///
239
/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
240
///
241
/// \param __a
242
///    A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
243
///    [127:64] and [63:0] of the destination.
244
/// \returns A 128-bit vector of [2 x double] containing the moved and
245
///    duplicated values.
246
static __inline__ __m128d __DEFAULT_FN_ATTRS
247
_mm_movedup_pd(__m128d __a)
248
{
249
  return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
250
}
251
 
252
/// Establishes a linear address memory range to be monitored and puts
253
///    the processor in the monitor event pending state. Data stored in the
254
///    monitored address range causes the processor to exit the pending state.
255
///
256
/// \headerfile <x86intrin.h>
257
///
258
/// This intrinsic corresponds to the <c> MONITOR </c> instruction.
259
///
260
/// \param __p
261
///    The memory range to be monitored. The size of the range is determined by
262
///    CPUID function 0000_0005h.
263
/// \param __extensions
264
///    Optional extensions for the monitoring state.
265
/// \param __hints
266
///    Optional hints for the monitoring state.
267
static __inline__ void __DEFAULT_FN_ATTRS
268
_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
269
{
270
  __builtin_ia32_monitor(__p, __extensions, __hints);
271
}
272
 
273
/// Used with the MONITOR instruction to wait while the processor is in
274
///    the monitor event pending state. Data stored in the monitored address
275
///    range causes the processor to exit the pending state.
276
///
277
/// \headerfile <x86intrin.h>
278
///
279
/// This intrinsic corresponds to the <c> MWAIT </c> instruction.
280
///
281
/// \param __extensions
282
///    Optional extensions for the monitoring state, which may vary by
283
///    processor.
284
/// \param __hints
285
///    Optional hints for the monitoring state, which may vary by processor.
286
static __inline__ void __DEFAULT_FN_ATTRS
287
_mm_mwait(unsigned __extensions, unsigned __hints)
288
{
289
  __builtin_ia32_mwait(__extensions, __hints);
290
}
291
 
292
#undef __DEFAULT_FN_ATTRS
293
 
294
#endif /* __PMMINTRIN_H */