WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – /llvm-build/x86_64/lib/clang/16/include/velintrin_approx.h

Rev	Author	Line No.	Line
14	pmbaty	1	/*===---- velintrin_approx.h - VEL intrinsics helper for VE ----------------===
		2	*
		3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	* See https://llvm.org/LICENSE.txt for license information.
		5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	*
		7	*===-----------------------------------------------------------------------===
		8	*/
		9	#ifndef __VEL_INTRIN_APPROX_H__
		10	#define __VEL_INTRIN_APPROX_H__
		11
		12	static inline __vr _vel_approx_vfdivs_vvvl(__vr v0, __vr v1, int l) {
		13	float s0;
		14	__vr v2, v3, v4, v5;
		15	v5 = _vel_vrcps_vvl(v1, l);
		16	s0 = 1.0;
		17	v4 = _vel_vfnmsbs_vsvvl(s0, v1, v5, l);
		18	v3 = _vel_vfmads_vvvvl(v5, v5, v4, l);
		19	v2 = _vel_vfmuls_vvvl(v0, v3, l);
		20	v4 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l);
		21	v2 = _vel_vfmads_vvvvl(v2, v5, v4, l);
		22	v0 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l);
		23	v0 = _vel_vfmads_vvvvl(v2, v3, v0, l);
		24	return v0;
		25	}
		26
		27	static inline __vr _vel_approx_pvfdiv_vvvl(__vr v0, __vr v1, int l) {
		28	float s0;
		29	__vr v2, v3, v4, v5;
		30	v5 = _vel_pvrcp_vvl(v1, l);
		31	s0 = 1.0;
		32	v4 = _vel_pvfnmsb_vsvvl(s0, v1, v5, l);
		33	v3 = _vel_pvfmad_vvvvl(v5, v5, v4, l);
		34	v2 = _vel_pvfmul_vvvl(v0, v3, l);
		35	v4 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l);
		36	v2 = _vel_pvfmad_vvvvl(v2, v5, v4, l);
		37	v0 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l);
		38	v0 = _vel_pvfmad_vvvvl(v2, v3, v0, l);
		39	return v0;
		40	}
		41
		42	static inline __vr _vel_approx_vfdivs_vsvl(float s0, __vr v0, int l) {
		43	float s1;
		44	__vr v1, v2, v3, v4;
		45	v4 = _vel_vrcps_vvl(v0, l);
		46	s1 = 1.0;
		47	v2 = _vel_vfnmsbs_vsvvl(s1, v0, v4, l);
		48	v2 = _vel_vfmads_vvvvl(v4, v4, v2, l);
		49	v1 = _vel_vfmuls_vsvl(s0, v2, l);
		50	v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l);
		51	v1 = _vel_vfmads_vvvvl(v1, v4, v3, l);
		52	v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l);
		53	v0 = _vel_vfmads_vvvvl(v1, v2, v3, l);
		54	return v0;
		55	}
		56
		57	static inline __vr _vel_approx_vfdivs_vvsl(__vr v0, float s0, int l) {
		58	float s1;
		59	__vr v1, v2;
		60	s1 = 1.0f / s0;
		61	v1 = _vel_vfmuls_vsvl(s1, v0, l);
		62	v2 = _vel_vfnmsbs_vvsvl(v0, s0, v1, l);
		63	v0 = _vel_vfmads_vvsvl(v1, s1, v2, l);
		64	return v0;
		65	}
		66
		67	static inline __vr _vel_approx_vfdivd_vsvl(double s0, __vr v0, int l) {
		68	__vr v1, v2, v3;
		69	v2 = _vel_vrcpd_vvl(v0, l);
		70	double s1 = 1.0;
		71	v3 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l);
		72	v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l);
		73	v1 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l);
		74	v1 = _vel_vfmadd_vvvvl(v2, v2, v1, l);
		75	v1 = _vel_vaddul_vsvl(1, v1, l);
		76	v3 = _vel_vfnmsbd_vsvvl(s1, v0, v1, l);
		77	v3 = _vel_vfmadd_vvvvl(v1, v1, v3, l);
		78	v1 = _vel_vfmuld_vsvl(s0, v3, l);
		79	v0 = _vel_vfnmsbd_vsvvl(s0, v1, v0, l);
		80	v0 = _vel_vfmadd_vvvvl(v1, v3, v0, l);
		81	return v0;
		82	}
		83
		84	static inline __vr _vel_approx_vfsqrtd_vvl(__vr v0, int l) {
		85	double s0, s1;
		86	__vr v1, v2, v3;
		87	v2 = _vel_vrsqrtdnex_vvl(v0, l);
		88	v1 = _vel_vfmuld_vvvl(v0, v2, l);
		89	s0 = 1.0;
		90	s1 = 0.5;
		91	v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
		92	v3 = _vel_vfmuld_vsvl(s1, v3, l);
		93	v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l);
		94	v1 = _vel_vfmuld_vvvl(v0, v2, l);
		95	v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
		96	v3 = _vel_vfmuld_vsvl(s1, v3, l);
		97	v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l);
		98	return v0;
		99	}
		100
		101	static inline __vr _vel_approx_vfsqrts_vvl(__vr v0, int l) {
		102	float s0, s1;
		103	__vr v1, v2, v3;
		104	v0 = _vel_vcvtds_vvl(v0, l);
		105	v2 = _vel_vrsqrtdnex_vvl(v0, l);
		106	v1 = _vel_vfmuld_vvvl(v0, v2, l);
		107	s0 = 1.0;
		108	s1 = 0.5;
		109	v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
		110	v3 = _vel_vfmuld_vsvl(s1, v3, l);
		111	v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l);
		112	v1 = _vel_vfmuld_vvvl(v0, v2, l);
		113	v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
		114	v3 = _vel_vfmuld_vsvl(s1, v3, l);
		115	v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l);
		116	v0 = _vel_vcvtsd_vvl(v0, l);
		117	return v0;
		118	}
		119
		120	#endif

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite/llvm-build/x86_64/lib/clang/16/include/velintrin_approx.h – Rev 14