9 static const __m128
one_o_10 = {0.1,0.1,0.1,0.1};
10 static const __m128
one_o_2 = {0.5,0.5,0.5,0.5};
11 static const __m128
close_one = {0.999,0.999,0.999,0.999};
12 static const __m128
two = {2., 2., 2., 2.};
13 static const __m128
four = {4., 4., 4., 4.};
14 static const __m128
one_o_3 = {0.3333333333333333333, 0.3333333333333333333, 0.3333333333333333333, 0.3333333333333333333};
15 static const __m128
_3_o_20 = {0.15, 0.15, 0.15, 0.15};
16 static const __m128
_5_o_56 = {8.92857142857142877e-02, 8.92857142857142877e-02, 8.92857142857142877e-02, 8.92857142857142877e-02};
17 static const __m128
SIGNMASK = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
19 void HelixHough::z0Range_sse(
const SimpleHit3D& hit,
float cosphi1,
float sinphi1,
float cosphi2,
float sinphi2,
float min_k,
float max_k,
float min_phi,
float max_phi,
float min_d,
float max_d,
float min_theta,
float max_theta,
float& min_z0,
float& max_z0)
23 __m128 v_z0_1 = _mm_load_ps(z0_1);
25 float x_arr[4]
__attribute__((aligned(16))) = {hit.x,hit.x,hit.x,hit.x};
26 float y_arr[4]
__attribute__((aligned(16))) = {hit.y,hit.y,hit.y,hit.y};
27 float z_arr[4]
__attribute__((aligned(16))) = {hit.z,hit.z,hit.z,hit.z};
29 __m128 v_x = _mm_load_ps(x_arr);
30 __m128 v_y = _mm_load_ps(y_arr);
31 __m128 v_z = _mm_load_ps(z_arr);
36 float cosphi_1[4]
__attribute__((aligned(16))) = {cosphi1, cosphi1, cosphi2, cosphi2}; __m128 v_cosphi_1 = _mm_load_ps(cosphi_1);
37 float sinphi_1[4]
__attribute__((aligned(16))) = {sinphi1, sinphi1, sinphi2, sinphi2}; __m128 v_sinphi_1 = _mm_load_ps(sinphi_1);
38 float d_1[4]
__attribute__((aligned(16))) = {min_d, min_d, max_d, max_d}; __m128 v_d_1 = _mm_load_ps(
d_1);
39 float k_1[4]
__attribute__((aligned(16))) = {min_k, min_k, max_k, max_k}; __m128 v_k_1 = _mm_load_ps(k_1);
40 float t_1[4]
__attribute__((aligned(16))) = {min_theta, max_theta, min_theta, max_theta}; __m128 v_t_1 = _mm_load_ps(t_1);
44 __m128 v_dx = _mm_mul_ps(v_cosphi_1, v_d_1);
45 __m128 v_dy = _mm_mul_ps(v_sinphi_1, v_d_1);
46 __m128
tmp1 = _mm_sub_ps(v_x, v_dx);
47 tmp1 = _mm_mul_ps(tmp1, tmp1);
48 __m128 v_D = _mm_sub_ps(v_y, v_dy);
49 v_D = _mm_mul_ps(v_D, v_D);
50 v_D = _mm_add_ps(v_D, tmp1);
51 v_D = _mm_sqrt_ps(v_D);
52 __m128 v_v = _mm_mul_ps(
one_o_2, v_k_1);
53 v_v = _mm_mul_ps(v_v, v_D);
57 __m128
tmp3 = _mm_andnot_ps(tmp1, v_v);
58 v_v = _mm_xor_ps(tmp2, tmp3);
61 __m128
temp1 = _mm_mul_ps(v_v, v_v);
63 tmp1 = _mm_mul_ps(
two, temp2);
64 v_s = _mm_add_ps(v_s, tmp1);
65 temp2 = _mm_mul_ps(temp2, temp1);
66 tmp1 = _mm_mul_ps(temp2,
one_o_3);
67 v_s = _mm_add_ps(v_s, tmp1);
68 temp2 = _mm_mul_ps(temp2, temp1);
69 tmp1 = _mm_mul_ps(temp2,
_3_o_20);
70 v_s = _mm_add_ps(v_s, tmp1);
71 temp2 = _mm_mul_ps(temp2, temp1);
72 tmp1 = _mm_mul_ps(temp2,
_5_o_56);
73 v_s = _mm_add_ps(v_s, tmp1);
78 tmp1 = _mm_mul_ps(v_v, v_v);
79 tmp1 = _mm_sub_ps(
one, tmp1);
80 tmp1 = _mm_sqrt_ps(tmp1);
81 tmp1 = _mm_add_ps(
one, tmp1);
82 tmp1 = _mm_div_ps(v_v, tmp1);
83 tmp2 = _vec_atan_ps(tmp1);
84 tmp2 = _mm_mul_ps(
four, tmp2);
85 tmp2 = _mm_div_ps(tmp2, v_k_1);
89 tmp3 = _mm_and_ps(tmp1, tmp2);
90 tmp2 = _mm_andnot_ps(tmp1, v_s);
91 v_s = _mm_xor_ps(tmp3, tmp2);
93 tmp1 = _mm_mul_ps(v_s, v_s);
94 tmp2 = _mm_mul_ps(v_t_1, v_t_1);
95 tmp3 = _mm_mul_ps(tmp1, tmp2);
96 __m128
tmp4 = _mm_sub_ps(
one, tmp2);
97 tmp4 = _mm_div_ps(tmp3, tmp4);
98 tmp4 = _mm_sqrt_ps(tmp4);
102 tmp1 = _mm_cmpgt_ps(v_t_1,
zero);
103 tmp2 = _mm_and_ps(tmp1, tmp3);
104 __m128 v_dz = _mm_andnot_ps(tmp1, tmp4);
105 v_dz = _mm_xor_ps(tmp2, v_dz);
106 v_z0_1 = _mm_add_ps(v_dz, v_z);
110 _mm_store_ps(z0_a, v_z0_1);
113 if (z0_a[1] < min_z0) min_z0 = z0_a[1] ;
114 if (z0_a[2] < min_z0) min_z0 = z0_a[2] ;
115 if (z0_a[3] < min_z0) min_z0 = z0_a[3] ;
118 if (z0_a[1] > max_z0) max_z0 = z0_a[1] ;
119 if (z0_a[2] > max_z0) max_z0 = z0_a[2] ;
120 if (z0_a[3] > max_z0) max_z0 = z0_a[3] ;