EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sPHENIXTrackerTpc.cpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file sPHENIXTrackerTpc.cpp
1 #include "sPHENIXTrackerTpc.h"
2 #include "CylinderKalman.h"
3 #include "Pincushion.h"
4 #include "SimpleTrack3D.h"
5 #include "vector_math_inline.h"
6 
7 #include <Eigen/Core>
8 #include <Eigen/Dense>
9 #include <Eigen/LU>
10 
11 #include <algorithm>
12 #include <cfloat>
13 #include <cmath>
14 #include <cstddef>
15 #include <iostream>
16 #include <sys/time.h>
17 #include <utility>
18 #include <xmmintrin.h>
19 
20 class HelixResolution;
21 
22 using namespace std;
23 using namespace Eigen;
24 using namespace SeamStress;
25 
26 class hitTriplet {
27 public:
28  hitTriplet(unsigned int h1, unsigned int h2, unsigned int h3, unsigned int t,
29  float c)
30  : hit1(h1), hit2(h2), hit3(h3), track(t), chi2(c) {}
32 
33  bool operator<(const hitTriplet& other) const {
34  return (hit1 < other.hit1) ||
35  ((hit2 < other.hit2) && (hit1 == other.hit1)) ||
36  ((hit3 < other.hit3) && (hit1 == other.hit1) &&
37  (hit2 == other.hit2));
38  }
39 
40  bool operator==(const hitTriplet& other) const {
41  return ((hit1 == other.hit1) && (hit2 == other.hit2) &&
42  (hit3 == other.hit3));
43  }
44 
45  unsigned int hit1, hit2, hit3, track;
46  float chi2;
47 };
48 
49 void sPHENIXTrackerTpc::tripletRejection(vector<SimpleTrack3D>& input,
50  vector<SimpleTrack3D>& /*output*/,
51  vector<bool>& usetrack,
52  vector<float>& next_best_chi2) {
53  vector<hitTriplet> trips;
54  for (unsigned int i = 0; i < input.size(); ++i) {
55  for (unsigned int h1 = 0; h1 < input[i].hits.size(); ++h1) {
56  for (unsigned int h2 = (h1 + 1); h2 < input[i].hits.size(); ++h2) {
57  for (unsigned int h3 = (h2 + 1); h3 < input[i].hits.size(); ++h3) {
58  if (cut_on_dca == false) {
59  trips.push_back(
60  hitTriplet(input[i].hits[h1].get_id(), input[i].hits[h2].get_id(),
61  input[i].hits[h3].get_id(), i, track_states[i].chi2));
62  }
63  }
64  }
65  }
66  }
67  if (trips.size() == 0) {
68  return;
69  }
70  sort(trips.begin(), trips.end());
71  unsigned int pos = 0;
72  // unsigned int cur_h1 = trips[pos].hit1;
73  // unsigned int cur_h2 = trips[pos].hit2;
74  while (pos < trips.size()) {
75  unsigned int next_pos = pos + 1;
76  if (next_pos >= trips.size()) {
77  break;
78  }
79  while (trips[pos] == trips[next_pos]) {
80  next_pos += 1;
81  if (next_pos >= trips.size()) {
82  break;
83  }
84  }
85  if ((next_pos - pos) > 1) {
86  float best_chi2 = trips[pos].chi2;
87  float next_chi2 = trips[pos + 1].chi2;
88  unsigned int best_pos = pos;
89  for (unsigned int i = (pos + 1); i < next_pos; ++i) {
90  if (input[trips[i].track].hits.size() <
91  input[trips[best_pos].track].hits.size()) {
92  continue;
93  } else if ((input[trips[i].track].hits.size() >
94  input[trips[best_pos].track].hits.size()) ||
95  (input[trips[i].track].hits.back().get_layer() >
96  input[trips[best_pos].track].hits.back().get_layer())) {
97  next_chi2 = best_chi2;
98  best_chi2 = trips[i].chi2;
99  best_pos = i;
100  continue;
101  }
102  if ((trips[i].chi2 < best_chi2) ||
103  (usetrack[trips[best_pos].track] == false)) {
104  next_chi2 = best_chi2;
105  best_chi2 = trips[i].chi2;
106  best_pos = i;
107  } else if (trips[i].chi2 < next_chi2) {
108  next_chi2 = trips[i].chi2;
109  }
110  }
111  for (unsigned int i = pos; i < next_pos; ++i) {
112  if (i != best_pos) {
113  usetrack[trips[i].track] = false;
114  } else {
115  next_best_chi2[trips[i].track] = next_chi2;
116  }
117  }
118  }
119  pos = next_pos;
120  // cur_h1 = trips[pos].hit1;
121  // cur_h2 = trips[pos].hit2;
122  }
123 }
124 
125 sPHENIXTrackerTpc::sPHENIXTrackerTpc(unsigned int n_phi, unsigned int n_d,
126  unsigned int n_k, unsigned int n_dzdl,
127  unsigned int n_z0,
128  HelixResolution& min_resolution,
129  HelixResolution& max_resolution,
130  HelixRange& range, vector<float>& material,
131  vector<float>& radius, float Bfield)
132  : HelixHough(n_phi, n_d, n_k, n_dzdl, n_z0, min_resolution, max_resolution,
133  range),
134  fast_chi2_cut_par0(12.),
135  fast_chi2_cut_par1(0.),
136  fast_chi2_cut_max(FLT_MAX),
137  chi2_cut(3.),
138  chi2_removal_cut(1.),
139  n_removal_hits(0),
140  seeding(false),
141  verbosity(0),
142  cut_on_dca(false),
143  dca_cut(0.01),
144  vertex_x(0.),
145  vertex_y(0.),
146  vertex_z(0.),
147  required_layers(0),
148  reject_ghosts(false),
149  nfits(0),
150  findtracksiter(0),
151  prev_max_k(0.),
152  prev_max_dzdl(0.),
153  prev_p_inv(0.),
154  seed_layer(0),
155  ca_chi2_cut(2.0),
156  cosang_cut(0.985),
157  require_pixels(false) {
158  vector<float> detector_material;
159 
160  for (unsigned int i = 0; i < radius.size(); ++i) {
161  detector_radii.push_back(radius[i]);
162  }
163  for (unsigned int i = 0; i < material.size(); ++i) {
164  detector_scatter.push_back(1.41421356237309515 * 0.0136 *
165  sqrt(3. * material[i]));
166  detector_material.push_back(3. * material[i]);
167  }
168 
169  detector_B_field = Bfield;
170 
171  integrated_scatter.assign(detector_scatter.size(), 0.);
172  float total_scatter_2 = 0.;
173  for (unsigned int l = 0; l < detector_scatter.size(); ++l) {
174  total_scatter_2 += detector_scatter[l] * detector_scatter[l];
175  integrated_scatter[l] = sqrt(total_scatter_2);
176  }
177 
178  kalman =
179  new CylinderKalman(detector_radii, detector_material, detector_B_field);
180 
181  vector<SimpleHit3D> one_layer;
182  layer_sorted.assign(n_layers, one_layer);
183  for (unsigned int i = 0; i < 4; ++i) {
184  layer_sorted_1[i].assign(n_layers, one_layer);
185  }
186  temp_comb.assign(n_layers, 0);
187 }
188 
190  vector<vector<unsigned int> >& zoom_profile, unsigned int minzoom,
191  HelixRange& range, vector<float>& material, vector<float>& radius,
192  float Bfield, bool parallel, unsigned int num_threads)
193  : HelixHough(zoom_profile, minzoom, range),
194  fast_chi2_cut_par0(12.),
195  fast_chi2_cut_par1(0.),
196  fast_chi2_cut_max(FLT_MAX),
197  chi2_cut(3.),
198  chi2_removal_cut(1.),
199  n_removal_hits(0),
200  seeding(false),
201  verbosity(0),
202  cut_on_dca(false),
203  dca_cut(0.01),
204  vertex_x(0.),
205  vertex_y(0.),
206  vertex_z(0.),
207  required_layers(0),
208  reject_ghosts(false),
209  nfits(0),
210  findtracksiter(0),
211  prev_max_k(0.),
212  prev_max_dzdl(0.),
213  prev_p_inv(0.),
214  seed_layer(0),
215  nthreads(num_threads),
216  vssp(NULL),
217  pins(NULL),
218  is_parallel(parallel),
219  is_thread(false),
220  ca_chi2_cut(2.0),
221  cosang_cut(0.985),
222  require_pixels(false){
223  vector<float> detector_material;
224 
225  for (unsigned int i = 0; i < radius.size(); ++i) {
226  detector_radii.push_back(radius[i]);
227  }
228  for (unsigned int i = 0; i < material.size(); ++i) {
229  detector_scatter.push_back(1.41421356237309515 * 0.0136 *
230  sqrt(3. * material[i]));
231  detector_material.push_back(3. * material[i]);
232  }
233 
234  detector_B_field = Bfield;
235 
236  integrated_scatter.assign(detector_scatter.size(), 0.);
237  float total_scatter_2 = 0.;
238  for (unsigned int l = 0; l < detector_scatter.size(); ++l) {
239  total_scatter_2 += detector_scatter[l] * detector_scatter[l];
240  integrated_scatter[l] = sqrt(total_scatter_2);
241  }
242  kalman =
243  new CylinderKalman(detector_radii, detector_material, detector_B_field);
244 
245  vector<SimpleHit3D> one_layer;
246  layer_sorted.assign(n_layers, one_layer);
247  for (unsigned int i = 0; i < 4; ++i) {
248  layer_sorted_1[i].assign(n_layers, one_layer);
249  }
250  temp_comb.assign(n_layers, 0);
251 
252  if (is_parallel == true) {
253  Seamstress::init_vector(num_threads, vss);
254 
255  vssp = new vector<Seamstress*>();
256  for (unsigned int i = 0; i < vss.size(); i++) {
257  vssp->push_back(&(vss[i]));
258  }
259 
261 
262  vector<vector<unsigned int> > zoom_profile_new;
263  for (unsigned int i = 1; i < zoom_profile.size(); ++i) {
264  zoom_profile_new.push_back(zoom_profile[i]);
265  }
266 
267  for (unsigned int i = 0; i < nthreads; ++i) {
268  thread_trackers.push_back(new sPHENIXTrackerTpc(
269  zoom_profile, minzoom, range, material, radius, Bfield));
270  thread_trackers.back()->setThread();
271  thread_trackers.back()->setStartZoom(1);
272  thread_tracks.push_back(vector<SimpleTrack3D>());
273  thread_ranges.push_back(HelixRange());
274  thread_hits.push_back(vector<SimpleHit3D>());
275  split_output_hits.push_back(new vector<vector<SimpleHit3D> >());
276  split_ranges.push_back(new vector<HelixRange>());
277  split_input_hits.push_back(vector<SimpleHit3D>());
278  }
279  }
280 }
281 
283  if (kalman != NULL) delete kalman;
284  for (unsigned int i = 0; i < vss.size(); i++) {
285  vss[i].stop();
286  }
287  for (unsigned int i = 0; i < thread_trackers.size(); ++i) {
288  delete thread_trackers[i];
289  delete split_output_hits[i];
290  delete split_ranges[i];
291  }
292 
293  if (pins != NULL) delete pins;
294  if (vssp != NULL) delete vssp;
295 }
296 
297 float sPHENIXTrackerTpc::kappaToPt(float kappa) {
298  return detector_B_field / 333.6 / kappa;
299 }
300 
302  return detector_B_field / 333.6 / pt;
303 }
304 
305 
306 void sPHENIXTrackerTpc::finalize(vector<SimpleTrack3D>& input,
307  vector<SimpleTrack3D>& output) {
308 
309  if (is_thread == true) {
310  for (unsigned int i = 0; i < input.size(); ++i) {
311  output.push_back(input[i]);
312  }
313  return;
314  }
315 
316  unsigned int nt = input.size();
317  vector<bool> usetrack;
318  usetrack.assign(input.size(), true);
319  vector<float> next_best_chi2;
320  next_best_chi2.assign(input.size(), 99999.);
321 
322  if (reject_ghosts == true) {
323  tripletRejection(input, output, usetrack, next_best_chi2);
324  }
325 
326  vector<HelixKalmanState> states_new;
327 
328  for (unsigned int i = 0; i < nt; ++i) {
329  if (usetrack[i] == true) {
330  if (!(track_states[i].chi2 == track_states[i].chi2)) {
331  continue;
332  }
333 
334  output.push_back(input[i]);
335  output.back().index = (output.size() - 1);
336  states_new.push_back(track_states[i]);
337  isolation_variable.push_back(next_best_chi2[i]);
338  }
339  }
340 
341  track_states = states_new;
342  if (smooth_back == true) {
343  for (unsigned int i = 0; i < output.size(); ++i) {
344 
345  SimpleTrack3D temp_track = output[i];
346  vector<SimpleHit3D> temp_hits;
347  vector<float> chi2_hit;
348  fitTrack(temp_track, chi2_hit);
349  for (unsigned int i = 0; i < chi2_hit.size(); ++i) {
350  if (chi2_hit[i] < 10. || temp_track.hits[i].get_layer() < 2) {
351  temp_hits.push_back(temp_track.hits[i]);
352  }
353  }
354 
355  temp_track.hits = temp_hits;
356 
357  fitTrack(temp_track, chi2_hit);
358 
359  if (temp_track.kappa == temp_track.kappa) {
360  track_states[i].kappa = temp_track.kappa;
361  track_states[i].nu = sqrt(temp_track.kappa);
362  }
363 
364  HelixKalmanState state = track_states[i];
365 
366  track_states[i].C *= 10.;
367 
368  track_states[i].chi2 = 0.;
369  track_states[i].x_int = 0.;
370  track_states[i].y_int = 0.;
371  track_states[i].z_int = 0.;
372  track_states[i].position = output[i].hits.size();
373  for (int h = (temp_track.hits.size() - 1); h >= 0; --h) {
374  SimpleHit3D hit = temp_track.hits[h];
375  kalman->addHit(hit, track_states[i]);
376  }
377 
378  if (fabs(track_states[i].d) < 0.01) {
379  SimpleHit3D vertex_hit;
380  vertex_hit.set_x(0.0);
381  vertex_hit.set_y(0.0);
382  vertex_hit.set_z(0.0);
383  //vertex_hit.set_ex(0.0001);
384  //vertex_hit.set_ey(0.0001);
385  //vertex_hit.set_ez(0.0001);
386  temp_track.hits.push_back(vertex_hit);
387 
388  fitTrack(temp_track, chi2_hit);
389 
390  temp_track.hits.pop_back();
391  }
392 
393  if (temp_track.kappa == temp_track.kappa) {
394  track_states[i].kappa = temp_track.kappa;
395  track_states[i].nu = sqrt(temp_track.kappa);
396  }
397 
398  if (!(track_states[i].kappa == track_states[i].kappa)) {
399  track_states[i] = state;
400  if (temp_track.kappa == temp_track.kappa) {
401  track_states[i].kappa = temp_track.kappa;
402  track_states[i].nu = sqrt(temp_track.kappa);
403  }
404  }
405 
406  if (output[i].phi < 0.) {
407  output[i].phi += 2. * M_PI;
408  }
409  output[i].phi = track_states[i].phi;
410  output[i].d = track_states[i].d;
411  output[i].kappa = track_states[i].kappa;
412  output[i].z0 = track_states[i].z0;
413  output[i].dzdl = track_states[i].dzdl;
414  }
415 
416  }
417 
418  if (verbosity > 0) {
419  cout << "# fits = " << nfits << endl;
420  cout << "findTracks called " << findtracksiter << " times" << endl;
421  cout << "CAtime = " << CAtime << endl;
422  cout << "KALime = " << KALtime << endl;
423  }
424 }
425 
426 bool sPHENIXTrackerTpc::breakRecursion(const vector<SimpleHit3D>& hits,
427  const HelixRange& /*range*/) {
428  if (seeding == true) {
429  return false;
430  }
431  unsigned int layer_mask[4] = {0, 0, 0, 0};
432  for (unsigned int i = 0; i < hits.size(); ++i) {
433  if (hits[i].get_layer() < 32) {
434  layer_mask[0] = layer_mask[0] | (1 << hits[i].get_layer());
435  } else if (hits[i].get_layer() < 64) {
436  layer_mask[1] = layer_mask[1] | (1 << (hits[i].get_layer() - 32));
437  } else if (hits[i].get_layer() < 96) {
438  layer_mask[2] = layer_mask[2] | (1 << (hits[i].get_layer() - 64));
439  } else if (hits[i].get_layer() < 128) {
440  layer_mask[3] = layer_mask[3] | (1 << (hits[i].get_layer() - 96));
441  }
442  }
443  unsigned int nlayers =
444  __builtin_popcount(layer_mask[0]) + __builtin_popcount(layer_mask[1]) +
445  __builtin_popcount(layer_mask[2]) + __builtin_popcount(layer_mask[3]);
446 
447  if (require_pixels == true) {
448  if (((layer_mask[0] & 1) == 0) || ((layer_mask[0] & 2) == 0)) {
449  return true;
450  }
451  }
452 
453  return (nlayers < required_layers);
454 }
455 
456 float sPHENIXTrackerTpc::phiError(SimpleHit3D& hit, float /*min_k*/, float max_k,
457  float min_d, float max_d, float /*min_z0*/,
458  float /*max_z0*/, float /*min_dzdl*/, float max_dzdl,
459  bool pairvoting) {
460  float Bfield_inv = 1. / detector_B_field;
461  float p_inv = 0.;
462 
463  if ((prev_max_k == max_k) && (prev_max_dzdl == max_dzdl)) {
464  p_inv = prev_p_inv;
465  } else {
466  prev_max_k = max_k;
467  prev_max_dzdl = max_dzdl;
468  prev_p_inv = 3.33333333333333314e+02 * max_k * Bfield_inv *
469  sqrt(1. - max_dzdl * max_dzdl);
470  p_inv = prev_p_inv;
471  }
472  float total_scatter_2 = 0.;
473  for (int i = seed_layer + 1; i <= (hit.get_layer()); ++i) {
474  float this_scatter = detector_scatter[i - 1] *
475  (detector_radii[i] - detector_radii[i - 1]) /
476  detector_radii[i];
477  total_scatter_2 += this_scatter * this_scatter;
478  }
479  float angle = p_inv * sqrt(total_scatter_2) * 1.0;
480  float dsize = 0.5 * (max_d - min_d);
481  float angle_from_d = dsize / detector_radii[hit.get_layer()];
482  float returnval = 0.;
483  if (pairvoting == false) {
484  if (angle_from_d > angle) {
485  returnval = 0.;
486  } else {
487  returnval = (angle - angle_from_d);
488  }
489  } else {
490  returnval = angle;
491  }
492 
493  return returnval;
494 }
495 
496 float sPHENIXTrackerTpc::dzdlError(SimpleHit3D& hit, float /*min_k*/, float max_k,
497  float /*min_d*/, float /*max_d*/, float min_z0,
498  float max_z0, float /*min_dzdl*/, float max_dzdl,
499  bool pairvoting) {
500  float Bfield_inv = 1. / detector_B_field;
501  float p_inv = 0.;
502 
503  if ((prev_max_k == max_k) && (prev_max_dzdl == max_dzdl)) {
504  p_inv = prev_p_inv;
505  } else {
506  prev_max_k = max_k;
507  prev_max_dzdl = max_dzdl;
508  prev_p_inv = 3.33333333333333314e+02 * max_k * Bfield_inv *
509  sqrt(1. - max_dzdl * max_dzdl);
510  p_inv = prev_p_inv;
511  }
512  float total_scatter_2 = 0.;
513  for (int i = seed_layer + 1; i <= (hit.get_layer()); ++i) {
514  float this_scatter = detector_scatter[i - 1] *
515  (detector_radii[i] - detector_radii[i - 1]) /
516  detector_radii[i];
517  total_scatter_2 += this_scatter * this_scatter;
518  }
519  float angle = p_inv * sqrt(total_scatter_2) * 1.0;
520  float z0size = 0.5 * (max_z0 - min_z0);
521  float angle_from_z0 = z0size / detector_radii[hit.get_layer()];
522  float returnval = 0.;
523  if (pairvoting == false) {
524  if (angle_from_z0 > angle) {
525  returnval = 0.;
526  } else {
527  returnval = (angle - angle_from_z0);
528  }
529  } else {
530  returnval = angle;
531  }
532 
533  return returnval;
534 }
535 
536 void sPHENIXTrackerTpc::findTracks(vector<SimpleHit3D>& hits,
537  vector<SimpleTrack3D>& tracks,
538  const HelixRange& range) {
539  cout << "findTracks " << endl;
540  findtracksiter += 1;
541  if( n_layers < 10 ) {
542  findTracksBySegments(hits, tracks, range);
543  } else {
544  findTracksByCombinatorialKalman(hits, tracks, range);
545  }
546 }
547 
549  float scale) {
550  vector<float> chi2_hit;
551  return sPHENIXTrackerTpc::fitTrack(track, chi2_hit, scale);
552 }
553 
555  vector<float>& chi2_hit,
556  float scale) {
557 
558  chi2_hit.clear();
559  vector<float> xyres;
560  vector<float> xyres_inv;
561  vector<float> zres;
562  vector<float> zres_inv;
563  for (unsigned int i = 0; i < track.hits.size(); i++) {
564 
565  float ex = (2.0*sqrt(track.hits[i].get_size(0,0))) * scale;
566  float ey = (2.0*sqrt(track.hits[i].get_size(1,1))) * scale;
567  float ez = (2.0*sqrt(track.hits[i].get_size(2,2))) * scale;
568 
569  if (track.hits[i].get_layer() < 0) {
570  ex = 0.0001 * scale;
571  ey = 0.0001 * scale;
572  ez = 0.0001 * scale;
573  }
574 
575  xyres.push_back(sqrt( ex * ex + ey * ey ));
576  xyres_inv.push_back(1. / xyres.back());
577  zres.push_back( ez );
578  zres_inv.push_back(1. / zres.back());
579  }
580 
581  chi2_hit.resize(track.hits.size(), 0.);
582 
583  MatrixXf y = MatrixXf::Zero(track.hits.size(), 1);
584  for (unsigned int i = 0; i < track.hits.size(); i++) {
585  y(i, 0) = (pow(track.hits[i].get_x(), 2) + pow(track.hits[i].get_y(), 2));
586  y(i, 0) *= xyres_inv[i];
587  }
588 
589  MatrixXf X = MatrixXf::Zero(track.hits.size(), 3);
590  for (unsigned int i = 0; i < track.hits.size(); i++) {
591  X(i, 0) = track.hits[i].get_x();
592  X(i, 1) = track.hits[i].get_y();
593  X(i, 2) = -1.;
594  X(i, 0) *= xyres_inv[i];
595  X(i, 1) *= xyres_inv[i];
596  X(i, 2) *= xyres_inv[i];
597  }
598 
599  MatrixXf Xt = X.transpose();
600 
601  MatrixXf prod = Xt * X;
602 
603  MatrixXf Xty = Xt * y;
604  MatrixXf beta = prod.ldlt().solve(Xty);
605 
606  float cx = beta(0, 0) * 0.5;
607  float cy = beta(1, 0) * 0.5;
608  float r = sqrt(cx * cx + cy * cy - beta(2, 0));
609 
610  float phi = atan2(cy, cx);
611  float d = sqrt(cx * cx + cy * cy) - r;
612  float k = 1. / r;
613 
614  MatrixXf diff = y - (X * beta);
615  MatrixXf chi2 = (diff.transpose()) * diff;
616 
617  float dx = d * cos(phi);
618  float dy = d * sin(phi);
619 
620  MatrixXf y2 = MatrixXf::Zero(track.hits.size(), 1);
621  for (unsigned int i = 0; i < track.hits.size(); i++) {
622  y2(i, 0) = track.hits[i].get_z();
623  y2(i, 0) *= zres_inv[i];
624  }
625 
626  MatrixXf X2 = MatrixXf::Zero(track.hits.size(), 2);
627  for (unsigned int i = 0; i < track.hits.size(); i++) {
628  float D = sqrt(pow(dx - track.hits[i].get_x(), 2) + pow(dy - track.hits[i].get_y(), 2));
629  float s = 0.0;
630 
631  if (0.5 * k * D > 0.1) {
632  float v = 0.5 * k * D;
633  if (v >= 0.999999) {
634  v = 0.999999;
635  }
636  s = 2. * asin(v) / k;
637  } else {
638  float temp1 = k * D * 0.5;
639  temp1 *= temp1;
640  float temp2 = D * 0.5;
641  s += 2. * temp2;
642  temp2 *= temp1;
643  s += temp2 / 3.;
644  temp2 *= temp1;
645  s += (3. / 20.) * temp2;
646  temp2 *= temp1;
647  s += (5. / 56.) * temp2;
648  }
649 
650  X2(i, 0) = s;
651  X2(i, 1) = 1.0;
652 
653  X2(i, 0) *= zres_inv[i];
654  X2(i, 1) *= zres_inv[i];
655  }
656 
657  MatrixXf Xt2 = X2.transpose();
658  MatrixXf prod2 = Xt2 * X2;
659 
660  MatrixXf Xty2 = Xt2 * y2;
661  MatrixXf beta2 = prod2.ldlt().solve(Xty2);
662 
663  MatrixXf diff2 = y2 - (X2 * beta2);
664  MatrixXf chi2_z = (diff2.transpose()) * diff2;
665 
666  float z0 = beta2(1, 0);
667  float dzdl = beta2(0, 0) / sqrt(1. + beta2(0, 0) * beta2(0, 0));
668 
669  track.phi = phi;
670  track.d = d;
671  track.kappa = k;
672  track.dzdl = dzdl;
673  track.z0 = z0;
674 
675  if (track.kappa != 0.) {
676  r = 1. / track.kappa;
677  } else {
678  r = 1.0e10;
679  }
680 
681  cx = (track.d + r) * cos(track.phi);
682  cy = (track.d + r) * sin(track.phi);
683 
684  float chi2_tot = 0.;
685  for (unsigned int h = 0; h < track.hits.size(); h++) {
686  float dx1 = track.hits[h].get_x() - cx;
687  float dy1 = track.hits[h].get_y() - cy;
688 
689  float dx2 = track.hits[h].get_x() + cx;
690  float dy2 = track.hits[h].get_y() + cy;
691 
692  float xydiff1 = sqrt(dx1 * dx1 + dy1 * dy1) - r;
693  float xydiff2 = sqrt(dx2 * dx2 + dy2 * dy2) - r;
694  float xydiff = xydiff2;
695  if (fabs(xydiff1) < fabs(xydiff2)) {
696  xydiff = xydiff1;
697  }
698 
699  float ls_xy = xyres[h];
700 
701  chi2_hit[h] = 0.;
702  chi2_hit[h] += xydiff * xydiff / (ls_xy * ls_xy);
703  chi2_hit[h] += diff2(h, 0) * diff2(h, 0);
704 
705  chi2_tot += chi2_hit[h];
706  }
707 
708  unsigned int deg_of_freedom = 2 * track.hits.size() - 5;
709 
710  return (chi2_tot) / ((float)(deg_of_freedom));
711 }
712 
713 void sPHENIXTrackerTpc::initSplitting(vector<SimpleHit3D>& hits,
714  unsigned int min_hits,
715  unsigned int /*max_hits*/) {
716  initEvent(hits, min_hits);
717  (*(hits_vec[0])) = hits;
718  zoomranges.clear();
719  for (unsigned int z = 0; z <= max_zoom; z++) {
720  zoomranges.push_back(top_range);
721  }
722 }
723 
724 static bool remove_bad_hits(SimpleTrack3D& track, float cut, float scale = 1.0) {
725  SimpleTrack3D temp_track = track;
726  vector<float> chi2_hit;
727  vector<float> temp_hits;
728  while (true) {
729  temp_track = track;
730  sPHENIXTrackerTpc::fitTrack(temp_track, chi2_hit, scale);
731  bool all_good = true;
732  track.hits.clear();
733  for (unsigned int h = 0; h < temp_track.hits.size(); h += 1) {
734  if (chi2_hit[h] < cut) {
735  track.hits.push_back(temp_track.hits[h]);
736  } else {
737  all_good = false;
738  }
739  }
740  if (track.hits.size() < 3) {
741  return false;
742  }
743  if (all_good == true) {
744  return true;
745  }
746  }
747 }
748 
749 static bool fit_all_update(vector<vector<int> >& layer_indexes,
750  SimpleTrack3D& temp_track, vector<int>& best_ind,
751  vector<float>& best_chi2, SimpleTrack3D& track,
752  float& /*chi2*/, int iter = 0,
753  float tempscale = 1.0, float scale = 1.0) {
754  if (iter != 0) {
755  if (remove_bad_hits(track, 5., scale) == false) {
756  return false;
757  }
758  sPHENIXTrackerTpc::fitTrack(track, scale);
759  }
760 
761  vector<float> chi2_hit;
762  float r = 1. / track.kappa;
763  float cx = (track.d + r) * cos(track.phi);
764  float cy = (track.d + r) * sin(track.phi);
765 
766  float phi = atan2(cy, cx);
767  float d = sqrt(cx * cx + cy * cy) - r;
768  float dx = d * cos(phi);
769  float dy = d * sin(phi);
770 
771  float k = track.kappa;
772 
773  best_ind.assign(layer_indexes.size(), -1);
774  best_chi2.assign(layer_indexes.size(), -1.);
775  for (unsigned int i = 0; i < temp_track.hits.size(); i += 1) {
776  float dx1 = temp_track.hits[i].get_x() - cx;
777  float dy1 = temp_track.hits[i].get_y() - cy;
778  float dx2 = temp_track.hits[i].get_x() + cx;
779  float dy2 = temp_track.hits[i].get_y() + cy;
780  float xydiff1 = sqrt(dx1 * dx1 + dy1 * dy1) - r;
781  float xydiff2 = sqrt(dx2 * dx2 + dy2 * dy2) - r;
782  float xydiff = xydiff2;
783  if (fabs(xydiff1) < fabs(xydiff2)) {
784  xydiff = xydiff1;
785  }
786 
787  float tex = (2.0*sqrt(temp_track.hits[i].get_size(0,0))) * tempscale;
788  float tey = (2.0*sqrt(temp_track.hits[i].get_size(1,1))) * tempscale;
789  float tez = (2.0*sqrt(temp_track.hits[i].get_size(2,2))) * tempscale;
790 
791  float dr2 = tex * tex + tey * tey;
792  float chi2_xy = xydiff * xydiff / dr2;
793 
794  float D = sqrt(pow(dx - temp_track.hits[i].get_x(), 2) +
795  pow(dy - temp_track.hits[i].get_y(), 2));
796  float s = 0.0;
797 
798  if (0.5 * k * D > 0.1) {
799  float v = 0.5 * k * D;
800  if (v >= 0.999999) {
801  v = 0.999999;
802  }
803  s = 2. * asin(v) / k;
804  } else {
805  float temp1 = k * D * 0.5;
806  temp1 *= temp1;
807  float temp2 = D * 0.5;
808  s += 2. * temp2;
809  temp2 *= temp1;
810  s += temp2 / 3.;
811  temp2 *= temp1;
812  s += (3. / 20.) * temp2;
813  temp2 *= temp1;
814  s += (5. / 56.) * temp2;
815  }
816 
817  float x2beta =
818  s * track.dzdl / (sqrt(1. - track.dzdl * track.dzdl)) + track.z0;
819  float diff = temp_track.hits[i].get_z() - x2beta;
820 
821  float chi2_z =
822  diff * diff / (tez * tez);
823 
824  chi2_xy += chi2_z;
825 
826  if (chi2_xy > 5.0) {
827  continue;
828  }
829 
830  if ((best_chi2[temp_track.hits[i].get_layer()] < -0.) ||
831  (best_chi2[temp_track.hits[i].get_layer()] > chi2_xy)) {
832  best_chi2[temp_track.hits[i].get_layer()] = chi2_xy;
833  best_ind[temp_track.hits[i].get_layer()] = i;
834  }
835  }
836 
837  track.hits.clear();
838  for (unsigned int i = 0; i < layer_indexes.size(); i += 1) {
839  if (best_ind[i] < 0) {
840  continue;
841  }
842  track.hits.push_back(temp_track.hits[best_ind[i]]);
843  }
844  if (track.hits.size() < 8) {
845  return false;
846  }
847  return true;
848 }
849 
850 static bool fit_all(vector<SimpleHit3D>& hits,
851  vector<vector<int> >& layer_indexes, SimpleTrack3D& track,
852  float& chi2) {
853  float scale1 = 32.;
854  float scale2 = sqrt(sqrt(0.5));
855 
856  SimpleTrack3D temp_track;
857  vector<float> chi2_hit;
858  for (unsigned int i = 0; i < hits.size(); i += 1) {
859  if (hits[i].get_layer() < 3) {
860  continue;
861  }
862  temp_track.hits.push_back(hits[i]);
863  }
864 
865  vector<int> best_ind;
866  vector<float> best_chi2;
867  track.hits.clear();
868  for (unsigned int i = 0; i < hits.size(); i += 1) {
869  if (hits[i].get_layer() < 3) {
870  continue;
871  }
872  if (layer_indexes[hits[i].get_layer()].size() != 2) {
873  continue;
874  }
875  track.hits.push_back(hits[i]);
876  }
877  sPHENIXTrackerTpc::fitTrack(track, chi2_hit, scale1);
878 
879  float tempscale = scale1;
880  for (int i = 0; i < 20; ++i) {
881  if (fit_all_update(layer_indexes, temp_track, best_ind, best_chi2, track,
882  chi2, i, tempscale, tempscale) == false) {
883  return false;
884  }
885  tempscale *= scale2;
886 
887  sPHENIXTrackerTpc::fitTrack(track, chi2_hit, tempscale);
888  }
889 
890  if (fit_all_update(layer_indexes, temp_track, best_ind, best_chi2, track,
891  chi2, 1.0, 1.0) == false) {
892  return false;
893  }
894 
895  chi2 = sPHENIXTrackerTpc::fitTrack(track, chi2_hit, 1.0);//scale1); // maybe this should be one
896  if ((chi2 < 10.0) && (track.hits.size() > ((layer_indexes.size() * 1) / 2))) {
897  return true;
898  }
899  return false;
900 }
901 
903  vector<SimpleHit3D>& hits, vector<SimpleTrack3D>& tracks,
904  const HelixRange& /*range*/) {
905 
906  timeval t1, t2;
907  double time1 = 0.;
908  double time2 = 0.;
909 
910  gettimeofday(&t1, NULL);
911 // float CHI2_CUT = chi2_cut + 2.;
912 
913  vector<vector<int> > layer_indexes;
914  layer_indexes.assign(n_layers, vector<int>());
915  for (unsigned int i = 0; i < hits.size(); ++i) {
916  layer_indexes[hits[i].get_layer()].push_back(i);
917  }
918  for (int i = 0; i < (int)n_layers; ++i) {
919  layer_indexes[i].push_back(-(i + 1));
920  }
921 
922  SimpleTrack3D fit_all_track;
923  float fit_all_chi2 = -1.;
924  bool fit_all_success =
925  fit_all(hits, layer_indexes, fit_all_track, fit_all_chi2);
926  if (fit_all_success == true) {
927  HelixKalmanState state;
928  state.phi = fit_all_track.phi;
929  if (state.phi < 0.) {
930  state.phi += 2. * M_PI;
931  }
932  state.d = fit_all_track.d;
933  state.kappa = fit_all_track.kappa;
934  state.nu = sqrt(state.kappa);
935  state.z0 = fit_all_track.z0;
936  state.dzdl = fit_all_track.dzdl;
937  state.C = Matrix<float, 5, 5>::Zero(5, 5);
938  state.C(0, 0) = pow(0.01, 2.);
939  state.C(1, 1) = pow(0.01, 2.);
940  state.C(2, 2) = pow(0.01 * state.nu, 2.);
941  state.C(3, 3) = pow(0.01, 2.);
942  state.C(4, 4) = pow(0.01, 2.);
943  state.chi2 = 0.;
944  state.position = n_layers;
945  state.x_int = 0.;
946  state.y_int = 0.;
947  state.z_int = 0.;
948 
949  state.C *= 3.;
950 
951  SimpleTrack3D temp_track;
952  vector<SimpleHit3D> temp_hits;
953 
954  for (int h = ((int)(fit_all_track.hits.size() - 1)); h >= 0; h -= 1) {
955  HelixKalmanState temp_state(state);
956  kalman->addHit(fit_all_track.hits[h], temp_state);
957  if ((temp_state.chi2 - state.chi2) < 4.) {
958  state = temp_state;
959  temp_hits.push_back(fit_all_track.hits[h]);
960  }
961  }
962 
963  state.chi2 = 0.;
964  state.position = n_layers;
965  state.x_int = 0.;
966  state.y_int = 0.;
967  state.z_int = 0.;
968 
969  state.C *= 3.;
970  temp_hits.clear();
971  for (int h = ((int)(fit_all_track.hits.size() - 1)); h >= 0; h -= 1) {
972  HelixKalmanState temp_state(state);
973  kalman->addHit(fit_all_track.hits[h], temp_state);
974  if ((temp_state.chi2 - state.chi2) < 10.) {
975  state = temp_state;
976  temp_hits.push_back(fit_all_track.hits[h]);
977  }
978  }
979  for (int h = ((int)(temp_hits.size() - 1)); h >= 0; h -= 1) {
980  temp_track.hits.push_back(temp_hits[h]);
981  }
982 
983  state.C *= 10.;
984 
985  vector<vector<int> > inner_combos;
986  for (int l = 0; l < 2; ++l) {
987  for (unsigned int i0 = 0; i0 < layer_indexes[0].size(); ++i0) {
988  for (unsigned int i1 = 0; i1 < layer_indexes[1].size(); ++i1) {
989  for (unsigned int i2 = 0; i2 < layer_indexes[2].size(); ++i2) {
990  inner_combos.push_back(vector<int>(3, 0));
991  inner_combos.back()[0] = layer_indexes[0][i0];
992  inner_combos.back()[1] = layer_indexes[1][i1];
993  inner_combos.back()[2] = layer_indexes[2][i2];
994  }
995  }
996  }
997  }
998 
999  float best_chi2_p[4] = {-1., -1., -1., -1.};
1000  int best_ind_p[4] = {-1, -1, -1, -1};
1001 
1002  for (unsigned int c = 0; c < inner_combos.size(); ++c) {
1003  HelixKalmanState temp_state(state);
1004  int n_p = 0;
1005  for (int l = 2; l >= 0; l -= 1) {
1006  if (inner_combos[c][l] >= 0) {
1007  n_p += 1;
1008  kalman->addHit(hits[inner_combos[c][l]], temp_state);
1009  }
1010  }
1011  float chi2 = temp_state.chi2 - state.chi2;
1012  if ((best_chi2_p[n_p] == -1) || (chi2 < best_chi2_p[n_p])) {
1013  best_chi2_p[n_p] = chi2;
1014  best_ind_p[n_p] = c;
1015  }
1016  }
1017 
1018  int best_np = 0;
1019  for (int n = 3; n >= 0; n -= 1) {
1020  if ((best_chi2_p[n] > 0.) && (best_chi2_p[n] < 25.)) {
1021  best_np = n;
1022  break;
1023  }
1024  }
1025 
1026  vector<SimpleHit3D> inner_hits;
1027  if (best_np > 0) {
1028  for (int l = 2; l >= 0; l -= 1) {
1029  int c = best_ind_p[best_np];
1030  if (inner_combos[c][l] >= 0) {
1031  inner_hits.push_back(hits[inner_combos[c][l]]);
1032  kalman->addHit(hits[inner_combos[c][l]], state);
1033  }
1034  }
1035  }
1036 
1037  vector<SimpleHit3D> new_hits;
1038  for (int i = (((int)(inner_hits.size())) - 1); i >= 0; i -= 1) {
1039  new_hits.push_back(inner_hits[i]);
1040  }
1041  for (unsigned int i = 0; i < temp_track.hits.size(); ++i) {
1042  new_hits.push_back(temp_track.hits[i]);
1043  }
1044  temp_track.hits = new_hits;
1045 
1046  if ((state.chi2 < chi2_cut * (2. * (temp_track.hits.size()) - 5.)) &&
1047  (temp_track.hits.size() > (n_layers / 2)) && best_np > 0) {
1048  tracks.push_back(temp_track);
1049  track_states.push_back(state);
1050  if (remove_hits == true) {
1051  for (unsigned int i = 0; i < tracks.back().hits.size(); ++i) {
1052  (*hit_used)[tracks.back().hits[i].get_id()] = true;
1053  }
1054  }
1055  gettimeofday(&t2, NULL);
1056  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1057  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1058  CAtime += (time2 - time1);
1059  return;
1060  } else {
1061 
1062  gettimeofday(&t2, NULL);
1063  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1064  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1065  CAtime += (time2 - time1);
1066  return;
1067  }
1068  } else {
1069 
1070  gettimeofday(&t2, NULL);
1071  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1072  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1073  CAtime += (time2 - time1);
1074  return;
1075  }
1076 }
1077 
1078 void sPHENIXTrackerTpc::findTracksBySegments(vector<SimpleHit3D>& hits,
1079  vector<SimpleTrack3D>& tracks,
1080  const HelixRange& /*range*/) {
1081 
1082  vector<TrackSegment>* cur_seg = &segments1;
1083  vector<TrackSegment>* next_seg = &segments2;
1084  unsigned int curseg_size = 0;
1085  unsigned int nextseg_size = 0;
1086 
1087  vector<TrackSegment> complete_segments;
1088 
1089  unsigned int allowed_missing = n_layers - req_layers;
1090  for (unsigned int l = 0; l < n_layers; ++l) {
1091  layer_sorted[l].clear();
1092  }
1093  for (unsigned int i = 0; i < hits.size(); ++i) {
1094  unsigned int min = (hits[i].get_layer() - allowed_missing);
1095  if (allowed_missing > (unsigned int) hits[i].get_layer()) {
1096  min = 0;
1097  }
1098  for (int l = min; l <= hits[i].get_layer(); l += 1) {
1099  layer_sorted[l].push_back(hits[i]);
1100  }
1101  }
1102  for (unsigned int l = 0; l < n_layers; ++l) {
1103  if (layer_sorted[l].size() == 0) {
1104  return;
1105  }
1106  }
1107 
1108  timeval t1, t2;
1109  double time1 = 0.;
1110  double time2 = 0.;
1111 
1112  gettimeofday(&t1, NULL);
1113 
1114  float cosang_diff = 1. - cosang_cut;
1115  float cosang_diff_inv = 1. / cosang_diff;
1116  float sinang_cut = sqrt(1. - cosang_cut * cosang_cut);
1117  float easy_chi2_cut = ca_chi2_cut;
1118 
1119  vector<float> inv_layer;
1120  inv_layer.assign(n_layers, 1.);
1121  for (unsigned int l = 3; l < n_layers; ++l) {
1122  inv_layer[l] = 1. / (((float)l) - 2.);
1123  }
1124 
1125  unsigned int hit_counter = 0;
1126  float x1_a[4] __attribute__((aligned(16)));
1127  float x2_a[4] __attribute__((aligned(16)));
1128  float x3_a[4] __attribute__((aligned(16)));
1129  float y1_a[4] __attribute__((aligned(16)));
1130  float y2_a[4] __attribute__((aligned(16)));
1131  float y3_a[4] __attribute__((aligned(16)));
1132  float z1_a[4] __attribute__((aligned(16)));
1133  float z2_a[4] __attribute__((aligned(16)));
1134  float z3_a[4] __attribute__((aligned(16)));
1135  float dx1_a[4] __attribute__((aligned(16)));
1136  float dx2_a[4] __attribute__((aligned(16)));
1137  float dx3_a[4] __attribute__((aligned(16)));
1138  float dy1_a[4] __attribute__((aligned(16)));
1139  float dy2_a[4] __attribute__((aligned(16)));
1140  float dy3_a[4] __attribute__((aligned(16)));
1141  float dz1_a[4] __attribute__((aligned(16)));
1142  float dz2_a[4] __attribute__((aligned(16)));
1143  float dz3_a[4] __attribute__((aligned(16)));
1144 
1145  float kappa_a[4] __attribute__((aligned(16)));
1146  float dkappa_a[4] __attribute__((aligned(16)));
1147 
1148  float ux_mid_a[4] __attribute__((aligned(16)));
1149  float uy_mid_a[4] __attribute__((aligned(16)));
1150  float ux_end_a[4] __attribute__((aligned(16)));
1151  float uy_end_a[4] __attribute__((aligned(16)));
1152 
1153  float dzdl_1_a[4] __attribute__((aligned(16)));
1154  float dzdl_2_a[4] __attribute__((aligned(16)));
1155  float ddzdl_1_a[4] __attribute__((aligned(16)));
1156  float ddzdl_2_a[4] __attribute__((aligned(16)));
1157 
1158  float cur_kappa_a[4] __attribute__((aligned(16)));
1159  float cur_dkappa_a[4] __attribute__((aligned(16)));
1160  float cur_ux_a[4] __attribute__((aligned(16)));
1161  float cur_uy_a[4] __attribute__((aligned(16)));
1162  float cur_chi2_a[4] __attribute__((aligned(16)));
1163  float chi2_a[4] __attribute__((aligned(16)));
1164 
1165  unsigned int hit1[4];
1166  unsigned int hit2[4];
1167  unsigned int hit3[4];
1168 
1169  TrackSegment temp_segment;
1170  temp_segment.hits.assign(n_layers, 0);
1171  // make segments out of first 3 layers
1172  for (unsigned int i = 0, sizei = layer_sorted[0].size(); i < sizei; ++i) {
1173  for (unsigned int j = 0, sizej = layer_sorted[1].size(); j < sizej; ++j) {
1174  for (unsigned int k = 0, sizek = layer_sorted[2].size(); k < sizek; ++k) {
1175  if ((layer_sorted[0][i].get_layer() >= layer_sorted[1][j].get_layer()) ||
1176  (layer_sorted[1][j].get_layer() >= layer_sorted[2][k].get_layer())) {
1177  continue;
1178  }
1179 
1180  x1_a[hit_counter] = layer_sorted[0][i].get_x();
1181  y1_a[hit_counter] = layer_sorted[0][i].get_y();
1182  z1_a[hit_counter] = layer_sorted[0][i].get_z();
1183 
1185 
1186  dx1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[0][i].get_size(0,0));
1187  dy1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[0][i].get_size(1,1));
1188  dz1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[0][i].get_size(2,2));
1189 
1190  x2_a[hit_counter] = layer_sorted[1][j].get_x();
1191  y2_a[hit_counter] = layer_sorted[1][j].get_y();
1192  z2_a[hit_counter] = layer_sorted[1][j].get_z();
1193 
1194  dx2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[1][j].get_size(0,0));
1195  dy2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[1][j].get_size(1,1));
1196  dz2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[1][j].get_size(2,2));
1197 
1198  x3_a[hit_counter] = layer_sorted[2][k].get_x();
1199  y3_a[hit_counter] = layer_sorted[2][k].get_y();
1200  z3_a[hit_counter] = layer_sorted[2][k].get_z();
1201  dx3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[2][k].get_size(0,0));
1202  dy3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[2][k].get_size(1,1));
1203  dz3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[2][k].get_size(2,2));
1204 
1205  hit1[hit_counter] = i;
1206  hit2[hit_counter] = j;
1207  hit3[hit_counter] = k;
1208 
1209  hit_counter += 1;
1210 
1211  if (hit_counter == 4) {
1212  calculateKappaTangents(x1_a, y1_a, z1_a, x2_a, y2_a, z2_a, x3_a, y3_a,
1213  z3_a, dx1_a, dy1_a, dz1_a, dx2_a, dy2_a, dz2_a,
1214  dx3_a, dy3_a, dz3_a, kappa_a, dkappa_a,
1215  ux_mid_a, uy_mid_a, ux_end_a, uy_end_a,
1216  dzdl_1_a, dzdl_2_a, ddzdl_1_a, ddzdl_2_a);
1217 
1218  for (unsigned int h = 0; h < hit_counter; ++h) {
1219  temp_segment.chi2 =
1220  (dzdl_1_a[h] - dzdl_2_a[h]) /
1221  (ddzdl_1_a[h] + ddzdl_2_a[h] + fabs(dzdl_1_a[h] * sinang_cut));
1222  temp_segment.chi2 *= temp_segment.chi2;
1223  if (temp_segment.chi2 > 2.0) {
1224  continue;
1225  }
1226  temp_segment.ux = ux_end_a[h];
1227  temp_segment.uy = uy_end_a[h];
1228  temp_segment.kappa = kappa_a[h];
1229  if (temp_segment.kappa > top_range.max_k) {
1230  continue;
1231  }
1232  temp_segment.dkappa = dkappa_a[h];
1233  temp_segment.hits[0] = hit1[h];
1234  temp_segment.hits[1] = hit2[h];
1235  temp_segment.hits[2] = hit3[h];
1236  temp_segment.n_hits = 3;
1237  unsigned int outer_layer =
1238  layer_sorted[2][temp_segment.hits[2]].get_layer();
1239  if ((outer_layer - 2) > allowed_missing) {
1240  continue;
1241  }
1242  if ((n_layers - 3) <= allowed_missing) {
1243  complete_segments.push_back(temp_segment);
1244  }
1245  if (next_seg->size() == nextseg_size) {
1246  next_seg->push_back(temp_segment);
1247  nextseg_size += 1;
1248  } else {
1249  (*next_seg)[nextseg_size] = temp_segment;
1250  nextseg_size += 1;
1251  }
1252  }
1253 
1254  hit_counter = 0;
1255  }
1256  }
1257  }
1258  }
1259  if (hit_counter != 0) {
1260  calculateKappaTangents(x1_a, y1_a, z1_a, x2_a, y2_a, z2_a, x3_a, y3_a, z3_a,
1261  dx1_a, dy1_a, dz1_a, dx2_a, dy2_a, dz2_a, dx3_a,
1262  dy3_a, dz3_a, kappa_a, dkappa_a, ux_mid_a, uy_mid_a,
1263  ux_end_a, uy_end_a, dzdl_1_a, dzdl_2_a, ddzdl_1_a,
1264  ddzdl_2_a);
1265 
1266  for (unsigned int h = 0; h < hit_counter; ++h) {
1267  temp_segment.chi2 =
1268  (dzdl_1_a[h] - dzdl_2_a[h]) /
1269  (ddzdl_1_a[h] + ddzdl_2_a[h] + fabs(dzdl_1_a[h] * sinang_cut));
1270  temp_segment.chi2 *= temp_segment.chi2;
1271  if (temp_segment.chi2 > 2.0) {
1272  continue;
1273  }
1274  temp_segment.ux = ux_end_a[h];
1275  temp_segment.uy = uy_end_a[h];
1276  temp_segment.kappa = kappa_a[h];
1277  if (temp_segment.kappa > top_range.max_k) {
1278  continue;
1279  }
1280  temp_segment.dkappa = dkappa_a[h];
1281  temp_segment.hits[0] = hit1[h];
1282  temp_segment.hits[1] = hit2[h];
1283  temp_segment.hits[2] = hit3[h];
1284  temp_segment.n_hits = 3;
1285  unsigned int outer_layer = layer_sorted[2][temp_segment.hits[2]].get_layer();
1286  if ((outer_layer - 2) > allowed_missing) {
1287  continue;
1288  }
1289  if ((n_layers - 3) <= allowed_missing) {
1290  complete_segments.push_back(temp_segment);
1291  }
1292  if (next_seg->size() == nextseg_size) {
1293  next_seg->push_back(temp_segment);
1294  nextseg_size += 1;
1295  } else {
1296  (*next_seg)[nextseg_size] = temp_segment;
1297  nextseg_size += 1;
1298  }
1299  }
1300 
1301  hit_counter = 0;
1302  }
1303 
1304  swap(cur_seg, next_seg);
1305  swap(curseg_size, nextseg_size);
1306  // add hits to segments layer-by-layer, cutting out bad segments
1307  unsigned int whichseg[4];
1308  for (unsigned int l = 3; l < n_layers; ++l) {
1309  if (l == (n_layers - 1)) {
1310  easy_chi2_cut *= 0.25;
1311  }
1312  nextseg_size = 0;
1313  for (unsigned int i = 0, sizei = curseg_size; i < sizei; ++i) {
1314  for (unsigned int j = 0, sizej = layer_sorted[l].size(); j < sizej; ++j) {
1315  if ((layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_layer() >=
1316  layer_sorted[l][j].get_layer())) {
1317  continue;
1318  }
1319 
1320  x1_a[hit_counter] = layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_x();
1321  y1_a[hit_counter] = layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_y();
1322  z1_a[hit_counter] = layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_z();
1323  x2_a[hit_counter] = layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_x();
1324  y2_a[hit_counter] = layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_y();
1325  z2_a[hit_counter] = layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_z();
1326  x3_a[hit_counter] = layer_sorted[l][j].get_x();
1327  y3_a[hit_counter] = layer_sorted[l][j].get_y();
1328  z3_a[hit_counter] = layer_sorted[l][j].get_z();
1329 
1330  dx1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_size(0,0));
1331  dy1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_size(1,1));
1332  dz1_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l - 2][(*cur_seg)[i].hits[l - 2]].get_size(2,2));
1333  dx2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_size(0,0));
1334  dy2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_size(1,1));
1335  dz2_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l - 1][(*cur_seg)[i].hits[l - 1]].get_size(2,2));
1336  dx3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l][j].get_size(0,0));
1337  dy3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l][j].get_size(1,1));
1338  dz3_a[hit_counter] = 0.5*sqrt(12.0)*sqrt(layer_sorted[l][j].get_size(2,2));
1339 
1340  cur_kappa_a[hit_counter] = (*cur_seg)[i].kappa;
1341  cur_dkappa_a[hit_counter] = (*cur_seg)[i].dkappa;
1342  cur_ux_a[hit_counter] = (*cur_seg)[i].ux;
1343  cur_uy_a[hit_counter] = (*cur_seg)[i].uy;
1344  cur_chi2_a[hit_counter] = (*cur_seg)[i].chi2;
1345 
1346  whichseg[hit_counter] = i;
1347  hit1[hit_counter] = j;
1348 
1349  hit_counter += 1;
1350  if (hit_counter == 4) {
1352  x1_a, y1_a, z1_a, x2_a, y2_a, z2_a, x3_a, y3_a, z3_a, dx1_a,
1353  dy1_a, dz1_a, dx2_a, dy2_a, dz2_a, dx3_a, dy3_a, dz3_a, kappa_a,
1354  dkappa_a, ux_mid_a, uy_mid_a, ux_end_a, uy_end_a, dzdl_1_a,
1355  dzdl_2_a, ddzdl_1_a, ddzdl_2_a, sinang_cut, cosang_diff_inv,
1356  cur_kappa_a, cur_dkappa_a, cur_ux_a, cur_uy_a, cur_chi2_a,
1357  chi2_a);
1358 
1359  for (unsigned int h = 0; h < hit_counter; ++h) {
1360  if ((chi2_a[h]) * inv_layer[l] < easy_chi2_cut) {
1361  temp_segment.chi2 = chi2_a[h];
1362  temp_segment.ux = ux_end_a[h];
1363  temp_segment.uy = uy_end_a[h];
1364  temp_segment.kappa = kappa_a[h];
1365  if (temp_segment.kappa > top_range.max_k) {
1366  continue;
1367  }
1368  temp_segment.dkappa = dkappa_a[h];
1369  for (unsigned int ll = 0; ll < l; ++ll) {
1370  temp_segment.hits[ll] = (*cur_seg)[whichseg[h]].hits[ll];
1371  }
1372  temp_segment.hits[l] = hit1[h];
1373  unsigned int outer_layer =
1374  layer_sorted[l][temp_segment.hits[l]].get_layer();
1375  temp_segment.n_hits = l + 1;
1376  if ((n_layers - (l + 1)) <= allowed_missing) {
1377  complete_segments.push_back(temp_segment);
1378  }
1379  if ((outer_layer - l) > allowed_missing) {
1380  continue;
1381  }
1382  if (next_seg->size() == nextseg_size) {
1383  next_seg->push_back(temp_segment);
1384  nextseg_size += 1;
1385  } else {
1386  (*next_seg)[nextseg_size] = temp_segment;
1387  nextseg_size += 1;
1388  }
1389  }
1390  }
1391  hit_counter = 0;
1392  }
1393  }
1394  }
1395  if (hit_counter != 0) {
1397  x1_a, y1_a, z1_a, x2_a, y2_a, z2_a, x3_a, y3_a, z3_a, dx1_a, dy1_a,
1398  dz1_a, dx2_a, dy2_a, dz2_a, dx3_a, dy3_a, dz3_a, kappa_a, dkappa_a,
1399  ux_mid_a, uy_mid_a, ux_end_a, uy_end_a, dzdl_1_a, dzdl_2_a, ddzdl_1_a,
1400  ddzdl_2_a, sinang_cut, cosang_diff_inv, cur_kappa_a, cur_dkappa_a,
1401  cur_ux_a, cur_uy_a, cur_chi2_a, chi2_a);
1402 
1403  for (unsigned int h = 0; h < hit_counter; ++h) {
1404  if ((chi2_a[h]) * inv_layer[l] < easy_chi2_cut) {
1405  temp_segment.chi2 = chi2_a[h];
1406  temp_segment.ux = ux_end_a[h];
1407  temp_segment.uy = uy_end_a[h];
1408  temp_segment.kappa = kappa_a[h];
1409  if (temp_segment.kappa > top_range.max_k) {
1410  continue;
1411  }
1412  temp_segment.dkappa = dkappa_a[h];
1413  for (unsigned int ll = 0; ll < l; ++ll) {
1414  temp_segment.hits[ll] = (*cur_seg)[whichseg[h]].hits[ll];
1415  }
1416  temp_segment.hits[l] = hit1[h];
1417  unsigned int outer_layer =
1418  layer_sorted[l][temp_segment.hits[l]].get_layer();
1419  temp_segment.n_hits = l + 1;
1420  if ((n_layers - (l + 1)) <= allowed_missing) {
1421  complete_segments.push_back(temp_segment);
1422  }
1423  if ((outer_layer - l) > allowed_missing) {
1424  continue;
1425  }
1426  if (next_seg->size() == nextseg_size) {
1427  next_seg->push_back(temp_segment);
1428  nextseg_size += 1;
1429  } else {
1430  (*next_seg)[nextseg_size] = temp_segment;
1431  nextseg_size += 1;
1432  }
1433  }
1434  }
1435  hit_counter = 0;
1436  }
1437  swap(cur_seg, next_seg);
1438  swap(curseg_size, nextseg_size);
1439  }
1440  for (unsigned int i = 0; i < complete_segments.size(); ++i) {
1441  if (cur_seg->size() == curseg_size) {
1442  cur_seg->push_back(complete_segments[i]);
1443  curseg_size += 1;
1444  } else {
1445  (*cur_seg)[curseg_size] = complete_segments[i];
1446  curseg_size += 1;
1447  }
1448  }
1449 
1450  gettimeofday(&t2, NULL);
1451  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1452  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1453  CAtime += (time2 - time1);
1454  SimpleTrack3D temp_track;
1455  temp_track.hits.assign(n_layers, SimpleHit3D());
1456  vector<SimpleHit3D> temp_hits;
1457  for (unsigned int i = 0, sizei = curseg_size; i < sizei; ++i) {
1458  temp_track.hits.assign((*cur_seg)[i].n_hits, SimpleHit3D());
1459 
1460  temp_comb.assign((*cur_seg)[i].n_hits, 0);
1461  for (unsigned int l = 0; l < (*cur_seg)[i].n_hits; ++l) {
1462  temp_comb[l] = layer_sorted[l][(*cur_seg)[i].hits[l]].get_id();
1463  }
1464  sort(temp_comb.begin(), temp_comb.end());
1465  set<vector<unsigned int> >::iterator it = combos.find(temp_comb);
1466  if (it != combos.end()) {
1467  continue;
1468  }
1469  if (combos.size() > 10000) {
1470  combos.clear();
1471  }
1472  combos.insert(temp_comb);
1473 
1474  for (unsigned int l = 0; l < (*cur_seg)[i].n_hits; ++l) {
1475  temp_track.hits[l] = layer_sorted[l][(*cur_seg)[i].hits[l]];
1476  }
1477 
1478  gettimeofday(&t1, NULL);
1479 
1480  float init_chi2 = fitTrack(temp_track);
1481 
1482  if (init_chi2 > fast_chi2_cut_max) {
1483  if (init_chi2 > fast_chi2_cut_par0 +
1484  fast_chi2_cut_par1 / kappaToPt(temp_track.kappa)) {
1485  gettimeofday(&t2, NULL);
1486  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1487  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1488  KALtime += (time2 - time1);
1489  continue;
1490  }
1491  }
1492  HelixKalmanState state;
1493  state.phi = temp_track.phi;
1494  if (state.phi < 0.) {
1495  state.phi += 2. * M_PI;
1496  }
1497  state.d = temp_track.d;
1498  state.kappa = temp_track.kappa;
1499  state.nu = sqrt(state.kappa);
1500  state.z0 = temp_track.z0;
1501  state.dzdl = temp_track.dzdl;
1502  state.C = Matrix<float, 5, 5>::Zero(5, 5);
1503  state.C(0, 0) = pow(0.01, 2.);
1504  state.C(1, 1) = pow(0.01, 2.);
1505  state.C(2, 2) = pow(0.01 * state.nu, 2.);
1506  state.C(3, 3) = pow(0.05, 2.);
1507  state.C(4, 4) = pow(0.05, 2.);
1508  state.chi2 = 0.;
1509  state.position = 0;
1510  state.x_int = 0.;
1511  state.y_int = 0.;
1512  state.z_int = 0.;
1513  for (unsigned int h = 0; h < temp_track.hits.size(); ++h) {
1514  kalman->addHit(temp_track.hits[h], state);
1515  nfits += 1;
1516  }
1517 
1518  // fudge factor for non-gaussian hit sizes
1519  state.C *= 3.;
1520  state.chi2 *= 6.;
1521 
1522  gettimeofday(&t2, NULL);
1523  time1 = ((double)(t1.tv_sec) + (double)(t1.tv_usec) / 1000000.);
1524  time2 = ((double)(t2.tv_sec) + (double)(t2.tv_usec) / 1000000.);
1525  KALtime += (time2 - time1);
1526 
1527  if (!(temp_track.kappa == temp_track.kappa)) {
1528  continue;
1529  }
1530  if (temp_track.kappa > top_range.max_k) {
1531  continue;
1532  }
1533  if (!(state.chi2 == state.chi2)) {
1534  continue;
1535  }
1536  if (state.chi2 / (2. * ((float)(temp_track.hits.size())) - 5.) > chi2_cut) {
1537  continue;
1538  }
1539 
1540  if (cut_on_dca == true) {
1541  if (fabs(temp_track.d) > dca_cut) {
1542  continue;
1543  }
1544  if (fabs(temp_track.z0) > dca_cut) {
1545  continue;
1546  }
1547  }
1548  tracks.push_back(temp_track);
1549  track_states.push_back(state);
1550  if ((remove_hits == true) && (state.chi2 < chi2_removal_cut) &&
1551  (temp_track.hits.size() >= n_removal_hits)) {
1552  for (unsigned int i = 0; i < temp_track.hits.size(); ++i) {
1553  (*hit_used)[temp_track.hits[i].get_id()] = true;
1554  }
1555  }
1556  }
1557 
1558 }
1559 
1561  float* x1_a, float* y1_a, float* z1_a, float* x2_a, float* y2_a,
1562  float* z2_a, float* x3_a, float* y3_a, float* z3_a, float* dx1_a,
1563  float* dy1_a, float* dz1_a, float* dx2_a, float* dy2_a, float* dz2_a,
1564  float* dx3_a, float* dy3_a, float* dz3_a, float* kappa_a, float* dkappa_a,
1565  float* ux_mid_a, float* uy_mid_a, float* ux_end_a, float* uy_end_a,
1566  float* dzdl_1_a, float* dzdl_2_a, float* ddzdl_1_a, float* ddzdl_2_a) {
1567  static const __m128 two = {2., 2., 2., 2.};
1568 
1569  __m128 x1 = _mm_load_ps(x1_a);
1570  __m128 x2 = _mm_load_ps(x2_a);
1571  __m128 x3 = _mm_load_ps(x3_a);
1572  __m128 y1 = _mm_load_ps(y1_a);
1573  __m128 y2 = _mm_load_ps(y2_a);
1574  __m128 y3 = _mm_load_ps(y3_a);
1575  __m128 z1 = _mm_load_ps(z1_a);
1576  __m128 z2 = _mm_load_ps(z2_a);
1577  __m128 z3 = _mm_load_ps(z3_a);
1578 
1579  __m128 dx1 = _mm_load_ps(dx1_a);
1580  __m128 dx2 = _mm_load_ps(dx2_a);
1581  __m128 dx3 = _mm_load_ps(dx3_a);
1582  __m128 dy1 = _mm_load_ps(dy1_a);
1583  __m128 dy2 = _mm_load_ps(dy2_a);
1584  __m128 dy3 = _mm_load_ps(dy3_a);
1585  __m128 dz1 = _mm_load_ps(dz1_a);
1586  __m128 dz2 = _mm_load_ps(dz2_a);
1587  __m128 dz3 = _mm_load_ps(dz3_a);
1588 
1589  __m128 D12 = _mm_sub_ps(x2, x1);
1590  D12 = _mm_mul_ps(D12, D12);
1591  __m128 tmp1 = _mm_sub_ps(y2, y1);
1592  tmp1 = _mm_mul_ps(tmp1, tmp1);
1593  D12 = _mm_add_ps(D12, tmp1);
1594  D12 = _vec_sqrt_ps(D12);
1595 
1596  __m128 D23 = _mm_sub_ps(x3, x2);
1597  D23 = _mm_mul_ps(D23, D23);
1598  tmp1 = _mm_sub_ps(y3, y2);
1599  tmp1 = _mm_mul_ps(tmp1, tmp1);
1600  D23 = _mm_add_ps(D23, tmp1);
1601  D23 = _vec_sqrt_ps(D23);
1602  __m128 D31 = _mm_sub_ps(x1, x3);
1603  D31 = _mm_mul_ps(D31, D31);
1604  tmp1 = _mm_sub_ps(y1, y3);
1605  tmp1 = _mm_mul_ps(tmp1, tmp1);
1606  D31 = _mm_add_ps(D31, tmp1);
1607  D31 = _vec_sqrt_ps(D31);
1608 
1609  __m128 k = _mm_mul_ps(D12, D23);
1610  k = _mm_mul_ps(k, D31);
1611  k = _vec_rec_ps(k);
1612  tmp1 = (D12 + D23 + D31) * (D23 + D31 - D12) * (D12 + D31 - D23) *
1613  (D12 + D23 - D31);
1614  tmp1 = _vec_sqrt_ps(tmp1);
1615  k *= tmp1;
1616 
1617  __m128 tmp2 = _mm_cmpgt_ps(tmp1, zero);
1618  tmp1 = _mm_and_ps(tmp2, k);
1619  tmp2 = _mm_andnot_ps(tmp2, zero);
1620  k = _mm_xor_ps(tmp1, tmp2);
1621 
1622  _mm_store_ps(kappa_a, k);
1623  __m128 k_inv = _vec_rec_ps(k);
1624 
1625  __m128 D12_inv = _vec_rec_ps(D12);
1626  __m128 D23_inv = _vec_rec_ps(D23);
1627  __m128 D31_inv = _vec_rec_ps(D31);
1628 
1629  __m128 dr1 = dx1 * dx1 + dy1 * dy1;
1630  dr1 = _vec_sqrt_ps(dr1);
1631  __m128 dr2 = dx2 * dx2 + dy2 * dy2;
1632  dr2 = _vec_sqrt_ps(dr2);
1633  __m128 dr3 = dx3 * dx3 + dy3 * dy3;
1634  dr3 = _vec_sqrt_ps(dr3);
1635 
1636  __m128 dk1 = (dr1 + dr2) * D12_inv * D12_inv;
1637  __m128 dk2 = (dr2 + dr3) * D23_inv * D23_inv;
1638  __m128 dk = dk1 + dk2;
1639  _mm_store_ps(dkappa_a, dk);
1640 
1641  __m128 ux12 = (x2 - x1) * D12_inv;
1642  __m128 uy12 = (y2 - y1) * D12_inv;
1643  __m128 ux23 = (x3 - x2) * D23_inv;
1644  __m128 uy23 = (y3 - y2) * D23_inv;
1645  __m128 ux13 = (x3 - x1) * D31_inv;
1646  __m128 uy13 = (y3 - y1) * D31_inv;
1647 
1648  __m128 cosalpha = ux12 * ux13 + uy12 * uy13;
1649  __m128 sinalpha = ux13 * uy12 - ux12 * uy13;
1650 
1651  __m128 ux_mid = ux23 * cosalpha - uy23 * sinalpha;
1652  __m128 uy_mid = ux23 * sinalpha + uy23 * cosalpha;
1653  _mm_store_ps(ux_mid_a, ux_mid);
1654  _mm_store_ps(uy_mid_a, uy_mid);
1655 
1656  __m128 ux_end = ux23 * cosalpha + uy23 * sinalpha;
1657  __m128 uy_end = uy23 * cosalpha - ux23 * sinalpha;
1658 
1659  _mm_store_ps(ux_end_a, ux_end);
1660  _mm_store_ps(uy_end_a, uy_end);
1661 
1662  __m128 v = one - sinalpha * sinalpha;
1663  v = _vec_sqrt_ps(v);
1664  v += one;
1665  v = _vec_rec_ps(v);
1666  v *= sinalpha;
1667  __m128 s2 = _vec_atan_ps(v);
1668  s2 *= two;
1669  s2 *= k_inv;
1670  tmp1 = _mm_cmpgt_ps(k, zero);
1671  tmp2 = _mm_and_ps(tmp1, s2);
1672  tmp1 = _mm_andnot_ps(tmp1, D23);
1673 
1674  s2 = _mm_xor_ps(tmp1, tmp2);
1675 
1676  // dz/dl = (dz/ds)/sqrt(1 + (dz/ds)^2)
1677  // = dz/sqrt(s^2 + dz^2)
1678  __m128 del_z_2 = z3 - z2;
1679  __m128 dzdl_2 = s2 * s2 + del_z_2 * del_z_2;
1680  dzdl_2 = _vec_rsqrt_ps(dzdl_2);
1681  dzdl_2 *= del_z_2;
1682  __m128 ddzdl_2 = (dz2 + dz3) * D23_inv;
1683  _mm_store_ps(dzdl_2_a, dzdl_2);
1684  _mm_store_ps(ddzdl_2_a, ddzdl_2);
1685 
1686  sinalpha = ux13 * uy23 - ux23 * uy13;
1687  v = one - sinalpha * sinalpha;
1688  v = _vec_sqrt_ps(v);
1689  v += one;
1690  v = _vec_rec_ps(v);
1691  v *= sinalpha;
1692  __m128 s1 = _vec_atan_ps(v);
1693  s1 *= two;
1694  s1 *= k_inv;
1695  tmp1 = _mm_cmpgt_ps(k, zero);
1696  tmp2 = _mm_and_ps(tmp1, s1);
1697  tmp1 = _mm_andnot_ps(tmp1, D12);
1698  s1 = _mm_xor_ps(tmp1, tmp2);
1699 
1700  __m128 del_z_1 = z2 - z1;
1701  __m128 dzdl_1 = s1 * s1 + del_z_1 * del_z_1;
1702  dzdl_1 = _vec_rsqrt_ps(dzdl_1);
1703  dzdl_1 *= del_z_1;
1704  __m128 ddzdl_1 = (dz1 + dz2) * D12_inv;
1705  _mm_store_ps(dzdl_1_a, dzdl_1);
1706  _mm_store_ps(ddzdl_1_a, ddzdl_1);
1707 
1708 }
1709 
1710 
1712  float* x1_a, float* y1_a, float* z1_a, float* x2_a, float* y2_a,
1713  float* z2_a, float* x3_a, float* y3_a, float* z3_a, float* dx1_a,
1714  float* dy1_a, float* dz1_a, float* dx2_a, float* dy2_a, float* dz2_a,
1715  float* dx3_a, float* dy3_a, float* dz3_a, float* kappa_a, float* dkappa_a,
1716  float* ux_mid_a, float* uy_mid_a, float* ux_end_a, float* uy_end_a,
1717  float* dzdl_1_a, float* dzdl_2_a, float* ddzdl_1_a, float* ddzdl_2_a,
1718  float sinang_cut, float cosang_diff_inv, float* cur_kappa_a,
1719  float* cur_dkappa_a, float* cur_ux_a, float* cur_uy_a, float* cur_chi2_a,
1720  float* chi2_a) {
1721  static const __m128 two = {2., 2., 2., 2.};
1722 
1723  __m128 x1 = _mm_load_ps(x1_a);
1724  __m128 x2 = _mm_load_ps(x2_a);
1725  __m128 x3 = _mm_load_ps(x3_a);
1726  __m128 y1 = _mm_load_ps(y1_a);
1727  __m128 y2 = _mm_load_ps(y2_a);
1728  __m128 y3 = _mm_load_ps(y3_a);
1729  __m128 z1 = _mm_load_ps(z1_a);
1730  __m128 z2 = _mm_load_ps(z2_a);
1731  __m128 z3 = _mm_load_ps(z3_a);
1732 
1733  __m128 dx1 = _mm_load_ps(dx1_a);
1734  __m128 dx2 = _mm_load_ps(dx2_a);
1735  __m128 dx3 = _mm_load_ps(dx3_a);
1736  __m128 dy1 = _mm_load_ps(dy1_a);
1737  __m128 dy2 = _mm_load_ps(dy2_a);
1738  __m128 dy3 = _mm_load_ps(dy3_a);
1739  __m128 dz1 = _mm_load_ps(dz1_a);
1740  __m128 dz2 = _mm_load_ps(dz2_a);
1741  __m128 dz3 = _mm_load_ps(dz3_a);
1742 
1743  __m128 D12 = _mm_sub_ps(x2, x1);
1744  D12 = _mm_mul_ps(D12, D12);
1745  __m128 tmp1 = _mm_sub_ps(y2, y1);
1746  tmp1 = _mm_mul_ps(tmp1, tmp1);
1747  D12 = _mm_add_ps(D12, tmp1);
1748  D12 = _vec_sqrt_ps(D12);
1749 
1750  __m128 D23 = _mm_sub_ps(x3, x2);
1751  D23 = _mm_mul_ps(D23, D23);
1752  tmp1 = _mm_sub_ps(y3, y2);
1753  tmp1 = _mm_mul_ps(tmp1, tmp1);
1754  D23 = _mm_add_ps(D23, tmp1);
1755  D23 = _vec_sqrt_ps(D23);
1756 
1757  __m128 D31 = _mm_sub_ps(x1, x3);
1758  D31 = _mm_mul_ps(D31, D31);
1759  tmp1 = _mm_sub_ps(y1, y3);
1760  tmp1 = _mm_mul_ps(tmp1, tmp1);
1761  D31 = _mm_add_ps(D31, tmp1);
1762  D31 = _vec_sqrt_ps(D31);
1763 
1764  __m128 k = _mm_mul_ps(D12, D23);
1765  k = _mm_mul_ps(k, D31);
1766  k = _vec_rec_ps(k);
1767  tmp1 = (D12 + D23 + D31) * (D23 + D31 - D12) * (D12 + D31 - D23) *
1768  (D12 + D23 - D31);
1769  tmp1 = _vec_sqrt_ps(tmp1);
1770  k *= tmp1;
1771 
1772  __m128 tmp2 = _mm_cmpgt_ps(tmp1, zero);
1773  tmp1 = _mm_and_ps(tmp2, k);
1774  tmp2 = _mm_andnot_ps(tmp2, zero);
1775  k = _mm_xor_ps(tmp1, tmp2);
1776 
1777  _mm_store_ps(kappa_a, k);
1778  __m128 k_inv = _vec_rec_ps(k);
1779 
1780  __m128 D12_inv = _vec_rec_ps(D12);
1781  __m128 D23_inv = _vec_rec_ps(D23);
1782  __m128 D31_inv = _vec_rec_ps(D31);
1783 
1784  __m128 dr1 = dx1 * dx1 + dy1 * dy1;
1785  dr1 = _vec_sqrt_ps(dr1);
1786  __m128 dr2 = dx2 * dx2 + dy2 * dy2;
1787  dr2 = _vec_sqrt_ps(dr2);
1788  __m128 dr3 = dx3 * dx3 + dy3 * dy3;
1789  dr3 = _vec_sqrt_ps(dr3);
1790 
1791  __m128 dk1 = (dr1 + dr2) * D12_inv * D12_inv;
1792  __m128 dk2 = (dr2 + dr3) * D23_inv * D23_inv;
1793  __m128 dk = dk1 + dk2;
1794  _mm_store_ps(dkappa_a, dk);
1795 
1796  __m128 ux12 = (x2 - x1) * D12_inv;
1797  __m128 uy12 = (y2 - y1) * D12_inv;
1798  __m128 ux23 = (x3 - x2) * D23_inv;
1799  __m128 uy23 = (y3 - y2) * D23_inv;
1800  __m128 ux13 = (x3 - x1) * D31_inv;
1801  __m128 uy13 = (y3 - y1) * D31_inv;
1802 
1803  __m128 cosalpha = ux12 * ux13 + uy12 * uy13;
1804  __m128 sinalpha = ux13 * uy12 - ux12 * uy13;
1805 
1806  __m128 ux_mid = ux23 * cosalpha - uy23 * sinalpha;
1807  __m128 uy_mid = ux23 * sinalpha + uy23 * cosalpha;
1808  _mm_store_ps(ux_mid_a, ux_mid);
1809  _mm_store_ps(uy_mid_a, uy_mid);
1810 
1811  __m128 ux_end = ux23 * cosalpha + uy23 * sinalpha;
1812  __m128 uy_end = uy23 * cosalpha - ux23 * sinalpha;
1813 
1814  _mm_store_ps(ux_end_a, ux_end);
1815  _mm_store_ps(uy_end_a, uy_end);
1816 
1817  // asin(x) = 2*atan( x/( 1 + sqrt( 1 - x*x ) ) )
1818  __m128 v = one - sinalpha * sinalpha;
1819  v = _vec_sqrt_ps(v);
1820  v += one;
1821  v = _vec_rec_ps(v);
1822  v *= sinalpha;
1823  __m128 s2 = _vec_atan_ps(v);
1824  s2 *= two;
1825  s2 *= k_inv;
1826  tmp1 = _mm_cmpgt_ps(k, zero);
1827  tmp2 = _mm_and_ps(tmp1, s2);
1828  tmp1 = _mm_andnot_ps(tmp1, D23);
1829  s2 = _mm_xor_ps(tmp1, tmp2);
1830 
1831  // dz/dl = (dz/ds)/sqrt(1 + (dz/ds)^2)
1832  // = dz/sqrt(s^2 + dz^2)
1833  __m128 del_z_2 = z3 - z2;
1834  __m128 dzdl_2 = s2 * s2 + del_z_2 * del_z_2;
1835  dzdl_2 = _vec_rsqrt_ps(dzdl_2);
1836  dzdl_2 *= del_z_2;
1837  __m128 ddzdl_2 = (dz2 + dz3) * D23_inv;
1838  _mm_store_ps(dzdl_2_a, dzdl_2);
1839  _mm_store_ps(ddzdl_2_a, ddzdl_2);
1840 
1841  sinalpha = ux13 * uy23 - ux23 * uy13;
1842  v = one - sinalpha * sinalpha;
1843  v = _vec_sqrt_ps(v);
1844  v += one;
1845  v = _vec_rec_ps(v);
1846  v *= sinalpha;
1847  __m128 s1 = _vec_atan_ps(v);
1848  s1 *= two;
1849  s1 *= k_inv;
1850  tmp1 = _mm_cmpgt_ps(k, zero);
1851  tmp2 = _mm_and_ps(tmp1, s1);
1852  tmp1 = _mm_andnot_ps(tmp1, D12);
1853  s1 = _mm_xor_ps(tmp1, tmp2);
1854 
1855  __m128 del_z_1 = z2 - z1;
1856  __m128 dzdl_1 = s1 * s1 + del_z_1 * del_z_1;
1857  dzdl_1 = _vec_rsqrt_ps(dzdl_1);
1858  dzdl_1 *= del_z_1;
1859  __m128 ddzdl_1 = (dz1 + dz2) * D12_inv;
1860  _mm_store_ps(dzdl_1_a, dzdl_1);
1861  _mm_store_ps(ddzdl_1_a, ddzdl_1);
1862  __m128 c_dk = _mm_load_ps(cur_dkappa_a);
1863  __m128 c_k = _mm_load_ps(cur_kappa_a);
1864  __m128 c_ux = _mm_load_ps(cur_ux_a);
1865  __m128 c_uy = _mm_load_ps(cur_uy_a);
1866  __m128 c_chi2 = _mm_load_ps(cur_chi2_a);
1867  __m128 sinang = _mm_load1_ps(&sinang_cut);
1868  __m128 cosdiff = _mm_load1_ps(&cosang_diff_inv);
1869 
1870  __m128 kdiff = c_k - k;
1871  __m128 n_dk = c_dk + dk + sinang * k;
1872  __m128 chi2_k = kdiff * kdiff / (n_dk * n_dk);
1873  __m128 cos_scatter = c_ux * ux_mid + c_uy * uy_mid;
1874  __m128 chi2_ang =
1875  (one - cos_scatter) * (one - cos_scatter) * cosdiff * cosdiff;
1876  tmp1 = dzdl_1 * sinang;
1877  _vec_fabs_ps(tmp1);
1878  __m128 chi2_dzdl = (dzdl_1 - dzdl_2) / (ddzdl_1 + ddzdl_2 + tmp1);
1879  chi2_dzdl *= chi2_dzdl;
1880  chi2_dzdl *= one_o_2;
1881 
1882  __m128 n_chi2 = c_chi2 + chi2_ang + chi2_k + chi2_dzdl;
1883  _mm_store_ps(chi2_a, n_chi2);
1884 
1885 }