source: HiSusy/trunk/Delphes-3.0.0/external/fastjet/ClusterSequenceAreaBase.cc @ 1

Last change on this file since 1 was 1, checked in by zerwas, 11 years ago

first import of structure, PYTHIA8 and DELPHES

File size: 14.8 KB
Line 
1
2//STARTHEADER
3// $Id: ClusterSequenceAreaBase.cc 859 2012-11-28 01:49:23Z pavel $
4//
5// Copyright (c) 2005-2011, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
6//
7//----------------------------------------------------------------------
8// This file is part of FastJet.
9//
10//  FastJet is free software; you can redistribute it and/or modify
11//  it under the terms of the GNU General Public License as published by
12//  the Free Software Foundation; either version 2 of the License, or
13//  (at your option) any later version.
14//
15//  The algorithms that underlie FastJet have required considerable
16//  development and are described in hep-ph/0512210. If you use
17//  FastJet as part of work towards a scientific publication, please
18//  include a citation to the FastJet paper.
19//
20//  FastJet is distributed in the hope that it will be useful,
21//  but WITHOUT ANY WARRANTY; without even the implied warranty of
22//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23//  GNU General Public License for more details.
24//
25//  You should have received a copy of the GNU General Public License
26//  along with FastJet. If not, see <http://www.gnu.org/licenses/>.
27//----------------------------------------------------------------------
28//ENDHEADER
29
30
31
32
33#include "fastjet/ClusterSequenceAreaBase.hh"
34#include <algorithm>
35
36FASTJET_BEGIN_NAMESPACE
37
38using namespace std;
39
40
41/// allow for warnings
42LimitedWarning ClusterSequenceAreaBase::_warnings;
43LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area;
44LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area;
45
46//----------------------------------------------------------------------
47/// return the total area, within the selector's range, that is free
48/// of jets.
49///
50/// Calculate this as (range area) - \sum_{i in range} A_i
51///
52/// for ClusterSequences with explicit ghosts, assume that there will
53/// never be any empty area, i.e. it is always filled in by pure
54/// ghosts jets. This holds for seq.rec. algorithms
55double ClusterSequenceAreaBase::empty_area(const Selector & selector) const {
56
57  if (has_explicit_ghosts()) {return 0.0;}
58  else { return empty_area_from_jets(inclusive_jets(0.0), selector);}
59
60}
61
62//----------------------------------------------------------------------
63/// return the total area, within range, that is free of jets.
64///
65/// Calculate this as (range area) - \sum_{i in range} A_i
66///
67double ClusterSequenceAreaBase::empty_area_from_jets(
68                      const std::vector<PseudoJet> & all_jets,
69                      const Selector & selector) const {
70  _check_selector_good_for_median(selector);
71
72  double empty = selector.area();
73  for (unsigned i = 0; i < all_jets.size(); i++) {
74    if (selector.pass(all_jets[i])) empty -= area(all_jets[i]);
75  }
76  return empty;
77}
78
79double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const {
80  return median_pt_per_unit_something(selector,false);
81}
82
83double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const {
84  return median_pt_per_unit_something(selector,true);
85}
86
87
88//----------------------------------------------------------------------
89/// the median of (pt/area) for jets contained within range, counting
90/// the empty area as if it were made up of a collection of empty
91/// jets each of area (0.55 * pi R^2).
92double ClusterSequenceAreaBase::median_pt_per_unit_something(
93                const Selector & selector, bool use_area_4vector) const {
94
95  double median, sigma, mean_area;
96  get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
97  return median;
98
99}
100
101
102//----------------------------------------------------------------------
103/// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range.
104/// exclude_above allows one to exclude large values of pt/area from fit.
105/// use_area_4vector = true uses the 4vector areas.
106void ClusterSequenceAreaBase::parabolic_pt_per_unit_area(
107       double & a, double & b, const Selector & selector, 
108       double exclude_above, bool use_area_4vector) const {
109  // sanity check on the selector: we require a finite area and that
110  // it applies jet by jet (see BackgroundEstimator for more advanced
111  // usage)
112  _check_selector_good_for_median(selector);
113
114  int n=0;
115  int n_excluded = 0;
116  double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0; 
117
118  vector<PseudoJet> incl_jets = inclusive_jets();
119
120  for (unsigned i = 0; i < incl_jets.size(); i++) {
121    if (selector.pass(incl_jets[i])) {
122      double this_area;
123      if ( use_area_4vector ) {
124          this_area = area_4vector(incl_jets[i]).perp();     
125      } else {
126          this_area = area(incl_jets[i]);
127      }
128      double f = incl_jets[i].perp()/this_area;
129      if (exclude_above <= 0.0 || f < exclude_above) {
130        double x = incl_jets[i].rap(); double x2 = x*x;
131        mean_f   += f;
132        mean_x2  += x2;
133        mean_x4  += x2*x2;
134        mean_fx2 += f*x2;
135        n++;
136      } else {
137        n_excluded++;
138      }
139    }
140  }
141
142  if (n <= 1) {
143    // meaningful results require at least two jets inside the
144    // area -- mind you if there are empty jets we should be in
145    // any case doing something special...
146    a = 0.0;
147    b = 0.0;
148  } else {
149    mean_f   /= n;
150    mean_x2  /= n;
151    mean_x4  /= n;
152    mean_fx2 /= n;
153   
154    b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4);
155    a = mean_f - b*mean_x2;
156  }
157  //cerr << "n_excluded = "<< n_excluded << endl;
158}
159
160
161
162void ClusterSequenceAreaBase::get_median_rho_and_sigma(
163            const Selector & selector, bool use_area_4vector,
164            double & median, double & sigma, double & mean_area) const {
165
166  vector<PseudoJet> incl_jets = inclusive_jets();
167  get_median_rho_and_sigma(incl_jets, selector, use_area_4vector,
168                           median, sigma, mean_area, true);
169}
170
171
172void ClusterSequenceAreaBase::get_median_rho_and_sigma(
173            const vector<PseudoJet> & all_jets,
174            const Selector & selector, bool use_area_4vector,
175            double & median, double & sigma, double & mean_area,
176            bool all_are_incl) const {
177
178  _check_jet_alg_good_for_median();
179
180  // sanity check on the selector: we require a finite area and that
181  // it applies jet by jet (see BackgroundEstimator for more advanced
182  // usage)
183  _check_selector_good_for_median(selector);
184
185  vector<double> pt_over_areas;
186  double total_area  = 0.0;
187  double total_njets = 0;
188
189  for (unsigned i = 0; i < all_jets.size(); i++) {
190    if (selector.pass(all_jets[i])) {
191      double this_area;
192      if (use_area_4vector) {
193          this_area = area_4vector(all_jets[i]).perp();
194      } else {
195          this_area = area(all_jets[i]);
196      }
197
198      if (this_area>0) {
199        pt_over_areas.push_back(all_jets[i].perp()/this_area);
200      } else {
201        _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A).");
202      }
203
204      total_area  += this_area;
205      total_njets += 1.0;
206    }
207  }
208
209  // there is nothing inside our region, so answer will always be zero
210  if (pt_over_areas.size() == 0) {
211    median = 0.0;
212    sigma  = 0.0;
213    mean_area = 0.0;
214    return;
215  }
216 
217  // get median (pt/area) [this is the "old" median definition. It considers
218  // only the "real" jets in calculating the median, i.e. excluding the
219  // only-ghost ones; it will be supplemented with more info below]
220  sort(pt_over_areas.begin(), pt_over_areas.end());
221
222  // now get the median & error, accounting for empty jets
223  // define the fractions of distribution at median, median-1sigma
224  double posn[2] = {0.5, (1.0-0.6827)/2.0};
225  double res[2];
226 
227  double n_empty, empty_a;
228  if (has_explicit_ghosts()) {
229    // NB: the following lines of code are potentially incorrect in cases
230    //     where there are unclustered particles (empty_area would do a better job,
231    //     at least for active areas). This is not an issue with kt or C/A, or other
232    //     algorithms that cluster all particles (and the median estimation should in
233    //     any case only be done with kt or C/A!)
234    empty_a = 0.0;
235    n_empty = 0;
236  } else if (all_are_incl) {
237    // the default case
238    empty_a = empty_area(selector);
239    n_empty = n_empty_jets(selector);
240  } else {
241    // this one is intended to be used when e.g. one runs C/A, then looks at its
242    // exclusive jets in order to get an effective smaller R value, and passes those
243    // to this routine.
244    empty_a = empty_area_from_jets(all_jets, selector);
245    mean_area = total_area / total_njets; // temporary value
246    n_empty   = empty_a / mean_area;
247  }
248  //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl;
249  //cout << "*** n_empty = " << n_empty << ", ntotal =  " << total_njets << endl;
250  total_njets += n_empty;
251  total_area  += empty_a;
252
253  // we need an int (rather than an unsigned int) with the size of the
254  // pt_over_areas array, because we'll often be doing subtraction of
255  // -1, negating it, etc. All of these operations go crazy with unsigned ints.
256  int pt_over_areas_size = pt_over_areas.size();
257  if (n_empty < -pt_over_areas_size/4.0)
258    _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
259
260  for (int i = 0; i < 2; i++) {
261    double nj_median_pos = 
262      (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty;
263    double nj_median_ratio;
264    if (nj_median_pos >= 0 && pt_over_areas_size > 1) {
265      int int_nj_median = int(nj_median_pos);
266 
267     // avoid potential overflow issues
268      if (int_nj_median+1 > pt_over_areas_size-1){
269        int_nj_median = pt_over_areas_size-2;
270        nj_median_pos = pt_over_areas_size-1;
271      }
272
273      nj_median_ratio = 
274        pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos)
275        + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median);
276    } else {
277      nj_median_ratio = 0.0;
278    }
279    res[i] = nj_median_ratio;
280  }
281  median = res[0];
282  double error  = res[0] - res[1];
283  mean_area = total_area / total_njets;
284  sigma  = error * sqrt(mean_area);
285}
286
287
288/// return a vector of all subtracted jets, using area_4vector, given rho.
289/// Only inclusive_jets above ptmin are subtracted and returned.
290/// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
291/// i.e. not necessarily ordered in pt once subtracted
292vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho,
293                                                           const double ptmin) 
294                                                           const {
295  vector<PseudoJet> sub_jets;
296  vector<PseudoJet> jets_local = sorted_by_pt(inclusive_jets(ptmin));
297  for (unsigned i=0; i<jets_local.size(); i++) {
298     PseudoJet sub_jet = subtracted_jet(jets_local[i],rho);
299     sub_jets.push_back(sub_jet);
300  }
301  return sub_jets;
302}
303
304/// return a vector of subtracted jets, using area_4vector.
305/// Only inclusive_jets above ptmin are subtracted and returned.
306/// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
307/// i.e. not necessarily ordered in pt once subtracted
308vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(
309                                                 const Selector & selector, 
310                                                 const double ptmin)
311                                                 const {
312  double rho = median_pt_per_unit_area_4vector(selector);
313  return subtracted_jets(rho,ptmin);
314}
315
316
317/// return a subtracted jet, using area_4vector, given rho
318PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
319                                                  const double rho) const {
320  PseudoJet area4vect = area_4vector(jet);
321  PseudoJet sub_jet;
322  // sanity check
323  if (rho*area4vect.perp() < jet.perp() ) { 
324    sub_jet = jet - rho*area4vect;
325  } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); }
326 
327  // make sure the subtracted jet has the same index (cluster, user, csw)
328  // (i.e. "looks like") the original jet
329  sub_jet.set_cluster_hist_index(jet.cluster_hist_index());
330  sub_jet.set_user_index(jet.user_index());
331  // do not use CS::_set_structure_shared_ptr here, which should
332  // only be called to maintain the tally during construction
333  sub_jet.set_structure_shared_ptr(jet.structure_shared_ptr());
334  return sub_jet;
335}
336
337
338/// return a subtracted jet, using area_4vector;  note that this is
339/// potentially inefficient if repeatedly used for many different
340/// jets, because rho will be recalculated each time around.
341PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
342                                       const Selector & selector) const {
343  double rho = median_pt_per_unit_area_4vector(selector);
344  PseudoJet sub_jet = subtracted_jet(jet, rho);
345  return sub_jet;
346}
347
348
349/// return the subtracted pt, given rho
350double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
351                                              const double rho,
352                                              bool use_area_4vector) const {
353  if ( use_area_4vector ) { 
354     PseudoJet sub_jet = subtracted_jet(jet,rho);
355     return sub_jet.perp();
356  } else {
357     return jet.perp() - rho*area(jet);
358  }
359} 
360
361
362/// return the subtracted pt; note that this is
363/// potentially inefficient if repeatedly used for many different
364/// jets, because rho will be recalculated each time around.
365double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
366                                              const Selector & selector,
367                                              bool use_area_4vector) const {
368  if ( use_area_4vector ) { 
369     PseudoJet sub_jet = subtracted_jet(jet,selector);
370     return sub_jet.perp();
371  } else {
372     double rho = median_pt_per_unit_area(selector);
373     return subtracted_pt(jet,rho,false);
374  }
375} 
376
377// check the selector is suited for the computations i.e. applies jet
378// by jet and has a finite area
379void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{
380  // make sure the selector has a finite area
381  if ((! has_explicit_ghosts()) &&  (! selector.has_finite_area())){
382    throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area");
383  }
384
385  // make sure the selector applies jet by jet
386  if (! selector.applies_jet_by_jet()){
387    throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet");
388  }
389}
390
391
392/// check the jet algorithm is suitable (and if not issue a warning)
393void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const {
394  if (jet_def().jet_algorithm() != kt_algorithm
395      && jet_def().jet_algorithm() != cambridge_algorithm
396      && jet_def().jet_algorithm() !=  cambridge_for_passive_algorithm) {
397    _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)");
398  }
399}
400
401
402
403FASTJET_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.