utils.hpp
1 // Copyright © 2020 Thomas Nagler
2 //
3 // This file is part of the wdm library and licensed under the terms of
4 // the MIT license. For a copy, see the LICENSE file in the root directory
5 // or https://github.com/tnagler/wdm/blob/master/LICENSE.
6 
7 #pragma once
8 
9 #include <algorithm>
10 #include <string>
11 #include <vector>
12 #include <numeric>
13 #include <cmath>
14 #include <stdexcept>
15 
16 namespace wdm {
17 
18 namespace utils {
19 
20 double normalCDF(double x)
21 {
22  return std::erfc(-x / std::sqrt(2)) / 2;
23 }
24 
25 inline double linear_interp(const double& x,
26  const std::vector<double>& grid,
27  const std::vector<double>& values)
28 {
29  // find upper end point of interval
30  size_t i = 1;
31  while(x > grid[i]) i++;
32 
33  // linear interpolation
34  double w = (x - grid[i - 1]) / (grid[i] - grid[i - 1]);
35  return w * values[i - 1] + (1 - w) * values[i];
36 }
37 
38 inline void check_sizes(const std::vector<double>& x,
39  const std::vector<double>& y,
40  const std::vector<double>& weights)
41 {
42  if (y.size() != x.size())
43  throw std::runtime_error("x and y must have the same size.");
44  if ((weights.size() > 0) && (weights.size() != y.size()))
45  throw std::runtime_error("x, y, and weights must have the same size.");
46 }
47 
48 
53 inline std::vector<double> pow(const std::vector<double>& x, size_t n)
54 {
55  std::vector<double> res(x.size(), 1.0);
56  if (n > 0) {
57  for (size_t i = 0; i < x.size(); i++) {
58  for (size_t j = 0; j < n; j++) {
59  res[i] *= x[i];
60  }
61  }
62  }
63 
64  return res;
65 }
66 
69 inline double sum(const std::vector<double>& x)
70 {
71  double res = 0.0;
72  for (size_t i = 0; i < x.size(); i++)
73  res += x[i];
74  return res;
75 }
76 
77 
82 inline double perm_sum(const std::vector<double>& x, size_t k)
83 {
84  if (k == 0)
85  return 1.0;
86  double s = 0;
87  for (size_t i = 1; i <= k; i++)
88  s += std::pow(-1.0, i - 1) * perm_sum(x, k - i) * sum(pow(x, i));
89  return s / k;
90 }
91 
95 inline double effective_sample_size(size_t n, const std::vector<double>& weights)
96 {
97  double n_eff;
98  if (weights.size() == 0) {
99  n_eff = static_cast<double>(n);
100  } else {
101  n_eff = std::pow(sum(weights), 2);
102  n_eff /= sum(pow(weights, 2));
103  }
104 
105  return n_eff;
106 }
107 
111 inline std::vector<size_t> invert_permutation(const std::vector<size_t>& perm)
112 {
113  std::vector<size_t> inv_perm(perm.size());
114  for (size_t i = 0; i < perm.size(); i++)
115  inv_perm[perm[i]] = i;
116  return inv_perm;
117 }
118 
122 inline std::vector<size_t> get_order(const std::vector<double>& x,
123  bool ascending = true)
124 {
125  size_t n = x.size();
126  std::vector<size_t> perm(n);
127  for (size_t i = 0; i < n; i++)
128  perm[i] = i;
129  auto sorter = [&] (size_t i, size_t j) {
130  if (ascending)
131  return (x[i] < x[j]);
132  else
133  return (x[i] > x[j]);
134  };
135  std::sort(perm.begin(), perm.end(), sorter);
136 
137  return perm;
138 }
139 
142 inline void sort_all(std::vector<double>& x,
143  std::vector<double>& y,
144  std::vector<double>& weights)
145 {
146  size_t n = x.size();
147  std::vector<size_t> order(n);
148  for (size_t i = 0; i < n; i++)
149  order[i] = i;
150  auto sorter_with_tie_break = [&] (size_t i, size_t j) {
151  return (x[i] < x[j]) | ((x[i] == x[j]) && (y[i] < y[j]));
152  };
153  std::sort(order.begin(), order.end(), sorter_with_tie_break);
154 
155  std::vector<double> xx(n), yy(n);
156  for (size_t i = 0; i < n; i++) {
157  xx[i] = x[order[i]];
158  yy[i] = y[order[i]];
159  }
160 
161  // sort weights accordingly
162  std::vector<double> w = weights;
163  if (weights.size() > 0) {
164  for (size_t i = 0; i < n; i++) {
165  w[i] = weights[order[i]];
166  }
167  }
168 
169  x = xx;
170  y = yy;
171  weights = w;
172 }
173 
179 inline double count_ties_v(const std::vector<double>& x,
180  const std::vector<double>& weights)
181 {
182  bool weighted = (weights.size() > 0);
183  double count = 0.0, w1 = 0.0, w2 = 0.0;
184  size_t reps = 1;
185  for (size_t i = 1; i < x.size(); i++) {
186  if ((x[i] == x[i - 1])) {
187  if (weighted) {
188  if (reps == 1) {
189  w1 = weights[i - 1];
190  w2 = w1 * w1;
191  }
192  w1 += weights[i];
193  w2 += std::pow(weights[i], 2);
194  }
195  reps++;
196  } else if (reps > 1) {
197  if (weighted) {
198  count += (w1 * w1 - w2) * (2 * w1 + 5);
199  } else {
200  count += reps * (reps - 1)* (2 * reps + 5);
201  }
202  reps = 1;
203  }
204  }
205 
206  if (reps > 1) {
207  if (weighted) {
208  count += (w1 * w1 - w2) * (2 * w1 + 5);
209  } else {
210  count += reps * (reps - 1) * (2 * reps + 5);
211  }
212  }
213 
214  return count;
215 }
216 
217 
222 inline double count_tied_pairs(const std::vector<double>& x,
223  const std::vector<double>& weights)
224 {
225  bool weighted = (weights.size() > 0);
226  double count = 0.0, w1 = 0.0, w2 = 0.0;
227  size_t reps = 1;
228  for (size_t i = 1; i < x.size(); i++) {
229  if ((x[i] == x[i - 1])) {
230  if (weighted) {
231  if (reps == 1) {
232  w1 = weights[i - 1];
233  w2 = w1 * w1;
234  }
235  w1 += weights[i];
236  w2 += std::pow(weights[i], 2);
237  }
238  reps++;
239  } else if (reps > 1) {
240  if (weighted) {
241  count += (w1 * w1 - w2) / 2.0;
242  } else {
243  count += reps * (reps - 1) / 2.0;
244  }
245  reps = 1;
246  }
247  }
248 
249  if (reps > 1) {
250  if (weighted) {
251  count += (w1 * w1 - w2) / 2.0;
252  } else {
253  count += reps * (reps - 1) / 2.0;
254  }
255  }
256 
257  return count;
258 }
259 
265 inline double count_tied_triplets(const std::vector<double>& x,
266  const std::vector<double>& weights)
267 {
268  bool weighted = (weights.size() > 0);
269  double count = 0.0, w1 = 0.0, w2 = 0.0, w3 = 0.0;
270  size_t reps = 2;
271  for (size_t i = 2; i < x.size(); i++) {
272  if ((x[i] == x[i - 1]) & (x[i] == x[i - 2])) {
273  if (weighted) {
274  if (reps == 1) {
275  w1 = weights[i - 1];
276  w2 = std::pow(weights[i - 1], 2);
277  w3 = std::pow(weights[i - 1], 3);
278  }
279  w1 += weights[i];
280  w2 += std::pow(weights[i], 2);
281  w3 += std::pow(weights[i], 3);
282  }
283  reps++;
284  } else if (reps > 2) {
285  if (weighted) {
286  count += (std::pow(w1, 3) - 3 * w2 * w1 + 2 * w3) / 6.0;
287  } else {
288  count += reps * (reps - 1) * (reps - 2) / 6.0;
289  }
290  reps = 1;
291  }
292  }
293 
294  if (reps > 2) {
295  if (weighted) {
296  count += (std::pow(w1, 3) - 3 * w2 * w1 + 2 * w3) / 6.0;
297  } else {
298  count += reps * (reps - 1) * (reps - 2) / 6.0;
299  }
300  }
301 
302  return count;
303 }
304 
310 inline double count_joint_ties(const std::vector<double>& x,
311  const std::vector<double>& y,
312  const std::vector<double>& weights)
313 {
314  bool weighted = (weights.size() > 0);
315  double count = 0.0, w1 = 0.0, w2 = 0.0;
316  size_t reps = 1;
317  for (size_t i = 1; i < x.size(); i++) {
318  if ((x[i] == x[i - 1]) && (y[i] == y[i - 1])) {
319  if (weighted) {
320  if (reps == 1) {
321  w1 = weights[i - 1];
322  w2 = weights[i - 1] * weights[i - 1];
323  }
324  w1 += weights[i];
325  w2 += weights[i] * weights[i];
326  }
327  reps++;
328  } else if (reps > 1) {
329  if (weighted) {
330  count += (w1 * w1 - w2) / 2.0;
331  } else {
332  count += reps * (reps - 1) / 2.0;
333  }
334  reps = 1;
335  }
336  }
337 
338  if (reps > 1) {
339  if (weighted) {
340  count += (w1 * w1 - w2) / 2.0;
341  } else {
342  count += reps * (reps - 1) / 2.0;
343  }
344  }
345 
346  return count;
347 }
348 
357 inline void merge(std::vector<double>& vec,
358  const std::vector<double>& vec1,
359  const std::vector<double>& vec2,
360  std::vector<double>& weights,
361  const std::vector<double>& weights1,
362  const std::vector<double>& weights2,
363  double& count)
364 {
365  double w_acc = 0.0, w1_sum = 0.0;
366  bool weighted = (weights.size() > 0);
367  if (weighted) {
368  for (size_t i = 0; i < weights1.size(); i++)
369  w1_sum += weights1[i];
370  }
371  size_t i, j, k;
372  for (i = 0, j = 0, k = 0; i < vec1.size() && j < vec2.size(); k++) {
373  if (vec1[i] <= vec2[j]) {
374  vec[k] = vec1[i];
375  if (weighted) {
376  weights[k] = weights1[i];
377  w_acc += weights1[i];
378  }
379  i++;
380  } else {
381  vec[k] = vec2[j];
382  if (weighted) {
383  weights[k] = weights2[j];
384  count += weights2[j] * (w1_sum - w_acc);
385  } else {
386  count += vec1.size() - i;
387  }
388  j++;
389  }
390  }
391 
392  while (i < vec1.size()) {
393  vec[k] = vec1[i];
394  if (weighted)
395  weights[k] = weights1[i];
396  k++;
397  i++;
398  }
399 
400  while (j < vec2.size()) {
401  vec[k] = vec2[j];
402  if (weighted)
403  weights[k] = weights2[j];
404  k++;
405  j++;
406  }
407 }
408 
414 inline void merge_sort(std::vector<double>& vec,
415  std::vector<double>& weights,
416  double& count)
417 {
418  if (vec.size() > 1) {
419  size_t n = vec.size();
420  std::vector<double> vec1(vec.begin(), vec.begin() + n / 2);
421  std::vector<double> vec2(vec.begin() + n / 2, vec.end());
422 
423  n = weights.size();
424  std::vector<double> weights1(weights.begin(), weights.begin() + n / 2);
425  std::vector<double> weights2(weights.begin() + n / 2, weights.end());
426 
427  merge_sort(vec1, weights1, count);
428  merge_sort(vec2, weights2, count);
429  merge(vec, vec1, vec2, weights, weights1, weights2, count);
430  }
431 }
432 
444 inline void merge_count_per_element(std::vector<double>& vec,
445  const std::vector<double>& vec1,
446  const std::vector<double>& vec2,
447  std::vector<double>& weights,
448  const std::vector<double>& weights1,
449  const std::vector<double>& weights2,
450  std::vector<double>& counts,
451  const std::vector<double>& counts1,
452  const std::vector<double>& counts2)
453 {
454  double w_acc = 0.0;
455  bool weighted = (weights.size() > 0);
456  double w1_sum = 0.0;
457  if (weighted) {
458  for (size_t i = 0; i < weights1.size(); i++)
459  w1_sum += weights1[i];
460  }
461  size_t i, j, k;
462  for (i = 0, j = 0, k = 0; i < vec1.size() && j < vec2.size(); k++) {
463  if (vec1[i] > vec2[j]) {
464  vec[k] = vec1[i];
465  counts[k] = counts1[i];
466  if (weighted) {
467  weights[k] = weights1[i];
468  w_acc += weights1[i];
469  }
470  i++;
471  } else {
472  vec[k] = vec2[j];
473  if (weighted) {
474  counts[k] = counts2[j] + w1_sum - w_acc;
475  weights[k] = weights2[j];
476  } else {
477  counts[k] = counts2[j] + vec1.size() - i;
478  }
479  j++;
480  }
481  }
482 
483  while (i < vec1.size()) {
484  vec[k] = vec1[i];
485  if (weighted)
486  weights[k] = weights1[i];
487  counts[k] = counts1[i];
488  k++;
489  i++;
490  }
491 
492  while (j < vec2.size()) {
493  vec[k] = vec2[j];
494  if (weighted)
495  weights[k] = weights2[j];
496  counts[k] = counts2[j];
497  k++;
498  j++;
499  }
500 }
501 
502 
509 inline void merge_sort_count_per_element(std::vector<double>& vec,
510  std::vector<double>& weights,
511  std::vector<double>& counts)
512 {
513  if (vec.size() > 1) {
514  size_t n = vec.size();
515  std::vector<double> vec1(vec.begin(), vec.begin() + n / 2);
516  std::vector<double> vec2(vec.begin() + n / 2, vec.end());
517 
518  n = weights.size();
519  std::vector<double> weights1(weights.begin(), weights.begin() + n / 2);
520  std::vector<double> weights2(weights.begin() + n / 2, weights.end());
521 
522  n = counts.size();
523  std::vector<double> counts1(counts.begin(), counts.begin() + n / 2);
524  std::vector<double> counts2(counts.begin() + n / 2, counts.end());
525 
526  merge_sort_count_per_element(vec1, weights1, counts1);
527  merge_sort_count_per_element(vec2, weights2, counts2);
528  merge_count_per_element(vec, vec1, vec2,
529  weights, weights1, weights2,
530  counts, counts1, counts2);
531  }
532 }
533 
534 }
535 
536 } // end wdm
Weighted dependence measures.
Definition: bbeta.hpp:11