ranks.hpp
1 // Copyright © 2020 Thomas Nagler
2 //
3 // This file is part of the wdm library and licensed under the terms of
4 // the MIT license. For a copy, see the LICENSE file in the root directory
5 // or https://github.com/tnagler/wdm/blob/master/LICENSE.
6 
7 #pragma once
8 
9 #include "utils.hpp"
10 
11 namespace wdm {
12 
13 namespace impl {
14 
22 inline std::vector<double> rank(
23  std::vector<double> x,
24  std::vector<double> weights = std::vector<double>(),
25  std::string ties_method = "min")
26 {
27  size_t n = x.size();
28  if (weights.size() == 0)
29  weights = std::vector<double>(n, 1.0);
30 
31  std::vector<size_t> perm = utils::get_order(x);
32 
33  double w_acc = 0.0, w_batch;
34  if ((ties_method != "min") && (ties_method != "average"))
35  throw std::runtime_error("ties_method must be either 'min' or 'average.");
36  for (size_t i = 0, reps; i < n; i += reps) {
37  // find replications
38  reps = 1;
39  w_batch = 0.0;
40  while ((i + reps < n) && (x[perm[i]] == x[perm[i + reps]])) {
41  w_batch += weights[perm[i]];
42  ++reps;
43  }
44 
45  // assign average rank of the tied values
46  for (size_t k = 0; k < reps; ++k) {
47  if (ties_method == "min")
48  x[perm[i + k]] = w_acc;
49  else
50  x[perm[i + k]] = w_acc + w_batch / 2.0;
51  }
52 
53  // accumulate weights for current batch
54  for (size_t k = 0; k < reps; ++k)
55  w_acc += weights[perm[i + k]];
56  }
57 
58  return x;
59 }
60 
65 inline std::vector<double> bivariate_rank(
66  std::vector<double> x,
67  std::vector<double> y,
68  std::vector<double> weights = std::vector<double>())
69 {
70  utils::check_sizes(x, y, weights);
71 
72  // get inverse of permutation that brings x in ascending order
73  std::vector<size_t> perm_x = utils::get_order(x);
74  perm_x = utils::invert_permutation(perm_x);
75 
76  // sort x, y, and weights according to x, breaking ties with y
77  utils::sort_all(x, y, weights);
78 
79  // get inverse of permutation that brings y in descending order
80  std::vector<size_t> perm_y = utils::get_order(y, false);
81  perm_y = utils::invert_permutation(perm_y);
82 
83  // sort y in descending order counting inversions
84  std::vector<double> counts(y.size(), 0.0);
85  utils::merge_sort_count_per_element(y, weights, counts);
86 
87  // bring counts back in original order
88  std::vector<double> counts_tmp = counts;
89  for (size_t i = 0; i < counts.size(); i++)
90  counts[i] = counts_tmp[perm_y[perm_x[i]]];
91 
92  return counts;
93 }
94 
97 inline double median(const std::vector<double>& x,
98  std::vector<double> weights = std::vector<double>())
99 {
100  utils::check_sizes(x, x, weights);
101  size_t n = x.size();
102 
103  // sort x and weights in x order
104  auto perm = utils::get_order(x);
105  auto xx = x;
106  auto w = weights;
107  for (size_t i = 0; i < n; i++) {
108  xx[i] = x[perm[i]];
109  if (w.size() > 0)
110  w[i] = weights[perm[i]];
111  }
112 
113  // compute weighted ranks and the "average rank" (corresponds to the median)
114  auto ranks = rank(xx, w, "average");
115  if (weights.size() == 0)
116  weights = std::vector<double>(n, 1.0);
117  double rank_avrg = utils::perm_sum(weights, 2) / utils::sum(weights);
118 
119  // weighted median splits data below and above rank_avrg
120  size_t i = 0;
121  while (ranks[i] < rank_avrg) i++;
122  if (ranks[i] == rank_avrg)
123  return xx[i];
124  else
125  return 0.5 * (xx[i - 1] + xx[i]);
126 }
127 
128 }
129 
130 }
Weighted dependence measures.
Definition: bbeta.hpp:11