ktau.hpp
1 // Copyright © 2020 Thomas Nagler
2 //
3 // This file is part of the wdm library and licensed under the terms of
4 // the MIT license. For a copy, see the LICENSE file in the root directory
5 // or https://github.com/tnagler/wdm/blob/master/LICENSE.
6 
7 #pragma once
8 
9 #include "utils.hpp"
10 
11 namespace wdm {
12 
13 namespace impl {
14 
15 inline void normalize_weights(std::vector<double>& w)
16 {
17  if (w.size() > 0) {
18  double s = utils::sum(w);
19  for (size_t i = 0; i < w.size(); i++)
20  w[i] /= s;
21  }
22 }
23 
27 inline double ktau(std::vector<double> x,
28  std::vector<double> y,
29  std::vector<double> weights = std::vector<double>())
30 {
31  utils::check_sizes(x, y, weights);
32 
33  // 1.1 Sort x, y, and weights in x order; break ties in according to y.
34  utils::sort_all(x, y, weights);
35 
36  // 1.2 Count pairs of tied x and simultaneous ties in x and y.
37  double ties_x = utils::count_tied_pairs(x, weights);
38  double ties_both = utils::count_joint_ties(x, y, weights);
39 
40  // 2.1 Sort y again and count exchanges (= number of discordant pairs).
41  double num_d = 0.0;
42  utils::merge_sort(y, weights, num_d);
43 
44  // 2.2 Count pairs of tied y.
45  double ties_y = utils::count_tied_pairs(y, weights);
46 
47  // 3. Calculate Kendall's tau.
48  if (weights.size() == 0)
49  weights = std::vector<double>(x.size(), 1.0);
50  double num_pairs = utils::perm_sum(weights, 2);
51  double num_c = num_pairs - (num_d + ties_x + ties_y - ties_both);
52  double tau = num_c - num_d;
53  tau /= std::sqrt((num_pairs - ties_x) * (num_pairs - ties_y));
54 
55  return tau;
56 }
57 
59 inline double ktau_stat_adjust(
60  std::vector<double> x,
61  std::vector<double> y,
62  std::vector<double> weights)
63 {
64  utils::check_sizes(x, y, weights);
65 
66  // 1.1 Sort x, y, and weights in x order; break ties in according to y.
67  utils::sort_all(x, y, weights);
68 
69  // 1.2 Count pairs and triplets of tied x and simultaneous ties in x and y.
70  double pair_x = utils::count_tied_pairs(x, weights);
71  double trip_x = utils::count_tied_triplets(x, weights);
72  double v_x = utils::count_ties_v(x, weights);
73 
74  // 2.1 Sort y and weights in y order; break ties according to x.
75  utils::sort_all(y, x, weights);
76 
77  // 2.2 Count pairs and triplets of tied y.
78  double pair_y = utils::count_tied_pairs(y, weights);
79  double trip_y = utils::count_tied_triplets(y, weights);
80  double v_y = utils::count_ties_v(y, weights);
81 
82  // 3. Calculate adjustment factor.
83  if (weights.size() == 0)
84  weights = std::vector<double>(x.size(), 1.0);
85  double s = utils::sum(weights);
86  double s2 = utils::perm_sum(weights, 2);
87  double s3 = utils::perm_sum(weights, 3);
88  double r = s / utils::sum(utils::pow(weights, 2));
89  double v_0 = 2 * s2 * (2 * s) * std::pow(r, 3);
90  double v_1 = 2 * pair_x * 2 * pair_y / (2 * 2 * s2) * std::pow(r, 2);
91  double v_2 = 6 * trip_x * 6 * trip_y / (9 * 6 * s3) * std::pow(r, 3);
92  double v = (v_0 - std::pow(r, 3) * (v_x - v_y)) / 18 + (v_1 + v_2);
93  return std::pow(r, 2) * std::sqrt((s2 - pair_x) * (s2 - pair_y) / v);
94 }
95 
96 }
97 
98 }
Weighted dependence measures.
Definition: bbeta.hpp:11