hoeffd.hpp
1 // Copyright © 2020 Thomas Nagler
2 //
3 // This file is part of the wdm library and licensed under the terms of
4 // the MIT license. For a copy, see the LICENSE file in the root directory
5 // or https://github.com/tnagler/wdm/blob/master/LICENSE.
6 
7 #pragma once
8 
9 #include "utils.hpp"
10 #include "ranks.hpp"
11 #include <cmath>
12 
13 namespace wdm {
14 
15 namespace impl {
16 
17 const double pi = std::acos(-1);
18 
22 inline double hoeffd(std::vector<double> x,
23  std::vector<double> y,
24  std::vector<double> weights = std::vector<double>())
25 {
26  utils::check_sizes(x, y, weights);
27 
28  // 1. Compute (weighted) ranks
29  std::vector<double> R_X = rank(x, weights);
30  std::vector<double> R_Y = rank(y, weights);
31  std::vector<double> S_X, S_Y;
32  if (weights.size() > 0) {
33  S_X = rank(x, utils::pow(weights, 2));
34  S_Y = rank(y, utils::pow(weights, 2));
35  } else {
36  S_X = R_X;
37  S_Y = R_Y;
38  }
39 
40  // 2. Compute (weighted) bivariate ranks (number of points w/ both columns
41  // less than the ith row).
42  std::vector<double> R_XY, S_XY, T_XY, U_XY;
43  R_XY = bivariate_rank(x, y, weights);
44  if (weights.size() > 0) {
45  S_XY = bivariate_rank(x, y, utils::pow(weights, 2));
46  T_XY = bivariate_rank(x, y, utils::pow(weights, 3));
47  U_XY = bivariate_rank(x, y, utils::pow(weights, 4));
48  } else {
49  S_XY = R_XY;
50  T_XY = R_XY;
51  U_XY = R_XY;
52  }
53 
54 
55  // 3. Compute (weighted) Hoeffdings' D
56  if (weights.size() == 0)
57  weights = std::vector<double>(x.size(), 1.0);
58  double A_1 = 0.0, A_2 = 0.0, A_3 = 0.0;
59  for (size_t i = 0; i < x.size(); i++) {
60  A_1 += (R_XY[i] * R_XY[i] - S_XY[i]) * weights[i];
61  A_2 += (
62  (R_X[i] * R_Y[i] - S_XY[i]) * R_XY[i] -
63  S_XY[i] * (R_X[i] + R_Y[i]) + 2 * T_XY[i]
64  ) * weights[i];
65  A_3 += (
66  (R_X[i] * R_X[i] - S_X[i]) * (R_Y[i] * R_Y[i] - S_Y[i]) -
67  4 * ((R_X[i] * R_Y[i] - S_XY[i]) * S_XY[i] -
68  T_XY[i] * (R_X[i] + R_Y[i]) + 2 * U_XY[i]) -
69  2 * (S_XY[i] * S_XY[i] - U_XY[i])
70  ) * weights[i];
71  }
72  double D = 0.0;
73  D += A_1 / (utils::perm_sum(weights, 3) * 6);
74  D -= 2 * A_2 / (utils::perm_sum(weights, 4) * 24);
75  D += A_3 / (utils::perm_sum(weights, 5) * 120);
76 
77  return 30.0 * D;
78 }
79 
85 inline double phoeffb(double B, double n) {
86  B *= 0.5 * std::pow(pi, 4) * (n - 1);
87 
88  // obtain approximate p values by interpolation of tabulated values
89  double p;
90  if ((B <= 1.1) | (B >= 8.5)) {
91  p = std::min(1.0, std::exp(0.3885037 - 1.164879 * B));
92  p = std::max(1e-12, p);
93  } else {
94  std::vector<double> grid{
95  1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.55, 1.6,
96  1.65, 1.7, 1.75, 1.8, 1.85, 1.9, 1.95, 2, 2.05, 2.1, 2.15, 2.2,
97  2.25, 2.3, 2.35, 2.4, 2.45, 2.5, 2.55, 2.6, 2.65, 2.7, 2.75,
98  2.8, 2.85, 2.9, 2.95, 3, 3.05, 3.1, 3.15, 3.2, 3.25, 3.3, 3.35,
99  3.4, 3.45, 3.5, 3.55, 3.6, 3.65, 3.7, 3.75, 3.8, 3.85, 3.9, 3.95,
100  4, 4.05, 4.1, 4.15, 4.2, 4.25, 4.3, 4.35, 4.4, 4.45, 4.5, 4.55,
101  4.6, 4.65, 4.7, 4.75, 4.8, 4.85, 4.9, 4.95, 5, 5.5, 6, 6.5, 7,
102  7.5, 8, 8.5
103  };
104  std::vector<double> vals{
105  0.5297, 0.4918, 0.4565, 0.4236, 0.3930, 0.3648, 0.3387, 0.3146,
106  0.2924, 0.2719, 0.2530, 0.2355, 0.2194, 0.2045, 0.1908, 0.1781,
107  0.1663, 0.1554, 0.1453, 0.1359, 0.1273, 0.1192, 0.1117, 0.1047,
108  0.0982, 0.0921, 0.0864, 0.0812, 0.0762, 0.0716, 0.0673, 0.0633,
109  0.0595, 0.0560, 0.0527, 0.0496, 0.0467, 0.0440, 0.0414, 0.0390,
110  0.0368, 0.0347, 0.0327, 0.0308, 0.0291, 0.0274, 0.0259, 0.0244,
111  0.0230, 0.0217, 0.0205, 0.0194, 0.0183, 0.0173, 0.0163, 0.0154,
112  0.0145, 0.0137, 0.0130, 0.0123, 0.0116, 0.0110, 0.0104, 0.0098,
113  0.0093, 0.0087, 0.0083, 0.0078, 0.0074, 0.0070, 0.0066, 0.0063,
114  0.0059, 0.0056, 0.0053, 0.0050, 0.0047, 0.0045, 0.0042, 0.00025,
115  0.00014, 0.0008, 0.0005, 0.0003, 0.0002, 0.0001
116  };
117  p = utils::linear_interp(B, grid, vals);
118  }
119 
120  return p;
121 }
122 
123 }
124 
125 }
Weighted dependence measures.
Definition: bbeta.hpp:11