wdm.hpp
1 // Copyright © 2020 Thomas Nagler
2 //
3 // This file is part of the wdm library and licensed under the terms of
4 // the MIT license. For a copy, see the LICENSE file in the root directory
5 // or https://github.com/tnagler/wdm/blob/master/LICENSE.
6 
7 #pragma once
8 
9 #include "wdm/ktau.hpp"
10 #include "wdm/hoeffd.hpp"
11 #include "wdm/prho.hpp"
12 #include "wdm/srho.hpp"
13 #include "wdm/bbeta.hpp"
14 #include "wdm/methods.hpp"
15 #include "wdm/nan_handling.hpp"
16 
18 namespace wdm {
19 
36 inline double wdm(std::vector<double> x,
37  std::vector<double> y,
38  std::string method,
39  std::vector<double> weights = std::vector<double>(),
40  bool remove_missing = true)
41 {
42  utils::check_sizes(x, y, weights);
43  // na handling
44  if (utils::preproc(x, y, weights, method, remove_missing) == "return_nan")
45  return std::numeric_limits<double>::quiet_NaN();
46 
47  if (methods::is_hoeffding(method))
48  return impl::hoeffd(x, y, weights);
49  if (methods::is_kendall(method))
50  return impl::ktau(x, y, weights);
51  if (methods::is_pearson(method))
52  return impl::prho(x, y, weights);
53  if (methods::is_spearman(method))
54  return impl::srho(x, y, weights);
55  if (methods::is_blomqvist(method))
56  return impl::bbeta(x, y, weights);
57  throw std::runtime_error("method not implemented.");
58 }
59 
60 
74 class Indep_test {
75 public:
76  Indep_test() = delete;
77 
87  Indep_test(std::vector<double> x,
88  std::vector<double> y,
89  std::string method,
90  std::vector<double> weights = std::vector<double>(),
91  bool remove_missing = true,
92  std::string alternative = "two-sided") :
93  method_(method),
94  alternative_(alternative)
95  {
96  utils::check_sizes(x, y, weights);
97  if (utils::preproc(x, y, weights, method, remove_missing) == "return_nan") {
98  n_eff_ = utils::effective_sample_size(x.size(), weights);
99  estimate_ = std::numeric_limits<double>::quiet_NaN();
100  statistic_ = std::numeric_limits<double>::quiet_NaN();
101  p_value_ = std::numeric_limits<double>::quiet_NaN();
102  } else {
103  n_eff_ = utils::effective_sample_size(x.size(), weights);
104  estimate_ = wdm(x, y, method, weights, false);
105  statistic_ = compute_test_stat(estimate_, method, n_eff_, x, y, weights);
106  p_value_ = compute_p_value(statistic_, method, alternative, n_eff_);
107  }
108  }
109 
111  std::string method() const {return method_;}
112 
114  std::string alternative() const {return alternative_;}
115 
117  double n_eff() const {return n_eff_;}
118 
120  double estimate() const {return estimate_;}
121 
123  double statistic() const {return statistic_;}
124 
126  double p_value() const {return p_value_;}
127 
128 private:
129 
130  inline double compute_test_stat(double estimate,
131  std::string method,
132  double n_eff,
133  const std::vector<double>& x,
134  const std::vector<double>& y,
135  const std::vector<double>& weights)
136  {
137  // prevent overflow in atanh
138  if (estimate == 1.0)
139  estimate = 1 - 1e-12;
140  if (estimate == -1.0)
141  estimate = 1e-12;
142 
143  double stat;
144  if (methods::is_hoeffding(method)) {
145  stat = estimate / 30.0 + 1.0 / (36.0 * n_eff);
146  } else if (methods::is_kendall(method)) {
147  stat = estimate * impl::ktau_stat_adjust(x, y, weights);
148  } else if (methods::is_pearson(method)) {
149  stat = std::atanh(estimate) * std::sqrt(n_eff - 3);
150  } else if (methods::is_spearman(method)) {
151  stat = std::atanh(estimate) * std::sqrt((n_eff - 3) / 1.06);
152  } else if (methods::is_blomqvist(method)) {
153  stat = std::atanh(estimate) * std::sqrt(n_eff);
154  } else {
155  throw std::runtime_error("method not implemented.");
156  }
157 
158  return stat;
159  }
160 
161  inline double compute_p_value(double statistic,
162  std::string method,
163  std::string alternative,
164  double n_eff = 0.0)
165  {
166  double p_value;
167  if (methods::is_hoeffding(method)) {
168  if (n_eff == 0.0)
169  throw std::runtime_error("must provide n_eff for method 'hoeffd'.");
170  if (alternative != "two-sided")
171  throw std::runtime_error("only two-sided test available for Hoeffding's D.");
172  p_value = impl::phoeffb(statistic, n_eff);
173  } else {
174  if (alternative == "two-sided") {
175  p_value = 2 * utils::normalCDF(-std::abs(statistic));
176  } else if (alternative == "less") {
177  p_value = utils::normalCDF(statistic);
178  } else if (alternative == "greater") {
179  p_value = 1 - utils::normalCDF(statistic);
180  } else {
181  throw std::runtime_error("alternative not implemented.");
182  }
183  }
184 
185  return p_value;
186  }
187 
188  std::string method_;
189  std::string alternative_;
190  double n_eff_;
191  double estimate_;
192  double statistic_;
193  double p_value_;
194 };
195 
196 
197 }
double wdm(const Eigen::VectorXd &x, const Eigen::VectorXd &y, std::string method, Eigen::VectorXd weights=Eigen::VectorXd(), bool remove_missing=true)
Definition: eigen.hpp:42
double n_eff() const
the effective sample size in the test
Definition: wdm.hpp:117
Definition: wdm.hpp:74
std::string alternative() const
the alternative hypothesis used for the test
Definition: wdm.hpp:114
double statistic() const
the test statistic
Definition: wdm.hpp:123
double estimate() const
the estimated dependence measure
Definition: wdm.hpp:120
std::string method() const
the method used for the test
Definition: wdm.hpp:111
Weighted dependence measures.
Definition: bbeta.hpp:11
double p_value() const
the p-value
Definition: wdm.hpp:126
Indep_test(std::vector< double > x, std::vector< double > y, std::string method, std::vector< double > weights=std::vector< double >(), bool remove_missing=true, std::string alternative="two-sided")
Definition: wdm.hpp:87