# Source code for mgcpy.independence_tests.mdmr

```"""
**Main MDMR Independence Test Module**
"""

import numpy as np
import scipy.spatial as scp

from mgcpy.independence_tests.abstract_class import IndependenceTest
from mgcpy.independence_tests.utils.compute_distance_matrix import \
compute_distance
from mgcpy.independence_tests.utils.mdmr_functions import (calc_ftest,
check_rank,
fperms_to_pvals,
gen_H2_perms,
gen_IH_perms,
gower_center_many)

[docs]class MDMR(IndependenceTest):
def __init__(self, compute_distance_matrix=None):
'''
:param compute_distance_matrix: a function to compute the pairwise distance matrix, given a data matrix
:type compute_distance_matrix: ``FunctionType`` or ``callable()``
'''
IndependenceTest.__init__(self, compute_distance_matrix)
self.which_test = "mdmr"

[docs]    def get_name(self):
return self.which_test

[docs]    def test_statistic(self, matrix_X, matrix_Y, permutations=0, individual=0, disttype='cityblock'):
"""
Computes MDMR Pseudo-F statistic between two datasets.

- It first takes the distance matrix of Y (by )
- Next it regresses X into a portion due to Y and a portion due to residual
- The p-value is for the null hypothesis that the variable of X is not correlated with Y's distance matrix

:param data_matrix_X: (optional, default picked from class attr) is interpreted as:

- a ``[n*d]`` data matrix, a matrix with n samples in d dimensions
:type data_matrix_X: 2D `numpy.array`

:param data_matrix_Y: (optional, default picked from class attr) is interpreted as:

- a ``[n*d]`` data matrix, a matrix with n samples in d dimensions
:type data_matrix_Y: 2D `numpy.array`

:parameter 'individual':

-integer, `0` or `1`
with value `0` tests the entire X matrix (default)
with value `1` tests the entire X matrix and then each predictor variable individually

:return: with individual = `0`, returns 1 values, with individual = `1` returns 2 values, containing:

-the test statistic of the entire X matrix
-for individual = 1, an array with the variable of X in the first column,
the test statistic in the second, and the permutation p-value in the third (which here will always be 1)
:rtype: list
"""
X = matrix_X
Y = matrix_Y

# calculate distance matrix of Y
D, _ = compute_distance(Y, np.identity(1), self.compute_distance_matrix)
a = D.shape**2
D = D.reshape((a, 1))

predictors = np.arange(X.shape)
predsingle = X.shape
check_rank(X)

# check number of subjects compatible
subjects = X.shape
if subjects != np.sqrt(D.shape):
raise Exception("# of subjects incompatible between X and D")

X = np.hstack((np.ones((X.shape, 1)), X))
predictors = np.array(predictors)
predictors += 1

# Gower Center the distance matrix of Y
Gs = gower_center_many(D)

m2 = float(X.shape - predictors.shape)
nm = float(subjects - X.shape)

# form permutation indexes
permutation_indexes = np.zeros((permutations + 1, subjects), dtype=np.int)
permutation_indexes[0, :] = range(subjects)
for i in range(1, permutations + 1):
permutation_indexes[i, :] = np.random.permutation(subjects)

H2perms = gen_H2_perms(X, predictors, permutation_indexes)
IHperms = gen_IH_perms(X, predictors, permutation_indexes)

# Calculate test statistic
F_perms = calc_ftest(H2perms, IHperms, Gs, m2, nm)

# Calculate p-value
p_vals = None
if permutations > 0:
p_vals = fperms_to_pvals(F_perms)
F_permtotal = F_perms[0, :]
self.test_statistic_ = F_permtotal
if individual == 0:

# code for individual test
if individual == 1:
results = np.zeros((predsingle, 3))
for predictors in range(1, predsingle+1):
predictors = np.array([predictors])

Gs = gower_center_many(D)

m2 = float(X.shape - predictors.shape)
nm = float(subjects - X.shape)

permutation_indexes = np.zeros((permutations + 1, subjects), dtype=np.int)
permutation_indexes[0, :] = range(subjects)
for i in range(1, permutations + 1):
permutation_indexes[i, :] = np.random.permutation(subjects)

H2perms = gen_H2_perms(X, predictors, permutation_indexes)
IHperms = gen_IH_perms(X, predictors, permutation_indexes)

F_perms = calc_ftest(H2perms, IHperms, Gs, m2, nm)

p_vals = None
if permutations > 0:
p_vals = fperms_to_pvals(F_perms)
results[predictors-1, 0] = predictors
results[predictors-1, 1] = F_perms[0, :]
results[predictors-1, 2] = p_vals

return F_permtotal, results

[docs]    def p_value(self, matrix_X, matrix_Y, replication_factor=1000):
"""
Tests independence between two datasets using MGC and permutation test.

:param matrix_X: is interpreted as:

- a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
:type matrix_X: 2D `numpy.array`

:param matrix_Y: is interpreted as:

- a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
:type matrix_Y: 2D `numpy.array`

:param replication_factor: specifies the number of replications to use for
the permutation test. Defaults to ``1000``.
:type replication_factor: integer

:return: returns a list of two items,that contains:

- :p_value: P-value of MGC
:rtype: list
"""
return super(MDMR, self).p_value(matrix_X, matrix_Y)

[docs]    def ind_p_value(self, matrix_X, matrix_Y, permutations=1000, individual=1, disttype='cityblock'):
"""
Individual predictor variable p-values calculation

:param matrix_X: is interpreted as:

- a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
:type matrix_X: 2D `numpy.array`

:param matrix_Y: is interpreted as:

- a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
:type matrix_Y: 2D `numpy.array`
"""
results = self.test_statistic(matrix_X, matrix_Y, permutations, individual)
return results
```