summaryrefslogtreecommitdiffstats
path: root/data/normality.py
blob: eb22553a48ff48ce2ca8e4e537e4c9e5aff03f29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import sys
import numpy as np
from scipy.stats import lognorm
import math

def normtest(x):
    x = np.matrix(x)
    s = np.matrix(np.cov(x,bias=1,rowvar=0))
    n,p = x.shape
    dift = x - np.mean(x,axis=0)
    dj = np.diag(dift*s.I*dift.T)
    y = x*s.I*x.T
    djk = - 2*y.T + np.diag(y.T).T*np.ones((1,n)) + np.ones((n,1))*np.diag(y.T)
    b = 1/(math.sqrt(2))*((2*p + 1)/4)**(1/(p + 4))*(n**(1/(p + 4)))
    
    if (np.rank(s) == p):
        hz = n * (1/(n**2) * np.sum(np.sum(np.exp( - (b**2)/2 * djk))) - 2 *
        ((1 + (b**2))**( - p/2)) * (1/n) * (np.sum(np.exp( - ((b**2)/(2 *
        (1 + (b**2)))) * dj))) + ((1 + (2 * (b**2)))**( - p/2)))
    else:
        hz = n*4

    wb = (1 + b**2)*(1 + 3*b**2)
    a = 1 + 2*b**2
    mu = 1 - a**(- p/2)*(1 + p*b**2/a + (p*(p + 2)*(b**4))/(2*a**2))
    si2 = 2*(1 + 4*b**2)**(- p/2) + 2*a**( - p)*(1 + (2*p*b**4)/a**2 + (3*p*
    (p + 2)*b**8)/(4*a**4)) - 4*wb**( - p/2)*(1 + (3*p*b**4)/(2*wb) + (p*
    (p + 2)*b**8)/(2*wb**2))

    pmu = math.log(math.sqrt(mu**4/(si2 + mu**2)))
    psi = math.sqrt(math.log((si2 + mu**2)/mu**2))
    p = 1 - lognorm.cdf(hz,psi,scale=math.exp(pmu))
    return p

if __name__ == "__main__":
    filename = sys.argv[1]
    x = np.loadtxt(filename,delimiter=",")
    x = x[:10,7:]
    p = normtest(x)
    print p