Browse Source

Save correlation matrix

Petra Lamborn 5 years ago
parent
commit
aa49458bfa
1 changed files with 7 additions and 1 deletions
  1. 7
    1
      py/clustering.py

+ 7
- 1
py/clustering.py View File

8
 from scipy.spatial.distance import squareform
8
 from scipy.spatial.distance import squareform
9
 from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
9
 from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
10
 from tqdm import tqdm
10
 from tqdm import tqdm
11
+from itertools import combinations
11
 
12
 
12
 def tqcorr(df):
13
 def tqcorr(df):
13
     cols = df.columns
14
     cols = df.columns
14
     ncols = len(cols)
15
     ncols = len(cols)
15
-    cdf = p.DataFrame(index = cols, columns = cols)
16
+    cdf = p.DataFrame(index = cols, columns = cols, dtype = np.float16)
17
+    print(cdf.info())
16
     for c in tqdm(cols):
18
     for c in tqdm(cols):
17
         cdf.loc[c, c] = 0
19
         cdf.loc[c, c] = 0
20
+    print(cdf.info())
18
     comb = combinations(cols, 2)
21
     comb = combinations(cols, 2)
19
     for c1, c2 in tqdm(comb):
22
     for c1, c2 in tqdm(comb):
20
         cdf.loc[c1, c2] = 1 - df[c1].corr(df[c2])
23
         cdf.loc[c1, c2] = 1 - df[c1].corr(df[c2])
24
+    print(cdf.info())
21
     return cdf
25
     return cdf
22
 
26
 
23
 
27
 
47
 print(cmat)
51
 print(cmat)
48
 print(cmat.info())
52
 print(cmat.info())
49
 
53
 
54
+cmat.to_pickle('../data/fulldcorrmatrix.pkl')
55
+
50
 # lmat = squareform(1 - cmat)
56
 # lmat = squareform(1 - cmat)
51
 
57
 
52
 # lobj = linkage(lmat, method = 'ward')
58
 # lobj = linkage(lmat, method = 'ward')