Browse Source

Save correlation matrix

Petra Lamborn 5 years ago
parent
commit
aa49458bfa
1 changed files with 7 additions and 1 deletions
  1. 7
    1
      py/clustering.py

+ 7
- 1
py/clustering.py View File

@@ -8,16 +8,20 @@ import seaborn as sns
8 8
 from scipy.spatial.distance import squareform
9 9
 from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
10 10
 from tqdm import tqdm
11
+from itertools import combinations
11 12
 
12 13
 def tqcorr(df):
13 14
     cols = df.columns
14 15
     ncols = len(cols)
15
-    cdf = p.DataFrame(index = cols, columns = cols)
16
+    cdf = p.DataFrame(index = cols, columns = cols, dtype = np.float16)
17
+    print(cdf.info())
16 18
     for c in tqdm(cols):
17 19
         cdf.loc[c, c] = 0
20
+    print(cdf.info())
18 21
     comb = combinations(cols, 2)
19 22
     for c1, c2 in tqdm(comb):
20 23
         cdf.loc[c1, c2] = 1 - df[c1].corr(df[c2])
24
+    print(cdf.info())
21 25
     return cdf
22 26
 
23 27
 
@@ -47,6 +51,8 @@ cmat = tqcorr(df)
47 51
 print(cmat)
48 52
 print(cmat.info())
49 53
 
54
+cmat.to_pickle('../data/fulldcorrmatrix.pkl')
55
+
50 56
 # lmat = squareform(1 - cmat)
51 57
 
52 58
 # lobj = linkage(lmat, method = 'ward')