|
@@ -8,16 +8,20 @@ import seaborn as sns
|
8
|
8
|
from scipy.spatial.distance import squareform
|
9
|
9
|
from scipy.cluster.hierarchy import dendrogram, linkage, cophenet, fcluster
|
10
|
10
|
from tqdm import tqdm
|
|
11
|
+from itertools import combinations
|
11
|
12
|
|
12
|
13
|
def tqcorr(df):
|
13
|
14
|
cols = df.columns
|
14
|
15
|
ncols = len(cols)
|
15
|
|
- cdf = p.DataFrame(index = cols, columns = cols)
|
|
16
|
+ cdf = p.DataFrame(index = cols, columns = cols, dtype = np.float16)
|
|
17
|
+ print(cdf.info())
|
16
|
18
|
for c in tqdm(cols):
|
17
|
19
|
cdf.loc[c, c] = 0
|
|
20
|
+ print(cdf.info())
|
18
|
21
|
comb = combinations(cols, 2)
|
19
|
22
|
for c1, c2 in tqdm(comb):
|
20
|
23
|
cdf.loc[c1, c2] = 1 - df[c1].corr(df[c2])
|
|
24
|
+ print(cdf.info())
|
21
|
25
|
return cdf
|
22
|
26
|
|
23
|
27
|
|
|
@@ -47,6 +51,8 @@ cmat = tqcorr(df)
|
47
|
51
|
print(cmat)
|
48
|
52
|
print(cmat.info())
|
49
|
53
|
|
|
54
|
+cmat.to_pickle('../data/fulldcorrmatrix.pkl')
|
|
55
|
+
|
50
|
56
|
# lmat = squareform(1 - cmat)
|
51
|
57
|
|
52
|
58
|
# lobj = linkage(lmat, method = 'ward')
|