|
@@ -20,7 +20,9 @@ def tqcorr(df):
|
20
|
20
|
print(cdf.info())
|
21
|
21
|
comb = combinations(cols, 2)
|
22
|
22
|
for c1, c2 in tqdm(comb):
|
23
|
|
- cdf.loc[c1, c2] = 1 - df[c1].corr(df[c2])
|
|
23
|
+ dv = 1 - df[c1].corr(df[c2])
|
|
24
|
+ cdf.loc[c1, c2] = dv
|
|
25
|
+ cdf.loc[c2, c1] = dv
|
24
|
26
|
print(cdf.info())
|
25
|
27
|
return cdf
|
26
|
28
|
|
|
@@ -28,9 +30,9 @@ def tqcorr(df):
|
28
|
30
|
tqdm.pandas()
|
29
|
31
|
|
30
|
32
|
Sourcedata = '../data/2017-all-wide.pkl'
|
31
|
|
-lableddata = '../data/9-clusters.pkl'
|
32
|
|
-aggdata = '../data/9-clusters.agg.pkl'
|
33
|
|
-clustertable = '../data/9-clusters-sample-table.pkl'
|
|
33
|
+lableddata = '../data/9-clusters-all.pkl'
|
|
34
|
+aggdata = '../data/9-clusters-all-agg.pkl'
|
|
35
|
+clustertable = '../data/9-clusters-all-table.pkl'
|
34
|
36
|
|
35
|
37
|
numclusts = 9
|
36
|
38
|
df = p.read_pickle(Sourcedata)
|