Browse Source

Pickle to csv

Petra Lamborn 5 years ago
parent
commit
aa5b835bad
2 changed files with 46 additions and 0 deletions
  1. 17
    0
      README.md
  2. 29
    0
      py/pickletocsv.py

+ 17
- 0
README.md View File

@@ -137,3 +137,20 @@ python agg.py -i ../data/test1k.pkl -c ../data/test1kclustable.pkl -o ../data/te
137 137
 ```
138 138
 
139 139
 Aggregates data from `../data/test1k.pkl` by cluster information in `../data/test1kclustable.pkl`, save in `../data/test1kagg.pkl`.
140
+
141
+### `pickletocsv.py`
142
+
143
+Helper function to transform a pickle into a csv file, for easier importing into e.g. Excel.
144
+
145
+* `-i PATH`: The path for the python "pickle" file which contains the dataset.
146
+* `-o PATH`: The path for the csv file to store the dataset in.
147
+* `-r`: Include row names/index labels in csv. This may be essential for proper exporting of some datasets
148
+* `-v`: Output extra information, including dimensions of dataset.
149
+
150
+Example:
151
+
152
+```bash
153
+python pickletocsv.py -i ../data/test1kagg.pkl | less
154
+```
155
+
156
+Reads file at `../data/test1kagg.pkl` and views it in the UNIX pager `less`.

+ 29
- 0
py/pickletocsv.py View File

@@ -0,0 +1,29 @@
1
+from argparse import ArgumentParser
2
+from sys import stdout
3
+import pandas as p
4
+
5
+def main():
6
+    parser = ArgumentParser(description='Transform a "pickle" to a csv file')
7
+    parser.add_argument("-i", "--input",  dest="input",      help = "input pickle path",  metavar="PATH", required = True)
8
+    parser.add_argument("-o", "--output", dest="output",     help = "output csv path", metavar="PATH")
9
+    parser.add_argument("-r", "--row-names", dest = "rownames", help = "include row names in csv; if ommitted prints to stdout", action = "store_true")
10
+    parser.add_argument("-v", "--verbose", dest = "verbose", action ="store_true")
11
+    args = parser.parse_args()
12
+
13
+    if args.output is None:
14
+        args.output = stdout
15
+
16
+    if args.verbose:
17
+        print("Reading pickle")
18
+
19
+    ptc = p.read_pickle(args.input)
20
+
21
+    if args.verbose:
22
+        print(ptc.info())
23
+        print("Saving as csv")
24
+
25
+    ptc.to_csv(args.output, index = args.rownames)
26
+
27
+
28
+if __name__ == "__main__":
29
+    main()