aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/reto_parsing.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/python/reto_parsing.py b/python/reto_parsing.py
new file mode 100644
index 00000000..a5ed60d9
--- /dev/null
+++ b/python/reto_parsing.py
@@ -0,0 +1,45 @@
+import pandas as pd
+from zipfile import ZipFile
+from pathlib import Path
+from dateutil.parser import parse
+import datetime
+import re
+
+csv_data = {}
+reto_dir = Path("/home/serenitas/flint/retozip")
+for zip_f in reto_dir.iterdir():
+ if zip_f.name.endswith(".zip"):
+ zip_file = ZipFile(zip_f)
+ fname = zip_file.namelist()[0]
+ df = pd.read_csv(zip_file.open(fname))
+ breakpoint()
+ try:
+ if len(df.columns) == 3:
+ date = parse(df.columns[2]).date()
+ df = df.drop(columns=[df.columns[1]])
+ else:
+ date = parse(df.columns[1]).date()
+ except IndexError:
+ print(zip_f.name)
+ except Exception as e:
+ breakpoint()
+ continue
+ if date.year < 2000:
+ year = int(re.search("\d{4}", zip_f.name)[0])
+ date = datetime.date(year, date.month, date.day)
+ try:
+ returns = df.set_index([df.columns[0]]).to_dict()[df.columns[1]][
+ "Gross Return"
+ ]
+ except KeyError:
+ returns = df.set_index([df.columns[0]]).to_dict()[df.columns[1]][
+ "ross Return"
+ ]
+ except:
+ breakpoint()
+ print(zip_f.name)
+ csv_data[date] = returns
+pd.DataFrame.from_dict(csv_data, orient="index").to_csv(
+ "/home/serenitas/flint/test_returns.csv"
+)
+print("hi")