import pandas as pd from zipfile import ZipFile from pathlib import Path from dateutil.parser import parse import datetime import re csv_data = {} reto_dir = Path("/home/serenitas/flint/retozip") for zip_f in reto_dir.iterdir(): if zip_f.name.endswith(".zip"): zip_file = ZipFile(zip_f) fname = zip_file.namelist()[0] df = pd.read_csv(zip_file.open(fname)) breakpoint() try: if len(df.columns) == 3: date = parse(df.columns[2]).date() df = df.drop(columns=[df.columns[1]]) else: date = parse(df.columns[1]).date() except IndexError: print(zip_f.name) except Exception as e: breakpoint() continue if date.year < 2000: year = int(re.search("\d{4}", zip_f.name)[0]) date = datetime.date(year, date.month, date.day) try: returns = df.set_index([df.columns[0]]).to_dict()[df.columns[1]][ "Gross Return" ] except KeyError: returns = df.set_index([df.columns[0]]).to_dict()[df.columns[1]][ "ross Return" ] except: breakpoint() print(zip_f.name) csv_data[date] = returns pd.DataFrame.from_dict(csv_data, orient="index").to_csv( "/home/serenitas/flint/test_returns.csv" ) print("hi")