diff options
| -rw-r--r-- | python/dtcc_sdr.py | 83 |
1 files changed, 68 insertions, 15 deletions
diff --git a/python/dtcc_sdr.py b/python/dtcc_sdr.py index ab264e97..20d8e4c9 100644 --- a/python/dtcc_sdr.py +++ b/python/dtcc_sdr.py @@ -6,6 +6,7 @@ import zipfile from pathlib import Path + def download_credit_slices(d: datetime.date) -> None: for i in range(1, 400): url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/SLICE_CREDITS_{d:%Y_%m_%d}_{i}.zip" @@ -15,6 +16,7 @@ def download_credit_slices(d: datetime.date) -> None: with zipfile.ZipFile(io.BytesIO(r.content)) as z: z.extractall() + def download_cumulative_credit(d: datetime.date) -> None: url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/CUMULATIVE_CREDITS_{d:%Y_%m_%d}.zip" r = requests.get(url) @@ -23,34 +25,85 @@ def download_cumulative_credit(d: datetime.date) -> None: with zipfile.ZipFile(io.BytesIO(r.content)) as z: z.extractall(path="/home/serenitas/CorpCDOs/data/DTCC") + def load_option_data(): base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/") - df = pd.concat([ - pd.read_csv(f, - parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"]) - for f in base_dir.glob("*.csv")]) + df = pd.concat( + [ + pd.read_csv( + f, parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"] + ) + for f in base_dir.glob("*.csv") + ] + ) df = df[df.OPTION_FAMILY.notnull()] - df = df.dropna(axis=1, how='all') + df = df.dropna(axis=1, how="all") del df["ASSET_CLASS"] del df["OPTION_FAMILY"] - for col in ["INDICATION_OF_END_USER_EXCEPTION", - "INDICATION_OF_OTHER_PRICE_AFFECTING_TERM", - "BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS"]: + for col in [ + "INDICATION_OF_END_USER_EXCEPTION", + "INDICATION_OF_OTHER_PRICE_AFFECTING_TERM", + "BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS", + ]: df[col] = df[col].map({"N": False, "Y": True}) - for col in ["ACTION", "CLEARED", "PRICE_NOTATION_TYPE", "OPTION_TYPE", - "OPTION_CURRENCY", "INDICATION_OF_COLLATERALIZATION", "EXECUTION_VENUE", - "DAY_COUNT_CONVENTION", "NOTIONAL_CURRENCY_1", "SETTLEMENT_CURRENCY"]: + for col in [ + "ACTION", + "CLEARED", + "PRICE_NOTATION_TYPE", + "OPTION_TYPE", + "OPTION_CURRENCY", + "INDICATION_OF_COLLATERALIZATION", + "EXECUTION_VENUE", + "DAY_COUNT_CONVENTION", + "NOTIONAL_CURRENCY_1", + "SETTLEMENT_CURRENCY", + ]: df[col] = df[col].astype("category") for col in ["OPTION_PREMIUM", "PRICE_NOTATION", "OPTION_STRIKE_PRICE"]: df[col] = df[col].str.replace(",", "").astype("float") df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1] for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]: - df[col+"_parsed"] = pd.to_datetime(df[col], errors="coerce") + df[col + "_parsed"] = pd.to_datetime(df[col], errors="coerce") df.ORIGINAL_DISSEMINATION_ID = df.ORIGINAL_DISSEMINATION_ID.astype("Int64") + df = df[~df.DISSEMINATION_ID.isin(df.ORIGINAL_DISSEMINATION_ID)] + df = df[df.ACTION != "CANCEL"] + df.sort_values("EXECUTION_TIMESTAMP", inplace=True) return df + +def load_tranche_data(): + base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/") + df = pd.concat( + [ + pd.read_csv( + f, parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"] + ) + for f in base_dir.glob("*.csv") + ] + ) + for col in [ + "ACTION", + "CLEARED", + "PRICE_NOTATION_TYPE", + "INDICATION_OF_COLLATERALIZATION", + "EXECUTION_VENUE", + "DAY_COUNT_CONVENTION", + "NOTIONAL_CURRENCY_1", + "SETTLEMENT_CURRENCY", + ]: + df[col] = df[col].astype("category") + df = df[df.TAXONOMY.str.contains("Credit:IndexTranche")] + del df["ASSET_CLASS"] + df = df[[c for c in df.columns if "OPTION" not in c]] + df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1] + df = df[~df.DISSEMINATION_ID.isin(df.ORIGINAL_DISSEMINATION_ID)] + df = df[df.ACTION != "CANCEL"] + df.sort_values("EXECUTION_TIMESTAMP", inplace=True) + return df + + if __name__ == "__main__": pass - # dr = pd.bdate_range("2018-01-01", "2019-02-11") - # for d in dr: - # download_cumulative_credit(d) + dr = pd.bdate_range("2018-01-01", "2019-02-11") + for d in dr: + download_cumulative_credit(d) |
