diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/dtcc_sdr.py | 102 | ||||
| -rw-r--r-- | python/process_queue.py | 4 |
2 files changed, 63 insertions, 43 deletions
diff --git a/python/dtcc_sdr.py b/python/dtcc_sdr.py index 20d8e4c9..49e72493 100644 --- a/python/dtcc_sdr.py +++ b/python/dtcc_sdr.py @@ -5,6 +5,7 @@ import requests import zipfile from pathlib import Path +from utils.db import dbconn def download_credit_slices(d: datetime.date) -> None: @@ -26,26 +27,19 @@ def download_cumulative_credit(d: datetime.date) -> None: z.extractall(path="/home/serenitas/CorpCDOs/data/DTCC") -def load_option_data(): +def load_data(): base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/") df = pd.concat( [ pd.read_csv( - f, parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"] + f, + parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"], + thousands=",", ) for f in base_dir.glob("*.csv") ] ) - df = df[df.OPTION_FAMILY.notnull()] - df = df.dropna(axis=1, how="all") - del df["ASSET_CLASS"] - del df["OPTION_FAMILY"] - for col in [ - "INDICATION_OF_END_USER_EXCEPTION", - "INDICATION_OF_OTHER_PRICE_AFFECTING_TERM", - "BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS", - ]: - df[col] = df[col].map({"N": False, "Y": True}) + df.DISSEMINATION_ID = df.DISSEMINATION_ID.astype("int") for col in [ "ACTION", "CLEARED", @@ -59,45 +53,67 @@ def load_option_data(): "SETTLEMENT_CURRENCY", ]: df[col] = df[col].astype("category") - for col in ["OPTION_PREMIUM", "PRICE_NOTATION", "OPTION_STRIKE_PRICE"]: - df[col] = df[col].str.replace(",", "").astype("float") - df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1] - for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]: - df[col + "_parsed"] = pd.to_datetime(df[col], errors="coerce") df.ORIGINAL_DISSEMINATION_ID = df.ORIGINAL_DISSEMINATION_ID.astype("Int64") + df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1] df = df[~df.DISSEMINATION_ID.isin(df.ORIGINAL_DISSEMINATION_ID)] df = df[df.ACTION != "CANCEL"] - df.sort_values("EXECUTION_TIMESTAMP", inplace=True) + del df["ASSET_CLASS"] + del df["ACTION"] return df -def load_tranche_data(): - base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/") - df = pd.concat( - [ - pd.read_csv( - f, parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"] - ) - for f in base_dir.glob("*.csv") - ] - ) +def process_option_data(df): + df = df[df.OPTION_FAMILY.notnull()] + df = df.dropna(axis=1, how="all") + del df["OPTION_FAMILY"] for col in [ - "ACTION", - "CLEARED", - "PRICE_NOTATION_TYPE", - "INDICATION_OF_COLLATERALIZATION", - "EXECUTION_VENUE", - "DAY_COUNT_CONVENTION", - "NOTIONAL_CURRENCY_1", - "SETTLEMENT_CURRENCY", + "INDICATION_OF_END_USER_EXCEPTION", + "INDICATION_OF_OTHER_PRICE_AFFECTING_TERM", + "BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS", ]: - df[col] = df[col].astype("category") - df = df[df.TAXONOMY.str.contains("Credit:IndexTranche")] - del df["ASSET_CLASS"] - df = df[[c for c in df.columns if "OPTION" not in c]] - df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1] - df = df[~df.DISSEMINATION_ID.isin(df.ORIGINAL_DISSEMINATION_ID)] - df = df[df.ACTION != "CANCEL"] + df[col] = df[col].map({"N": False, "Y": True}) + df.at[df.DISSEMINATION_ID == 107282774, "OPTION_EXPIRATION_DATE"] = "2019-09-18" + for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]: + df[col] = pd.to_datetime(df[col], errors="raise") + df = df.rename( + columns={ + "OPTION_STRIKE_PRICE": "strike", + "OPTION_EXPIRATION_DATE": "expiration_date", + "UNDERLYING_ASSET_1": "redindexcode", + "ROUNDED_NOTIONAL_AMOUNT_1": "notional", + "OPTION_PREMIUM": "premium", + "OPTION_TYPE": "option_type", + "PRICE_NOTATION": "price", + "EXECUTION_TIMESTAMP": "trade_timestamp", + } + ) + df.strike = df.strike.where(df.strike < 1000, df.strike / 100).where( + df.strike > 10, df.strike * 100 + ) + df.price = (df.price * 1e2).where(df.PRICE_NOTATION_TYPE == "Percentage", df.price) + conn = dbconn("serenitasdb") + df_indic = pd.read_sql_query( + "SELECT redindexcode, index, series, version FROM index_version", conn + ) + conn.close() + df = df.merge(df_indic, on="redindexcode") + df = df.set_index(["index", "series", "version", "trade_timestamp"]).sort_index() + return df[ + [ + "DISSEMINATION_ID", + "expiration_date", + "notional", + "strike", + "option_type", + "premium", + "price", + ] + ] + + +def process_tranche_data(df): + df = df[df.TAXONOMY.str.startswith("Credit:IndexTranche")] + df = df.loc[:, ~df.columns.str.contains("OPTION")] df.sort_values("EXECUTION_TIMESTAMP", inplace=True) return df diff --git a/python/process_queue.py b/python/process_queue.py index d7ace658..16ad3af8 100644 --- a/python/process_queue.py +++ b/python/process_queue.py @@ -327,6 +327,7 @@ def build_termination( dawndb, dealid, fee, + *, termination_date=datetime.date.today(), termination_amount=None, termination_cp=None, @@ -339,6 +340,9 @@ def build_termination( table = "cds" elif deal_type == "SwaptionDeal": table = "swaptions" + else: + raise ValueError("Unkown deal_type: {deal_type}") + with dawndb.cursor() as c: c.execute( f"SELECT dealid, cp_code, notional FROM {table} where id=%s", (dealid,) |
