aboutsummaryrefslogtreecommitdiffstats
path: root/python/dtcc_sdr.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/dtcc_sdr.py')
-rw-r--r--python/dtcc_sdr.py102
1 files changed, 59 insertions, 43 deletions
diff --git a/python/dtcc_sdr.py b/python/dtcc_sdr.py
index 20d8e4c9..49e72493 100644
--- a/python/dtcc_sdr.py
+++ b/python/dtcc_sdr.py
@@ -5,6 +5,7 @@ import requests
import zipfile
from pathlib import Path
+from utils.db import dbconn
def download_credit_slices(d: datetime.date) -> None:
@@ -26,26 +27,19 @@ def download_cumulative_credit(d: datetime.date) -> None:
z.extractall(path="/home/serenitas/CorpCDOs/data/DTCC")
-def load_option_data():
+def load_data():
base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/")
df = pd.concat(
[
pd.read_csv(
- f, parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"]
+ f,
+ parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"],
+ thousands=",",
)
for f in base_dir.glob("*.csv")
]
)
- df = df[df.OPTION_FAMILY.notnull()]
- df = df.dropna(axis=1, how="all")
- del df["ASSET_CLASS"]
- del df["OPTION_FAMILY"]
- for col in [
- "INDICATION_OF_END_USER_EXCEPTION",
- "INDICATION_OF_OTHER_PRICE_AFFECTING_TERM",
- "BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS",
- ]:
- df[col] = df[col].map({"N": False, "Y": True})
+ df.DISSEMINATION_ID = df.DISSEMINATION_ID.astype("int")
for col in [
"ACTION",
"CLEARED",
@@ -59,45 +53,67 @@ def load_option_data():
"SETTLEMENT_CURRENCY",
]:
df[col] = df[col].astype("category")
- for col in ["OPTION_PREMIUM", "PRICE_NOTATION", "OPTION_STRIKE_PRICE"]:
- df[col] = df[col].str.replace(",", "").astype("float")
- df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1]
- for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]:
- df[col + "_parsed"] = pd.to_datetime(df[col], errors="coerce")
df.ORIGINAL_DISSEMINATION_ID = df.ORIGINAL_DISSEMINATION_ID.astype("Int64")
+ df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1]
df = df[~df.DISSEMINATION_ID.isin(df.ORIGINAL_DISSEMINATION_ID)]
df = df[df.ACTION != "CANCEL"]
- df.sort_values("EXECUTION_TIMESTAMP", inplace=True)
+ del df["ASSET_CLASS"]
+ del df["ACTION"]
return df
-def load_tranche_data():
- base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/")
- df = pd.concat(
- [
- pd.read_csv(
- f, parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"]
- )
- for f in base_dir.glob("*.csv")
- ]
- )
+def process_option_data(df):
+ df = df[df.OPTION_FAMILY.notnull()]
+ df = df.dropna(axis=1, how="all")
+ del df["OPTION_FAMILY"]
for col in [
- "ACTION",
- "CLEARED",
- "PRICE_NOTATION_TYPE",
- "INDICATION_OF_COLLATERALIZATION",
- "EXECUTION_VENUE",
- "DAY_COUNT_CONVENTION",
- "NOTIONAL_CURRENCY_1",
- "SETTLEMENT_CURRENCY",
+ "INDICATION_OF_END_USER_EXCEPTION",
+ "INDICATION_OF_OTHER_PRICE_AFFECTING_TERM",
+ "BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS",
]:
- df[col] = df[col].astype("category")
- df = df[df.TAXONOMY.str.contains("Credit:IndexTranche")]
- del df["ASSET_CLASS"]
- df = df[[c for c in df.columns if "OPTION" not in c]]
- df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1]
- df = df[~df.DISSEMINATION_ID.isin(df.ORIGINAL_DISSEMINATION_ID)]
- df = df[df.ACTION != "CANCEL"]
+ df[col] = df[col].map({"N": False, "Y": True})
+ df.at[df.DISSEMINATION_ID == 107282774, "OPTION_EXPIRATION_DATE"] = "2019-09-18"
+ for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]:
+ df[col] = pd.to_datetime(df[col], errors="raise")
+ df = df.rename(
+ columns={
+ "OPTION_STRIKE_PRICE": "strike",
+ "OPTION_EXPIRATION_DATE": "expiration_date",
+ "UNDERLYING_ASSET_1": "redindexcode",
+ "ROUNDED_NOTIONAL_AMOUNT_1": "notional",
+ "OPTION_PREMIUM": "premium",
+ "OPTION_TYPE": "option_type",
+ "PRICE_NOTATION": "price",
+ "EXECUTION_TIMESTAMP": "trade_timestamp",
+ }
+ )
+ df.strike = df.strike.where(df.strike < 1000, df.strike / 100).where(
+ df.strike > 10, df.strike * 100
+ )
+ df.price = (df.price * 1e2).where(df.PRICE_NOTATION_TYPE == "Percentage", df.price)
+ conn = dbconn("serenitasdb")
+ df_indic = pd.read_sql_query(
+ "SELECT redindexcode, index, series, version FROM index_version", conn
+ )
+ conn.close()
+ df = df.merge(df_indic, on="redindexcode")
+ df = df.set_index(["index", "series", "version", "trade_timestamp"]).sort_index()
+ return df[
+ [
+ "DISSEMINATION_ID",
+ "expiration_date",
+ "notional",
+ "strike",
+ "option_type",
+ "premium",
+ "price",
+ ]
+ ]
+
+
+def process_tranche_data(df):
+ df = df[df.TAXONOMY.str.startswith("Credit:IndexTranche")]
+ df = df.loc[:, ~df.columns.str.contains("OPTION")]
df.sort_values("EXECUTION_TIMESTAMP", inplace=True)
return df