aboutsummaryrefslogtreecommitdiffstats
path: root/python/dtcc_sdr.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/dtcc_sdr.py')
-rw-r--r--python/dtcc_sdr.py56
1 files changed, 56 insertions, 0 deletions
diff --git a/python/dtcc_sdr.py b/python/dtcc_sdr.py
new file mode 100644
index 00000000..ab264e97
--- /dev/null
+++ b/python/dtcc_sdr.py
@@ -0,0 +1,56 @@
+import datetime
+import io
+import pandas as pd
+import requests
+import zipfile
+
+from pathlib import Path
+
+def download_credit_slices(d: datetime.date) -> None:
+ for i in range(1, 400):
+ url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/SLICE_CREDITS_{d:%Y_%m_%d}_{i}.zip"
+ r = requests.get(url)
+ if r.status_code != 200:
+ continue
+ with zipfile.ZipFile(io.BytesIO(r.content)) as z:
+ z.extractall()
+
+def download_cumulative_credit(d: datetime.date) -> None:
+ url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/CUMULATIVE_CREDITS_{d:%Y_%m_%d}.zip"
+ r = requests.get(url)
+ if r.status_code != 200:
+ return
+ with zipfile.ZipFile(io.BytesIO(r.content)) as z:
+ z.extractall(path="/home/serenitas/CorpCDOs/data/DTCC")
+
+def load_option_data():
+ base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/")
+ df = pd.concat([
+ pd.read_csv(f,
+ parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"])
+ for f in base_dir.glob("*.csv")])
+ df = df[df.OPTION_FAMILY.notnull()]
+ df = df.dropna(axis=1, how='all')
+ del df["ASSET_CLASS"]
+ del df["OPTION_FAMILY"]
+ for col in ["INDICATION_OF_END_USER_EXCEPTION",
+ "INDICATION_OF_OTHER_PRICE_AFFECTING_TERM",
+ "BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS"]:
+ df[col] = df[col].map({"N": False, "Y": True})
+ for col in ["ACTION", "CLEARED", "PRICE_NOTATION_TYPE", "OPTION_TYPE",
+ "OPTION_CURRENCY", "INDICATION_OF_COLLATERALIZATION", "EXECUTION_VENUE",
+ "DAY_COUNT_CONVENTION", "NOTIONAL_CURRENCY_1", "SETTLEMENT_CURRENCY"]:
+ df[col] = df[col].astype("category")
+ for col in ["OPTION_PREMIUM", "PRICE_NOTATION", "OPTION_STRIKE_PRICE"]:
+ df[col] = df[col].str.replace(",", "").astype("float")
+ df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1]
+ for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]:
+ df[col+"_parsed"] = pd.to_datetime(df[col], errors="coerce")
+ df.ORIGINAL_DISSEMINATION_ID = df.ORIGINAL_DISSEMINATION_ID.astype("Int64")
+ return df
+
+if __name__ == "__main__":
+ pass
+ # dr = pd.bdate_range("2018-01-01", "2019-02-11")
+ # for d in dr:
+ # download_cumulative_credit(d)