1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
import datetime
import io
import pandas as pd
import requests
import zipfile
from pathlib import Path
from utils.db import dbconn
def download_credit_slices(d: datetime.date) -> None:
for i in range(1, 400):
url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/SLICE_CREDITS_{d:%Y_%m_%d}_{i}.zip"
r = requests.get(url)
if r.status_code != 200:
continue
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
z.extractall()
def download_cumulative_credit(d: datetime.date) -> None:
url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/CUMULATIVE_CREDITS_{d:%Y_%m_%d}.zip"
r = requests.get(url)
if r.status_code != 200:
return
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
z.extractall(path="/home/serenitas/CorpCDOs/data/DTCC")
def load_data():
base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/")
df = pd.concat(
[
pd.read_csv(
f,
parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"],
thousands=",",
)
for f in base_dir.glob("*.csv")
]
)
df.DISSEMINATION_ID = df.DISSEMINATION_ID.astype("int")
for col in [
"ACTION",
"CLEARED",
"PRICE_NOTATION_TYPE",
"OPTION_TYPE",
"OPTION_CURRENCY",
"INDICATION_OF_COLLATERALIZATION",
"EXECUTION_VENUE",
"DAY_COUNT_CONVENTION",
"NOTIONAL_CURRENCY_1",
"SETTLEMENT_CURRENCY",
]:
df[col] = df[col].astype("category")
df.ORIGINAL_DISSEMINATION_ID = df.ORIGINAL_DISSEMINATION_ID.astype("Int64")
df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1]
df = df[~df.DISSEMINATION_ID.isin(df.ORIGINAL_DISSEMINATION_ID)]
df = df[df.ACTION != "CANCEL"]
del df["ASSET_CLASS"]
del df["ACTION"]
return df
def process_option_data(df):
df = df[df.OPTION_FAMILY.notnull()]
df = df.dropna(axis=1, how="all")
del df["OPTION_FAMILY"]
for col in [
"INDICATION_OF_END_USER_EXCEPTION",
"INDICATION_OF_OTHER_PRICE_AFFECTING_TERM",
"BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS",
]:
df[col] = df[col].map({"N": False, "Y": True})
df.at[df.DISSEMINATION_ID == 107282774, "OPTION_EXPIRATION_DATE"] = "2019-09-18"
for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]:
df[col] = pd.to_datetime(df[col], errors="raise")
df = df.rename(
columns={
"OPTION_STRIKE_PRICE": "strike",
"OPTION_EXPIRATION_DATE": "expiration_date",
"UNDERLYING_ASSET_1": "redindexcode",
"ROUNDED_NOTIONAL_AMOUNT_1": "notional",
"OPTION_PREMIUM": "premium",
"OPTION_TYPE": "option_type",
"PRICE_NOTATION": "price",
"EXECUTION_TIMESTAMP": "trade_timestamp",
}
)
df.strike = df.strike.where(df.strike < 1000, df.strike / 100).where(
df.strike > 10, df.strike * 100
)
df.price = (df.price * 1e2).where(df.PRICE_NOTATION_TYPE == "Percentage", df.price)
conn = dbconn("serenitasdb")
df_indic = pd.read_sql_query(
"SELECT redindexcode, index, series, version FROM index_version", conn
)
conn.close()
df = df.merge(df_indic, on="redindexcode")
df = df.set_index(["index", "series", "version", "trade_timestamp"]).sort_index()
return df[
[
"DISSEMINATION_ID",
"expiration_date",
"notional",
"strike",
"option_type",
"premium",
"price",
]
]
def process_tranche_data(df):
df = df[df.TAXONOMY.str.startswith("Credit:IndexTranche")]
df = df.loc[:, ~df.columns.str.contains("OPTION")]
df.sort_values("EXECUTION_TIMESTAMP", inplace=True)
return df
if __name__ == "__main__":
pass
dr = pd.bdate_range("2018-01-01", "2019-02-11")
for d in dr:
download_cumulative_credit(d)
|