python/option_trades.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

import pandas as pd
from arch import arch_model
import math
from db import dbengine
import numpy as np
from scipy.interpolate import interp1d

serenitasdb = dbengine('serenitasdb')

def get_daily_pnl(index, series, tenor, coupon=1):
    sql_str = "SELECT date, adjcloseprice AS close, closespread AS spread, duration, theta FROM index_quotes " \
    "WHERE index=%s and series=%s and tenor = %s"
    df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'],
                           index_col=['date'], params=(index, series, tenor))
    df.sort_index(inplace=True)
    df['dt'] = df.index.to_series().diff().astype('timedelta64[D]')
    df['pnl'] = df['close'].ffill().diff() + df.dt/360*coupon
    return df

def daily_spreads(index, series, tenor):
    """computes daily spreads returns

    Parameters
    ----------
    index : string
    series : int
    tenor : string
    """
    sql_str = "SELECT date, closespread AS spread FROM index_quotes " \
              "WHERE index=%s and series=%s and tenor = %s"
    df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], index_col=['date'],
                           params=(index.upper(), series, tenor))
    df.sort_index(inplace=True)
    return df.spread.pct_change().dropna()

def index_returns(date=None, years=3, index="IG", tenor="5yr"):
    """computes on the run returns"""
    if date is None:
        date = (pd.Timestamp.now() - pd.DateOffset(years=years)).date()
    sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \
              "WHERE index=%s and date>=%s and tenor = %s"
    df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], index_col=['date', 'series'],
                           params=(index.upper(), date, tenor))
    df.sort_index(inplace=True)
    return (df.groupby(level='series').
            transform(lambda x: x.pct_change()).
            groupby(level='date').
            last())

def realized_vol(index, series, tenor, date=None, years=None):
    """computes the realized spread volatility"""
    if date is no
    returns = daily_spreads(index, series, tenor)
    am = arch_model(returns)
    res = am.fit(update_freq=0, disp='off')
    return (res.conditional_volatility * math.sqrt(252), res)

def atm_vol_fun(v, ref_is_price=False, moneyness=0.2):
    f = interp1d(v.strike.values, v.vol.values, fill_value='extrapolate')
    atm_val = v['fwdspread'].iat[0]
    otm_val = atm_val * (1 + moneyness) ## doesn't make sense for HY
    return pd.Series(f(np.array([atm_val, otm_val])), index = ['atm_vol', 'otm_vol'])

def atm_vol(index, series, moneyness=0.2):
    df =  pd.read_sql_query('SELECT quotedate, expiry, strike, vol from swaption_quotes ' \
                            'WHERE index = %s and series = %s',
                            serenitasdb, index_col=['quotedate', 'expiry'],
                            params = (index.upper(), series))
    index_data = pd.read_sql_query(
        'SELECT quotedate, expiry, fwdspread from swaption_ref_quotes ' \
        'WHERE index= %s and series = %s',
        serenitasdb, index_col = ['quotedate', 'expiry'],
        params = (index.upper(), series))

    df = df.join(index_data)
    df = df.groupby(level=['quotedate', 'expiry']).filter(lambda x: len(x)>=2)
    df = df.groupby(level=['quotedate', 'expiry']).apply(atm_vol_fun, index=="HY", moneyness)
    df = df.reset_index(level=-1) #move expiry back to the column
    return df

def atm_vol_date(index, date):
    df =  pd.read_sql_query('SELECT quotedate, series, expiry, strike, vol ' \
                            'FROM swaption_quotes ' \
                            'WHERE index = %s and quotedate >= %s',
                            serenitasdb,
                            index_col=['quotedate', 'expiry', 'series'],
                            params=(index.upper(), date))
    index_data = pd.read_sql_query(
        'SELECT quotedate, expiry, series,  fwdspread FROM swaption_ref_quotes ' \
        'WHERE index= %s and quotedate >= %s',
        serenitasdb, index_col=['quotedate', 'expiry', 'series'],
        params = (index.upper(), date))
    df = df.join(index_data)
    df = df.groupby(df.index).filter(lambda x: len(x)>=2)
    df = df.groupby(level=['quotedate', 'expiry', 'series']).apply(atm_vol_fun)
    df = df.reset_index(level=['expiry', 'series']) #move expiry and series back to the columns
    return df

def rolling_vol(df, col='atm_vol', term=[3]):
    """compute the rolling volatility for various terms"""
    df = df.groupby(df.index).filter(lambda x: len(x)>2)
    def aux(s, col, term):
        k = s.index[0]
        f = interp1d(s.expiry.values.astype('float'), s[col].values, fill_value='extrapolate')
        x = np.array([(k + pd.DateOffset(months=t)).to_datetime64().astype('float') \
                      for t in term])
        return pd.Series(f(x), index=[str(t)+'m' for t in term])

    df = df.groupby(level='quotedate').apply(aux, col, term)
    # MS quotes don't have fwdspread so they end up as NA
    return df.dropna()

def vol_var(percentile=0.99, index='IG'):
    df = atm_vol_date("IG", datetime.date(2014, 6, 11))
    df = rolling_vol(df, term=[1,2,3])
    df = df.sort_index()
    df = df.groupby(df.index.date).last()
    return df.pct_change().quantile(percentile)

def lr_var(res):
    """ computes long run variance of the garch process"""
    var = res.params.omega/(1 - res.params['alpha[1]'] - res.params['beta[1]'])
    return math.sqrt(var) * math.sqrt(252)

if __name__ == "__main__":
    series = 23
    rv, res = realized_vol("ig", series, "5yr")
    rv = pd.DataFrame(rv)
    rv = rv.reset_index()
    df_vol = atm_vol("ig", series)
    df_vol = rolling_vol(df_vol, term=[1, 2, 3])
    realized_vs_atm = pd.merge_asof(rv, df_vol, on='date')
    realized_vs_atm.set_index('date', inplace=True)
    fig = realized_vs_atm[['cond_vol', '1m', '2m', '3m']].plot()

    #compute series
    top10 = pd.DataFrame()
    for series in [23, 24, 25, 26, 27]:
        df_vol = atm_vol("ig", series)
        df_vol = rolling_vol(df_vol, term=[1, 2, 3])
        df_vol.set_index('date', inplace=True)
        daily_vol = df_vol.resample('D').last()
        daily_vol['series'] = series
        daily_vol = pd.DataFrame(daily_vol['3m'].diff().abs().nlargest(10))
        daily_vol['series'] = series
        top10 = top10.append(daily_vol)