aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_emails.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_emails.py')
-rw-r--r--python/parse_emails.py48
1 files changed, 33 insertions, 15 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index d93c17b6..2e01c5b1 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -12,10 +12,12 @@ import sys
from quantlib.time.imm import next_date
from quantlib.time.api import Date, pydate_from_qldate
-logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"), 'emails_parsing.log'),
+logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"),
+ 'emails_parsing.log'),
level=logging.WARNING,
format='%(asctime)s %(message)s')
+
def list_imm_dates(date):
d = Date.from_datetime(date)
r = []
@@ -24,6 +26,7 @@ def list_imm_dates(date):
r.append(pydate_from_qldate(d))
return r
+
def makedf(r, indextype, quote_source):
if indextype == 'IG':
cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid',
@@ -35,7 +38,7 @@ def makedf(r, indextype, quote_source):
cols.append('gamma')
if quote_source == "GS":
cols.append("tail")
- df = pd.DataFrame.from_records(r, columns = cols)
+ df = pd.DataFrame.from_records(r, columns=cols)
for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma', 'tail']:
if col in df:
df[col] = df[col].str.strip("%").astype('float') / 100
@@ -52,6 +55,7 @@ def makedf(r, indextype, quote_source):
df.set_index('strike', inplace=True)
return df
+
def parse_quotedate(fh, date_received):
for line in fh:
line = line.rstrip()
@@ -69,10 +73,11 @@ def parse_quotedate(fh, date_received):
raise RuntimeError("can't parse date")
return quotedate
+
def parse_refline(line):
- regex = "Ref:(?P<ref>\S+)\s+(?:Fwd Px:(?P<fwdprice>\S+)\s+)?" \
- "Fwd(?: Spd)?:(?P<fwdspread>\S+)\s+Fwd Bpv:(?P<fwdbpv>\S+)" \
- "\s+Expiry:(?P<expiry>\S+)"
+ regex = r"Ref:(?P<ref>\S+)\s+(?:Fwd Px:(?P<fwdprice>\S+)\s+)?" \
+ r"Fwd(?: Spd)?:(?P<fwdspread>\S+)\s+Fwd Bpv:(?P<fwdbpv>\S+)" \
+ r"\s+Expiry:(?P<expiry>\S+)"
m = re.match(regex, line)
try:
d = m.groupdict()
@@ -81,6 +86,7 @@ def parse_refline(line):
logging.error("something wrong with " + fh.name)
return d
+
def parse_baml(fh, indextype, series, quotedate, *args):
option_stack = {}
fwd_index = []
@@ -107,6 +113,7 @@ def parse_baml(fh, indextype, series, quotedate, *args):
else:
raise RuntimeError("empty email: " + fh.name)
+
def parse_baml_block(fh, indextype):
next(fh) ## skip header
r = []
@@ -123,6 +130,7 @@ def parse_baml_block(fh, indextype):
r.append(vals)
return makedf(r, indextype, "BAML"), line
+
def parse_ms_block(fh, indextype):
line = next(fh) ## skip header
if line.strip() == "": ## empty block
@@ -155,6 +163,7 @@ def parse_ms_block(fh, indextype):
r.append(vals)
return makedf(r, indextype, "MS")
+
def parse_nomura_block(fh, indextype):
next(fh) ## skip header
r = []
@@ -178,6 +187,7 @@ def parse_nomura_block(fh, indextype):
return None, makedf(r, indextype, "NOM")
return line, makedf(r, indextype, "NOM")
+
def parse_sg_block(fh, indextype, expiration_dates):
r = []
for line in fh:
@@ -206,6 +216,7 @@ def parse_sg_block(fh, indextype, expiration_dates):
r.append(vals)
return expiry, makedf(r, indextype, "SG")
+
def parse_gs_block(fh, indextype):
next(fh)
r = []
@@ -235,6 +246,7 @@ def parse_gs_block(fh, indextype):
r.append(vals)
return makedf(r, indextype, "GS")
+
def parse_ms(fh, indextype, *args):
option_stack = {}
for line in fh:
@@ -247,6 +259,7 @@ def parse_ms(fh, indextype, *args):
option_stack[expiry] = block
return option_stack
+
def parse_nomura(fh, indextype, *args):
option_stack = {}
def aux(line, fh, indextype, option_stack):
@@ -258,13 +271,14 @@ def parse_nomura(fh, indextype, *args):
if "EXPIRY" in next_line:
aux(next_line, fh, indextype, option_stack)
else:
- raise RuntimeError("Don't know what to do with {}:".format(line))
+ raise RuntimeError(f"Don't know what to do with {line}.")
for line in fh:
line = line.rstrip()
if "EXPIRY" in line:
aux(line, fh, indextype, option_stack)
return option_stack
+
def parse_sg(fh, indextype, expiration_dates):
option_stack = {}
fwd_index = []
@@ -275,6 +289,7 @@ def parse_sg(fh, indextype, expiration_dates):
option_stack[expiry] = df
return option_stack
+
def parse_gs(fh, indextype, series, quotedate, ref):
option_stack = {}
fwd_index = []
@@ -283,7 +298,7 @@ def parse_gs(fh, indextype, series, quotedate, ref):
for line in fh:
line = line.rstrip()
if line.startswith("Expiry"):
- m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
+ m = re.match(r"Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
if m:
expiry, fwdprice, fwdspread = m.groups()
expiry = pd.to_datetime(expiry, format='%d%b%y')
@@ -298,11 +313,11 @@ def parse_gs(fh, indextype, series, quotedate, ref):
fwd_index['quote_source'] = 'GS'
return option_stack, fwd_index
-subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
-subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)")
-subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
-subject_gs = re.compile("GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)")
-subject_sg = re.compile("SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)")
+subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
+subject_ms = re.compile(r"[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)")
+subject_nomura = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
+subject_gs = re.compile(r"GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)")
+subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)")
def parse_email(email, date_received):
with open(email.path, "rt") as fh:
@@ -339,8 +354,8 @@ def parse_email(email, date_received):
fwd_index.set_index('quotedate', inplace=True)
return (quotedate, indextype, series), (option_stack, fwd_index)
else:
- raise RuntimeError("can't parse subject line: {0} for email {1}".format(
- subject, email.name))
+ raise RuntimeError("can't parse subject line: {0} for email {1}".
+ format(subject, email.name))
def write_todb(swaption_stack, index_data):
def gen_sql_str(query, table_name, columns):
@@ -367,6 +382,7 @@ def write_todb(swaption_stack, index_data):
df.itertuples(index=False))
conn.commit()
+
def get_email_list(date):
"""returns a list of email file names for a given date
@@ -379,7 +395,8 @@ def get_email_list(date):
df = pd.DataFrame.from_dict(already_uploaded, orient='index')
df.columns = ['quotedate']
df = df.reset_index().set_index('quotedate')
- return df.loc[date,'index'].tolist()
+ return df.loc[date, 'index'].tolist()
+
def pickle_drop_date(date):
with open(".pickle", "rb") as fh:
@@ -388,6 +405,7 @@ def pickle_drop_date(date):
with open(".pickle", "wb") as fh:
pickle.dump(newdict, fh)
+
if __name__=="__main__":
save_emails()
data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions")