diff options
| -rw-r--r-- | python/parse_emails.py | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index 4ff32602..8e861f25 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -231,11 +231,18 @@ def parse_sg_block(fh, indextype, expiration_dates): def parse_gs_block(fh, indextype): - next(fh) + #skip header + while True: + line = next(fh) + if line.strip().startswith("Stk"): + break + r = [] for line in fh: line = line.rstrip() if line == "": + continue + if line.startswith("Expiry") or line.startswith("Assumes"): break vals = line.split() if indextype == 'HY': @@ -257,7 +264,7 @@ def parse_gs_block(fh, indextype): vals.append(None) vals.append(tail) r.append(vals) - return makedf(r, indextype, "GS") + return makedf(r, indextype, "GS"), line def parse_citi_block(fh, indextype): next(fh) #skip header @@ -341,8 +348,9 @@ def parse_gs(fh, indextype, series, quotedate, ref): d = {'quotedate': quotedate, 'index': indextype, 'series': series, 'ref': ref} pat = re.compile(r"Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)") - for line in fh: - line = line.rstrip() + + line = next(fh).strip() + while True: if line.startswith("Expiry"): m = pat.match(line) if m: @@ -351,9 +359,14 @@ def parse_gs(fh, indextype, series, quotedate, ref): d.update({'fwdspread': fwdspread, 'fwdprice': fwdprice, 'expiry': expiry}) fwd_index.append(d.copy()) - option_stack[expiry] = parse_gs_block(fh, indextype) + option_stack[expiry], line = parse_gs_block(fh, indextype) else: logging.error("Can't parse expiry line:", line) + elif line.startswith("Assumes"): + break + else: + line = next(fh).strip() + fwd_index = pd.DataFrame.from_records(fwd_index, index='quotedate') fwd_index['quote_source'] = 'GS' |
