import pandas as pd import pdb import re import os import pdb os.chdir("quotes") for f in os.listdir("."): with open(os.path.abspath(f), "rb") as fh: flag = False masterdf = {} for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() m = re.search("(IG|HY)24 5y SWAPTION UPDATE - Ref\D+(.+)$", line) if m: indextype = m.groups()[0] if indextype=='HY': refprice, refspread = map(float, re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[1]).groups()) else: refspread = float(m.groups()[1]) continue if line.startswith("At"): quotedate = pd.to_datetime(line[4:]) continue if line.startswith("Expiry"): m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) if m: date, fwprice, fwspread = m.groups() date = pd.datetime.strptime(date, '%d%b%y') continue if line.startswith("Stk"): flag = True r = [] continue if flag: if line: vals = re.sub(" +", " ", line).split(" ") if indextype=='HY': vals.pop(2) vals.pop(9) else: vals.pop(1) vals.pop(8) r.append(vals) continue else: if indextype=='HY': cols = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] else: cols = ['Stk', 'Pay', 'Delta', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] df = pd.DataFrame.from_records(r, columns = cols) df['refspread'] = refspread if indextype=='HY': df['refprice'] = refprice df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) df.drop(['Pay', 'Rec'], axis=1, inplace=True) df = df.convert_objects(convert_numeric=True) df.set_index('Stk', inplace=True) masterdf[date]=df flag = False r = [] continue masterdf = pd.concat(masterdf) pdb.set_trace()