diff options
Diffstat (limited to 'python/quote_parsing/__main__.py')
| -rw-r--r-- | python/quote_parsing/__main__.py | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py index a0e35510..3fd50f4e 100644 --- a/python/quote_parsing/__main__.py +++ b/python/quote_parsing/__main__.py @@ -42,13 +42,16 @@ except FileNotFoundError: conn = serenitas_pool.getconn() for f in emails: + print(f) date_composed, msg_id = f.name.split("_") date_composed = datetime.datetime.strptime(date_composed, "%Y-%m-%d %H-%M-%S") + if date_composed.date() < datetime.date.fromisoformat("2021-01-18"): + continue if msg_id == "16e4b563f6cff219": # GS message has IG quotes with a HY header continue - if msg_id in already_uploaded: - continue + # if msg_id in already_uploaded: + # continue else: try: key, (option_stack, fwd_index) = parse_email(f, date_composed, conn) @@ -58,12 +61,12 @@ for f in emails: if key[0] is None or len(option_stack) == 0: logger.error(f"Something wrong with email: {f.name}") continue - swaption_stack[key] = pd.concat(option_stack, names=["expiry", "strike"]) + swaption_stack[key] = pd.concat(option_stack, names=["expiry", "version"]) fwd_index["msg_id"] = int(msg_id, 16) index_data = index_data.append(fwd_index) - # already_uploaded[msg_id] = key[0] -# if index_data.empty: -# sys.exit() + already_uploaded[msg_id] = key[0] +if index_data.empty: + sys.exit() for col in ["fwdbpv", "fwdprice", "fwdspread", "ref"]: if col in index_data: index_data[col] = pd.to_numeric(index_data[col]) @@ -72,9 +75,10 @@ index_data["index"] = index_data["index"].astype("category") index_names = ["quotedate", "index", "series", "quote_source"] swaption_stack = pd.concat(swaption_stack, names=index_names, sort=False) dup = swaption_stack.index.duplicated() -# if dup.any(): -# logger.warning("duplicated data") -# swaption_stack = swaption_stack[~dup] +if dup.any(): + logger.warning("duplicated data") + swaption_stack = swaption_stack[~dup] + swaption_stack = swaption_stack.reset_index().set_index( ["quotedate", "index", "series", "expiry", "quote_source", "version"] ) |
