diff options
Diffstat (limited to 'python/quote_parsing/__main__.py')
| -rw-r--r-- | python/quote_parsing/__main__.py | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py index c97b21ef..95657dea 100644 --- a/python/quote_parsing/__main__.py +++ b/python/quote_parsing/__main__.py @@ -47,6 +47,7 @@ for f in emails: logger.error(f"Something wrong with email: {f.name}") continue swaption_stack[key] = pd.concat(option_stack, names=["expiry", "strike"]) + fwd_index["msg_id"] = int(msg_id, 16) index_data = index_data.append(fwd_index) already_uploaded[msg_id] = key[0] if index_data.empty: @@ -56,14 +57,20 @@ for col in ["fwdbpv", "fwdprice", "fwdspread", "ref"]: index_data[col] = index_data[col].astype("float") index_data["index"] = index_data["index"].astype("category") -swaption_stack = pd.concat(swaption_stack, names=["quotedate", "index", "series"]) -swaption_stack = swaption_stack.reset_index() -swaption_stack = swaption_stack.drop_duplicates( - ["quotedate", "index", "series", "expiry", "strike"] +index_names = ["quotedate", "index", "series", "quote_source"] +swaption_stack = pd.concat(swaption_stack, names=index_names) +dup = swaption_stack.index.duplicated() +if dup.any(): + logger.warning("duplicated data") + swaption_stack = swaption_stack[~dup] +swaption_stack = swaption_stack.reset_index().set_index( + ["quotedate", "index", "series", "expiry", "quote_source"] ) -swaption_stack = swaption_stack.set_index(["quotedate", "index", "series", "expiry"]) +swaption_stack = swaption_stack.sort_index() index_data = index_data.reset_index() -index_data = index_data.drop_duplicates(["quotedate", "index", "series", "expiry"]) +index_data = index_data.drop_duplicates( + ["quotedate", "index", "series", "expiry", "quote_source"] +) from utils.db import serenitas_pool conn = serenitas_pool.getconn() |
