diff options
Diffstat (limited to 'python/quote_parsing/__main__.py')
| -rw-r--r-- | python/quote_parsing/__main__.py | 26 |
1 files changed, 11 insertions, 15 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py index b151c019..4726d6a7 100644 --- a/python/quote_parsing/__main__.py +++ b/python/quote_parsing/__main__.py @@ -5,6 +5,7 @@ import pandas as pd import pickle import sys +from collections import defaultdict from serenitas.utils.env import DATA_DIR from serenitas.utils import SerenitasRotatingFileHandler from . import logger @@ -31,7 +32,7 @@ if args.download: save_emails(update=False) emails = [f for f in (DATA_DIR / "swaptions").glob("????-??/*") if f.is_file()] -swaption_stack = {} +swaption_stack = defaultdict(list) index_data = [] try: @@ -63,23 +64,16 @@ with serenitas_pool.connection() as conn: if key[0] is None or len(option_stack) == 0: logger.error(f"Something wrong with email: {f.name}") continue - if key in swaption_stack: - swaption_stack[key] = pd.concat( - [ - swaption_stack[key], - pd.concat( - option_stack, names=["expiry", "series", "version"] - ), - ] - ) - else: - swaption_stack[key] = pd.concat( - option_stack, names=["expiry", "series", "version"] + swaption_stack[key].append( + pd.concat( + option_stack, names=["expiry", "series", "version"], copy=False ) + ) fwd_index["msg_id"] = int(msg_id, 16) index_data.append(fwd_index) already_uploaded[msg_id] = key[0] - index_data = pd.concat(index_data) + index_data = pd.concat(index_data, copy=False) + swaption_stack = {k: pd.concat(v, copy=False) for k, v in swaption_stack.items()} if index_data.empty: sys.exit() for col in ["fwdbpv", "fwdprice", "fwdspread", "ref"]: @@ -88,7 +82,9 @@ with serenitas_pool.connection() as conn: index_data["index"] = index_data["index"].astype("category") index_names = ["quotedate", "index", "quote_source"] - swaption_stack = pd.concat(swaption_stack, names=index_names, sort=False) + swaption_stack = pd.concat( + swaption_stack, names=index_names, sort=False, copy=False + ) dup = swaption_stack.index.duplicated() if dup.any(): logger.warning("duplicated data") |
