aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/quote_parsing/__main__.py26
1 files changed, 11 insertions, 15 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py
index b151c019..4726d6a7 100644
--- a/python/quote_parsing/__main__.py
+++ b/python/quote_parsing/__main__.py
@@ -5,6 +5,7 @@ import pandas as pd
import pickle
import sys
+from collections import defaultdict
from serenitas.utils.env import DATA_DIR
from serenitas.utils import SerenitasRotatingFileHandler
from . import logger
@@ -31,7 +32,7 @@ if args.download:
save_emails(update=False)
emails = [f for f in (DATA_DIR / "swaptions").glob("????-??/*") if f.is_file()]
-swaption_stack = {}
+swaption_stack = defaultdict(list)
index_data = []
try:
@@ -63,23 +64,16 @@ with serenitas_pool.connection() as conn:
if key[0] is None or len(option_stack) == 0:
logger.error(f"Something wrong with email: {f.name}")
continue
- if key in swaption_stack:
- swaption_stack[key] = pd.concat(
- [
- swaption_stack[key],
- pd.concat(
- option_stack, names=["expiry", "series", "version"]
- ),
- ]
- )
- else:
- swaption_stack[key] = pd.concat(
- option_stack, names=["expiry", "series", "version"]
+ swaption_stack[key].append(
+ pd.concat(
+ option_stack, names=["expiry", "series", "version"], copy=False
)
+ )
fwd_index["msg_id"] = int(msg_id, 16)
index_data.append(fwd_index)
already_uploaded[msg_id] = key[0]
- index_data = pd.concat(index_data)
+ index_data = pd.concat(index_data, copy=False)
+ swaption_stack = {k: pd.concat(v, copy=False) for k, v in swaption_stack.items()}
if index_data.empty:
sys.exit()
for col in ["fwdbpv", "fwdprice", "fwdspread", "ref"]:
@@ -88,7 +82,9 @@ with serenitas_pool.connection() as conn:
index_data["index"] = index_data["index"].astype("category")
index_names = ["quotedate", "index", "quote_source"]
- swaption_stack = pd.concat(swaption_stack, names=index_names, sort=False)
+ swaption_stack = pd.concat(
+ swaption_stack, names=index_names, sort=False, copy=False
+ )
dup = swaption_stack.index.duplicated()
if dup.any():
logger.warning("duplicated data")