diff options
| -rw-r--r-- | sql/dawn.sql | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/sql/dawn.sql b/sql/dawn.sql index da311088..0dc7a3fd 100644 --- a/sql/dawn.sql +++ b/sql/dawn.sql @@ -1049,20 +1049,27 @@ CREATE TYPE mark_list AS (date date, "REUTERS" float, "S&P" float); -CREATE OR REPLACE FUNCTION example() RETURNS SETOF mark_list +CREATE OR REPLACE FUNCTION get_mark_matrix(identifier varchar(9)) RETURNS SETOF mark_list AS $$ -import pandas as pd + import numpy as np -c = plpy.cursor("SELECT identifier, date, final AS source, mark FROM external_marks " \ - "LEFT JOIN mark_source_mapping " \ - "ON mark_source_mapping.globeop = external_marks.source " \ - "ORDER BY date, identifier") -df = pd.DataFrame.from_records(c) -for t in (df. - groupby(['date', 'identifier', 'source']). - mean(). - unstack(-1). - reset_index(). - itertuples(index=False)): +import pandas as pd + +source_list = ["BROKER", "BVAL", "IDC", "MANAGER", "MARKIT", "PB", "PRICESERVE", "PRICINGDIRECT", "REUTERS", "S&P"] +sql_string = "SELECT identifier, date, source, mark FROM external_marks_mapped WHERE identifier = $1 order by date, source" +plan = plpy.prepare(sql_string, ["varchar"]) +df = pd.DataFrame.from_records(plpy.cursor(plan, [identifier])) + +df = df.groupby(['date', 'identifier', 'source']).mean().unstack(-1) +df.columns = df.columns.droplevel(level=0) + +for s in source_list: + if s not in df.columns: + df[s] = np.nan + +df = df[source_list].reset_index() + +for t in (df.itertuples(index=False)): yield [t[0], t[1]] + [None if np.isnan(v) else v for v in t[2:]] -$$ LANGUAGE plpythonu; + +$$ LANGUAGE plpythonu;
\ No newline at end of file |
