[Beet-users] Patch PostgreSQL support for beet-utils
Status: Beta
Brought to you by:
tormp
From: Pierre Q. <pie...@ze...> - 2010-06-17 20:21:27
|
From b52e8ee9cc0ce74b99e8c439fe4fda438290bd0e Mon Sep 17 00:00:00 2001 From: Pierre Queinnec <pie...@ze...> Date: Thu, 17 Jun 2010 14:49:12 +0200 Subject: [PATCH 2/2] Add PostgreSQL support to the beet-utils. Use with the '-tool csv'. --- utils/src/main/sh/load-events-postgresql.sh | 58 ++++++++++++++++++++ utils/src/main/sql/etl/postgresql/create_etl.sql | 56 +++++++++++++++++++ .../main/sql/etl/postgresql/event_summarize.sql | 27 +++++++++ .../etl/postgresql/load_event_csv-client_copy.ctl | 1 + .../src/main/sql/etl/postgresql/load_event_csv.ctl | 16 ++++++ 5 files changed, 158 insertions(+), 0 deletions(-) create mode 100755 utils/src/main/sh/load-events-postgresql.sh create mode 100644 utils/src/main/sql/etl/postgresql/create_etl.sql create mode 100644 utils/src/main/sql/etl/postgresql/event_summarize.sql create mode 100644 utils/src/main/sql/etl/postgresql/load_event_csv-client_copy.ctl create mode 100644 utils/src/main/sql/etl/postgresql/load_event_csv.ctl diff --git a/utils/src/main/sh/load-events-postgresql.sh b/utils/src/main/sh/load-events-postgresql.sh new file mode 100755 index 0000000..c42ac7f --- /dev/null +++ b/utils/src/main/sh/load-events-postgresql.sh @@ -0,0 +1,58 @@ +#!/bin/bash -x + +## function to echo a usage message and exit. +function usage { + cat <<EOF + +Usage: $0 [psql client args] [event log] + + Example: + + > $0 '-U beet analysis' log.bxml.gz.20080101000000 + +EOF + exit 1 +} + +## check arg count. +if [[ $# < 2 ]]; then + usage; +fi + +## verify that second arg is an actual file. +if [ ! -f $2 ]; then + echo + echo "\"$2\" does not exist or is not a regular file." + usage; +fi + +if [ -z "$JAVA_HOME" ]; then + JAVA=java +else + JAVA=$JAVA_HOME/bin/java +fi + +echo "Loading $2 into behavior tracking database at $1." +echo "Start: " $(date) + +case "`uname`" in +# FIXME Cygwin support +Darwin) + gzcat "$2" | + "$JAVA" -jar beet-utils.jar -tool csv | + sed 's/,$//' | + psql -c "$(cat sql/etl/postgresql/load_event_csv.ctl)" $1 + ;; + +*) + zcat "$2" | + "$JAVA" -jar beet-utils.jar -tool csv | + sed 's/,$//' | + psql -c "$(cat sql/etl/postgresql/load_event_csv.ctl)" $1 + ;; +esac + +echo "Summarizing data..." +psql -f sql/etl/postgresql/event_summarize.sql $1 + +echo "End: " $(date) diff --git a/utils/src/main/sql/etl/postgresql/create_etl.sql b/utils/src/main/sql/etl/postgresql/create_etl.sql new file mode 100644 index 0000000..8abec99 --- /dev/null +++ b/utils/src/main/sql/etl/postgresql/create_etl.sql @@ -0,0 +1,56 @@ +-- event data table for basic analysis +create table BEHAVIOR_TRACKING_EVENT ( + EVENT_ID VARCHAR(100) NOT NULL, + PARENT_EVENT_ID VARCHAR(100), + EVENT_TYPE VARCHAR(20) NOT NULL, + EVENT_NAME VARCHAR(256) NOT NULL, + EVENT_START DATE NOT NULL, + USER_ID VARCHAR(64), + SESSION_ID VARCHAR(64), + APPLICATION VARCHAR(64) NOT NULL, + DURATION_NS BIGINT NOT NULL, + EVENT_DATA VARCHAR(4000), + ERROR VARCHAR(512), + SUMMARIZED CHAR(1) +); +create index EVENT_NAME_IDX on BEHAVIOR_TRACKING_EVENT ( + EVENT_NAME ASC +); + +-- event summary table for trending over long periods of time +create table BEHAVIOR_TRACKING_SUMMARY ( + SUMMARY_DATE VARCHAR(100) NOT NULL, + EVENT_TYPE VARCHAR(20) NOT NULL, + EVENT_NAME VARCHAR(256) NOT NULL, + PERIOD_START DATE NOT NULL, + PERIOD_END DATE NOT NULL, + COUNT BIGINT NOT NULL, + AVERAGE_NS BIGINT NOT NULL, + MINIMUM_NS BIGINT NOT NULL, + MAXIMUM_NS BIGINT NOT NULL, + MEDIAN_NS BIGINT NOT NULL, + STANDARD_DEVIATION BIGINT NOT NULL, + ERROR_COUNT BIGINT NOT NULL +); + +-- MEDIAN, taken from Scott Bailey 'Artacus' +CREATE OR REPLACE FUNCTION _final_median(numeric[]) + RETURNS numeric AS +$$ + SELECT AVG(val) + FROM ( + SELECT val + FROM unnest($1) val + ORDER BY 1 + LIMIT 2 - MOD(array_upper($1, 1), 2) + OFFSET CEIL(array_upper($1, 1) / 2.0) - 1 + ) sub; +$$ +LANGUAGE 'sql' IMMUTABLE; + +CREATE AGGREGATE median(numeric) ( + SFUNC=array_append, + STYPE=numeric[], + FINALFUNC=_final_median, + INITCOND='{}' +); diff --git a/utils/src/main/sql/etl/postgresql/event_summarize.sql b/utils/src/main/sql/etl/postgresql/event_summarize.sql new file mode 100644 index 0000000..87445c5 --- /dev/null +++ b/utils/src/main/sql/etl/postgresql/event_summarize.sql @@ -0,0 +1,27 @@ +BEGIN; +INSERT INTO BEHAVIOR_TRACKING_SUMMARY + (SUMMARY_DATE, EVENT_TYPE, EVENT_NAME, PERIOD_START, PERIOD_END, COUNT, + AVERAGE_NS, MINIMUM_NS, MAXIMUM_NS, median_ns, STANDARD_DEVIATION, ERROR_COUNT) +SELECT NOW(), evt.* + FROM ( + SELECT event.event_type, event.event_name, + MIN(event_start) period_start, + MAX(event_start) period_end, + COUNT(*) event_count, + AVG(duration_ns) average_ns, + MIN(duration_ns) min_ns, + MAX(duration_ns) max_ns, + MEDIAN(duration_ns) median_ns, + STDDEV(duration_ns) std_dev_ns, + (SELECT COUNT(*) + FROM BEHAVIOR_TRACKING_EVENT tmp + WHERE tmp.summarized is NULL + and event_name = event.event_name + and event_type = event.event_type + and error IS NOT NULL) error_cnt + FROM BEHAVIOR_TRACKING_EVENT event + WHERE event.summarized IS NULL + GROUP BY event_name, event_type + ) evt; + UPDATE BEHAVIOR_TRACKING_EVENT SET summarized='Y' WHERE summarized IS NULL; + COMMIT; diff --git a/utils/src/main/sql/etl/postgresql/load_event_csv-client_copy.ctl b/utils/src/main/sql/etl/postgresql/load_event_csv-client_copy.ctl new file mode 100644 index 0000000..fa299df --- /dev/null +++ b/utils/src/main/sql/etl/postgresql/load_event_csv-client_copy.ctl @@ -0,0 +1 @@ +\COPY BEHAVIOR_TRACKING_EVENT (EVENT_ID, PARENT_EVENT_ID, EVENT_TYPE, EVENT_NAME, APPLICATION, EVENT_START, DURATION_NS, USER_ID, SESSION_ID, ERROR, EVENT_DATA) FROM STDIN WITH CSV HEADER diff --git a/utils/src/main/sql/etl/postgresql/load_event_csv.ctl b/utils/src/main/sql/etl/postgresql/load_event_csv.ctl new file mode 100644 index 0000000..2b1784c --- /dev/null +++ b/utils/src/main/sql/etl/postgresql/load_event_csv.ctl @@ -0,0 +1,16 @@ +COPY BEHAVIOR_TRACKING_EVENT + ( + EVENT_ID, + PARENT_EVENT_ID, + EVENT_TYPE, + EVENT_NAME, + APPLICATION, + EVENT_START, + DURATION_NS, + USER_ID, + SESSION_ID, + ERROR, + EVENT_DATA + ) + FROM STDIN + WITH CSV HEADER; -- 1.7.1 |