Last active
May 8, 2018 02:48
-
-
Save gullevek/81b9d24039714af3726f4597e88d4223 to your computer and use it in GitHub Desktop.
PostgreSQL function to monitor progress for select -> insert flow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION copy_progress(from_date DATE, to_date DATE, p_precision INT DEFAULT 1) RETURNS "varchar" | |
AS $$ | |
DECLARE | |
status VARCHAR; -- return status | |
my_rec RECORD; -- transfer record | |
-- progress info | |
pos INT := 1; -- current position | |
row_count INT := 0; -- overall row count (max) | |
percent NUMERIC; -- output percent | |
old_percent NUMERIC := 0.0; -- previous percent | |
percent_precision INT := 1; -- default precision, use function parameter to override | |
-- to char formats | |
percent_precision_format VARCHAR := '990D9'; | |
-- count_format VARCHAR := '9G999G999G999'; -- fixed (no padding of empty space on the left) | |
count_format VARCHAR; | |
flex_number_format VARCHAR := 'FM9G999G999G999'; -- flex (will omit leading zeros) | |
time_format VARCHAR := 'FMHH24"h" FMMI"m" FMSS"s" US"ms"'; | |
-- below is for time | |
start_all TIMESTAMP := timeofday(); -- overall start time | |
start_ts TIMESTAMP; -- start time after query is run (select) | |
start_time TIMESTAMP; -- start time of one segment | |
end_time TIMESTAMP; -- any end time for segment, overall run, etc | |
full_time_needed INTERVAL := '1 seconds'; -- the full time for the insert | |
overall_time_needed INTERVAL := '1 seconds'; -- the overall run time including select | |
query_time_needed INTERVAL := '1 seconds'; -- the select query time | |
last_group INTERVAL; -- last group (percent set) time | |
lines_processed INT; -- last group (percent set) rows | |
current_pos INT := 1; -- current position | |
full_time_per_line INTERVAL; -- overall time per lines inserted | |
eta INTERVAL; -- estimated finish time | |
data_per_sec NUMERIC; -- transfer per second | |
last_run_time NUMERIC; -- last run time for segment | |
BEGIN | |
-- override percent precision, if too large, fallback to default 100.0% format | |
IF p_precision IS NOT NULL AND p_precision >= 0 AND p_precision <= 3 THEN | |
percent_precision := p_precision; | |
-- set new output precision based on the value | |
percent_precision_format := '990'; | |
IF p_precision > 0 THEN | |
percent_precision_format := percent_precision_format || 'D' || repeat('9', p_precision); | |
END IF; | |
END IF; | |
-- set output format for pos/row count (based on count format) | |
FOR my_rec IN | |
-- READ QUERY HERE | |
SELECT COUNT(*) OVER () AS ecount, ROW_NUMBER() OVER () AS epos, | |
[bar] -- HERE NEEDS TO BE THE ROWS YOU WANT TO READ | |
FROM [bar] -- TABLE YOU WANT TO READ | |
-- NEEDS DATE | |
WHERE [date] >= from_date AND [date] < to_date | |
LOOP | |
-- set start time if not set yet, so we get start time AFTER the query run | |
IF start_time IS NULL THEN | |
start_time := timeofday(); | |
start_ts := start_time; | |
-- store query run time | |
query_time_needed := start_ts - start_all; | |
END IF; | |
-- set current position and max row count | |
pos := my_rec.epos; | |
row_count := my_rec.ecount; | |
-- fixed pos/row_count string length based on row_count | |
IF count_format IS NULL THEN | |
-- strip FM from flex_number_format (1), get length of row count (2), add G number spacers (for every three numbers) (3) | |
-- for every three steps there is a thousand seperator | |
count_format := RIGHT(REPLACE(flex_number_format, 'FM', ''), LENGTH(row_count::VARCHAR) + ((LENGTH(row_count::VARCHAR) - 1) / 3)); | |
END IF; | |
-- INSERT QUERY HERE | |
INSERT INTO [foo] ([bar], [date]) VALUES (my_rec.[bar], ..., my_rec.[date]); | |
-- percent calculate | |
percent := ROUND((pos::NUMERIC / row_count::NUMERIC) * 100::NUMERIC, percent_precision); | |
-- if percent is different to previous one, run all the time calculations | |
IF percent <> old_percent THEN | |
-- get the end time for this segment | |
end_time := timeofday(); -- calc the run time since call | |
full_time_needed := end_time - start_ts; -- calc the run time for the last segment | |
last_group := end_time - start_time; | |
-- lines processed in the last run | |
lines_processed := pos - current_pos; | |
-- time needed in average for each row based on complete run time | |
full_time_per_line := full_time_needed / pos; | |
-- time to finish | |
eta := full_time_per_line * (row_count - pos); | |
-- fix possible unset eta | |
IF eta < '0 seconds' THEN | |
eta := '0 seconds'; | |
END IF; | |
-- for data per seconds output | |
EXECUTE 'SELECT EXTRACT(EPOCH FROM INTERVAL ''' || full_time_needed || ''')::numeric' INTO last_run_time; | |
IF last_run_time <> 0 THEN | |
data_per_sec := ROUND(pos::numeric / last_run_time, 2)::numeric; | |
ELSE | |
data_per_sec := 0::numeric; | |
END IF; | |
-- progress output | |
RAISE NOTICE '[% ~ %] Processed% /% rows |% %% | ETA: %s / TR: %s / % rows/s / LR: % rows in %s', | |
from_date, to_date, | |
to_char(pos, count_format), | |
to_char(row_count, count_format), | |
to_char(percent, percent_precision_format), | |
to_char(eta, time_format), | |
to_char(full_time_needed, time_format), | |
to_char(data_per_sec, flex_number_format), | |
to_char(lines_processed, flex_number_format), | |
to_char(last_group, time_format); | |
-- next step keep | |
old_percent := percent; | |
current_pos := pos; | |
-- new start time for next segment | |
start_time := timeofday(); | |
END IF; | |
END LOOP; | |
-- end time calculations | |
end_time := timeofday(); | |
overall_time_needed := end_time - start_all; | |
full_time_needed := end_time - start_ts; | |
RAISE NOTICE 'COUNT FORMAT: %', count_format; | |
-- finished status | |
status := '[' || from_date || ' ~ ' || to_date || '] Copied ' || to_char(pos, flex_number_format) || ' rows in ' || to_char(full_time_needed, time_format) || ' and overall time ' || to_char(overall_time_needed, time_format) || ', Query time: ' || to_char(query_time_needed, time_format); | |
RETURN status; | |
END; | |
$$ | |
LANGUAGE plpgsql; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Shows progress of a large data copy