Created
March 28, 2017 18:25
-
-
Save jmindek/6e1cdb303cd746a2849da2eba6c148ac to your computer and use it in GitHub Desktop.
Using Redshift table column names for use in generated SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Have tried to generate SQL statements by first retrieving table column names | |
* from Redshift pg_* tables? | |
* | |
* You probably give up due to errors regarding invalid types. | |
* | |
* Here is my suggestion for accomplishing this. | |
* | |
* First, use the create table statement below to create a temp table with the col_names for our table of interest. | |
* We must create a table otherwise we get an error that listagg must be used on a user-created table. | |
*/ | |
DROP TABLE IF EXISTS col_names; | |
CREATE TEMP TABLE col_names AS | |
SELECT attrelid::int as tableid, attname::text AS column_name, attnum AS column_num | |
FROM pg_attribute | |
WHERE attrelid in | |
(SELECT pg_class.oid FROM pg_class INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid | |
WHERE relname = '<table_with_desired_columns>' AND nspname = '<table_schema>' ) AND | |
attname NOT IN ('insertxid', 'deletexid', 'oid', 'tableoid', 'xmin', 'cmin', 'xmax', 'cmax', 'ctid') AND | |
; | |
-- Then you can LISTAGG the columns to get a string of the column names. | |
SELECT LISTAGG(column_name, ', ') FROM col_names; | |
-- You can do interesting things like ordering your column names: | |
SELECT LISTAGG(column_name, ', ') FROM col_names group by column_name order by column_name; | |
-- You can also use this to do pretty advanced SQL generation. | |
-- | |
-- For example, I want to generate JSON objects for each row per tables. | |
-- I did not want to hand type the SQL statement for each of the 10+ tables I wanted this for. | |
-- The following SQL statement gave me the expression that I used in a select statement. | |
SELECT REGEXP_REPLACE( | |
LISTAGG('COALESCE(\\' || '''' || '"' || column_name || '"' || ':"\\' | |
|| '''' || ' || ' || column_name || ' || CHR(34), \\' || '''' || '"' || column_name || '"' || ':\\' || '''' || ') ' || ' || '','' || \n ' ) within group (order by column_num) | |
,'\',\' \\|\\| \n $' | |
,'' | |
) | |
FROM col_names | |
; | |
-- It outputs: | |
/* | |
COALESCE(\'"id":"\' || id || CHR(34), \'"id":\') || ',' || | |
COALESCE(\'"npi":"\' || npi || CHR(34), \'"npi":\') || ',' || | |
COALESCE(\'"first_name":"\' || first_name || CHR(34), \'"first_name":\') || ',' || | |
COALESCE(\'"last_name":"\' || last_name || CHR(34), \'"last_name":\') || ',' || | |
COALESCE(\'"address_line_1":"\' || address_line_1 || CHR(34), \'"address_line_1":\') || ',' || | |
COALESCE(\'"address_line_2":"\' || address_line_2 || CHR(34), \'"address_line_2":\') || ',' || | |
COALESCE(\'"city":"\' || city || CHR(34), \'"city":\') || ',' || | |
COALESCE(\'"state":"\' || state || CHR(34), \'"state":\') || ',' || | |
COALESCE(\'"zip_code":"\' || zip_code || CHR(34), \'"zip_code":\') || ',' || | |
COALESCE(\'"specialty_type":"\' || specialty_type || CHR(34), \'"specialty_type":\') || ',' || | |
COALESCE(\'"_partition_id":"\' || _partition_id || CHR(34), \'"_partition_id":\') || ',' || | |
COALESCE(\'"full_name":"\' || full_name || CHR(34), \'"full_name":\') || | |
*/ | |
-- Which I use in a SELECT statement to output each row as a JSON object: | |
SELECT | |
id as key, | |
'{' || | |
COALESCE(\'"id":"\' || id || CHR(34), \'"id":\') || ',' || | |
COALESCE(\'"npi":"\' || npi || CHR(34), \'"npi":\') || ',' || | |
COALESCE(\'"first_name":"\' || first_name || CHR(34), \'"first_name":\') || ',' || | |
COALESCE(\'"last_name":"\' || last_name || CHR(34), \'"last_name":\') || ',' || | |
COALESCE(\'"address_line_1":"\' || address_line_1 || CHR(34), \'"address_line_1":\') || ',' || | |
COALESCE(\'"address_line_2":"\' || address_line_2 || CHR(34), \'"address_line_2":\') || ',' || | |
COALESCE(\'"city":"\' || city || CHR(34), \'"city":\') || ',' || | |
COALESCE(\'"state":"\' || state || CHR(34), \'"state":\') || ',' || | |
COALESCE(\'"zip_code":"\' || zip_code || CHR(34), \'"zip_code":\') || ',' || | |
COALESCE(\'"specialty_type":"\' || specialty_type || CHR(34), \'"specialty_type":\') || ',' || | |
COALESCE(\'"_partition_id":"\' || _partition_id || CHR(34), \'"_partition_id":\') || ',' || | |
COALESCE(\'"full_name":"\' || full_name || CHR(34), \'"full_name":\') || | |
'}' | |
FROM provider | |
; | |
-- Note: I am escaping the single quotes because this query is stored in a file which is read into a Python script. | |
-- Single quotes have special meaning in Python, thus we need to escape them. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment