chenzhan · August 8, 2023 05:49
diff --git a/redshift_admin_queries.sql b/redshift_admin_queries.sql
 # List all tables:
 select db_id, id, name, sum(rows) as mysum 
 from stv_tbl_perm where db_id = 100546 
 group by db_id, id, name order by mysum desc;

 # list all running processes:
 select pid, query from stv_recents where status = 'Running';

 # describe table
 select * from PG_TABLE_DEF where tablename='audit_trail';

 select * from pg_tables where schemaname = 'public' 

 # Disk space used:
 select sum(used-tossed) as used, sum(capacity) as capacity from stv_partitions

 # Query log
 select query, starttime , substring from svl_qlog where substring like '%tbl_name%' 
 order by starttime desc limit 50;

 # command history
 select * from stl_ddltext where text like '%ox_data_summary_hourly_depot%' limit 10

 # last load errors
 select starttime, filename, err_reason from stl_load_errors order by starttime desc limit 100

 select filename, count(*) as cnt from stl_load_errors group by filename

 # create table from another table
 select * into newevent from event;

 # Check how columns are compressed
 ANALYZE COMPRESSION

 # ANALYZE and VACUUM
 If you insert, update, or delete a significant number of rows in a table, run the ANALYZE and VACUUM commands against the table. 
 "analyze compression tbl_name" command produce a report with the suggested column encoding.

 # To find and diagnose load errors for table 'event'
 create view loadview as
 (select distinct tbl, trim(name) as table_name, query, starttime,
 trim(filename) as input, line_number, field, err_code,
 trim(err_reason) as reason
 from stl_load_errors sl, stv_tbl_perm sp
 where sl.tbl = sp.id);
 select * from loadview where table_name='event';

 # Query to find blocks used
 select stv_tbl_perm.name, count(*)
 from stv_blocklist, stv_tbl_perm
 where stv_blocklist.tbl = stv_tbl_perm.id
 and stv_blocklist.slice = stv_tbl_perm.slice
 group by stv_tbl_perm.name
 order by stv_tbl_perm.name;

 Load tips:
 # While loading data you can specify "empty as null", "blanks as null" allow "max error 5", "ignore blank lines", "remove quotes", "use zip". Use the keywords: emptyasnull blanksasnull maxerror 5 ignoreblanklines removequotes gzip
 # use NULL AS '\000' to fix the import from specific files
 # use BLANKASNULL in the original COPY statement so that no empty strings are loaded into VARCHAR fields which might ultimately be converted to numeric fields. 
 # Use the NOLOAD keyword with a COPY command to validate the data in the input files before actually loading the data.
 # use COMPUPDATE to enable automatic compression
 # FILLRECORD to fill missing columns at the end with blanks or NULLs
 # TRIMBLANKS Removes the trailing whitespace characters from a VARCHAR string.
 # ESCAPE the backslash character (\) in input data is treated as an escape character. (useful for delimiters and embedded newlines)
 # ROUNDEC a value of 20.259 is loaded into a DECIMAL(8,2) column is changed to 20.26. or else 20.25
 # TRUNCATECOLUMNS Truncates data in columns to the appropriate number.
 # IGNOREHEADER to ignore first row

 _____

 If you are using JDBC, can you try adding the keepalive option to your connect string. E.g.,
 jdbc:postgresql://instance.amazonaws.com:8192/database?tcpkeepalive=true
 You can have AUTOCOMMIT set in your Workbench client.
 _____

 In order to avoid timeout error while using workbench on Windows, use the following setting:
 HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveTime 30000
 HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveInterval 1000
 HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\TcpMaxDataRetransmission 10
 _____

 # Consider using DISTKEY and SORTKEY  - There can be multiple sortkeys but only one primary key.
 # wlm_query_slot_count - This will set aside more memory for query, which may avoid operations spilling to disk
 # the isolation level for Redshift is SERIALIZABLE

 _____

 // There is no equivalent of "show create table tbl_name" 
 select from the PG_TABLE_DEF table to gather all the necessary schema information

 // convert to and from unixtime

 select extract (epoch from timestamp '2011-08-08 11:11:58');
 select TIMESTAMP 'epoch' + starttime * INTERVAL '1 second' starting from tbl_name;

 // Update a joined table:

 update abcd 
 set ser_area_code=abcd_update.ser_area_code, 
 preferences=abcd_update.preferences, opstype=abcd_update.opstype,
 phone_type=abcd_update.phone_type
 from abcd_update join abcd nc on nc.phone_number = abcd_update.phone_number

 http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html#concept_upsert
 _____

 // install postgresql
 yum install postgresql postgresql-server
 chkconfig postgresql on

 // You will now create a file where the redshift password will be stored.
 vi  ~/.pgpass
 c.us-east-1.redshift.amazonaws.com:5439:mydb:root:Passwd

 chmod 0600 ~/.pgpass



 // load data to redshift 
 cat to_psql.txt  | psql -hc.us-east-1.redshift.amazonaws.com  -Uroot -p5439 mydb > to_save.csv

 // send the file as an attachment
 echo "report file attached. " | mutt -s "result data " -a to_save.csv -- [email protected]

 // mysqldump command that will generate the required statements to be used in redshift

 mysqldump db_name tbl_name --where='1=1 limit 10' --compact --no-create-info --skip-quote-names > to_psql.txt
 _____

 Amazon data types are different than of MySQL. For e.g. literals can be saved only as varchar type and upto 65000 bytes.

 http://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html

 Here is a script that will do this conversion automatically.

 https://gist.github.com/shantanuo/5115366

 _____


 If postgresql client is installed, we can connect to redshift using something like this...

 # PGPASSWORD=Fly8946392085 psql -U fsb_user_85_22719249 -h flydata-sandbox-cluster.clroanynhqjo.us-east-1.redshift.amazonaws.com -p 5439 -d flydatasandboxdb
 Welcome to psql 8.1.23 (server 8.0.2), the PostgreSQL interactive terminal.

 _____

 ## script that will display 10 rows from each table

 #!/bin/sh

 echo "select  name  from stv_tbl_perm where db_id = 100546 group by  name ;"  | psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb  > /root/psql.txt 2>> /root/psql_err.txt

 for tbl_name in `cat /root/psql.txt`
 do
 echo "$tbl_name" >> /root/psql_limit.txt 2>> /root/psql_limit_err.txt
 echo "select * from $tbl_name limit 10 ; "  | psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb  >> /var/www/psql_limit.txt 2>> /root/psql_limit_err.txt
 echo "====================================="

 done
 _____


 The following statement queries the STV_LOCKS table to view all locks in effect for current transactions:

 select table_id, last_update, lock_owner, lock_owner_pid, lock_status
 from stv_locks;

 table_id |        last_update         | lock_owner | lock_owner_pid |      lock_status
 ----------+----------------------------+------------+----------------+------------------------
   100295 | 2014-01-06 23:50:56.290917 |      95402 |           7723 | Holding write lock
   100304 | 2014-01-06 23:50:57.408457 |      95402 |           7723 | Holding write lock
   100304 | 2014-01-06 23:50:57.409986 |      95402 |           7723 | Holding  insert lock
 (3 rows)

 The following statement terminates the session holding the locks:

 select pg_terminate_backend(7723);


 -- Credit: http://oksoft.blogspot.com/2013/03/redshift-tips.html
	# List all tables:
	select db_id, id, name, sum(rows) as mysum
	from stv_tbl_perm where db_id = 100546
	group by db_id, id, name order by mysum desc;

	# list all running processes:
	select pid, query from stv_recents where status = 'Running';

	# describe table
	select * from PG_TABLE_DEF where tablename='audit_trail';

	select * from pg_tables where schemaname = 'public'

	# Disk space used:
	select sum(used-tossed) as used, sum(capacity) as capacity from stv_partitions

	# Query log
	select query, starttime , substring from svl_qlog where substring like '%tbl_name%'
	order by starttime desc limit 50;

	# command history
	select * from stl_ddltext where text like '%ox_data_summary_hourly_depot%' limit 10

	# last load errors
	select starttime, filename, err_reason from stl_load_errors order by starttime desc limit 100

	select filename, count(*) as cnt from stl_load_errors group by filename

	# create table from another table
	select * into newevent from event;

	# Check how columns are compressed
	ANALYZE COMPRESSION

	# ANALYZE and VACUUM
	If you insert, update, or delete a significant number of rows in a table, run the ANALYZE and VACUUM commands against the table.
	"analyze compression tbl_name" command produce a report with the suggested column encoding.

	# To find and diagnose load errors for table 'event'
	create view loadview as
	(select distinct tbl, trim(name) as table_name, query, starttime,
	trim(filename) as input, line_number, field, err_code,
	trim(err_reason) as reason
	from stl_load_errors sl, stv_tbl_perm sp
	where sl.tbl = sp.id);
	select * from loadview where table_name='event';

	# Query to find blocks used
	select stv_tbl_perm.name, count(*)
	from stv_blocklist, stv_tbl_perm
	where stv_blocklist.tbl = stv_tbl_perm.id
	and stv_blocklist.slice = stv_tbl_perm.slice
	group by stv_tbl_perm.name
	order by stv_tbl_perm.name;

	Load tips:
	# While loading data you can specify "empty as null", "blanks as null" allow "max error 5", "ignore blank lines", "remove quotes", "use zip". Use the keywords: emptyasnull blanksasnull maxerror 5 ignoreblanklines removequotes gzip
	# use NULL AS '\000' to fix the import from specific files
	# use BLANKASNULL in the original COPY statement so that no empty strings are loaded into VARCHAR fields which might ultimately be converted to numeric fields.
	# Use the NOLOAD keyword with a COPY command to validate the data in the input files before actually loading the data.
	# use COMPUPDATE to enable automatic compression
	# FILLRECORD to fill missing columns at the end with blanks or NULLs
	# TRIMBLANKS Removes the trailing whitespace characters from a VARCHAR string.
	# ESCAPE the backslash character (\) in input data is treated as an escape character. (useful for delimiters and embedded newlines)
	# ROUNDEC a value of 20.259 is loaded into a DECIMAL(8,2) column is changed to 20.26. or else 20.25
	# TRUNCATECOLUMNS Truncates data in columns to the appropriate number.
	# IGNOREHEADER to ignore first row

	_____

	If you are using JDBC, can you try adding the keepalive option to your connect string. E.g.,
	jdbc:postgresql://instance.amazonaws.com:8192/database?tcpkeepalive=true
	You can have AUTOCOMMIT set in your Workbench client.
	_____

	In order to avoid timeout error while using workbench on Windows, use the following setting:
	HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveTime 30000
	HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveInterval 1000
	HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\TcpMaxDataRetransmission 10
	_____

	# Consider using DISTKEY and SORTKEY - There can be multiple sortkeys but only one primary key.
	# wlm_query_slot_count - This will set aside more memory for query, which may avoid operations spilling to disk
	# the isolation level for Redshift is SERIALIZABLE

	_____

	// There is no equivalent of "show create table tbl_name"
	select from the PG_TABLE_DEF table to gather all the necessary schema information

	// convert to and from unixtime

	select extract (epoch from timestamp '2011-08-08 11:11:58');
	select TIMESTAMP 'epoch' + starttime * INTERVAL '1 second' starting from tbl_name;

	// Update a joined table:

	update abcd
	set ser_area_code=abcd_update.ser_area_code,
	preferences=abcd_update.preferences, opstype=abcd_update.opstype,
	phone_type=abcd_update.phone_type
	from abcd_update join abcd nc on nc.phone_number = abcd_update.phone_number

	http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html#concept_upsert
	_____

	// install postgresql
	yum install postgresql postgresql-server
	chkconfig postgresql on

	// You will now create a file where the redshift password will be stored.
	vi ~/.pgpass
	c.us-east-1.redshift.amazonaws.com:5439:mydb:root:Passwd

	chmod 0600 ~/.pgpass



	// load data to redshift
	cat to_psql.txt \| psql -hc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb > to_save.csv

	// send the file as an attachment
	echo "report file attached. " \| mutt -s "result data " -a to_save.csv -- [email protected]

	// mysqldump command that will generate the required statements to be used in redshift

	mysqldump db_name tbl_name --where='1=1 limit 10' --compact --no-create-info --skip-quote-names > to_psql.txt
	_____

	Amazon data types are different than of MySQL. For e.g. literals can be saved only as varchar type and upto 65000 bytes.

	http://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html

	Here is a script that will do this conversion automatically.

	https://gist.github.com/shantanuo/5115366

	_____


	If postgresql client is installed, we can connect to redshift using something like this...

	# PGPASSWORD=Fly8946392085 psql -U fsb_user_85_22719249 -h flydata-sandbox-cluster.clroanynhqjo.us-east-1.redshift.amazonaws.com -p 5439 -d flydatasandboxdb
	Welcome to psql 8.1.23 (server 8.0.2), the PostgreSQL interactive terminal.

	_____

	## script that will display 10 rows from each table

	#!/bin/sh

	echo "select name from stv_tbl_perm where db_id = 100546 group by name ;" \| psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb > /root/psql.txt 2>> /root/psql_err.txt

	for tbl_name in `cat /root/psql.txt`
	do
	echo "$tbl_name" >> /root/psql_limit.txt 2>> /root/psql_limit_err.txt
	echo "select * from $tbl_name limit 10 ; " \| psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb >> /var/www/psql_limit.txt 2>> /root/psql_limit_err.txt
	echo "====================================="

	done
	_____


	The following statement queries the STV_LOCKS table to view all locks in effect for current transactions:

	select table_id, last_update, lock_owner, lock_owner_pid, lock_status
	from stv_locks;

	table_id \| last_update \| lock_owner \| lock_owner_pid \| lock_status
	----------+----------------------------+------------+----------------+------------------------
	100295 \| 2014-01-06 23:50:56.290917 \| 95402 \| 7723 \| Holding write lock
	100304 \| 2014-01-06 23:50:57.408457 \| 95402 \| 7723 \| Holding write lock
	100304 \| 2014-01-06 23:50:57.409986 \| 95402 \| 7723 \| Holding insert lock
	(3 rows)

	The following statement terminates the session holding the locks:

	select pg_terminate_backend(7723);


	-- Credit: http://oksoft.blogspot.com/2013/03/redshift-tips.html