Last active
August 8, 2023 05:49
-
-
Save chenzhan/8a5143bc3f8ad04f3cf4 to your computer and use it in GitHub Desktop.
Redshift Tips
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# List all tables: | |
select db_id, id, name, sum(rows) as mysum | |
from stv_tbl_perm where db_id = 100546 | |
group by db_id, id, name order by mysum desc; | |
# list all running processes: | |
select pid, query from stv_recents where status = 'Running'; | |
# describe table | |
select * from PG_TABLE_DEF where tablename='audit_trail'; | |
select * from pg_tables where schemaname = 'public' | |
# Disk space used: | |
select sum(used-tossed) as used, sum(capacity) as capacity from stv_partitions | |
# Query log | |
select query, starttime , substring from svl_qlog where substring like '%tbl_name%' | |
order by starttime desc limit 50; | |
# command history | |
select * from stl_ddltext where text like '%ox_data_summary_hourly_depot%' limit 10 | |
# last load errors | |
select starttime, filename, err_reason from stl_load_errors order by starttime desc limit 100 | |
select filename, count(*) as cnt from stl_load_errors group by filename | |
# create table from another table | |
select * into newevent from event; | |
# Check how columns are compressed | |
ANALYZE COMPRESSION | |
# ANALYZE and VACUUM | |
If you insert, update, or delete a significant number of rows in a table, run the ANALYZE and VACUUM commands against the table. | |
"analyze compression tbl_name" command produce a report with the suggested column encoding. | |
# To find and diagnose load errors for table 'event' | |
create view loadview as | |
(select distinct tbl, trim(name) as table_name, query, starttime, | |
trim(filename) as input, line_number, field, err_code, | |
trim(err_reason) as reason | |
from stl_load_errors sl, stv_tbl_perm sp | |
where sl.tbl = sp.id); | |
select * from loadview where table_name='event'; | |
# Query to find blocks used | |
select stv_tbl_perm.name, count(*) | |
from stv_blocklist, stv_tbl_perm | |
where stv_blocklist.tbl = stv_tbl_perm.id | |
and stv_blocklist.slice = stv_tbl_perm.slice | |
group by stv_tbl_perm.name | |
order by stv_tbl_perm.name; | |
Load tips: | |
# While loading data you can specify "empty as null", "blanks as null" allow "max error 5", "ignore blank lines", "remove quotes", "use zip". Use the keywords: emptyasnull blanksasnull maxerror 5 ignoreblanklines removequotes gzip | |
# use NULL AS '\000' to fix the import from specific files | |
# use BLANKASNULL in the original COPY statement so that no empty strings are loaded into VARCHAR fields which might ultimately be converted to numeric fields. | |
# Use the NOLOAD keyword with a COPY command to validate the data in the input files before actually loading the data. | |
# use COMPUPDATE to enable automatic compression | |
# FILLRECORD to fill missing columns at the end with blanks or NULLs | |
# TRIMBLANKS Removes the trailing whitespace characters from a VARCHAR string. | |
# ESCAPE the backslash character (\) in input data is treated as an escape character. (useful for delimiters and embedded newlines) | |
# ROUNDEC a value of 20.259 is loaded into a DECIMAL(8,2) column is changed to 20.26. or else 20.25 | |
# TRUNCATECOLUMNS Truncates data in columns to the appropriate number. | |
# IGNOREHEADER to ignore first row | |
_____ | |
If you are using JDBC, can you try adding the keepalive option to your connect string. E.g., | |
jdbc:postgresql://instance.amazonaws.com:8192/database?tcpkeepalive=true | |
You can have AUTOCOMMIT set in your Workbench client. | |
_____ | |
In order to avoid timeout error while using workbench on Windows, use the following setting: | |
HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveTime 30000 | |
HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveInterval 1000 | |
HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\TcpMaxDataRetransmission 10 | |
_____ | |
# Consider using DISTKEY and SORTKEY - There can be multiple sortkeys but only one primary key. | |
# wlm_query_slot_count - This will set aside more memory for query, which may avoid operations spilling to disk | |
# the isolation level for Redshift is SERIALIZABLE | |
_____ | |
// There is no equivalent of "show create table tbl_name" | |
select from the PG_TABLE_DEF table to gather all the necessary schema information | |
// convert to and from unixtime | |
select extract (epoch from timestamp '2011-08-08 11:11:58'); | |
select TIMESTAMP 'epoch' + starttime * INTERVAL '1 second' starting from tbl_name; | |
// Update a joined table: | |
update abcd | |
set ser_area_code=abcd_update.ser_area_code, | |
preferences=abcd_update.preferences, opstype=abcd_update.opstype, | |
phone_type=abcd_update.phone_type | |
from abcd_update join abcd nc on nc.phone_number = abcd_update.phone_number | |
http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html#concept_upsert | |
_____ | |
// install postgresql | |
yum install postgresql postgresql-server | |
chkconfig postgresql on | |
// You will now create a file where the redshift password will be stored. | |
vi ~/.pgpass | |
c.us-east-1.redshift.amazonaws.com:5439:mydb:root:Passwd | |
chmod 0600 ~/.pgpass | |
// load data to redshift | |
cat to_psql.txt | psql -hc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb > to_save.csv | |
// send the file as an attachment | |
echo "report file attached. " | mutt -s "result data " -a to_save.csv -- [email protected] | |
// mysqldump command that will generate the required statements to be used in redshift | |
mysqldump db_name tbl_name --where='1=1 limit 10' --compact --no-create-info --skip-quote-names > to_psql.txt | |
_____ | |
Amazon data types are different than of MySQL. For e.g. literals can be saved only as varchar type and upto 65000 bytes. | |
http://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html | |
Here is a script that will do this conversion automatically. | |
https://gist.github.com/shantanuo/5115366 | |
_____ | |
If postgresql client is installed, we can connect to redshift using something like this... | |
# PGPASSWORD=Fly8946392085 psql -U fsb_user_85_22719249 -h flydata-sandbox-cluster.clroanynhqjo.us-east-1.redshift.amazonaws.com -p 5439 -d flydatasandboxdb | |
Welcome to psql 8.1.23 (server 8.0.2), the PostgreSQL interactive terminal. | |
_____ | |
## script that will display 10 rows from each table | |
#!/bin/sh | |
echo "select name from stv_tbl_perm where db_id = 100546 group by name ;" | psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb > /root/psql.txt 2>> /root/psql_err.txt | |
for tbl_name in `cat /root/psql.txt` | |
do | |
echo "$tbl_name" >> /root/psql_limit.txt 2>> /root/psql_limit_err.txt | |
echo "select * from $tbl_name limit 10 ; " | psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb >> /var/www/psql_limit.txt 2>> /root/psql_limit_err.txt | |
echo "=====================================" | |
done | |
_____ | |
The following statement queries the STV_LOCKS table to view all locks in effect for current transactions: | |
select table_id, last_update, lock_owner, lock_owner_pid, lock_status | |
from stv_locks; | |
table_id | last_update | lock_owner | lock_owner_pid | lock_status | |
----------+----------------------------+------------+----------------+------------------------ | |
100295 | 2014-01-06 23:50:56.290917 | 95402 | 7723 | Holding write lock | |
100304 | 2014-01-06 23:50:57.408457 | 95402 | 7723 | Holding write lock | |
100304 | 2014-01-06 23:50:57.409986 | 95402 | 7723 | Holding insert lock | |
(3 rows) | |
The following statement terminates the session holding the locks: | |
select pg_terminate_backend(7723); | |
-- Credit: http://oksoft.blogspot.com/2013/03/redshift-tips.html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment