Created
August 13, 2013 00:03
-
-
Save felipedeboni/6216560 to your computer and use it in GitHub Desktop.
Import data from GPBE tables (format as .csv, with ; as delimiter and quotes with ")
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* =============================================================================================================================== | |
This will get data from GBPE (previously) converted to CSV and: | |
* Create temporary tables | |
* Import data from: LOG_LOGRADOURO.csv, LOG_LOCALIDADE.csv, LOG_BAIRRO.csv into the temp tables | |
* Decrypt CEP column and create a new table with the following fields: cep, logradouro, bairro, cidade, uf, uf_sigla | |
* Dropping the temp shit | |
* Update PG Query Builder with an EXPLAIN ANALYZE | |
This script can be used to update an existing table (with same struct of course) | |
=============================================================================================================================== */ | |
CREATE OR REPLACE FUNCTION fnc_create_cep_table( pTblName VARCHAR, pFilesPath VARCHAR ) | |
RETURNS NUMERIC AS $body$ | |
DECLARE | |
vTmpTableName VARCHAR := 'tmp_' || pTblName; | |
vMaxLengthState VARCHAR := 255; | |
vMaxLengthCity VARCHAR := 255; | |
vMaxLengthStreet VARCHAR := 255; | |
vDecriptFunction TEXT; | |
vCopy TEXT; | |
vCreateTable TEXT; | |
BEGIN | |
/* ======================================================================================================================= | |
TABLES MATCHING GPBE SCHEMA (don't bother about everything is 255, we will drop it later) | |
======================================================================================================================= */ | |
-- creating tables | |
CREATE TEMPORARY TABLE tmp_log_logradouro ( | |
log_nu_sequencial INTEGER, | |
ufe_sg VARCHAR(255), | |
loc_nu_sequencial INTEGER, | |
log_no VARCHAR(255), | |
log_nome VARCHAR(255), | |
bai_nu_sequencial_ini INTEGER, | |
bai_nu_sec INTEGER, | |
cep VARCHAR(255), | |
log_complemento VARCHAR(255), | |
log_tipo_logradouro VARCHAR(255), | |
status VARCHAR(255), | |
log_no_sem_acento VARCHAR(255), | |
log_key_dne VARCHAR(255), | |
ind_uop VARCHAR(255), | |
ind_gru VARCHAR(255), | |
temp VARCHAR(255) | |
); | |
CREATE TEMPORARY TABLE tmp_log_localidade ( | |
loc_nu_sequencial INTEGER, | |
loc_nosub VARCHAR(255), | |
loc_no VARCHAR(255), | |
cep VARCHAR(255), | |
ufe_sg VARCHAR(255), | |
loc_in_situacao INTEGER, | |
loc_in_tipo_localidade VARCHAR(255), | |
loc_nu_sequencial_sub INTEGER, | |
loc_key_dne VARCHAR(255), | |
temp VARCHAR(255) | |
); | |
CREATE TEMPORARY TABLE tmp_log_bairro ( | |
bai_nu_sequencial INTEGER, | |
ufe_sg VARCHAR(255), | |
loc_nu_sequencial INTEGER, | |
bai_no VARCHAR(255), | |
bai_no_abrev VARCHAR(255) | |
); | |
-- creating indexes | |
CREATE INDEX log_loc_nu_seq_idx ON tmp_log_logradouro(loc_nu_sequencial); | |
CREATE INDEX bai_nu_seq_ini_idx ON tmp_log_logradouro(bai_nu_sequencial_ini); | |
CREATE INDEX loc_nu_seq_idx ON tmp_log_localidade(loc_nu_sequencial); | |
CREATE INDEX bai_nu_seq_idx ON tmp_log_bairro(bai_nu_sequencial); | |
/* ======================================================================================================================= | |
IMPORT CSV | |
======================================================================================================================= */ | |
vCopy := 'COPY tmp_log_logradouro FROM ''' || pFilesPath || 'LOG_LOGRADOURO.csv'' DELIMITER '';'' QUOTE ''"'' CSV'; | |
EXECUTE vCopy; | |
vCopy := 'COPY tmp_log_localidade FROM ''' || pFilesPath || 'LOG_LOCALIDADE.csv'' DELIMITER '';'' QUOTE ''"'' CSV'; | |
EXECUTE vCopy; | |
vCopy := 'COPY tmp_log_bairro FROM ''' || pFilesPath || 'LOG_BAIRRO.csv'' DELIMITER '';'' QUOTE ''"'' CSV'; | |
EXECUTE vCopy; | |
/* ======================================================================================================================= | |
CREATE A DECRYPT FUNCTION | |
======================================================================================================================= */ | |
vDecriptFunction := 'CREATE OR REPLACE FUNCTION fnc_decrypt_cep(key_dne VARCHAR) | |
RETURNS varchar AS $decrypt$ | |
DECLARE | |
cep VARCHAR := ''''; | |
BEGIN | |
FOR i IN 1..length($1) BY 2 LOOP | |
CASE substr( $1, i, 2) | |
WHEN ''X8'',''CN'',''8X'',''NC'' THEN | |
cep := cep || 1; | |
WHEN ''Z0'',''EP'',''0Z'',''PE'' THEN | |
cep := cep || 2; | |
WHEN ''1B'',''GR'',''B1'',''RG'' THEN | |
cep := cep || 3; | |
WHEN ''3D'',''ID'',''D3'',''DI'' THEN | |
cep := cep || 4; | |
WHEN ''4C'',''JS'',''C4'',''SJ'' THEN | |
cep := cep || 5; | |
WHEN ''2A'',''HQ'',''A2'',''QH'' THEN | |
cep := cep || 6; | |
WHEN ''09'',''FO'',''90'',''OF'' THEN | |
cep := cep || 7; | |
WHEN ''Y7'',''DM'',''7Y'',''MD'' THEN | |
cep := cep || 8; | |
WHEN ''5V'',''BK'',''V5'',''KB'' THEN | |
cep := cep || 9; | |
WHEN ''UG'',''AL'',''GU'',''LA'' THEN | |
cep := cep || 0; | |
ELSE | |
RETURN NULL; | |
END CASE; | |
END LOOP; | |
RETURN cep; | |
END; | |
$decrypt$ LANGUAGE plpgsql'; | |
EXECUTE vDecriptFunction; | |
/* ======================================================================================================================= | |
CREATE THE REAL TABLE ONLY WITH IMPORTANT DATA | |
======================================================================================================================= */ | |
-- maximum length for state/province column | |
SELECT MAX(LENGTH(bai_no)) INTO vMaxLengthState FROM tmp_log_bairro; | |
-- maximum length for city column | |
SELECT MAX(LENGTH(loc_no)) INTO vMaxLengthCity FROM tmp_log_localidade; | |
-- maximum length for street address column | |
SELECT MAX(LENGTH(log_nome)) INTO vMaxLengthStreet FROM tmp_log_logradouro; | |
-- create and insert data | |
vCreateTable := ' | |
CREATE TABLE ' || vTmpTableName || ' AS | |
(SELECT | |
fnc_decrypt_cep(log_logradouro.log_key_dne)::integer as "cep", | |
log_logradouro.log_nome::varchar(' || vMaxLengthStreet || ') as "logradouro", | |
log_bairro.bai_no::varchar(' || vMaxLengthState || ') as "bairro", | |
log_localidade.loc_no::varchar(' || vMaxLengthCity || ') as "cidade", | |
(CASE | |
WHEN log_logradouro.ufe_sg = ''AC'' THEN ''Acre'' | |
WHEN log_logradouro.ufe_sg = ''AL'' THEN ''Alagoas'' | |
WHEN log_logradouro.ufe_sg = ''AM'' THEN ''Amazonas'' | |
WHEN log_logradouro.ufe_sg = ''AP'' THEN ''Amapá'' | |
WHEN log_logradouro.ufe_sg = ''BA'' THEN ''Bahia'' | |
WHEN log_logradouro.ufe_sg = ''CE'' THEN ''Ceará'' | |
WHEN log_logradouro.ufe_sg = ''DF'' THEN ''Distrito Federal'' | |
WHEN log_logradouro.ufe_sg = ''ES'' THEN ''Espírito Santo'' | |
WHEN log_logradouro.ufe_sg = ''GO'' THEN ''Goiás'' | |
WHEN log_logradouro.ufe_sg = ''MA'' THEN ''Maranhão'' | |
WHEN log_logradouro.ufe_sg = ''MG'' THEN ''Minas Gerais'' | |
WHEN log_logradouro.ufe_sg = ''MS'' THEN ''Mato Grosso do Sul'' | |
WHEN log_logradouro.ufe_sg = ''MT'' THEN ''Mato Grosso'' | |
WHEN log_logradouro.ufe_sg = ''PA'' THEN ''Pará'' | |
WHEN log_logradouro.ufe_sg = ''PB'' THEN ''Paraíba'' | |
WHEN log_logradouro.ufe_sg = ''PE'' THEN ''Pernambuco'' | |
WHEN log_logradouro.ufe_sg = ''PI'' THEN ''Piauí'' | |
WHEN log_logradouro.ufe_sg = ''PR'' THEN ''Paraná'' | |
WHEN log_logradouro.ufe_sg = ''RJ'' THEN ''Rio de Janeiro'' | |
WHEN log_logradouro.ufe_sg = ''RN'' THEN ''Rio Grande do Norte'' | |
WHEN log_logradouro.ufe_sg = ''RO'' THEN ''Rondônia'' | |
WHEN log_logradouro.ufe_sg = ''RR'' THEN ''Roraima'' | |
WHEN log_logradouro.ufe_sg = ''RS'' THEN ''Rio Grande do Sul'' | |
WHEN log_logradouro.ufe_sg = ''SC'' THEN ''Santa Catarina'' | |
WHEN log_logradouro.ufe_sg = ''SE'' THEN ''Sergipe'' | |
WHEN log_logradouro.ufe_sg = ''SP'' THEN ''São Paulo'' | |
WHEN log_logradouro.ufe_sg = ''TO'' THEN ''Tocantins'' | |
END)::varchar(20) as "uf", | |
log_logradouro.ufe_sg::varchar(2) as "uf_sigla" | |
FROM | |
tmp_log_logradouro as log_logradouro, | |
tmp_log_bairro as log_bairro, | |
tmp_log_localidade as log_localidade | |
WHERE | |
log_logradouro.bai_nu_sequencial_ini = log_bairro.bai_nu_sequencial | |
AND log_logradouro.loc_nu_sequencial = log_localidade.loc_nu_sequencial | |
); | |
'; | |
EXECUTE vCreateTable; | |
/* ======================================================================================================================= | |
DROP SHITS (use if exists for safety) | |
======================================================================================================================= */ | |
DROP TABLE IF EXISTS tmp_log_logradouro; | |
DROP TABLE IF EXISTS tmp_log_localidade; | |
DROP TABLE IF EXISTS tmp_log_bairro; | |
DROP FUNCTION IF EXISTS fnc_decrypt_cep(key_dne VARCHAR); | |
/* ======================================================================================================================= | |
UPDATING OLD ONE IF EXISTS | |
======================================================================================================================= */ | |
EXECUTE 'DROP TABLE IF EXISTS ' || pTblName; | |
EXECUTE 'ALTER TABLE ' || vTmpTableName || ' RENAME TO ' || pTblName; | |
EXECUTE 'ALTER TABLE ' || pTblName || ' ADD PRIMARY KEY (cep);'; | |
/* ======================================================================================================================= | |
TELL PG QUERY BUILDER HOW DATA IS NOW | |
======================================================================================================================= */ | |
EXECUTE 'EXPLAIN ANALYZE SELECT * FROM ' || pTblName; | |
/* ======================================================================================================================= | |
RETURNS | |
======================================================================================================================= */ | |
RETURN 1; | |
END; | |
$body$ LANGUAGE 'plpgsql'; | |
-- example usage to import the data | |
SELECT fnc_create_cep_table( 'cep', 'D:\\' ); | |
-- to query | |
SELECT * FROM cep WHERE cep = {CEP as INT}; | |
-- note, do you need to add your indexes inside the function, because it drops the table to update. maybe you want to change columns name. I hate columns and tables in pt-br. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment