Created
October 28, 2022 12:26
-
-
Save mneedham/6701e88f2aa51c0836ee4883492d397a to your computer and use it in GitHub Desktop.
Querying ATP matches using DuckDB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Fails because of weird date | |
CREATE TABLE players AS | |
select * | |
from 'atp_players.csv'; | |
-- all varchar | |
CREATE TABLE players1 AS | |
select * | |
from read_csv_auto('atp_players.csv', ALL_VARCHAR=TRUE); | |
DESCRIBE players1; | |
SELECT count(*) FROM players1; | |
-- sample all rows | |
CREATE TABLE players2 AS | |
select * | |
from read_csv_auto('atp_players.csv', SAMPLE_SIZE=-1); | |
DESCRIBE players2; | |
SELECT count(*) FROM players2; | |
-- manual schema | |
CREATE TABLE players3 AS | |
SELECT * FROM read_csv('atp_players.csv', | |
header=True, | |
columns={'player_id': 'INTEGER', 'name_first': 'VARCHAR', 'name_last': 'VARCHAR', 'hand': 'VARCHAR', 'dob': 'VARCHAR', 'ioc': 'VARCHAR', 'height': 'INTEGER', 'wikidata_id': 'VARCHAR' | |
}); | |
DESCRIBE players3; | |
SELECT count(*) FROM players3; | |
-- ignore errors | |
CREATE TABLE players4 AS | |
select * | |
from read_csv_auto('atp_players.csv', IGNORE_ERRORS=TRUE); | |
DESCRIBE players4; | |
SELECT count(*) FROM players4; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment