Created
February 8, 2020 16:37
-
-
Save CharlyWargnier/18fa6553a60365bed39e4a7772e2a24c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Form elements ## | |
table_name = 'log_sample' #@param {type:"string"} | |
SQL_1st_Filter ='ch.loggy' #@param {type:"string"} | |
SQL_Useragent_Filter = "Googlebot/2.1" #@param ["Googlebot/2.1", "YandexBot", "BingBot", "DuckDuckBot", "Baiduspider"] {allow-input: true} | |
# Temporary Bigquery table name | |
table_id = table_name | |
## Full SQL query ## | |
# Concatenate SQL filters above | |
SQLFilters = 'SELECT * FROM `{}` WHERE header LIKE "%' + SQL_1st_Filter + '%" AND header LIKE "%' + SQL_Useragent_Filter + '%"' | |
# Concatenate SQL filters above | |
sql = SQLFilters.format(table_id) | |
## Other config lines for BigQuery ## | |
# Configure the external data source and query job | |
external_config = bigquery.ExternalConfig("CSV") | |
external_config.source_uris = [GCS_Full_Path] | |
# 1st argument is where you ut the name of the header, here it is called 'header' | |
external_config.schema = [bigquery.SchemaField("header", "STRING"),] | |
# Should remain at 0 for default log file upload | |
external_config.options.skip_leading_rows = 0 | |
# BigQuery job configuration | |
job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config}) | |
# Auto-detect Schemas (hashed as not currently in use) | |
# job_config.autodetect = True | |
# Make an API request | |
query_job = client.query(sql, job_config=job_config) | |
# Wait for the job to complete. | |
log_sample = list(query_job) | |
# Print SQL query sent to BigQuery | |
print('The SQL query sent to BigQuery is "' + SQLFilters + '"') | |
#log_sample |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment