This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
import pandas as pd | |
import numpy as np | |
import socket | |
import json | |
#Connect Python to your GCP project | |
from google.colab import auth | |
auth.authenticate_user() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GCS_Bucket_Name = 'apache_seo_logs' #@param {type:"string"} | |
GCS_File_Path = 'Loggy*' #@param {type:"string"} | |
GCS_Full_Path = "gs://" + GCS_Bucket_Name + "/" + GCS_File_Path | |
print("the path to your GCS folder is: " + GCS_Full_Path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import BigQuery from google.cloud | |
from google.cloud import bigquery | |
## Inputs from forms ## | |
GCP_PROJECT_ID = 'your_project_ID_here' #@param {type:"string"} | |
GCS_Bucket_Location = 'europe-west2' #@param ["asia-east1", "asia-east2", "asia-northeast1", "asia-northeast2", "asia-northeast3", "asia-south1", "asia-southeast1", "australia-southeast1", "europe-north1", "europe-west1", "europe-west2", "europe-west3", "europe-west4", "europe-west6", "northamerica-northeast1", "southamerica-east1", "us-central1", "us-east1", "us-east4", "us-west1", "us-west2"] | |
# Create a client instance for your project |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Form elements ## | |
table_name = 'log_sample' #@param {type:"string"} | |
SQL_1st_Filter ='ch.loggy' #@param {type:"string"} | |
SQL_Useragent_Filter = "Googlebot/2.1" #@param ["Googlebot/2.1", "YandexBot", "BingBot", "DuckDuckBot", "Baiduspider"] {allow-input: true} | |
# Temporary Bigquery table name | |
table_id = table_name | |
## Full SQL query ## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note that we convert a list of lists, not just a list. | |
df = pd.DataFrame.from_records(log_sample) | |
df.info() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.rename(columns={0:'header'}, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Split IP addresses | |
df[['IP','header']] = df["header"].str.split(" - - \[", 1, expand=True) | |
# Remove slash | |
df['IP'] = df['IP'].str.replace("Row\(\('","",n=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Split Date & Time | |
df[['Date_Time', 'header']] = df['header'].str.split("\] ", 1, | |
expand=True) | |
df['Date_Time'] = df['Date_Time'].str.replace(':.*', '') | |
# Convert the Date_Time column from string to datetime format | |
df['Date'] = pd.to_datetime(df['Date_Time'], errors='coerce', | |
infer_datetime_format=True) | |
# Remove Time column, for clarity's sake |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df[['Domain','header']] = df["header"].str.split(" \"", 1, expand=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df[['Method','header']] = df["header"].str.split(" ", 1, expand=True) |
OlderNewer