🎈

Charly Wargnier CharlyWargnier

🎈

DevRel at 🎈@streamlit ❄️@snowflakedb | Open Sourcerer | Joining the dots between SEO & Data Science!

CharlyWargnier / seoLogs_import&Connect.py

Last active February 8, 2020 16:36

	# Import libraries
	import pandas as pd
	import numpy as np
	import socket
	import json

	#Connect Python to your GCP project
	from google.colab import auth
	auth.authenticate_user()

CharlyWargnier / seoLogs_bucketName&FilePath.py

Last active February 13, 2020 10:01

	GCS_Bucket_Name = 'apache_seo_logs' #@param {type:"string"}

	GCS_File_Path = 'Loggy*' #@param {type:"string"}

	GCS_Full_Path = "gs://" + GCS_Bucket_Name + "/" + GCS_File_Path

	print("the path to your GCS folder is: " + GCS_Full_Path)

CharlyWargnier / seoLogs_gcpProjectEnabl.py

Created February 8, 2020 16:34

	# Import BigQuery from google.cloud
	from google.cloud import bigquery

	## Inputs from forms ##

	GCP_PROJECT_ID = 'your_project_ID_here' #@param {type:"string"}

	GCS_Bucket_Location = 'europe-west2' #@param ["asia-east1", "asia-east2", "asia-northeast1", "asia-northeast2", "asia-northeast3", "asia-south1", "asia-southeast1", "australia-southeast1", "europe-north1", "europe-west1", "europe-west2", "europe-west3", "europe-west4", "europe-west6", "northamerica-northeast1", "southamerica-east1", "us-central1", "us-east1", "us-east4", "us-west1", "us-west2"]

	# Create a client instance for your project

CharlyWargnier / seoLogs_sqlQuerying.py

Created February 8, 2020 16:37

	## Form elements ##

	table_name = 'log_sample' #@param {type:"string"}
	SQL_1st_Filter ='ch.loggy' #@param {type:"string"}
	SQL_Useragent_Filter = "Googlebot/2.1" #@param ["Googlebot/2.1", "YandexBot", "BingBot", "DuckDuckBot", "Baiduspider"] {allow-input: true}

	# Temporary Bigquery table name
	table_id = table_name

	## Full SQL query ##

CharlyWargnier / seoLogs_convertToDF.py

Created February 8, 2020 16:39

	# Note that we convert a list of lists, not just a list.
	df = pd.DataFrame.from_records(log_sample)
	df.info()

CharlyWargnier / seoLogs_renameHeader.py

Created February 8, 2020 16:43

df.rename(columns={0:'header'}, inplace=True)

CharlyWargnier / seoLogs_splitIP.py

Created February 8, 2020 16:45

	# Split IP addresses
	df[['IP','header']] = df["header"].str.split(" - - \[", 1, expand=True)
	# Remove slash
	df['IP'] = df['IP'].str.replace("Row\(\('","",n=1)

CharlyWargnier / seoLogs_splitDate.py

Created February 8, 2020 17:07

	# Split Date & Time
	df[['Date_Time', 'header']] = df['header'].str.split("\] ", 1,
	expand=True)
	df['Date_Time'] = df['Date_Time'].str.replace(':.*', '')

	# Convert the Date_Time column from string to datetime format
	df['Date'] = pd.to_datetime(df['Date_Time'], errors='coerce',
	infer_datetime_format=True)

	# Remove Time column, for clarity's sake

CharlyWargnier / seoLogs_splitDomains.py

Last active February 8, 2020 17:09

df[['Domain','header']] = df["header"].str.split(" \"", 1, expand=True)

CharlyWargnier / seoLogs_splitMethods.py

Created February 8, 2020 17:10

df[['Method','header']] = df["header"].str.split(" ", 1, expand=True)