Skip to content

Instantly share code, notes, and snippets.

View DerekHawkins's full-sized avatar

DerekHawkins

View GitHub Profile
import socket
log_file = pd.read_pickle('log.pkl')
log_file = log_file.ip_address.apply(lambda ip: socket.gethostbyaddr(ip)[0])
# Alternative
from crawlerdetect import CrawlerDetect
crawler_detect = CrawlerDetect()
validate = []
for crawl in log_file.user_agent:
data = {'valid':crawler_detect.isCrawler(crawl),
# Approach takes into consideration multiple log files stored locally with a .gz format
log_file_slugs = os.listdir(r'C:\Users\Derek.Hawkins\Log File Analysis Folder')
log_file_slugs = [x for x in log_file_slugs if '.gz' in x]
main = []
for i in log_file_slugs:
with gzip.open(r'C:\Users\Derek.Hawkins\Log File Analysis\{}'.format(i),'r') as fin:
for line in tqdm(fin):
try:
main.append(log_parse(line.decode()))
except AttributeError as e:
def log_parse(data):
# Response Size
try:
size = re.search(r'[0-9] (\d{1,4})', data).group(1)
except AttributeError as e:
size = 'n/a'
# Server Response
try:
server_response = re.search(r'http.*?[\"]', data).group(0).replace('"', '')
term = input('What keyword would you like to explore?')
df1 = pd.DataFrame(parse_response(requests.get(build_seo_urls(phrase=term)).content))
try:
keyword_list = secondary_layer(crawl_urls=df1['Url'])
except KeyError as e:
raise Exception("The keyword you have inputted is either not in SEMrush's database or your input was incorrectly submitted. Please rerun and try again.")
third_layer = third_layer_setup(second_layer_kw=keyword_list)
third_layer = third_layer.merge(keyword_list[['Keyword','Search Volume', 'CPC', 'Competition']], on="Keyword", how='left')
### Import Libraries ###
import urllib
import requests
import pandas as pd
from tqdm.notebook import tqdm
from urllib.parse import urlparse
### Load API Key
api_key = ''
@DerekHawkins
DerekHawkins / htz_search_comparison.py
Created June 10, 2020 18:19
Example of how to take search interest around multiple keywords and align it to stock activity
import pandas as pd
from time import sleep
from random import randint
from tqdm import notebook as tqdm
### Import Modules and Set Perimeters for Pytrends ###
from pytrends.request import TrendReq
### For Ticker Information
import yfinance as yf
@DerekHawkins
DerekHawkins / core_web_vitals_analysis_example.py
Created June 1, 2020 13:47
core_web_vitals_analysis_example.py
import pandas as pd
import requests
import urllib
import time
import re
# Data Visualization
from plotly import tools
import chart_studio
# for Plotly API Credentials, create a json file with credentials
import json
with open('path_to_credentials.json') as f:
creds = json.load(f)
# Essentials
import math
x = float('nan')
import pandas as pd
import numpy as np
data_list_mobile = []
firstContent_mobile = str(response["lighthouseResult"]['audits']['first-contentful-paint']['displayValue'])
timetoInteractive_mobile = str(response["lighthouseResult"]['audits']['interactive']['displayValue'])
speedData_mobile = str(response["lighthouseResult"]['audits']['speed-index']['displayValue'])
data_list_mobile.append((firstContent_mobile, timetoInteractive_mobile, speedData_mobile))