-- set mapred.max.split.size=128000000;
set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.tez.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.mapjoin.smalltable.filesize=30000000;
-- set hive.optimize.s3.query=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.optimize.sort.dynamic.partition=false;
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with control_campaigns as ( | |
select | |
'Control' as measurement_group | |
, campaign_name as base_campaign | |
, date | |
, cost | |
, conversions | |
, conversion_value | |
from | |
fivetran.adwords.google_campaign_performance |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def change_draft_target_cpa(original_campaign_tcpa, original_campaign_name, draft_campaign_id): | |
# Create Portfolio for new campaign | |
draft_target_cpa = round(original_campaign_tcpa * (1-0.30), 2) | |
draft_portfolio_name = '{0}-Test_XYZ_Draft_Portfolio'.format(original_campaign_name) | |
adwords_client = connect_to_adwords_api() | |
bidding_strategy_service = adwords_client.GetService('BiddingStrategyService', version='v201809') | |
portfolio_config = { | |
'name': draft_portfolio_name, | |
'biddingScheme': { | |
'xsi_type': 'TargetCpaBiddingScheme', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_draft(): | |
adwords_client = connect_to_adwords_api() | |
draft_service = adwords_client.GetService('DraftService', version='v201809') | |
draft = { | |
'baseCampaignId': original_campaign_id, | |
'draftName': {draft_name} | |
} | |
draft_operation = {'operator': 'ADD', 'operand': draft} | |
draft = draft_service.mutate([draft_operation])['value'][0] | |
draft_campaign_id = draft['draftCampaignId'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_and_assign_budget(): | |
adwords_client = connect_to_adwords_api() | |
# This bit here creates the budget | |
budget_service = adwords_client.GetService('BudgetService', version='v201809') | |
budget = { | |
'name': {an appropriate name}, # Doesn't need to be unique. Budget Id is what matters for uniqueness | |
'amount': { | |
'microAmount': int(round({budget_amount} * 1000000)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from googleads import adwords | |
from googleads import oauth2 | |
def connect_to_adwords_api(): | |
customer_id = 'xxx-xxx-xxxx' # This is client customer ID you see in the google ads UI | |
oauth2_client = oauth2.GoogleRefreshTokenClient( | |
client_id={GOOGLE_CLIENT_ID}, | |
client_secret={GOOGLE_CLIENT_SECRET}, | |
refresh_token={GOOGLE_REFRESH_TOKEN} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
raw_test_data = pd.read_csv('medium_ppc_inc_evaluate_dataset.csv') | |
# You can access the csv above in this link: https://drive.google.com/open?id=1IOuneJr-QFDYGsJPRjQ8ra-5C4VczeOW | |
raw_test_data['diff_conversions'] = raw_test_data['test_conversions'] - raw_test_data['ctl_conversions'] | |
N = len(raw_test_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
class Node: | |
''' | |
This class defines a node which creates a tree structure by recursively calling itself | |
whilst checking a number of ending parameters such as depth and min_leaf. It uses an exact greedy method | |
to exhaustively scan every possible split point. Algorithm is based on Frieman's 2001 Gradient Boosting Machines | |
Input |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from math import e | |
class Node: | |
''' | |
This class defines a node which creates a tree structure by recursively calling itself | |
whilst checking a number of ending parameters such as depth and min_leaf. It uses an exact greedy method | |
to exhaustively scan every possible split point. The gain metric of choice is conservation of varience. | |
This is a Naive solution and does not comapre to Frieman's 2001 Gradient Boosting Machines |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from math import e | |
class Node: | |
''' | |
A node object that is recursivly called within itslef to construct a regression tree. Based on Tianqi Chen's XGBoost | |
the internal gain used to find the optimal split value uses both the gradient and hessian. Also a weighted quantlie sketch | |
and optimal leaf values all follow Chen's description in "XGBoost: A Scalable Tree Boosting System" the only thing not |