Created
February 20, 2018 17:29
-
-
Save robhammond/0dcc27214acd5df273a64aa2055b1f5a to your computer and use it in GitHub Desktop.
Extract monthly data from Adobe Analytics to a Google BigQuery table. Once completed, use `cat *.json >> all.json` to combine files for import into BQ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Data::Dumper; | |
use DateTime; | |
use FindBin qw($Bin); | |
use Getopt::Long; | |
use Modern::Perl; | |
use Mojo::JSON qw(encode_json); | |
use lib ("$Bin/../lib"); | |
use HiveAlpha::Common qw(parse_config); | |
use TMR::SiteCatalyst; | |
my $bin_dir = "$Bin/../bin"; | |
my $config = parse_config("$Bin/../news_check.conf"); | |
my ($first_date, $last_date); | |
my $date_granularity = 'month'; | |
GetOptions ( | |
"from=s" => \$first_date, | |
"to=s" => \$last_date, | |
"type=s" => \$date_granularity, | |
); | |
my $sites = { | |
mirror => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Mirror" | |
}, | |
mirrormobile => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Mirror" | |
}, | |
m4 => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Mirror" | |
}, | |
ampp3d => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Mirror" | |
}, | |
'manchester evening news' => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Manchester Evening News' | |
}, | |
'manchester evening newsmobile' => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Manchester Evening News' | |
}, | |
liverpoolecho => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Liverpool Echo' | |
}, | |
liverpoolechomobile => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Liverpool Echo' | |
}, | |
liverpoolechmobile => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Liverpool Echo' | |
}, | |
dailyrecord => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'Scotland', | |
name => 'Daily Record' | |
}, | |
dailyrecordmobile => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'Scotland', | |
name => 'Daily Record' | |
}, | |
dailyrecord3 => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'Scotland', | |
name => 'Daily Record' | |
}, | |
icwales2 => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Wales Online' | |
}, | |
icwales2mobile => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Wales Online' | |
}, | |
icWales => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Wales Online' | |
}, | |
walesonline => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Wales Online' | |
}, | |
walesonlinemobile => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Wales Online' | |
}, | |
birminghammail => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Birmingham Mail' | |
}, | |
irishmirror => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Irish Mirror" | |
}, | |
irishmirrormobile => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Irish Mirror" | |
}, | |
nechronicle => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Chronicle Live' | |
}, | |
nechroniclemobile => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Chronicle Live' | |
}, | |
hulldailymail => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Hull Daily Mail' | |
}, | |
lwhulldaily => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'LW Hull Daily Mail' | |
}, | |
'get surrey' => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Get Surrey' | |
}, | |
bristolpost => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Bristol Post' | |
}, | |
croydonadvertiser => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Community Sites', | |
name => "Croydon Advertiser" | |
}, | |
gazettelive2 => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Gazette Live' | |
}, | |
glasgowlive => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'Scotland', | |
name => 'Glasgow Live' | |
}, | |
devonlive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Devon Live' | |
}, | |
lwdevonlive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'LW Devon Live' | |
}, | |
belfastlive => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Belfast Live" | |
}, | |
coventrytelegraph => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Coventry Telegraph' | |
}, | |
dailypost => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Daily Post' | |
}, | |
nottinghampost => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Nottingham Post' | |
}, | |
leicestermercury => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Leicester Mercury' | |
}, | |
plymouthherald => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Regional Capital', | |
name => 'Plymouth Herald' | |
}, | |
derbytelegraph => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Derby Telegraph' | |
}, | |
dublinlive => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => "Dublin Live" | |
}, | |
kentlive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Kent Live' | |
}, | |
gloucestershirelive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Gloucestershire Live' | |
}, | |
stokesentinel => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Stoke Sentinel' | |
}, | |
cornwalllive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Cornwall Live' | |
}, | |
huddexaminer => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Huddersfield Examiner' | |
}, | |
somersetlive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Somerset Live' | |
}, | |
essexlive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Essex Live' | |
}, | |
'get west london' => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Get West London' | |
}, | |
cambridgenews => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Cambridge News' | |
}, | |
'get reading' => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Get Reading' | |
}, | |
footballlondon => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Other', | |
name => "Football London" | |
}, | |
grimsbytelegraph => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Grimsby Telegraph' | |
}, | |
chesterchronicle => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Other', | |
name => "Chester Chronicle" | |
}, | |
lincolnshirelive => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'County Sites', | |
name => 'Lincolnshire Live' | |
}, | |
lwherts => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Community Sites', | |
name => "Hertfordshire Mercury" | |
}, | |
bathchronicle => { | |
siteGroup => 'Regionals', | |
siteSubGroup => 'Urban Hub', | |
name => 'Bath Chronicle' | |
}, | |
rsvplive => { | |
siteGroup => 'UK Nats and Scotland', | |
siteSubGroup => 'UK Nats', | |
name => 'RSVP Live' | |
}, | |
}; | |
unless ($first_date) { | |
# $first_date = DateTime->now->subtract(days => 1)->ymd(''); | |
# $last_date = DateTime->now->subtract(days => 1)->ymd(''); | |
$first_date = DateTime->now(time_zone => 'Europe/London')->subtract(days => 1)->ymd(); | |
$last_date = DateTime->now(time_zone => 'Europe/London')->subtract(days => 1)->ymd(); | |
} | |
my $from = DateTime->new( | |
{ | |
year => substr($first_date,0,4), | |
month => substr($first_date,5,2), | |
day => substr($first_date,8,2), | |
time_zone => 'Europe/London' | |
}); | |
my $to = DateTime->new( | |
{ | |
year => substr($last_date,0,4), | |
month => substr($last_date,5,2), | |
day => substr($last_date,8,2), | |
time_zone => 'Europe/London' | |
}); | |
while ( $from <= $to ) { | |
say "running " . $from->ymd; | |
open my $fh, ">", "monthly-bq-" . $from->ymd . ".json" or die $!; | |
my $segments = []; | |
my $site_catalyst = TMR::SiteCatalyst->new( | |
username => $config->{'site_catalyst'}->{'creds'}->{'username'}, | |
shared_secret => $config->{'site_catalyst'}->{'creds'}->{'shared_secret'}, | |
api_subdomain => 'api3', # optional, api3 = london | |
); | |
my $report = $site_catalyst->instantiate_report( | |
report_suite_id => $config->{'site_catalyst'}->{'creds'}->{'report_suite_id'}, | |
); | |
$report->queue( | |
dateFrom => $from->ymd, | |
dateTo => $to->ymd, | |
metrics => [ | |
{ id => 'cm988_583593eb02f60c3f0654fbf0' }, # kpi: unique browsers | |
{ id => 'cm988_56b0dff3dd0ab12d4cbdac0c' }, # kpi: pageviews | |
{ id => 'cm988_585d18295fb23e5abcd8f6e6' }, # calc metric seo uv | |
{ id => 'cm988_5915579d97301a6927e0468c' }, # calc metric seo pv | |
{ id => 'cm988_585d187ea90b46318e4b79f4' }, # calc metric social uv | |
{ id => 'cm988_5915593c5fb23e52577c13de' }, # calc metric social pv | |
{ id => 'cm988_585d18b397301a0b131d73cc' }, # calc metric referral uv | |
{ id => 'cm988_5915597f1d8453766a6ff62d' }, # calc metric referral pv | |
{ id => 'cm988_585d18bb5fb23e5abcd8f6e7' }, # calc metric brand uv | |
{ id => 'cm988_591557fc01c1eb182edc379e' }, # calc metric brand pv | |
{ id => 'cm988_585d18bb1d845314b2d7f775' }, # calc metric dark uv | |
{ id => 'cm988_59155a191d8453766a6ff62e' }, # calc metric dark pv | |
{ id => 'cm988_589345930d38c2652a046d23' }, # calc metric desktop uv | |
{ id => 'cm988_5893453d366e5b10113cf399' }, # calc metric mobile uv | |
{ id => 'cm988_5893456397301a3142ffef81' }, # calc metric amp uv | |
{ id => 'cm988_589344e30d38c2652a046d22' }, # calc metric fbia uv | |
], | |
elements => [ | |
{ id => 'prop5', top => 200 }, # owner site | |
], | |
segments => $segments, | |
dateGranularity => $date_granularity, | |
); | |
my $report_id = $report->get_id(); | |
my $results; | |
for ( my $tries = 0; $tries < 1000; $tries++ ) { | |
if ($results = $report->retrieve( { report_id => $report_id } )) { | |
# $results = $report->retrieve(); | |
last; | |
} else { | |
say "sleeping ($tries)..."; | |
sleep 3; | |
} | |
} | |
my @json; | |
for my $r (@{$results->{'data'}->[0]->{'breakdown'}}) { | |
# next unless $sites->{$r->{'name'}}; | |
my $siteName = $sites->{$r->{'name'}}->{'name'} || $r->{'name'}; | |
push @json, { | |
siteName => $siteName, | |
eVar37 => $r->{'name'}, | |
date => $from->ymd, | |
totals => { | |
visitors => $r->{'counts'}->[0] +0, | |
pageviews => $r->{'counts'}->[1] +0 | |
}, | |
trafficSource => { | |
seo => { | |
visitors => $r->{'counts'}->[2] +0, | |
pageviews => $r->{'counts'}->[3] +0 | |
}, | |
social => { | |
visitors => $r->{'counts'}->[4] +0, | |
pageviews => $r->{'counts'}->[5] +0 | |
}, | |
referral => { | |
visitors => $r->{'counts'}->[6] +0, | |
pageviews => $r->{'counts'}->[7] +0 | |
}, | |
brand => { | |
visitors => $r->{'counts'}->[8] +0, | |
pageviews => $r->{'counts'}->[9] +0 | |
}, | |
dark => { | |
visitors => $r->{'counts'}->[10] +0, | |
pageviews => $r->{'counts'}->[11] +0 | |
}, | |
}, | |
platform => { | |
desktop => { | |
# pageviews => 0, | |
visitors => $r->{'counts'}->[12] +0 | |
}, | |
mobile => { | |
# pageviews => 0, | |
visitors => $r->{'counts'}->[13] +0 | |
}, | |
amp => { | |
# pageviews => 0, | |
visitors => $r->{'counts'}->[14] +0 | |
}, | |
fbia => { | |
# pageviews => 0, | |
visitors => $r->{'counts'}->[15] +0 | |
}, | |
} | |
}; | |
} | |
for my $j (@json) { | |
say encode_json $j; | |
say $fh encode_json($j); | |
} | |
$from->add(months => 1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment