Created
May 6, 2024 19:27
-
-
Save mshakhomirov/ed1a3f8c13de140b4ec3b3c5e8ff8904 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with opportunity as ( | |
select distinct | |
'dbo_opportunity' datasource | |
,'Opportunity created' event_name | |
, toLowCardinality(id) id | |
, parseDateTimeBestEffortOrNull(createdon) timestamp_utc | |
, toDate(timestamp_utc) date_utc | |
,nullif(lower(contactid ),'none') missing_contactid --? contactid in this table is missing | |
,upper(nullif(lower(customerid ),'none')) customerid | |
,nullif(lower(parentaccountid ),'none') parentaccountid | |
,nullif(lower(customeridname ),'none') customeridname | |
,nullif(lower(parentaccountidname),'none') companyname | |
,nullif(lower(ncbh_contactemail ),'none') ncbh_contactemail | |
,upper(nullif(lower(originatingleadid ),'none')) originatingleadid | |
,if(toFloat64(actualvalue) = 0 | |
, 999 | |
,toFloat64(actualvalue) | |
) actualvalue | |
from | |
im_9598_90e.prod_dataverse_dbo_opportunity_9598_azure_sql | |
where | |
date_utc > toDate('2024-04-01') | |
order by | |
actualvalue desc | |
) | |
-- select count(*) from opportunity limit 10 | |
-- select * from opportunity limit 10 | |
, contact as ( | |
select | |
'contact' data_source | |
,'Contact created' event_name | |
,coalesce( | |
nullif(emailaddress1,'') | |
,nullif(emailaddress2,'') | |
,nullif(emailaddress3,'') | |
,lower(hex(SHA256(coalesce( | |
)))) user_id | |
,parseDateTimeBestEffortOrNull(createdon) timestamp_utc | |
, toDate(timestamp_utc) date_utc | |
,'web_form' traffic_source_name | |
,'gtm_form_submit' traffic_source_medium | |
,'thenationalcouncil.hrmdirect.com' traffic_source_source -- get source from ga4 | |
,upper(nullif(lower(contactid ),'none')) contactid --E44685A7-7E62-E711-8103-5065F38B31A1 = id | |
,toLowCardinality(id) id -- same as contactid | |
,nullif(lower(company),'none') company -- ! Do not use | |
,upper(nullif(lower(accountid),'none')) accountid -- None | |
,upper(nullif(lower(accountidname),'none')) accountidname -- None | |
,altai_createnewaccountname -- None | |
,altai_councilidname -- None | |
,upper(nullif(lower(originatingleadid),'none')) originatingleadid -- we have approx. 571 out of 525000 matching leads. What is it? | |
,altai_webcompanyname -- None | |
from im_9598_90e.prod_dataverse_dbo_contact_9598_azure_sql | |
-- desc table im_9598_90e.prod_dataverse_dbo_contact_9598_azure_sql | |
where 1=1 | |
and date_utc > toDate('2024-04-01') | |
and | |
( | |
length(emailaddress1) > 4 or | |
length(emailaddress2) > 4 or | |
length(emailaddress3) > 4 | |
) | |
) | |
-- select * from contact limit 10 | |
-- select count(*) from contact limit 10 | |
, conversion as ( | |
select distinct | |
con.datasource | |
,con.event_name | |
,con.timestamp_utc | |
,con.missing_contactid --? contactid in this table is missing | |
,con.originatingleadid | |
,con.customerid | |
,coalesce(c.contactid,c2.contactid) contactid -- ? We try to identify contact from opps using both customerid and originatingleadid from contact table | |
,con.parentaccountid | |
,con.customeridname | |
,con.companyname companyname --join c on contactid to add company name to ga4 case_id | |
,con.ncbh_contactemail | |
,con.actualvalue | |
from | |
opportunity con | |
left join contact c | |
on | |
con.customerid = c.contactid | |
left join contact c2 | |
on | |
con.originatingleadid = c2.originatingleadid | |
) | |
-- select * from conversion limit 10 | |
-- select count(*) from conversion limit 10 | |
-- Identify CRM cases using company id, if not present then use user_id hash for attribution case | |
, crm_opportunity_contacts as ( | |
select | |
c.data_source | |
,c.event_name | |
,co.companyname | |
,c.email | |
,c.user_id | |
,coalesce(co.companyname, c.user_id) case_id | |
,c.timestamp_utc | |
,c.traffic_source_name | |
,c.traffic_source_medium | |
,c.traffic_source_source | |
,con.actualvalue | |
from | |
contact c | |
-- inner join conversion co | |
left join conversion co | |
on | |
co.contactid = c.contactid | |
) | |
-- select * from crm_opportunity_contacts order by email limit 100 | |
-- select count(*) from crm_opportunity_contacts limit 10 | |
-- users who submitted meails via web form | |
, ga4_users as ( | |
select | |
user_pseudo_id | |
FROM im_9598_90e.ga_4_events_a9598_9598_gbq_all_data | |
where 1=1 | |
and user_id is not null | |
and user_id <> '' | |
and length(user_id) > 4 | |
) | |
, ga4_user_events as ( | |
SELECT | |
datasource | |
, event_name | |
, lower(user_id) user_id | |
, user_pseudo_id | |
,parseDateTimeBestEffortOrNull(timestamp_utc) timestamp_utc | |
, toDate(timestamp_utc) date_utc | |
, assumeNotNull(event_params_json_string) as event_params | |
, JSONExtract(event_params, | |
'Array(Tuple( | |
key String | |
,value Tuple( | |
string_value String | |
,int_value Int32 | |
,float_value Float32 | |
,double_value Float32 | |
) | |
) | |
)' | |
) as event_params_extracted | |
, CAST(event_params_extracted, 'Map(String, Tuple(String,Int32,Float32,Float32))') as event_params_map | |
, event_params_map['page_refferer'].1 as page_refferer | |
, event_params_map['medium'].1 as medium | |
, event_params_map['campaign'].1 as campaign | |
, event_params_map['source'].1 as source | |
,traffic_source_name | |
,traffic_source_medium | |
,traffic_source_source | |
FROM im_9598_90e.ga_4_events_a9598_9598_gbq_all_data | |
where 1=1 | |
and user_pseudo_id in (select user_pseudo_id from ga4_users) | |
) | |
-- select count(distinct user_id), count(*) from ga4_user_events limit 10 | |
-- select * from ga4_user_events order by user_pseudo_id, timestamp_utc desc limit 10 | |
-- select distinct | |
-- traffic_source_name | |
-- ,traffic_source_medium | |
-- ,traffic_source_source | |
-- from ga4_user_events order by user_pseudo_id, timestamp_utc desc limit 1000 | |
-- activity events with channels | |
, events as ( | |
select | |
datasource data_source | |
,g.event_name | |
,g.user_id -- email hash. join on contacts to get a company from crm | |
,g.timestamp_utc | |
,g.traffic_source_name | |
,g.traffic_source_medium | |
,g.traffic_source_source | |
,coalesce(c.case_id , g.user_id) case_id | |
,c.companyname | |
,c.email | |
from | |
ga4_user_events g | |
-- left join | |
inner join | |
crm_opportunity_contacts c on | |
g.user_id = c.user_id | |
) | |
-- select * from events limit 100 | |
-- select count(*) from events limit 100 | |
-- select * from contact where user_id in (select user_id from ga4_user_events) | |
---- ads | |
-- select count(*) from dev_attribution.attribution__ads limit 10 | |
-- select distinct advertiser_name from dev_attribution.attribution__ads limit 100 | |
desc table im_9598_90e.mrt_paid_ads_fields | |
select * FROM im_9598_90e.mrt_paid_ads_fields limit 10 | |
with | |
ads_data as ( | |
SELECT | |
date | |
, channel | |
, datasource | |
, ad_name | |
,campaign_name | |
, adset_name | |
, campaign_id | |
, spend | |
, creative_name | |
, advertiser_name | |
, advertiser_id | |
, adset_id | |
, campaign_type | |
, creative_id | |
FROM `improvado-9598-90e.improvado.paid_ads__basic_performance` | |
WHERE date >= "2024-04-01" | |
) | |
select distinct datasource, campaign_name, ad_name from ads_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment