enric1994 · May 2, 2019 22:24
diff --git a/scrap_fb_ads.py b/scrap_fb_ads.py
 import requests
 import csv
 import time
 import sys
 reload(sys)
 sys.setdefaultencoding('utf8')


 access_token=''
 url='https://graph.facebook.com/v3.3/ads_archive?fields=impressions,ad_creation_time,ad_creative_body,ad_creative_link_caption,ad_creative_link_description,ad_creative_link_title,ad_delivery_start_time,ad_delivery_stop_time,ad_snapshot_url,currency,demographic_distribution,funding_entity,page_id,page_name,region_distribution,spend'
 params = {
 'search_terms':'ireland',
 'ad_active_status':'ALL',
 'ad_type':'POLITICAL_AND_ISSUE_ADS',
 'ad_reached_countries':'IE',
 'access_token':access_token,
 }
 res = requests.get(url, params=params)
 jres = res.json()

 with open('fb_inactive_and_active_ie.csv', 'w') as csvfile:
 	spamwriter = csv.writer(csvfile, delimiter=';',
 							quotechar='"', quoting=csv.QUOTE_MINIMAL)
 	spamwriter.writerow([
 		'impressions_low',
 		'impressions_up',
 		'ad_creation_time',
 		'ad_creative_body',
 		'currency',
 		'ad_delivery_start_time',
 		'ad_delivery_stop_time',
 		'ad_snapshot_url',
 		'demographic_distribution',
 		'funding_entity',
 		'page_id',
 		'page_name',
 		'region_distribution',
 		'spend_low',
 		'spend_up'
 		])

 	for page in range(100000):
 		time.sleep(2)
 		print('------------------------------')
 		print(page)
 		for i in jres['data']:
 			if 'funding_entity' not in i:
 				i['funding_entity']='?'
 				print('no funding entity')
 			if 'ad_creative_body' not in i:
 				i['ad_creative_body']='?'
 				print('no ad_creative_body')
 			if 'ad_delivery_stop_time' not in i:
 				i['ad_delivery_stop_time']='?'
 				print('no ad_delivery_stop_time')

 			spamwriter.writerow([
 			i['impressions']['lower_bound'],
 			i['impressions']['upper_bound'],
 			i['ad_creation_time'],
 			i['ad_creative_body'],
 			i['currency'],
 			i['ad_delivery_start_time'],
 			i['ad_delivery_stop_time'],
 			i['ad_snapshot_url'],
 			i['demographic_distribution'],
 			i['funding_entity'],
 			i['page_id'],
 			i['page_name'],
 			i['region_distribution'],
 			i['spend']['lower_bound'],
 			i['spend']['upper_bound'],
 			])
 		next_page=jres['paging']['next']
 		res = requests.get(next_page)
 		jres = res.json()
	import requests
	import csv
	import time
	import sys
	reload(sys)
	sys.setdefaultencoding('utf8')


	access_token=''
	url='https://graph.facebook.com/v3.3/ads_archive?fields=impressions,ad_creation_time,ad_creative_body,ad_creative_link_caption,ad_creative_link_description,ad_creative_link_title,ad_delivery_start_time,ad_delivery_stop_time,ad_snapshot_url,currency,demographic_distribution,funding_entity,page_id,page_name,region_distribution,spend'
	params = {
	'search_terms':'ireland',
	'ad_active_status':'ALL',
	'ad_type':'POLITICAL_AND_ISSUE_ADS',
	'ad_reached_countries':'IE',
	'access_token':access_token,
	}
	res = requests.get(url, params=params)
	jres = res.json()

	with open('fb_inactive_and_active_ie.csv', 'w') as csvfile:
	spamwriter = csv.writer(csvfile, delimiter=';',
	quotechar='"', quoting=csv.QUOTE_MINIMAL)
	spamwriter.writerow([
	'impressions_low',
	'impressions_up',
	'ad_creation_time',
	'ad_creative_body',
	'currency',
	'ad_delivery_start_time',
	'ad_delivery_stop_time',
	'ad_snapshot_url',
	'demographic_distribution',
	'funding_entity',
	'page_id',
	'page_name',
	'region_distribution',
	'spend_low',
	'spend_up'
	])

	for page in range(100000):
	time.sleep(2)
	print('------------------------------')
	print(page)
	for i in jres['data']:
	if 'funding_entity' not in i:
	i['funding_entity']='?'
	print('no funding entity')
	if 'ad_creative_body' not in i:
	i['ad_creative_body']='?'
	print('no ad_creative_body')
	if 'ad_delivery_stop_time' not in i:
	i['ad_delivery_stop_time']='?'
	print('no ad_delivery_stop_time')

	spamwriter.writerow([
	i['impressions']['lower_bound'],
	i['impressions']['upper_bound'],
	i['ad_creation_time'],
	i['ad_creative_body'],
	i['currency'],
	i['ad_delivery_start_time'],
	i['ad_delivery_stop_time'],
	i['ad_snapshot_url'],
	i['demographic_distribution'],
	i['funding_entity'],
	i['page_id'],
	i['page_name'],
	i['region_distribution'],
	i['spend']['lower_bound'],
	i['spend']['upper_bound'],
	])
	next_page=jres['paging']['next']
	res = requests.get(next_page)
	jres = res.json()
No results found