Created
May 19, 2022 12:55
-
-
Save jpigla/eb08b51ed0b2e8dcf2ea9457f1dee876 to your computer and use it in GitHub Desktop.
Get CrUX Data from Google with Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --- Open the file containing the list of URLs ------------------------------------------------------------------------ | |
urls_file = open('urls.txt', 'r') | |
url_list_file = urls_file.readlines() | |
# url_list_file = pd.read_csv('urls.csv', header=None).drop_duplicates()[0].to_list() | |
# --- Set parameters and variables -------------------------------------------------------------------------------------- | |
# Get date of today | |
date_today = date.today().strftime("%d.%m.%Y") | |
# Create empty dataframe for the results | |
df_crux = pd.DataFrame(None) | |
# Set request parameters | |
lst_formFactor = ['PHONE', 'DESKTOP'] # 'PHONE' or 'TABLET' or 'DESKTOP' | |
level = 'url' # 'url' or 'origin' | |
counter = 0 | |
# Create dict for the results (temporarily used) | |
crux_data_dict = {} | |
crux_data_dict['date'] = date.today() | |
crux_data_dict['level'] = level | |
# --- Loop over the URLs ------------------------------------------------------------------------------------------------ | |
for url in url_list_file: | |
# Strip URL | |
url = url.strip() | |
# Get domain name | |
url_netloc = urlparse(url).netloc | |
for formFactor in lst_formFactor: | |
# Set request parameters | |
api_url = f'https://chromeuxreport.googleapis.com/v1/records:queryRecord?key={API_KEY}' | |
data_dic = { 'formFactor': formFactor, level: url } | |
header_dic = { 'Content-Type': 'application/json' } | |
result = requests.post(api_url, json = data_dic, headers = header_dic) | |
result = result.json() | |
# print(result) | |
try: | |
# Get url | |
try: crux_data_dict['url'] = result['record']['key']['url'] | |
except KeyError: crux_data_dict['url'] = url | |
# Set domain name | |
crux_data_dict['url_netloc'] = url_netloc | |
# Get form factor | |
try: crux_data_dict['device'] = result['record']['key']['formFactor'] | |
except KeyError: crux_data_dict['device'] = formFactor | |
# Get status of the request | |
try: crux_data_dict['status'] = result['error']['status'] | |
except KeyError: crux_data_dict['status'] = 'Success' | |
# Get CLS data | |
try: crux_data_dict['cumulative_layout_shift'] = result['record']['metrics']['cumulative_layout_shift']['percentiles']['p75'] | |
except KeyError: crux_data_dict['cumulative_layout_shift'] = np.nan | |
try: crux_data_dict['cumulative_layout_shift_good'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][0]['density'] | |
except KeyError: crux_data_dict['cumulative_layout_shift_good'] = np.nan | |
try: crux_data_dict['cumulative_layout_shift_ni'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][1]['density'] | |
except KeyError: crux_data_dict['cumulative_layout_shift_ni'] = np.nan | |
try: crux_data_dict['cumulative_layout_shift_bad'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][2]['density'] | |
except KeyError: crux_data_dict['cumulative_layout_shift_bad'] = np.nan | |
# Get FCP data | |
try: crux_data_dict['first_contentful_paint'] = result['record']['metrics']['first_contentful_paint']['percentiles']['p75'] | |
except KeyError: crux_data_dict['first_contentful_paint'] = np.nan | |
try: crux_data_dict['first_contentful_paint_good'] = result['record']['metrics']['first_contentful_paint']['histogram'][0]['density'] | |
except KeyError: crux_data_dict['first_contentful_paint_good'] = np.nan | |
try: crux_data_dict['first_contentful_paint_ni'] = result['record']['metrics']['first_contentful_paint']['histogram'][1]['density'] | |
except KeyError: crux_data_dict['first_contentful_paint_ni'] = np.nan | |
try: crux_data_dict['first_contentful_paint_bad'] = result['record']['metrics']['first_contentful_paint']['histogram'][2]['density'] | |
except KeyError: crux_data_dict['first_contentful_paint_bad'] = np.nan | |
# Get FID data | |
try: crux_data_dict['first_input_delay'] = result['record']['metrics']['first_input_delay']['percentiles']['p75'] | |
except KeyError: crux_data_dict['first_input_delay'] = np.nan | |
try: crux_data_dict['first_input_delay_good'] = result['record']['metrics']['first_input_delay']['histogram'][0]['density'] | |
except KeyError: crux_data_dict['first_input_delay_good'] = np.nan | |
try: crux_data_dict['first_input_delay_ni'] = result['record']['metrics']['first_input_delay']['histogram'][1]['density'] | |
except KeyError: crux_data_dict['first_input_delay_ni'] = np.nan | |
try: crux_data_dict['first_input_delay_bad'] = result['record']['metrics']['first_input_delay']['histogram'][2]['density'] | |
except KeyError: crux_data_dict['first_input_delay_bad'] = np.nan | |
# Get LCP data | |
try: crux_data_dict['largest_contentful_paint'] = result['record']['metrics']['largest_contentful_paint']['percentiles']['p75'] | |
except KeyError: crux_data_dict['largest_contentful_paint'] = np.nan | |
try: crux_data_dict['largest_contentful_paint_good'] = result['record']['metrics']['largest_contentful_paint']['histogram'][0]['density'] | |
except KeyError: crux_data_dict['largest_contentful_paint_good'] = np.nan | |
try: crux_data_dict['largest_contentful_paint_ni'] = result['record']['metrics']['largest_contentful_paint']['histogram'][1]['density'] | |
except KeyError: crux_data_dict['largest_contentful_paint_ni'] = np.nan | |
try: crux_data_dict['largest_contentful_paint_bad'] = result['record']['metrics']['largest_contentful_paint']['histogram'][2]['density'] | |
except KeyError: crux_data_dict['largest_contentful_paint_bad'] = np.nan | |
# If the request fails | |
except Exception as e: | |
# print(e) | |
crux_data_dict['url'] = url | |
crux_data_dict['url_netloc'] = url_netloc | |
crux_data_dict['device'] = formFactor | |
crux_data_dict['status'] = 'ERROR - ' + str(e) | |
# After the request - append data to remaining data | |
df_crux = df_crux.append(pd.DataFrame(crux_data_dict, columns=crux_data_dict.keys(), index=[0]), ignore_index=True) | |
# If backup is needed, save every 10th stepts during loope | |
counter += 1 | |
if counter % 10 == 0: | |
df_crux.to_csv(f'crux_results_{date_today}_wip.csv', sep=';', index=True) | |
# Set time to sleep between requests | |
time.sleep(0.3) | |
df_crux.to_csv(f'crux_results_{date_today}_final.csv', sep=';', index=True) | |
df_crux |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment