Skip to content

Instantly share code, notes, and snippets.

@rmhrisk
Last active April 4, 2024 23:18
Show Gist options
  • Save rmhrisk/2334a56d4a7034cd23562e1ada253585 to your computer and use it in GitHub Desktop.
Save rmhrisk/2334a56d4a7034cd23562e1ada253585 to your computer and use it in GitHub Desktop.
Look at CA distribution by region and country
import pandas as pd
import requests
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from io import StringIO
from datetime import datetime, timezone
import time
import matplotlib.pyplot as plt
# Regional country codes for percentage calculation
european_countries = set(['AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK'])
north_america_countries = set(['US', 'CA', 'MX'])
apac_countries = set(['AU', 'CN', 'JP', 'IN', 'NZ', 'SG', 'KR', 'TH', 'ID', 'MY', 'PH', 'VN'])
latin_america_countries = set(['AR', 'BR', 'CL', 'CO', 'PE', 'VE', 'PR', 'UY', 'EC', 'GT', 'CU', 'BO', 'HN', 'PY', 'SV', 'NI', 'CR', 'PA'])
africa_countries = set(['ZA', 'NG', 'EG', 'DZ', 'MA', 'KE', 'ET', 'GH', 'TZ', 'CI', 'UG', 'TN', 'ML', 'ZW', 'SD', 'AO', 'LY', 'CM', 'NA', 'BF'])
def download_csv(url):
print(f"Downloading CSV from {url}...")
response = requests.get(url)
if response.ok:
print("Download complete.")
return StringIO(response.text)
else:
print(f"Failed to download CSV. Status code: {response.status_code}")
return None
def compute_fingerprint(pem_data):
try:
cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
return cert.fingerprint(hashes.SHA256()).hex().upper()
except Exception as e:
print(f"Error computing fingerprint: {e}")
return None
def extract_country_from_certificate(pem_data):
try:
cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
country_names = [i.value for i in cert.subject.get_attributes_for_oid(x509.NameOID.COUNTRY_NAME)]
return country_names[0] if country_names else "Not Available"
except Exception as e:
print(f"Error extracting country from certificate: {e}")
return "Error"
def search_gleif(ca_owner_name, country_code):
if country_code == "Unknown":
print(f"Searching GLEIF for '{ca_owner_name}' without specifying country...")
else:
print(f"Searching GLEIF for '{ca_owner_name}' in country '{country_code}'...")
url = "https://api.gleif.org/api/v1/lei-records"
params = {
"page[size]": 1,
"page[number]": 1,
"filter[entity.names]": ca_owner_name
}
if country_code != "Unknown":
params["filter[entity.legalAddress.country]"] = country_code
headers = {"Accept": "application/vnd.api+json"}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
if data['data']:
record = data['data'][0]
lei = record['id']
legal_entity_name = record['attributes']['entity']['legalName']['name']
hq_address = record['attributes']['entity']['legalAddress']
address_parts = [
", ".join([line for line in hq_address.get('addressLines', []) if line]),
hq_address.get('city', ''),
hq_address.get('region', ''),
hq_address.get('country', ''),
hq_address.get('postalCode', '')
]
complete_address = ", ".join(filter(None, address_parts))
return "Found", lei, legal_entity_name, complete_address
else:
return "Not Found", None, None, None
else:
print(f"Failed to search GLEIF. Status code: {response.status_code}")
return "Failed", None, None, None
def is_certificate_valid(pem_data):
cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
# Use the not_valid_after_utc property directly
not_valid_after_with_tz = cert.not_valid_after_utc
return datetime.now(timezone.utc) < not_valid_after_with_tz
def calculate_region_percentage(country_data, region_countries):
region_ca_owners = sum(country in region_countries for country in country_data)
total_ca_owners_checked = len(country_data)
return (region_ca_owners / total_ca_owners_checked) * 100 if total_ca_owners_checked > 0 else 0
def generate_pie_chart(data, title, labels, explode=None):
fig, ax = plt.subplots(figsize=(12, 7)) # Match the figure size to the first script for consistency
wedges, texts, autotexts = ax.pie(data, explode=explode, labels=labels, startangle=90, autopct='%1.1f%%')
ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title(title)
# Create a legend with both the labels and the corresponding percentages.
# The lambda function in autopct only shows the percentage if it's greater than 1% for clarity.
legend_labels = [f"{label} - {int(value/sum(data)*100)}%" for label, value in zip(labels, data)]
# Position the legend similar to the first script, using bbox_to_anchor for precise control.
plt.legend(wedges, legend_labels, title="Certificate Authorities", loc="center left", bbox_to_anchor=(1, 0.5))
# Adjust the layout to prevent cutting off any elements, specifically the legend.
plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust the right side as needed to fit the legend
plt.show()
def generate_ca_address_table_from_url(ca_url, roots_url):
ca_csv_data = download_csv(ca_url)
if ca_csv_data is None:
return
ca_data = pd.read_csv(ca_csv_data)
roots_csv_data = download_csv(roots_url)
if roots_csv_data is None:
return
roots_data = pd.read_csv(roots_csv_data)
lei_count = 0
roots_data.columns = roots_data.columns.str.strip()
roots_data['Computed Fingerprint'] = roots_data['PEM'].apply(compute_fingerprint)
roots_data['CERT:COUNTRY'] = roots_data['PEM'].apply(extract_country_from_certificate)
roots_data['IsValid'] = roots_data['PEM'].apply(is_certificate_valid)
ca_owner_search_results = []
searched_combinations = set()
country_data = []
for index, row in roots_data.iterrows():
if row['IsValid']:
ca_owner = ca_data[ca_data['SHA-256 Fingerprint'] == row['Computed Fingerprint']].iloc[0]['CA Owner']
country_code = row['CERT:COUNTRY'] if row['CERT:COUNTRY'] != "Not Available" else "Unknown"
search_key = (ca_owner, country_code)
if search_key not in searched_combinations:
searched_combinations.add(search_key)
status, lei, legal_entity_name, complete_address = search_gleif(ca_owner, country_code)
ca_owner_search_results.append({
"CCADB:OWNER": ca_owner,
"CERT:COUNTRY": country_code if country_code != "Unknown" else None,
"GLEIF:STATUS": status,
"GLEIF:legalEntityName": legal_entity_name,
"GLEIF:LEI": lei,
"GLEIF:legalAddress": complete_address
})
if country_code != "Unknown":
country_data.append(country_code)
if lei: lei_count += 1
time.sleep(1) # Respectful delay for API calls
print(pd.DataFrame(ca_owner_search_results).to_markdown(index=False)) # Ensure the markdown table is displayed
# Calculate and generate pie chart for regions
region_data = [calculate_region_percentage(country_data, eu_countries),
calculate_region_percentage(country_data, north_america_countries),
calculate_region_percentage(country_data, apac_countries),
calculate_region_percentage(country_data, latin_america_countries),
calculate_region_percentage(country_data, africa_countries)]
regions = ['European Union', 'North America', 'APAC', 'Latin America', 'Africa']
generate_pie_chart(region_data, 'CA Owners by Region', regions)
# Generate pie chart for countries
country_counts = pd.Series(country_data).value_counts().rename_axis('Country').reset_index(name='Counts')
labels = country_counts['Country'].tolist()
data = country_counts['Counts'].tolist()
generate_pie_chart(data, 'CA Owner By Country', labels)
# Calculate and print regional distribution
region_counts = {region: calculate_region_percentage(country_data, countries) for region, countries in {
"Europe": european_countries,
"North America": north_america_countries,
"APAC": apac_countries,
"Latin America": latin_america_countries,
"Africa": africa_countries
}.items()}
print("\nRegional Distribution of CAs:")
for region, percentage in region_counts.items():
print(f"{region}: {percentage:.2f}%")
# Calculate and print the percentage of sovereign nations with a CA owner
unique_countries = set(country_data) - {"Unknown"}
sovereign_nations_with_ca = len(unique_countries)
total_sovereign_nations = sum(len(region) for region in [eu_countries, north_america_countries, apac_countries, latin_america_countries, africa_countries])
percentage_sovereign_nations = (sovereign_nations_with_ca / total_sovereign_nations) * 100
print(f"\nPercentage of Sovereign Nations with a CA Owner: {percentage_sovereign_nations:.2f}%")
# Calculate and display the LEI summary
total_cas = len(set([result['CCADB:OWNER'] for result in ca_owner_search_results]))
lei_percentage = (lei_count / total_cas) * 100 if total_cas > 0 else 0
print(f"\nNumber of CAs with an LEI: {lei_count}")
print(f"Percentage of CAs with an LEI: {lei_percentage:.2f}%")
# URLs to your data sources remain unchanged
ca_url = 'https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv2'
roots_url = 'https://ccadb.my.salesforce-sites.com/mozilla/IncludedRootsDistrustTLSSSLPEMCSV?TrustBitsInclude=Websites'
generate_ca_address_table_from_url(ca_url, roots_url)
@rmhrisk
Copy link
Author

rmhrisk commented Apr 4, 2024

I wanted to examine the distribution of CAs by country and region. However, the CCADB does not contain the necessary information, so I decided to see if I could correlate the data from CCADB with LEI entries, since they offer an open API.

The method I am using is flawed because I have not devised a reliable way to match the CA owner to a legal entity and, ultimately, their physical address in GLEIF. Despite this, there is still some intriguing data here. For instance, I am using the 'C' field in the intermediate certificates to both determine the country and region. This also provides a straightforward view into the countries associated with the intermediate certificates linked to a CA owner.

The layout in the pie charts is also not great but still valuable.

I think I will next try to get the address out of CP/CPS's using a LLM, I believe CCADB has the necessary links to make this analysis possible.

Here is an example output from this script:

CCADB:OWNER CERT:COUNTRY GLEIF:STATUS GLEIF:legalEntityName GLEIF:LEI GLEIF:legalAddress
SSL.com US Not Found
OISTE CH Not Found
certSIGN RO Not Found
Entrust US Found ENTRUST CORPORATION 549300RRHR61YGCQ8575 C/O CORPORATION SERVICE COMPANY, 251 LITTLE FALLS DRIVE, WILMINGTON, US-DE, US, 19808
eMudhra Technologies Limited IN Not Found
eMudhra Technologies Limited US Not Found
Government of Hong Kong (SAR), Hongkong Post, Certizen HK Found 翹晉科技有限公司 836800VC81GMPMG59W77 香港灣仔軒尼詩道245-251號守時商業大廈16樓B室, 香港, HK, 999077
Microsec Ltd. HU Not Found
Autoridad de Certificacion Firmaprofesional ES Not Found
NAVER Cloud Trust Services KR Not Found
Viking Cloud, Inc. US Found Mavenir, Inc. 5493007DSIFXCC6J0N24 C/O The Corporation Trust Company, Corporation Trust Center, 1209 Orange Street, Wilmington, US-DE, US, 19801
Government of Spain, Fábrica Nacional de Moneda y Timbre (FNMT) ES Not Found
iTrusChina Co., Ltd. CN Found HONEYWELL (TIANJIN) CO., LTD. 5493001KNG41ZWLL0D66 FACTORY 21, 156 NANHAI ROAD JIN BIN DEVELOPMENT ZONE, TIANJIN, CN-TJ, CN, 300000
Chunghwa Telecom TW Not Found
GlobalSign nv-sa BE Not Found
Autoridad de Certificación (ANF AC) ES Found ANF AUTORIDAD DE CERTIFICACIÓN ASOCIACIÓN ANF AC 984500AFEBFDD980CB60 PASEO DE LA CASTELLANA, Nº 79, MADRID, ES-M, ES, 28046
Agence Nationale de Certification Electronique TN Not Found
Asseco Data Systems S.A. (previously Unizeto Certum) PL Not Found
e-commerce monitoring GmbH AT Found e-commerce monitoring GmbH 529900UR5S86P5UH5L35 Redtenbachergasse 20, Wien, AT-9, AT, 1160
Microsoft Corporation US Found MICROSOFT CORPORATION INR2EJN1ERAN0W5ZP974 300 DESCHUTES WAY SW STE 208 MC-CSC1, TUMWATER, US-WA, US, 98501
BEIJING CERTIFICATE AUTHORITY Co., Ltd. CN Found HONEYWELL (TIANJIN) CO., LTD. 5493001KNG41ZWLL0D66 FACTORY 21, 156 NANHAI ROAD JIN BIN DEVELOPMENT ZONE, TIANJIN, CN-TJ, CN, 300000
Google Trust Services LLC US Not Found
Google Trust Services LLC Not Found
Telia Company FI Not Found
D-TRUST DE Not Found
HARICA GR Not Found
Internet Security Research Group US Not Found
DigiCert US Found DIGICERT, INC. 984500C4BDA7A9584A97 1108 E SOUTH UNION AVE, Midvale, US-UT, US, 84047
Certainly LLC US Not Found
TrustAsia Technologies, Inc. CN Found 亚数信息科技(上海)有限公司 3003006UVF25XTIMJ336 桂平路391号3号楼32层3201A室, 徐汇区, CN-SH, CN, 200233
CommScope US Found COMMSCOPE, INC. 549300Z6K4JXMFE8QY54 C/O UNITED AGENT GROUP INC., 1521 CONCORD PIKE SUITE 201, WILMINGTON, US-DE, US, 19803
Eviden DE Found Eviden Germany GmbH 5493000GKWL23MABZD98 Otto-Hahn-Ring 6, München, DE-BY, DE, 81739
Sectigo GB Not Found
Deutsche Telekom Security GmbH DE Found Deutsche Telekom Security GmbH 5299001OA46XN1EFPU92 Bonner Talweg 100, Bonn, DE-NW, DE, 53113
Government of Spain, Autoritat de Certificació de la Comunitat Valenciana (ACCV) ES Not Found
Actalis IT Found ACTALIS SPA 815600BA34A5A6794979 VIA SAN CLEMENTE, 53, PONTE SAN PIETRO, IT-BG, IT, 24036
DigiCert IE Not Found
Buypass NO Not Found
Disig, a.s. SK Found ČAS, a.s. 097900CAKA0000176224 Jantárová 48, Bratislava, SK, 85010
Certigna FR Not Found
Entrust Found ENTRUST CORPORATION 549300RRHR61YGCQ8575 C/O CORPORATION SERVICE COMPANY, 251 LITTLE FALLS DRIVE, WILMINGTON, US-DE, US, 19808
GlobalSign nv-sa Not Found
GoDaddy US Found GODADDY INC. 549300J8H1TB0I2CB447 C/O CORPORATION SERVICE COMPANY, 251 LITTLE FALLS DRIVE, WILMINGTON, US-DE, US, 19808
Izenpe S.A. ES Not Found
Netlock HU Not Found
QuoVadis BM Not Found
Cybertrust Japan / JCSI JP Not Found
SECOM Trust Systems CO., LTD. JP Found ALTRAN JAPAN LTD 549300CY7JUSEA5Q1691 TRANOMON HILLS TOWER 22F, 1-23-1 TORANOMON MINATO-KU, TOKYO, JP-13, JP, 105-6325
Amazon Trust Services US Not Found
SwissSign AG CH Not Found
Telia Company Found TELIA COMPANY AB 213800FSR9RNDUOTXO25 STJÄRNTORGET 1, SOLNA, SE-AB, SE, 169 94
Taiwan-CA Inc. (TWCA) TW Not Found
Sectigo US Not Found
IdenTrust Services, LLC US Found COVALTO LLC 54930097CJ45QIXK1148 C/O THE CORPORATION TRUST COMPANY, CORPORATION TRUST CENTER 1209 ORANGE ST, WILMINGTON, US-DE, US, 19801
China Financial Certification Authority (CFCA) CN Not Found
Krajowa Izba Rozliczeniowa S.A. (KIR) PL Not Found
Global Digital Cybersecurity Authority Co., Ltd. (Formerly Guang Dong Certificate Authority (GDCA)) CN Found 数安时代科技股份有限公司 836800QG9BP3KQ2DUG37 南海区狮山镇南海软件科技园科教路(广东省数字证书认证中心), 佛山市, CN-GD, CN, 528225
Government of Turkey, Kamu Sertifikasyon Merkezi (Kamu SM) TR Not Found
Shanghai Electronic Certification Authority Co., Ltd. CN Found HONEYWELL (TIANJIN) CO., LTD. 5493001KNG41ZWLL0D66 FACTORY 21, 156 NANHAI ROAD JIN BIN DEVELOPMENT ZONE, TIANJIN, CN-TJ, CN, 300000

Regional Distribution of CAs:
European Union: 38.18%
North America: 25.45%
APAC: 18.18%
Latin America: 0.00%
Africa: 1.82%

Percentage of Sovereign Nations with a CA Owner: 32.50%

image image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment