Created
March 18, 2024 22:45
-
-
Save rmhrisk/d0ecc39eab846c5c2d14fa11b1c20811 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import requests | |
from cryptography import x509 | |
from cryptography.hazmat.backends import default_backend | |
from io import StringIO | |
from cryptography.hazmat.primitives import hashes | |
import matplotlib.pyplot as plt | |
def download_csv(url): | |
response = requests.get(url) | |
response.raise_for_status() | |
return StringIO(response.text) | |
def compute_fingerprint(pem_data): | |
try: | |
cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend()) | |
return cert.fingerprint(hashes.SHA256()).hex().upper() | |
except Exception as e: | |
print(f"Error computing fingerprint: {e}") | |
return None | |
def extract_country_from_certificate(pem_data): | |
try: | |
cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend()) | |
issuer_countries = [i.value for i in cert.issuer.get_attributes_for_oid(x509.NameOID.COUNTRY_NAME)] | |
return ",".join(set(issuer_countries)) | |
except Exception as e: | |
print(f"Error extracting country: {e}") | |
return "" | |
def generate_pie_chart_with_legend(ca_countries): | |
# Transform the ca_countries into a DataFrame | |
country_counts = pd.Series(ca_countries).value_counts().rename_axis('Country').reset_index(name='Counts') | |
# Increase the figure size to make more room for the pie chart and the legend | |
fig, ax = plt.subplots(figsize=(15, 7)) | |
# Create the pie chart with the autopct set to display percentages | |
wedges, _, autotexts = ax.pie( | |
country_counts['Counts'], | |
startangle=140, | |
autopct='%1.1f%%', | |
textprops=dict(color="w") | |
) | |
# Draw a circle at the center to make it a donut chart | |
plt.gca().add_artist(plt.Circle((0, 0), 0.70, color='white')) | |
# Set legend with country names and percentages, placed on the right side | |
legend_labels = [f"{country}: {perc:.2f}%" for country, perc in zip(country_counts['Country'], country_counts['Counts'])] | |
ax.legend(wedges, legend_labels, title="Country", loc="center left", bbox_to_anchor=(1.1, 0.5)) | |
# Adjust figure to prevent cutoff of legend or labels | |
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.75) | |
# Set the title and show the plot | |
plt.title('Country Distribution of Certificate Authorities') | |
plt.show() | |
def generate_trusted_ca_markdown_table_from_url(ca_url, roots_url): | |
ca_csv_data = download_csv(ca_url) | |
ca_data = pd.read_csv(ca_csv_data) | |
ca_data = ca_data[ca_data['Certificate Record Type'] == 'Root Certificate'] | |
roots_csv_data = download_csv(roots_url) | |
roots_data = pd.read_csv(roots_csv_data) | |
roots_data['Computed SHA-256 Fingerprint'] = roots_data['PEM'].apply(compute_fingerprint) | |
fingerprint_to_country = dict(zip(roots_data['Computed SHA-256 Fingerprint'], roots_data['PEM'].apply(extract_country_from_certificate))) | |
trusted_roots = {} | |
ca_countries = {} | |
for _, row in ca_data.iterrows(): | |
ca_owner = row['CA Owner'] | |
fingerprint = row.get('SHA-256 Fingerprint', | |
'') | |
country = fingerprint_to_country.get(fingerprint, "Unknown") # Use "Unknown" for CAs without a country | |
status = row['Status of Root Cert'] | |
# Only include CAs that are trusted by at least one program | |
if any(trust in status for trust in ["Apple: Included", "Google Chrome: Included", "Microsoft: Included", "Mozilla: Included"]): | |
if ca_owner not in trusted_roots: | |
trusted_roots[ca_owner] = set() | |
ca_countries[ca_owner] = country if country else "Unknown" | |
# Check for inclusion by each program | |
if "Apple: Included" in status: | |
trusted_roots[ca_owner].add("Apple") | |
if "Google Chrome: Included" in status: | |
trusted_roots[ca_owner].add("Google Chrome") | |
if "Microsoft: Included" in status: | |
trusted_roots[ca_owner].add("Microsoft") | |
if "Mozilla: Included" in status: | |
trusted_roots[ca_owner].add("Mozilla") | |
# Generating markdown table | |
markdown_table = "CA Owner | Countries | Apple | Google Chrome | Microsoft | Mozilla\n" | |
markdown_table += "--- | --- | --- | --- | --- | ---\n" | |
for ca_owner, stores in trusted_roots.items(): | |
countries = ca_countries.get(ca_owner, "Unknown") | |
row = [ca_owner, countries] + ["✓" if store in stores else "" for store in ["Apple", "Google Chrome", "Microsoft", "Mozilla"]] | |
markdown_table += " | ".join(row) + "\n" | |
markdown_table += f"\nTotal CAs: {len(trusted_roots)}\n" | |
print(markdown_table) | |
# Convert ca_countries to a list and then to a Series object for value counts | |
ca_countries_list = list(ca_countries.values()) | |
generate_pie_chart_with_legend(ca_countries_list) | |
# URLs for the datasets | |
ca_url = 'https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv2' | |
roots_url = 'https://ccadb.my.salesforce-sites.com/mozilla/IncludedRootsDistrustTLSSSLPEMCSV?TrustBitsInclude=Websites' | |
# Generate the markdown table and plot the pie chart with legend | |
generate_trusted_ca_markdown_table_from_url(ca_url, roots_url) |
With that said, I would argue that the lack of eventual inclusion in all root programs is merely a signal, not an absolute indicator, that a CA isn't providing enough value to the web to justify the exposure it represents. A much better indicator would be the ultimate issuance volume over a fixed period of time. For example, if you meet all the requirements and successfully pass audits for 5 years, yet fail to achieve any material issuance volume, should you still be trusted?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If we filter the 92 CAs down to those that are in all root stores (Apple, Google Chrome, Microsoft, Mozilla), this is what we see:
Total CAs in all root stores: 41
This point becomes particularly pertinent when considering that a WebPKI TLS certificate loses much of its utility if it isn't included in every browser root store. Given the diversity of the browser market share, as illustrated by StatCounter, this could explain why Certificate Authorities (CAs) not included in the set of well-trusted roots issue few, if any, WebPKI TLS certificates. The fragmentation in browser market share necessitates broad inclusion in root stores to ensure widespread trust and acceptance, highlighting the challenges faced by CAs operating outside this trusted circle.
By removing long-standing members of the root programs that have not issued a significant number of certificates, or no longer do, you could reduce the trusted set of CAs down to 51 certificates. This could result in an attack surface reduction of up to 52%.