Rank | Certificate Authority | Unexpired Precertificates | Percentage | Cumulative % |
---|---|---|---|---|
1 | Internet Security Research Group | 338,978,164 | 47.928% | 47.928% |
2 | DigiCert | 81,481,742 | 11.521% | 59.449% |
3 | Sectigo | 71,963,799 | 10.175% | 69.624% |
4 | GoDaddy | 69,771,281 | 9.865% | 79.489% |
5 | Amazon Trust Services | 56,893,730 | 8.044% | 87.533% |
6 | Google Trust Services LLC | 56,021,094 | 7.921% | 95.454% |
7 | Microsoft Corporation | 23,484,401 | 3.320% | 98.774% |
8 | IdenTrust Services, LLC | 4,385,571 | 0.620% | 99.394% |
9 | GlobalSign nv-sa | 1,460,274 | 0.206% | 99.601% |
10 | Actalis | 802,313 | 0.113% | 99.714% |
11 | Entrust | 536,517 | 0.076% | 99.790% |
12 | Asseco Data Systems S.A. (previously Unizeto Certum) | 469,826 | 0.066% | 99.856% |
13 | Buypass | 228,402 | 0.032% | 99.889% |
14 | SECOM Trust Systems CO., LTD. | 209,764 | 0.030% | 99.918% |
15 | SSL.com | 177,890 | 0.025% | 99.944% |
16 | Certainly LLC | 118,103 | 0.017% | 99.960% |
17 | Taiwan-CA Inc. (TWCA) | 50,049 | 0.007% | 99.967% |
18 | QuoVadis | 48,498 | 0.007% | 99.974% |
19 | SwissSign AG | 27,290 | 0.004% | 99.978% |
20 | ? | 22,291 | 0.003% | 99.981% |
21 | Deutsche Telekom Security GmbH | 18,025 | 0.003% | 99.984% |
22 | Chunghwa Telecom | 16,632 | 0.002% | 99.986% |
23 | Telia Company | 13,353 | 0.002% | 99.988% |
24 | D-TRUST | 11,745 | 0.002% | 99.990% |
25 | Viking Cloud, Inc. | 9,355 | 0.001% | 99.991% |
26 | Certigna | 7,715 | 0.001% | 99.992% |
27 | eMudhra Technologies Limited | 5,722 | 0.001% | 99.993% |
28 | Shanghai Electronic Certification Authority Co., Ltd. | 5,532 | 0.001% | 99.994% |
29 | China Financial Certification Authority (CFCA) | 4,223 | 0.001% | 99.994% |
30 | HARICA | 3,901 | 0.001% | 99.995% |
- | Total for all considered CAs | - | 99.995% | 99.995% |
Created
March 18, 2024 15:15
-
-
Save rmhrisk/39f266d7b0fc3a7ec62047828b98964f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
def fetch_data(url): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
data = [] | |
table = soup.find("table") | |
rows = table.find_all("tr")[2:] # Skip header rows | |
for row in rows: | |
cols = row.find_all("td") | |
if cols: | |
owner = cols[0].text.strip() | |
unexpired_precerts = int(cols[4].text.strip().replace(',', '')) | |
data.append([owner, unexpired_precerts]) | |
return pd.DataFrame(data, columns=['Owner', 'Unexpired Precertificates']) | |
root_owner_url = 'https://crt.sh/cert-populations?group=RootOwner' | |
ca_owner_url = 'https://crt.sh/cert-populations?group=CAOwner' | |
root_owner_data = fetch_data(root_owner_url) | |
ca_owner_data = fetch_data(ca_owner_url) | |
# Adjust Certainly LLC and GoDaddy data | |
certainly_data = ca_owner_data[ca_owner_data['Owner'].str.contains('Certainly LLC', case=False)]['Unexpired Precertificates'].sum() | |
godaddy_indices = root_owner_data[root_owner_data['Owner'].str.contains('GoDaddy', case=False)].index | |
if not godaddy_indices.empty: | |
for idx in godaddy_indices: | |
root_owner_data.at[idx, 'Unexpired Precertificates'] -= certainly_data | |
# Now add Certainly LLC as a separate row | |
new_row = pd.DataFrame({'Owner': 'Certainly LLC', 'Unexpired Precertificates': certainly_data}, index=[0]) | |
root_owner_data = pd.concat([root_owner_data, new_row], ignore_index=True) | |
# Combine Quovadis data with DigiCert | |
quovadis_total = root_owner_data[root_owner_data['Owner'].str.contains('Quovadis')]['Unexpired Precertificates'].sum() | |
root_owner_data.loc[root_owner_data['Owner'].str.contains('DigiCert'), 'Unexpired Precertificates'] += quovadis_total | |
root_owner_data = root_owner_data[~root_owner_data['Owner'].str.contains('Quovadis')] | |
# Combine Microsoft Corporation Core Services Engineering & Operations data with Microsoft Corporation | |
microsoft_cseo_total = root_owner_data[root_owner_data['Owner'].str.contains('Microsoft Corporation Core Services Engineering & Operations')]['Unexpired Precertificates'].sum() | |
root_owner_data.loc[root_owner_data['Owner'] == 'Microsoft Corporation', 'Unexpired Precertificates'] += microsoft_cseo_total | |
root_owner_data = root_owner_data[~root_owner_data['Owner'].str.contains('Microsoft Corporation Core Services Engineering & Operations')] | |
# Sort and calculate percentages | |
root_owner_data.sort_values('Unexpired Precertificates', ascending=False, inplace=True) | |
root_owner_data['Percentage'] = (root_owner_data['Unexpired Precertificates'] / root_owner_data['Unexpired Precertificates'].sum()) * 100 | |
root_owner_data['Cumulative Percentage'] = root_owner_data['Percentage'].cumsum() | |
# Determine how many top CAs are needed to reach a cumulative percentage close to 100% | |
threshold = 99.995 # Define the threshold to reach close to 100% | |
top_cas_needed = root_owner_data[root_owner_data['Cumulative Percentage'] <= threshold] | |
# Plotting the pie chart with the needed number of CAs | |
fig, ax = plt.subplots(figsize=(12, 7)) | |
wedges, texts, autotexts = ax.pie( | |
top_cas_needed['Unexpired Precertificates'], | |
autopct=lambda pct: f'{pct:.3f}%' if pct > 1 else '', | |
startangle=90 | |
) | |
ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. | |
plt.title('CAs by Unexpired Precertificates Issuance') | |
legend_labels = [f'{owner} - {pct:.3f}%' for owner, pct in zip(top_cas_needed['Owner'], top_cas_needed['Percentage'])] | |
ax.legend(wedges, legend_labels, title="Certificate Authorities", loc="center left", bbox_to_anchor=(1, 0.5)) | |
plt.tight_layout() | |
plt.show() | |
# Generate the markdown table with the needed number of CAs | |
print("| Rank | Certificate Authority | Unexpired Precertificates | Percentage | Cumulative % |") | |
print("|------|----------------------|---------------------------|------------|--------------|") | |
rank = 1 # Start ranking at 1 | |
for _, row in top_cas_needed.iterrows(): | |
print(f"| {rank} | {row['Owner']} | {row['Unexpired Precertificates']:,} | {row['Percentage']:.3f}% | {row['Cumulative Percentage']:.3f}% |") | |
rank += 1 # Increment rank by 1 for each row | |
# Optionally, if you want to include the total for all CAs considered (not just the top 20) | |
total_percentage = top_cas_needed['Percentage'].sum() | |
print("| - | **Total for all considered CAs** | - | **{:.3f}%** | **{:.3f}%** |".format(total_percentage, total_percentage)) |
It turns out that ?
is how crt.sh
captures untrusted certificates, as such it can be safely filtered. This adds a block to do just that.
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
def fetch_data(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
data = []
table = soup.find("table")
rows = table.find_all("tr")[2:] # Skip header rows
for row in rows:
cols = row.find_all("td")
if cols:
owner = cols[0].text.strip()
unexpired_precerts = int(cols[4].text.strip().replace(',', ''))
data.append([owner, unexpired_precerts])
return pd.DataFrame(data, columns=['Owner', 'Unexpired Precertificates'])
root_owner_url = 'https://crt.sh/cert-populations?group=RootOwner'
ca_owner_url = 'https://crt.sh/cert-populations?group=CAOwner'
root_owner_data = fetch_data(root_owner_url)
ca_owner_data = fetch_data(ca_owner_url)
# Adjust Certainly LLC and GoDaddy data
certainly_data = ca_owner_data[ca_owner_data['Owner'].str.contains('Certainly LLC', case=False)]['Unexpired Precertificates'].sum()
godaddy_indices = root_owner_data[root_owner_data['Owner'].str.contains('GoDaddy', case=False)].index
if not godaddy_indices.empty:
for idx in godaddy_indices:
root_owner_data.at[idx, 'Unexpired Precertificates'] -= certainly_data
# Now add Certainly LLC as a separate row
new_row = pd.DataFrame({'Owner': 'Certainly LLC', 'Unexpired Precertificates': certainly_data}, index=[0])
root_owner_data = pd.concat([root_owner_data, new_row], ignore_index=True)
# Combine Quovadis data with DigiCert
quovadis_total = root_owner_data[root_owner_data['Owner'].str.contains('Quovadis', case=False)]['Unexpired Precertificates'].sum()
root_owner_data.loc[root_owner_data['Owner'].str.contains('DigiCert'), 'Unexpired Precertificates'] += quovadis_total
root_owner_data = root_owner_data[~root_owner_data['Owner'].str.contains('Quovadis', case=False)]
# Combine Microsoft Corporation Core Services Engineering & Operations data with Microsoft Corporation
microsoft_cseo_total = root_owner_data[root_owner_data['Owner'].str.contains('Microsoft Corporation Core Services Engineering & Operations')]['Unexpired Precertificates'].sum()
root_owner_data.loc[root_owner_data['Owner'] == 'Microsoft Corporation', 'Unexpired Precertificates'] += microsoft_cseo_total
root_owner_data = root_owner_data[~root_owner_data['Owner'].str.contains('Microsoft Corporation Core Services Engineering & Operations')]
# Filter out rows where 'Owner' is marked as "?" because they are not in CCADB which they should be but, in some cases, untrusted. It just dirties the report given its purpose.
root_owner_data = root_owner_data[root_owner_data['Owner'] != "?"]
ca_owner_data = ca_owner_data[ca_owner_data['Owner'] != "?"]
# Sort and calculate percentages
root_owner_data.sort_values('Unexpired Precertificates', ascending=False, inplace=True)
root_owner_data['Percentage'] = (root_owner_data['Unexpired Precertificates'] / root_owner_data['Unexpired Precertificates'].sum()) * 100
root_owner_data['Cumulative Percentage'] = root_owner_data['Percentage'].cumsum()
# Determine how many top CAs are needed to reach a cumulative percentage close to 100%
threshold = 99.995 # Define the threshold to reach close to 100%
top_cas_needed = root_owner_data[root_owner_data['Cumulative Percentage'] <= threshold]
# Plotting the pie chart with the needed number of CAs
fig, ax = plt.subplots(figsize=(12, 7))
wedges, texts, autotexts = ax.pie(
top_cas_needed['Unexpired Precertificates'],
autopct=lambda pct: f'{pct:.3f}%' if pct > 1 else '',
startangle=90
)
ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('CAs by Unexpired Precertificates Issuance')
# Generate legend labels after removing QuoVadis
legend_labels = [f'{owner} - {pct:.3f}%' for owner, pct in zip(top_cas_needed['Owner'], top_cas_needed['Percentage']) if 'QuoVadis' not in owner]
ax.legend(wedges, legend_labels, title="Certificate Authorities", loc="center left", bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()
# Generate the markdown table with the needed number of CAs
print("| Rank | Certificate Authority | Unexpired Precertificates | Percentage | Cumulative % |")
print("|------|----------------------|---------------------------|------------|--------------|")
rank = 1 # Start ranking at 1
for _, row in top_cas_needed.iterrows():
print(f"| {rank} | {row['Owner']} | {row['Unexpired Precertificates']:,} | {row['Percentage']:.3f}% | {row['Cumulative Percentage']:.3f}% |")
rank += 1 # Increment rank by 1 for each row
total_percentage = top_cas_needed['Percentage'].sum()
print("| - | **Total for all considered CAs** | - | **{:.3f}%** | **{:.3f}%** |".format(total_percentage, total_percentage))
The updated output looks like this:

Rank | Certificate Authority | Unexpired Precertificates | Percentage | Cumulative % |
---|---|---|---|---|
1 | Internet Security Research Group | 339,096,117 | 47.911% | 47.911% |
2 | DigiCert | 81,567,944 | 11.525% | 59.435% |
3 | Sectigo | 72,068,468 | 10.183% | 69.618% |
4 | GoDaddy | 69,828,902 | 9.866% | 79.484% |
5 | Amazon Trust Services | 57,114,851 | 8.070% | 87.554% |
6 | Google Trust Services LLC | 55,991,420 | 7.911% | 95.465% |
7 | Microsoft Corporation | 23,496,018 | 3.320% | 98.784% |
8 | IdenTrust Services, LLC | 4,386,466 | 0.620% | 99.404% |
9 | GlobalSign nv-sa | 1,463,850 | 0.207% | 99.611% |
10 | Actalis | 802,443 | 0.113% | 99.724% |
11 | Entrust | 537,208 | 0.076% | 99.800% |
12 | Asseco Data Systems S.A. (previously Unizeto Certum) | 470,097 | 0.066% | 99.867% |
13 | Buypass | 228,125 | 0.032% | 99.899% |
14 | SECOM Trust Systems CO., LTD. | 209,771 | 0.030% | 99.929% |
15 | SSL.com | 177,784 | 0.025% | 99.954% |
16 | Certainly LLC | 117,480 | 0.017% | 99.970% |
17 | Taiwan-CA Inc. (TWCA) | 49,943 | 0.007% | 99.977% |
18 | SwissSign AG | 27,309 | 0.004% | 99.981% |
19 | Deutsche Telekom Security GmbH | 18,031 | 0.003% | 99.984% |
20 | Chunghwa Telecom | 16,631 | 0.002% | 99.986% |
21 | Telia Company | 13,356 | 0.002% | 99.988% |
22 | D-TRUST | 11,746 | 0.002% | 99.990% |
23 | Viking Cloud, Inc. | 9,428 | 0.001% | 99.991% |
24 | Certigna | 7,684 | 0.001% | 99.992% |
25 | eMudhra Technologies Limited | 5,724 | 0.001% | 99.993% |
26 | Shanghai Electronic Certification Authority Co., Ltd. | 5,530 | 0.001% | 99.994% |
27 | China Financial Certification Authority (CFCA) | 4,223 | 0.001% | 99.994% |
28 | HARICA | 3,901 | 0.001% | 99.995% |
- | Total for all considered CAs | - | 99.995% | 99.995% |
Rank | Certificate Authority | Unexpired Precertificates | Percentage | Cumulative % |
---|---|---|---|---|
1 | Internet Security Research Group | 363,820,787 | 50.506% | 50.506% |
2 | GoDaddy | 78,213,543 | 10.858% | 61.363% |
3 | DigiCert | 72,780,038 | 10.103% | 71.467% |
4 | Sectigo | 60,414,175 | 8.387% | 79.853% |
5 | Amazon Trust Services | 58,081,483 | 8.063% | 87.916% |
6 | Google Trust Services LLC | 53,055,699 | 7.365% | 95.281% |
7 | Microsoft Corporation | 25,462,525 | 3.535% | 98.816% |
8 | IdenTrust Services, LLC | 4,332,393 | 0.601% | 99.418% |
9 | GlobalSign nv-sa | 1,485,722 | 0.206% | 99.624% |
10 | Actalis | 811,133 | 0.113% | 99.736% |
11 | Entrust | 562,764 | 0.078% | 99.815% |
12 | Asseco Data Systems S.A. (previously Unizeto Certum) | 469,804 | 0.065% | 99.880% |
13 | Buypass | 222,349 | 0.031% | 99.911% |
14 | SECOM Trust Systems CO., LTD. | 207,286 | 0.029% | 99.939% |
15 | SSL.com | 128,007 | 0.018% | 99.957% |
16 | Certainly LLC | 76,243 | 0.011% | 99.968% |
17 | Taiwan-CA Inc. (TWCA) | 49,473 | 0.007% | 99.975% |
18 | SwissSign AG | 27,760 | 0.004% | 99.978% |
19 | Chunghwa Telecom | 23,353 | 0.003% | 99.982% |
20 | Deutsche Telekom Security GmbH | 18,157 | 0.003% | 99.984% |
21 | Telia Company | 13,753 | 0.002% | 99.986% |
22 | D-TRUST | 12,299 | 0.002% | 99.988% |
23 | Certigna | 11,452 | 0.002% | 99.989% |
24 | Viking Cloud, Inc. | 10,589 | 0.001% | 99.991% |
25 | China Financial Certification Authority (CFCA) | 6,578 | 0.001% | 99.992% |
26 | Shanghai Electronic Certification Authority Co., Ltd. | 6,103 | 0.001% | 99.993% |
27 | eMudhra Technologies Limited | 5,837 | 0.001% | 99.993% |
28 | certSIGN | 4,465 | 0.001% | 99.994% |
29 | HARICA | 3,997 | 0.001% | 99.995% |
- | Total for all considered CAs | - | 99.995% | 99.995% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is the pie chart created by this script
