Ad-hoc analysis of which organization do the most work in web standards.
Data from day-to-day was used to get a list of active authors in the last 8 weeks. Conclusions up front:
- Independent contractors or otherwise unaffiliated people are large contributors.
- Google and Mozilla are the top contributors among browser vendors, roughly equally split.
- W3C staff do a lot of spec work.
- Intel is the top non-browser vendor.
Script used to count activity and map individuals to organizations
'use strict';
const fetch = require('node-fetch');
const dataURL = 'https://foolip.github.io/day-to-day/data.json';
const ORG_PATTERNS = [
['[email protected]', 'Mozilla'],
['[email protected]', 'Intel'],
['[email protected]', 'Google'],
['[email protected]', 'Microsoft'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Google'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Google'],
['[email protected]', 'Google'],
['[email protected]', 'Google'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Intel'],
['[email protected]', 'Adobe'],
['[email protected]', 'Igalia'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Igalia'],
['[email protected]', 'Igalia'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Google'],
['[email protected]', 'Google'],
['[email protected]', 'Mozilla'],
['[email protected]', 'Bocoup'],
[/@apple.com$/, 'Apple'],
[/@chromium.org$/, 'Google'], // with exception above
[/@google.com$/, 'Google'],
[/@intel.com$/, 'Intel'],
[/@mozilla.com$/, 'Mozilla'],
[/@w3.org$/, 'W3C'],
];
function orgFromEmail(email) {
for (const [pattern, org] of ORG_PATTERNS) {
if (pattern instanceof RegExp && pattern.exec(email)) {
return org;
}
if (pattern === email) {
return org;
}
}
return null;
}
async function main() {
const data = await (await fetch(dataURL)).json();
// Active days are counted as commit counts (or PR counts) are skewed a lot
// by working mode, i.e. how changes are split and whether branches are
// squashed. Doing work on multiple days is a decent indicator of activity.
const authorActivity = new Map;
for (const spec of data.specs) {
for (const entry of spec.speclog) {
const { author, date } = entry;
if (!authorActivity.has(author)) {
authorActivity.set(author, new Set);
}
authorActivity.get(author).add(date);
}
}
// When aggregating to org activity it doesn't make as much sense to treat
// each day as its own bucket. Instead add up "org member active days".
const orgActivity = new Map([['Unaffiliated', 0]]);
for (const [author, dates] of authorActivity.entries()) {
if (dates.size < 3) {
// Skip less active contributors. To include authors going far into
// the tail will lead to a recognition bias unless all are
// accounted for.
continue;
}
let org = orgFromEmail(author);
if (!org) {
console.warn(`Unaffiliated author: ${author}`);
org = 'Unaffiliated';
}
let count = orgActivity.get(org) || 0;
orgActivity.set(org, count + dates.size);
}
for (const [org, count] of orgActivity.entries()) {
console.log(`${count}\t${org}`);
}
}
main();
Output from this script on 2019-01-28:
57 Unaffiliated
56 Google
51 Mozilla
34 W3C
18 Intel
12 Igalia
8 Microsoft
8 Adobe