Skip to content

Instantly share code, notes, and snippets.

@skylarmt
Created January 1, 2017 12:19
Show Gist options
  • Select an option

  • Save skylarmt/c0b5e0140f6eb4f3170bc87fb4d0c295 to your computer and use it in GitHub Desktop.

Select an option

Save skylarmt/c0b5e0140f6eb4f3170bc87fb4d0c295 to your computer and use it in GitHub Desktop.
PHP script to fetch license data from GitHub for your project's dependencies. It outputs to a simple HTML document.
<?php
/*
Set $gits to an array of repository author/name combinations.
Run it with `php collectlicenses.php`.
It saves the generated HTML to licenses.html in the same folder.
*/
$gits = ["twbs/bootstrap", "npm/npm", "FortAwesome/Font-Awesome", "thomaspark/bootswatch", "composer/composer", "catfan/Medoo", "nwjs/nw.js"];
$url1 = "https://raw.githubusercontent.com/";
$url2s = ["/master/", "/gh-pages/"];
$endings = ["LICENSE", "LICENSE.md", "LICENSE.txt", "license", "license.txt", "package.json", "LICENCE"];
define("LICENSE_SEP", "\n\n=================================\n\n");
/* http://stackoverflow.com/a/7684862/2534036 */
function is_url_exist($url) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_exec($ch);
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($code == 200) {
$status = true;
} else {
$status = false;
}
curl_close($ch);
return $status;
}
echo "\nScraping licenses from GitHub...\n";
$licenses = [];
$maxtries = count($endings) * count($url2s);
foreach ($gits as $repo) {
$tries = 0;
$found = false;
foreach ($url2s as $url2) {
foreach ($endings as $end) {
$url = $url1 . $repo . $url2 . $end;
$handle = curl_init($url);
curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
/* Get the HTML or whatever is linked in $url. */
$response = curl_exec($handle);
/* Check for 404 (file not found). */
$httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
if ($httpCode == 200) {
$licenses[$repo] = "";
if ($end == "package.json") {
$json = json_decode($response, true);
if (isset($json['license'])) {
$lic = $json['license'];
if (is_array($lic)) {
// legacy
foreach ($lic as $l) {
$lname = $l['type'];
$licenses[$repo] .= file_get_contents("https://spdx.org/licenses/$lname.txt") . (count($lic) > 1 ? LICENSE_SEP : "");
}
} else {
if (preg_match("/\(.+ AND .+\)/", $lic)) {
$lic = str_replace("(", "", $lic);
$lic = str_replace(")", "", $lic);
foreach (explode(" AND ", $lic) as $l) {
$licenses[$repo] .= file_get_contents("https://spdx.org/licenses/$l.txt") . LICENSE_SEP;
}
} else if (preg_match("/SEE LICENSE IN .+/", $lic)) {
$licenses[$repo] = file_get_contents($url1 . $repo . $url2 . str_replace("SEE LICENSE IN ", "", $lic));
} else if (is_url_exist("https://spdx.org/licenses/$lic.txt")) {
$licenses[$repo] = file_get_contents("https://spdx.org/licenses/$lic.txt");
} else {
$tries++;
echo "Warning: problem parsing package.json for repo $repo (attempt $tries/$maxtries)\n";
continue;
}
}
if (is_array($json['author'])) {
$licenses[$repo] = str_replace("<copyright holders>", $json['author']['name'], $licenses[$repo]);
} else if (isset($json['author'])) {
$licenses[$repo] = str_replace("<copyright holders>", explode(" <", $json['author'])[0], $licenses[$repo]);
}
$licenses[$repo] = str_replace("<year>", date('Y'), $licenses[$repo]);
echo "Warning: extracted license ($lic) for $repo from a metadata file. Please hand-edit and correct any mistakes.\n";
$found = true;
break;
} else {
$tries++;
echo "Warning: no license data found in package.json for repo $repo (attempt $tries/$maxtries)\n";
continue;
}
} else {
$licenses[$repo] = $response;
$found = true;
break;
}
} else {
$tries++;
echo "Warning: got http code $httpCode for repo $repo (attempt $tries/$maxtries)\n";
}
if ($tries > $maxtries) {
echo "Error: no license file found for repo $repo.\n";
}
curl_close($handle);
}
if ($found) {
break;
}
}
}
$out = "<!DOCTYPE html>
<html>
<head>
<meta charset=\"UTF-8\">
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">
<title>Licenses</title>
<style>
pre, p {
white-space: pre-wrap;
word-wrap: break-word;
max-width: 600px;
}
</style>
</head>
<body>
<h1>Licenses</h1>
<p>This application contains open-source code from some or all of these projects:</p>";
foreach ($licenses as $repo => $license) {
$out .= "
<div>
<h2>$repo</h2>
<pre>$license</pre>
</div>";
}
$out .= " </body>
</html>";
file_put_contents('licenses.html', $out);
echo "\nDone!\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment