Last active
October 7, 2015 16:03
-
-
Save subhojit777/5df702f24d4154c6da64 to your computer and use it in GitHub Desktop.
Parse YAML files from https://github.com/hechoendrupal/DrupalConsole/tree/master/config/translations/en and export them to CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// BEWARE this script is going to create lots of junk files. | |
// Thanks to http://htmlparsing.com/php.html | |
$url = "https://github.com/hechoendrupal/DrupalConsole/tree/master/config/translations/en"; | |
$ch = curl_init(); | |
$timeout = 5; | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); | |
$html = curl_exec($ch); | |
curl_close($ch); | |
$dom = new DOMDocument(); | |
@$dom->loadHTML($html); | |
$xpath = new DOMXpath($dom); | |
// Fetch all YAML file rows from the table. | |
// Note that this XPath query may not work in future. | |
// XPath query strictly depends on Github's markup. | |
foreach($xpath->query('//table[contains(@class, "files")]/tbody/tr[@class="js-navigation-item"]') as $node) { | |
// Fetch the contents of YAML files from every row. | |
// Parse them and push them in CSV file. | |
// Note - CSV file type is not mandatory, you can even use txt. | |
foreach ($xpath->query($node->getNodePath() . '/td[@class="content"]/span/a') as $link) { | |
$file_name = $link->nodeValue; | |
$yml = file_get_contents("https://raw.githubusercontent.com/hechoendrupal/DrupalConsole/master/config/translations/en/$file_name"); | |
file_put_contents($file_name, $yml); | |
// Make sure you got this https://gist.github.com/subhojit777/2df7dae5df9bce9ace95 | |
shell_exec("./parse_yaml.sh $file_name >> export.csv"); | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment