Created
June 30, 2017 16:01
-
-
Save aylarov/dc8017f9935bfad75900e6681ffd7150 to your computer and use it in GitHub Desktop.
X-Wiki parser for VoxEngine
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* X-Wiki Parser to get JSON from infobox | |
*/ | |
var generate = function(length) { | |
if (length !== 0) { | |
length = Math.abs(length) || 10; | |
} | |
var output = Math.random().toString(36).slice(2).toUpperCase(); | |
if (length === 0) { | |
throw new Error('Lenght need to be an integer different than 0.'); | |
} else if (length > 10) { | |
var tens = ~~(length/10); | |
while (tens--) { | |
output += Math.random().toString(36).slice(2).toUpperCase(); | |
} | |
} | |
return output.substr(0, length); | |
}; | |
var separator = generate(); | |
var WikiInfobox = function(page, language, cb) { | |
var apiURL = 'http://'+ language + '.wikipedia.org/w/api.php?format' + | |
'=json&action=query&prop=revisions&rvprop=content&titles=' + | |
encodeURIComponent(page); | |
Logger.write("Requested URL: " + apiURL); | |
var wikiURL = 'http://' + language +'.wikipedia.org/wiki/'; | |
Net.httpRequest(apiURL, function(e) { | |
if (e.code != 200) { | |
cb(e.code); | |
return; | |
} | |
var content = JSON.parse(e.text); | |
try { | |
content = content.query.pages; | |
var page = Object.keys(content); | |
content = content[page].revisions[0]['*']; | |
} catch(e) { | |
cb(e); | |
return; | |
} | |
if (content.indexOf('#REDIRECT') > -1) { | |
var redirectToPageNamed = content.match(/\[\[(.+?)\]\]/)[1]; | |
WikiInfobox(redirectToPageNamed, language, cb); | |
return; | |
} | |
var startingPointRegex = /\{\{\s*[Ii]nfobox/; | |
var startArray = content.match(startingPointRegex); | |
if(!startArray) { | |
cb(new Error('No infobox found!')); | |
return; | |
} | |
var start = startArray.index; | |
var end = parse(content.substr(start, content.length)); | |
content = content.substr(start+2, end); | |
content = content.replace(/\n/g, ' '); | |
var result = content.match(/\[\[(.+?)\]\]|\{\{(.+?)\}\}/ig); | |
if (result !== null) { | |
result.forEach(function(link) { | |
content = content.replace(link, link.replace(/\|/g, separator)); | |
}); | |
} | |
content = content.split('|'); | |
content.shift(); | |
var output = {}; | |
content.forEach(function(element) { | |
var splited = element.split('='); | |
splited = splited.map(function(el) { | |
return el.trim(); | |
}); | |
try { | |
output[splited[0]] = stringToObject( | |
splited[0], | |
splited[1].replace(new RegExp(separator, 'g'), '|') | |
); | |
} catch(e) { | |
} | |
}); | |
cb(null, output); | |
}); | |
var stringToObject = function(name, value) { | |
var matches = []; | |
var fullMatches = []; | |
var pom = value; | |
value.replace(/\[\[(.*?)\]\]/g, function(g0,g1){ matches.push(g1); }); | |
matches.forEach(function(entry) { | |
pom = pom.split('[['+entry+']]'); | |
if(pom[0].match(/\S/) && pom[0].match(/^\s*[\.\,\:]*\s$/) === null) { | |
fullMatches.push({type: 'text', value: pom[0]}); | |
} | |
fullMatches.push(entry); | |
pom = pom[1]; | |
}); | |
if(pom.match(/\S/) && pom.match(/^\s*[\.\,\:]*\s$/) === null) { | |
fullMatches.push({type: 'text', value: pom}); | |
} | |
if (fullMatches.length > 0) { | |
var results = []; | |
var obj; | |
fullMatches.forEach(function(matchElement) { | |
if(typeof(matchElement)!='object') { | |
if ( | |
matchElement.indexOf('File:') > -1 || | |
matchElement.indexOf('Image:') > -1 | |
) { | |
obj = { | |
type: 'image' | |
}; | |
} else { | |
obj = { | |
type: 'link' | |
}; | |
} | |
matchElement = matchElement.split('|'); | |
if (matchElement.length > 1) { | |
obj.text = matchElement[1]; | |
obj.url = wikiURL + matchElement[0]; | |
} else { | |
obj.text = matchElement[0]; | |
obj.url = wikiURL + matchElement[0]; | |
} | |
results.push(obj); | |
} else { | |
results.push(matchElement); | |
} | |
}); | |
if (results.length === 1) { | |
results = results.pop(); | |
} | |
return results; | |
} else { | |
return {type:'text', value:value}; | |
} | |
}; | |
var parse = function(text) { | |
var brackets = 0; | |
for (var i=0, l=text.length; i<l; i++) { | |
if (text.charAt(i) === '{') { | |
brackets++; | |
} else if (text.charAt(i) === '}') { | |
brackets--; | |
} | |
if (brackets === 0 && i > 0) { | |
return i-1; | |
} | |
} | |
}; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment