Skip to content

Instantly share code, notes, and snippets.

@donpdonp
Last active May 12, 2024 09:56
Show Gist options
  • Save donpdonp/4ad3bb1343d17d3db9ce110b6ea78330 to your computer and use it in GitHub Desktop.
Save donpdonp/4ad3bb1343d17d3db9ce110b6ea78330 to your computer and use it in GitHub Desktop.
gluon wikipedia
(function () {
// setup
setup()
// descriptor
return {name: 'wikipedia'}
})
function setup () {
}
function go (msg) {
if (msg.method == 'irc.privmsg') {
var cmd_match = /^!wikipedia(\s+(.+)\s*$)?/.exec(msg.params.message)
if (cmd_match) {
var term = cmd_match[2]
bot.say(bot.admin_channel, msg.params.nick+' wikipedia lookup: ' + term)
var plain = wikipedia_lookup(term)
if (plain) {
bot.say(msg.params.channel, plain)
}
} else {
// trynlp(msg)
}
}
}
function trynlp (msg) {
if (msg.params.message.split(' ').length >= 3) {
var sub = nlp(msg.params.message)
if (sub.length > 2) {
bot.say('#pdxbots', 'I heard a noun from ' + msg.params.channel + ': ' + sub)
var plain = wikipedia_lookup(sub)
if (plain) {
bot.say('#pdxbots', plain)
}
}
}
}
function wikipedia_lookup (word) {
var data = wikipage(titleize(word))
if (data) {
var text
if (data.error) {
if (data.error.code == 'missingtitle') {
bot.say(bot.admin_channel, 'wikipedia: title match failed for ' + word)
var srch = wikisearch(word)
if (srch) {
bot.say(bot.admin_channel, 'wikipedia: search hits ' + srch.query.searchinfo.totalhits)
if (srch.query.searchinfo.totalhits > 0) {
word = srch.query.search[0].title
bot.say(bot.admin_channel, 'wikipedia: search first ' + word)
data = wikipage(word)
if (data.parse) {
text = data.parse.text['*'].replace(/\n/g, '').replace(/\r/g, '')
} else {
if (data.error) {
bot.say(bot.admin_channel, 'wikipedia: wikipage() ' + data.error.code)
}
}
}
}
}
}
if (data.parse) {
text = data.parse.text['*'].replace(/\n/g, '').replace(/\r/g, '')
var recapital = /ul class="redirectText"><li><a href="\/wiki\/([^"]+)"/.exec(text)
if (recapital) {
bot.say(bot.admin_channel, 'wikipedia: recapital ' + word + ' to ' + recapital[1])
data = wikipage(recapital[1])
text = null
if (data.parse) {
text = data.parse.text['*'].replace(/\n/g, '').replace(/\r/g, '')
}
}
}
if (text) {
return simpleparse(decodeEntities(text))// + ' [' + data.parse.title + ']'
}
} else {
bot.say(bot.admin_channel, 'wikipedia: no json in ' + html.substr(0, 80))
}
}
function simpleparse (text) {
var shortdesc_regex = /div class=".*?mw-parser-output".*?<div class=".*?shortdescription.*?>(.*?)<\/div>/
var pom = shortdesc_regex.exec(text)
if (pom) {
var plain = pom[1].replace(/<[^>]*>/g, '')
// first words up to a lowercase letter period (avoid Abb R. reviations)
var sentance = /(^(\([^)]+\)|[^()]+?)[^A-Z]\.)/.exec(plain)
var desc = plain
if (sentance) {
desc = sentance[1]
// take out html entities
}
return desc
} else {
bot.say(bot.admin_channel, 'wikipedia: no match in ' + text.substr(0, 80))
}
}
function wikipage (word) {
var url = 'https://en.wikipedia.org/w/api.php?action=parse&page=' + encodeURIComponent(word) + '&format=json'
var html = http.get(url)
if (html) {
var data = JSON.parse(html)
return data
}
}
function wikisearch (word) {
var url = 'https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=' + encodeURIComponent(word) + '&format=json'
var html = http.get(url)
if (html) {
var data = JSON.parse(html)
return data
}
}
function titleize (word) {
var parts = word.split(' ')
parts = parts.map(function (part) {
return part[0].toUpperCase() + part.substring(1)
})
return parts.join('_')
}
function nlp (words) {
var url = 'http://nlp.stanford.edu:8080/parser/index.jsp?query=' + encodeURIComponent(words)
var html = http.get(url).replace(/\n/g, '').replace(/\r/g, '')
// var subj = /nsubj\([^,]+, ([^)]+)\)/.exec(html)
// return subj[1].split('-')[0]
var subj = /<pre id="parse" class="spacingFree">([^<]+)</.exec(html)
if (subj) {
var nn = /\(NN ([^)]+)\)/.exec(subj[1])
if (nn) {
return nn[1]
}
}
}
function wikiparse (text) {
var sentance
parser.parse(text, {
startElement: function (sTagName, oAttrs) {
bot.say(bot.admin_channel, 'parse start: ' + sTagName)
},
endElement: function (sTagName) {},
characters: function (s) {},
comment: function (s) {}
})
return sentance
}
var parser = {
handler: null,
// regexps
startTagRe: /^<([^>\s\/]+)((\s+[^=>\s]+(\s*=\s*((\"[^"]*\")|(\'[^']*\')|[^>\s]+))?)*)\s*\/?\s*>/m,
endTagRe: /^<\/([^>\s]+)[^>]*>/m,
attrRe: /([^=\s]+)(\s*=\s*((\"([^"]*)\")|(\'([^']*)\')|[^>\s]+))?/gm,
parse: function (s, oHandler) {
if (oHandler)
this.contentHandler = oHandler
var i = 0
var res, lc, lm, rc, index
var treatAsChars = false
var oThis = this
while (s.length > 0){
// Comment
if (s.substring(0, 4) == '<!--') {
index = s.indexOf('-->')
if (index != -1) {
this.contentHandler.comment(s.substring(4, index))
s = s.substring(index + 3)
treatAsChars = false
}else {
treatAsChars = true
}
}
// end tag
else if (s.substring(0, 2) == '</') {
if (this.endTagRe.test(s)) {
lc = RegExp.leftContext
lm = RegExp.lastMatch
rc = RegExp.rightContext
lm.replace(this.endTagRe, function () {
return oThis.parseEndTag.apply(oThis, arguments)
})
s = rc
treatAsChars = false
}else {
treatAsChars = true
}
}
// start tag
else if (s.charAt(0) == '<') {
if (this.startTagRe.test(s)) {
lc = RegExp.leftContext
lm = RegExp.lastMatch
rc = RegExp.rightContext
lm.replace(this.startTagRe, function () {
return oThis.parseStartTag.apply(oThis, arguments)
})
s = rc
treatAsChars = false
}else {
treatAsChars = true
}
}
if (treatAsChars) {
index = s.indexOf('<')
if (index == -1) {
this.contentHandler.characters(s)
s = ''
}else {
this.contentHandler.characters(s.substring(0, index))
s = s.substring(index)
}
}
treatAsChars = true
}
},
parseStartTag: function (sTag, sTagName, sRest) {
var attrs = this.parseAttributes(sTagName, sRest)
this.contentHandler.startElement(sTagName, attrs)
},
parseEndTag: function (sTag, sTagName) {
this.contentHandler.endElement(sTagName)
},
parseAttributes: function (sTagName, s) {
var oThis = this
var attrs = []
s.replace(this.attrRe, function (a0, a1, a2, a3, a4, a5, a6) {
attrs.push(oThis.parseAttribute(sTagName, a0, a1, a2, a3, a4, a5, a6))
})
return attrs
},
parseAttribute: function (sTagName, sAttribute, sName) {
var value = ''
if (arguments[7])
value = arguments[8]
else if (arguments[5])
value = arguments[6]
else if (arguments[3])
value = arguments[4]
var empty = !value && !arguments[3]
return {name: sName, value: empty ? null : value}
}
}
function decodeEntities(encodedString) {
var translate_re = /&(nbsp|amp|quot|lt|gt);/g;
var translate = {
"nbsp":" ",
"amp" : "&",
"quot": "\"",
"lt" : "<",
"gt" : ">"
};
return encodedString.replace(translate_re, function(match, entity) {
return translate[entity];
}).replace(/&#(\d+);/gi, function(match, numStr) {
var num = parseInt(numStr, 10);
return String.fromCharCode(num);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment