Last active
June 13, 2022 12:20
-
-
Save loretoparisi/c221a9c55fb71a23ff4e7bba3b794425 to your computer and use it in GitHub Desktop.
List of English contractions from Wikipedia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
script = document.createElement('script');script.src = "https://ajax.googleapis.com/ajax/libs/jquery/1.6.3/jquery.min.js";document.getElementsByTagName('head')[0].appendChild(script); | |
function download(name,jsonObject) { | |
var fileContents = JSON.stringify(jsonObject, null, 2); | |
var pp = document.createElement('a'); | |
pp.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(fileContents)); | |
pp.setAttribute('download', name+'.json'); | |
pp.click(); | |
} | |
df={} | |
$( $('.wikitable')[0] ).find('tr').each((index,item) => { | |
if(index==0) return | |
cntrc=$($(item).find('td')[0]).text().replace(/\n|\[.+\]|\(.+\)/g,'').replace(/\s+/g, ' ').replace(/^\s+|\s+$/g, '') | |
// remove links in expanded words | |
$($(item).find('td')[1]).find('a').remove(); | |
// return the first match in the expansion and return the first comma separated value | |
// (this accounts for Scottish <a> and "'s" mapping to "is, has, does, or us" vs. just "is" | |
expnd=$($(item).find('td')[1]).text().replace(/\n|\[.+\]|\(.+\)/g,'').replace(/\s+/g, ' ').replace(/^\s+|\s+$/g, '').split('/')[0].trim().split(',')[0].trim(); | |
df[cntrc]=expnd | |
}) | |
download("english_contractions_ds",df) |
(Also shouldn't that last one be "noun are"
?)
Thanks, fixed that!
@loretoparisi can you modify it per below? (remove the <a>
tags in the second <td>
):
script = document.createElement('script');script.src = "https://ajax.googleapis.com/ajax/libs/jquery/1.6.3/jquery.min.js";document.getElementsByTagName('head')[0].appendChild(script);
function download(name,jsonObject) {
var fileContents = JSON.stringify(jsonObject, null, 2);
var pp = document.createElement('a');
pp.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(fileContents));
pp.setAttribute('download', name+'.json');
pp.click();
}
df={}
$( $('.wikitable')[0] ).find('tr').each((index,item) => {
if(index==0) return
cntrc=$($(item).find('td')[0]).text().replace(/\n|\[.+\]|\(.+\)/g,'').replace(/\s+/g, ' ').replace(/^\s+|\s+$/g, '')
// remove links in expanded words
$($(item).find('td')[1]).find('a').remove();
// return the first match in the expansion and return the first comma separated value
// (this accounts for Scottish <a> and "'s" mapping to "is, has, does, or us" vs. just "is"
expnd=$($(item).find('td')[1]).text().replace(/\n|\[.+\]|\(.+\)/g,'').replace(/\s+/g, ' ').replace(/^\s+|\s+$/g, '').split('/')[0].trim().split(',')[0].trim();
df[cntrc]=expnd
})
download("english_contractions_ds",df)
@titanism fixed to your version, thank you!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
What about handling