Last active
June 17, 2023 01:49
-
-
Save loretoparisi/db70e9b91c7f2363a8dc9ecd80d58ce6 to your computer and use it in GitHub Desktop.
List of English contractions from Wikipedia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"ain't": "am not / is not / are not / has not / have not / did not", | |
"amn't": "am not", | |
"aren't": "are not", | |
"can't": "cannot", | |
"'cause": "because", | |
"could've": "could have", | |
"couldn't": "could not", | |
"couldn't've": "could not have", | |
"daren't": "dare not / dared not", | |
"daresn't": "dare not", | |
"dasn't": "dare not", | |
"didn't": "did not", | |
"doesn't": "does not", | |
"don't": "do not / does not", | |
"d'ye": "do you / did you", | |
"e'er": "ever", | |
"everybody's": "everybody is", | |
"everyone's": "everyone is", | |
"finna": "fixing to / going to", | |
"g'day": "good day", | |
"gimme": "give me", | |
"giv'n": "given", | |
"gonna": "going to", | |
"gon't": "go not", | |
"gotta": "got to", | |
"hadn't": "had not", | |
"hasn't": "has not", | |
"haven't": "have not", | |
"he'd": "he had / he would", | |
"he'll": "he shall / he will", | |
"he's": "he has / he is", | |
"he've": "he have", | |
"how'd": "how did / how would", | |
"howdy": "how do you do / how do you fare", | |
"how'll": "how will", | |
"how're": "how are", | |
"how's": "how has / how is / how does", | |
"I'd": "I had / I would", | |
"I'd've": "I would have", | |
"I'll": "I shall / I will", | |
"I'm": "I am", | |
"I'm'a": "I am about to", | |
"I'm'o": "I am going to", | |
"innit": "is it not", | |
"I've": "I have", | |
"isn't": "is not", | |
"it'd": "it would", | |
"it'll": "it shall / it will", | |
"it's": "it has / it is", | |
"let's": "let us", | |
"ma'am": "madam", | |
"mayn't": "may not", | |
"may've": "may have", | |
"methinks": "me thinks", | |
"mightn't": "might not", | |
"might've": "might have", | |
"mustn't": "must not", | |
"mustn't've": "must not have", | |
"must've": "must have", | |
"needn't": "need not", | |
"ne'er": "never", | |
"o'clock": "of the clock", | |
"o'er": "over", | |
"ol'": "old", | |
"oughtn't": "ought not", | |
"'s": "is, has, does, or us", | |
"shalln't": "shall not", | |
"shan't": "shall not", | |
"she'd": "she had / she would", | |
"she'll": "she shall / she will", | |
"she's": "she has / she is", | |
"should've": "should have", | |
"shouldn't": "should not", | |
"shouldn't've": "should not have", | |
"somebody's": "somebody has / somebody is", | |
"someone's": "someone has / someone is", | |
"something's": "something has / something is", | |
"so're": "so are", | |
"that'll": "that shall / that will", | |
"that're": "that are", | |
"that's": "that has / that is", | |
"that'd": "that would / that had", | |
"there'd": "there had / there would", | |
"there'll": "there shall / there will", | |
"there're": "there are", | |
"there's": "there has / there is", | |
"these're": "these are", | |
"they'd": "they had / they would", | |
"they'll": "they shall / they will", | |
"they're": "they are / they were", | |
"they've": "they have", | |
"this's": "this has /", | |
"those're": "those are", | |
"'tis": "it is", | |
"to've": "to have", | |
"'twas": "it was", | |
"wanna": "want to", | |
"wasn't": "was not", | |
"we'd": "we had / we would/ we did", | |
"we'd've": "we would have", | |
"we'll": "we will", | |
"we're": "we are", | |
"we've": "we have", | |
"weren't": "were not", | |
"what'd": "what did", | |
"what'll": "what shall / what will/ what all", | |
"what're": "what are/what were", | |
"what's": "what has / what is / what does", | |
"what've": "what have", | |
"when's": "when has / when is", | |
"where'd": "where did", | |
"where're": "where are", | |
"where's": "where has / where is / where does", | |
"where've": "where have", | |
"which'll": "which shall / which will", | |
"which's": "which has / which is", | |
"whilst": "while still", | |
"who'd": "who would / who had / who did", | |
"who'd've": "who would have", | |
"who'll": "who shall / who will", | |
"who're": "who are", | |
"who's": "who has / who is / who does", | |
"who've": "who have", | |
"why'd": "why did", | |
"why're": "why are", | |
"why's": "why has / why is / why does", | |
"won't": "will not", | |
"would've": "would have", | |
"wouldn't": "would not", | |
"y'all": "you all", | |
"y'all'd've": "you all would have", | |
"y'all're": "you all are", | |
"you'd": "you had / you would", | |
"you'll": "you shall / you will", | |
"you're": "you are/ you are", | |
"you've": "you have", | |
"noun's": "noun is", | |
"noun're": "noun are" | |
} |
See https://gist.github.com/loretoparisi/c221a9c55fb71a23ff4e7bba3b794425?permalink_comment_id=4198425#gistcomment-4198425 where I made two improvements, one to return only the first match from the expansions (split on "/") and the second is to remove hyperlinks in the second which were showing up in the expansions, e.g. Scottish.
I also made one more improvement to convert 's
from an expansion of "is, has, does, or us" to just "is". See the same link updated.
Here's the updated dataset:
{
"a'ight": "alright",
"ain't": "am not",
"amn't": "am not",
"'n'": "and",
"arencha": "aren’t you",
"aren't": "are not",
"‘bout": "about",
"can't": "cannot",
"cap’n": "captain",
"'cause": "because",
"’cept": "except",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"cuppa": "cup of",
"dammit": "damn it",
"daren't": "dare not",
"daresn't": "dare not",
"dasn't": "dare not",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"dunno": "don't know",
"d'ye": "do you",
"d'ya": "do you",
"e'en": "even",
"e'er": "ever",
"'em": "them",
"everybody's": "everybody is",
"everyone's": "everyone is",
"finna": "fixing to",
"fo’c’sle": "forecastle",
"’gainst": "against",
"g'day": "good day",
"gimme": "give me",
"giv'n": "given",
"gi'z": "give us",
"gonna": "going to",
"gon't": "go not",
"gotta": "got to",
"hadn't": "had not",
"had've": "had have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he had",
"he'll": "he shall",
"helluva": "hell of a",
"he's": "he has",
"here's": "here is",
"how'd": "how did",
"howdy": "how do you do",
"how'll": "how will",
"how're": "how are",
"how's": "how has",
"I'd": "I had",
"I'd've": "I would have",
"I'd'nt": "I would not",
"I'd'nt've": "I would not have",
"I'll": "I shall",
"I'm": "I am",
"Imma": "I am about to",
"I'm'o": "I am going to",
"innit": "isn't it",
"Ion": "I don't",
"I've": "I have",
"isn't": "is not",
"it'd": "it would",
"it'll": "it shall",
"it's": "it has",
"Idunno": "I don't know",
"kinda": "kind of",
"let's": "let us",
"loven't": "love not",
"ma'am": "madam",
"mayn't": "may not",
"may've": "may have",
"methinks": "I think",
"mightn't": "might not",
"might've": "might have",
"mustn't": "must not",
"mustn't've": "must not have",
"must've": "must have",
"‘neath": "beneath",
"needn't": "need not",
"nal": "and all",
"ne'er": "never",
"o'clock": "of the clock",
"o'er": "over",
"ol'": "old",
"ought've": "ought have",
"oughtn't": "ought not",
"oughtn't've": "ought not have",
"‘round": "around",
"'s": "is",
"shalln't": "shall not",
"shan't": "shall not",
"she'd": "she had",
"she'll": "she shall",
"she's": "she has",
"should've": "should have",
"shouldn't": "should not",
"shouldn't've": "should not have",
"somebody's": "somebody has",
"someone's": "someone has",
"something's": "something has",
"so're": "so are",
"so’s": "so is",
"so’ve": "so have",
"that'll": "that shall",
"that're": "that are",
"that's": "that has",
"that'd": "that would",
"there'd": "there had",
"there'll": "there shall",
"there're": "there are",
"there's": "there has",
"these're": "these are",
"these've": "these have",
"they'd": "they had",
"they'll": "they shall",
"they're": "they are",
"they've": "they have",
"this's": "this has",
"those're": "those are",
"those've": "those have",
"'thout": "without",
"’til": "until",
"'tis": "it is",
"to've": "to have",
"'twas": "it was",
"'tween": "between",
"'twere": "it were",
"w'all": "we all",
"w'at": "we at",
"wanna": "want to",
"wasn't": "was not",
"we'd": "we had",
"we'd've": "we would have",
"we'll": "we shall",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"whatcha": "what are you what about you",
"what'd": "what did",
"what'll": "what shall",
"what're": "what are",
"what's": "what has",
"what've": "what have",
"when's": "when has",
"where'd": "where did",
"where'll": "where shall",
"where're": "where are",
"where's": "where has",
"where've": "where have",
"which'd": "which had",
"which'll": "which shall",
"which're": "which are",
"which's": "which has",
"which've": "which have",
"who'd": "who would",
"who'd've": "who would have",
"who'll": "who shall",
"who're": "who are",
"who's": "who has",
"who've": "who have",
"why'd": "why did",
"why're": "why are",
"why's": "why has",
"willn't": "will not",
"won't": "will not",
"wonnot": "will not",
"would've": "would have",
"wouldn't": "would not",
"wouldn't've": "would not have",
"y'ain't": "you are not",
"y'all": "you all",
"y'all'd've": "you all would have",
"y'all'd'n't've": "you all would not have",
"y'all're": "you all are",
"y'all'ren't": "you all are not",
"y'at": "you at",
"yes’m": "yes ma’am",
"y'know": "you know",
"yessir": "yes sir",
"you'd": "you had",
"you'll": "you shall",
"you're": "you are",
"you've": "you have",
"when'd": "when did"
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Run the code here in a JavaScript console of the Wikipedia page: https://en.wikipedia.org/wiki/Wikipedia:List_of_English_contractions