Created
July 7, 2017 10:50
-
-
Save forresty/3e64da0528ea7fc008ed03f35e3cd8db to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Armanni TODO required file DONE | |
| # grabbed text from file DONE | |
| # filter the text | |
| # split it with empty space DONE | |
| # downcase text DONE | |
| # create empty hash DONE | |
| # looking through the text if not in the hash then set number to 1 DONE | |
| # if in the hash use hash index += to increase the number DONE | |
| # after sorting it becomes an array | |
| # sort by value | |
| # make is word => count | |
| # then fetch top item | |
| # goal is find the top 100 most frequently used word | |
| stop_words = %w{ | |
| trump | |
| donald | |
| - | |
| @realdonaldtrump | |
| & | |
| a | |
| about | |
| above | |
| after | |
| again | |
| against | |
| all | |
| am | |
| an | |
| and | |
| any | |
| are | |
| aren't | |
| as | |
| at | |
| be | |
| because | |
| been | |
| before | |
| being | |
| below | |
| between | |
| both | |
| but | |
| by | |
| can't | |
| cannot | |
| could | |
| couldn't | |
| did | |
| didn't | |
| do | |
| does | |
| doesn't | |
| doing | |
| don't | |
| down | |
| during | |
| each | |
| few | |
| for | |
| from | |
| further | |
| had | |
| hadn't | |
| has | |
| hasn't | |
| have | |
| haven't | |
| having | |
| he | |
| he'd | |
| he'll | |
| he's | |
| her | |
| here | |
| here's | |
| hers | |
| herself | |
| him | |
| himself | |
| his | |
| how | |
| how's | |
| i | |
| i'd | |
| i'll | |
| i'm | |
| i've | |
| if | |
| in | |
| into | |
| is | |
| isn't | |
| it | |
| it's | |
| its | |
| itself | |
| let's | |
| me | |
| more | |
| most | |
| mustn't | |
| my | |
| myself | |
| no | |
| nor | |
| not | |
| of | |
| off | |
| on | |
| once | |
| only | |
| or | |
| other | |
| ought | |
| our | |
| ours ourselves | |
| out | |
| over | |
| own | |
| same | |
| shan't | |
| she | |
| she'd | |
| she'll | |
| she's | |
| should | |
| shouldn't | |
| so | |
| some | |
| such | |
| than | |
| that | |
| that's | |
| the | |
| their | |
| theirs | |
| them | |
| themselves | |
| then | |
| there | |
| there's | |
| these | |
| they | |
| they'd | |
| they'll | |
| they're | |
| they've | |
| this | |
| those | |
| through | |
| to | |
| too | |
| under | |
| until | |
| up | |
| very | |
| was | |
| wasn't | |
| we | |
| we'd | |
| we'll | |
| we're | |
| we've | |
| were | |
| weren't | |
| what | |
| what's | |
| when | |
| when's | |
| where | |
| where's | |
| which | |
| while | |
| who | |
| who's | |
| whom | |
| why | |
| why's | |
| with | |
| won't | |
| would | |
| wouldn't | |
| you | |
| you'd | |
| you'll | |
| you're | |
| you've | |
| your | |
| yours | |
| yourself | |
| yourselves | |
| } | |
| require "csv" | |
| all_text = "" | |
| CSV.foreach("data.csv") do |row| | |
| all_text += row[0] + " " unless row[0].nil? | |
| end | |
| words = all_text.downcase.split(" ") | |
| word_count = {} | |
| words.each do |word| | |
| unless stop_words.include? word | |
| if word_count.key? word | |
| word_count[word] += 1 | |
| else | |
| word_count[word] = 1 | |
| end | |
| end | |
| end | |
| result = word_count.sort_by {|word, count| count}.reverse | |
| p result[0..99] | |
| #remove stop words | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment