Last active
November 15, 2015 12:24
-
-
Save soldev-42/c5af53f7fa63d700a5d9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class SimpleHash { | |
| private: | |
| // data | |
| string * m_pData; | |
| int * counter; | |
| int maxIndex; | |
| int numberOfWords; | |
| int arraySize; | |
| // hash table size | |
| unsigned int m_size; | |
| unsigned short hash (const char *s) { | |
| unsigned int h = 0; | |
| while(*s) { | |
| h = h*23131 + (unsigned char)*s++; | |
| } | |
| return (h & 0xffff); | |
| } | |
| public: | |
| SimpleHash(unsigned int size): m_size(size) { | |
| // init hash table data given table size | |
| m_pData = new string[m_size]; | |
| counter= new int[m_size]; | |
| // memset(&(counter), 0, sizeof(counter)); | |
| for (int i = 0; i < size; i++) counter[i] = 0; | |
| maxIndex = 0; | |
| arraySize = 0; | |
| numberOfWords = 0; | |
| } | |
| ~SimpleHash() { | |
| delete[] m_pData; | |
| delete[] counter; | |
| } | |
| string toLowerCase(string str) { | |
| int i = 0; | |
| for (int i = 0; i < strlen(str.c_str()); i++) { | |
| str[i] = tolower(str[i]); | |
| } | |
| cout << str << endl; | |
| return str; | |
| } | |
| void set(string value) { | |
| value = toLowerCase(value); | |
| int index = hash(value.c_str()); | |
| /*if (!m_pData[index].size()) arraySize++; | |
| else { | |
| if (m_pData[index] != value) { | |
| cout<<"KOLIZJA => " << m_pData[index] << " : "<< value << endl; | |
| } | |
| }*/ | |
| if (index > maxIndex) maxIndex = index; | |
| counter[index]++; | |
| numberOfWords++; | |
| if (counter[index] > 1 && m_pData[index] != value) { | |
| cout << index <<" - KOLIZJA => " << m_pData[index] << " : "<< value << endl; | |
| } | |
| m_pData[index] = value; | |
| //cout << index << " -> " << m_pData[index] << endl; | |
| } | |
| string get(const char* key) { | |
| unsigned int index = hash(key); | |
| return m_pData[index]; | |
| } | |
| int size() { | |
| return arraySize; | |
| } | |
| int getNumberOfWords() { | |
| return numberOfWords; | |
| } | |
| int getMaxIndex() { | |
| return maxIndex; | |
| } | |
| string * getData() { | |
| return m_pData; | |
| } | |
| int * getCounter() { | |
| return counter; | |
| } | |
| }; | |
| int main() { | |
| SimpleHash hash(100000); | |
| string line; | |
| const char * delimiter = " ,.-!?()[]{}\"'`;:1234567890"; | |
| ifstream inputFile("potop.txt"); | |
| try { | |
| if (!inputFile.is_open()) throw "Nieudane otwarcie pliku"; | |
| string word; | |
| while (inputFile >> word) { | |
| char *cstr = new char[word.length() + 1]; | |
| strcpy(cstr, word.c_str()); | |
| char * pch = strtok(cstr, delimiter); | |
| if (pch == NULL) continue; | |
| hash.set(pch); | |
| delete[] cstr; | |
| } | |
| cout << hash.getNumberOfWords() << endl; | |
| cout << hash.size() << endl; | |
| int * counter = hash.getCounter(); | |
| for (int i = 0; i < hash.getMaxIndex(); i++) { | |
| cout << counter[i] << endl; | |
| } | |
| inputFile.close(); | |
| } catch (string exception) { | |
| cout << exception << endl; | |
| } | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.FileReader; | |
| import java.io.IOException; | |
| import java.io.BufferedReader; | |
| import java.io.File; | |
| import java.io.FileNotFoundException; | |
| import java.util.Arrays; | |
| import java.util.HashMap; | |
| public class WordCount { | |
| public static void main(String[] args) { | |
| if (args.length == 0) { | |
| System.err.println("Usage: java -jar WordCount.jar filename"); | |
| System.exit(1); | |
| } | |
| WordCount wc = new WordCount(args[0]); | |
| } | |
| private String filename; | |
| public WordCount(String filename) { | |
| this.filename = filename; | |
| this.count(); | |
| } | |
| private void count() { | |
| try { | |
| HashMap<String, String> list = new HashMap<String,String>(); | |
| BufferedReader bufferedReader = new BufferedReader(new FileReader(filename)); | |
| String line; | |
| int numberOfWords = 0; | |
| while ((line = bufferedReader.readLine()) != null) { | |
| String[] words = line.toLowerCase().split("[^a-zA-ZąęóśłżźćńĄĘÓŚŁŻŹĆŃ]"); | |
| for (String word : words) { | |
| word = word.trim(); | |
| if (word.length() == 0) continue; | |
| numberOfWords++; | |
| list.put(word, word); | |
| } | |
| } | |
| System.out.println(numberOfWords); | |
| System.out.println(String.valueOf(list.size())); | |
| bufferedReader.close(); | |
| } catch (Exception ex) { | |
| System.err.println(ex.getMessage()); | |
| ex.printStackTrace(); | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| mb_internal_encoding('UTF-8'); | |
| $list = array(); | |
| $number_of_words = 0; | |
| $fh = fopen('potop.txt', 'r'); | |
| $fw = fopen('words.txt', 'w'); | |
| while (!feof($fh)) { | |
| $line = fgets($fh, 1024); | |
| $words = preg_split('/[^a-zA-ZąęóśłżźćńĄĘÓŁŚŻŹĆŃ]/', $line); | |
| foreach ($words as $word) { | |
| $word = strtolower(trim($word)); | |
| if (strlen($word) == 0) continue; | |
| $list[$word] = $word; | |
| $number_of_words++; | |
| fwrite($fw, $word.PHP_EOL); | |
| } | |
| } | |
| echo $number_of_words . "\n"; | |
| echo count($list) . "\n"; | |
| ?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #python3 | |
| import fileinput | |
| import re | |
| filePath = "potop.txt" | |
| try: | |
| with open(filePath, encoding='utf-8') as a_file: | |
| word_list = {} | |
| for a_line in a_file: | |
| words = re.findall(r"\w+", a_line.lower()); | |
| for word in words: | |
| word = word.strip() | |
| if len(word) == 0: | |
| continue | |
| try: | |
| word_list[word]+=1 | |
| except: | |
| word_list[word]=1 | |
| res = list(sorted(word_list, key=word_list.__getitem__, reverse=True)) | |
| count = 0 | |
| for k in res: | |
| if count == 20: | |
| break | |
| print(k, " => ", word_list[k]) | |
| count+=1 | |
| except: | |
| print("Problem z otwarciem pliku") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| number_of_words = 0 | |
| words = {} | |
| File.open("potop.txt").each(sep="\w+") do |line| | |
| all_words = line.downcase.gsub(/[^[[:word:]]\s]/,'').split.each do |s| | |
| words[s] = s if s.strip.length > 0 | |
| end | |
| File.open('words.txt', 'w') do |f| | |
| f.puts all_words | |
| end | |
| puts all_words.length | |
| puts words.length | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment