Skip to content

Instantly share code, notes, and snippets.

@soldev-42
Last active November 15, 2015 12:24
Show Gist options
  • Select an option

  • Save soldev-42/c5af53f7fa63d700a5d9 to your computer and use it in GitHub Desktop.

Select an option

Save soldev-42/c5af53f7fa63d700a5d9 to your computer and use it in GitHub Desktop.
class SimpleHash {
private:
// data
string * m_pData;
int * counter;
int maxIndex;
int numberOfWords;
int arraySize;
// hash table size
unsigned int m_size;
unsigned short hash (const char *s) {
unsigned int h = 0;
while(*s) {
h = h*23131 + (unsigned char)*s++;
}
return (h & 0xffff);
}
public:
SimpleHash(unsigned int size): m_size(size) {
// init hash table data given table size
m_pData = new string[m_size];
counter= new int[m_size];
// memset(&(counter), 0, sizeof(counter));
for (int i = 0; i < size; i++) counter[i] = 0;
maxIndex = 0;
arraySize = 0;
numberOfWords = 0;
}
~SimpleHash() {
delete[] m_pData;
delete[] counter;
}
string toLowerCase(string str) {
int i = 0;
for (int i = 0; i < strlen(str.c_str()); i++) {
str[i] = tolower(str[i]);
}
cout << str << endl;
return str;
}
void set(string value) {
value = toLowerCase(value);
int index = hash(value.c_str());
/*if (!m_pData[index].size()) arraySize++;
else {
if (m_pData[index] != value) {
cout<<"KOLIZJA => " << m_pData[index] << " : "<< value << endl;
}
}*/
if (index > maxIndex) maxIndex = index;
counter[index]++;
numberOfWords++;
if (counter[index] > 1 && m_pData[index] != value) {
cout << index <<" - KOLIZJA => " << m_pData[index] << " : "<< value << endl;
}
m_pData[index] = value;
//cout << index << " -> " << m_pData[index] << endl;
}
string get(const char* key) {
unsigned int index = hash(key);
return m_pData[index];
}
int size() {
return arraySize;
}
int getNumberOfWords() {
return numberOfWords;
}
int getMaxIndex() {
return maxIndex;
}
string * getData() {
return m_pData;
}
int * getCounter() {
return counter;
}
};
int main() {
SimpleHash hash(100000);
string line;
const char * delimiter = " ,.-!?()[]{}\"'`;:1234567890";
ifstream inputFile("potop.txt");
try {
if (!inputFile.is_open()) throw "Nieudane otwarcie pliku";
string word;
while (inputFile >> word) {
char *cstr = new char[word.length() + 1];
strcpy(cstr, word.c_str());
char * pch = strtok(cstr, delimiter);
if (pch == NULL) continue;
hash.set(pch);
delete[] cstr;
}
cout << hash.getNumberOfWords() << endl;
cout << hash.size() << endl;
int * counter = hash.getCounter();
for (int i = 0; i < hash.getMaxIndex(); i++) {
cout << counter[i] << endl;
}
inputFile.close();
} catch (string exception) {
cout << exception << endl;
}
return 0;
}
import java.io.FileReader;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.HashMap;
public class WordCount {
public static void main(String[] args) {
if (args.length == 0) {
System.err.println("Usage: java -jar WordCount.jar filename");
System.exit(1);
}
WordCount wc = new WordCount(args[0]);
}
private String filename;
public WordCount(String filename) {
this.filename = filename;
this.count();
}
private void count() {
try {
HashMap<String, String> list = new HashMap<String,String>();
BufferedReader bufferedReader = new BufferedReader(new FileReader(filename));
String line;
int numberOfWords = 0;
while ((line = bufferedReader.readLine()) != null) {
String[] words = line.toLowerCase().split("[^a-zA-ZąęóśłżźćńĄĘÓŚŁŻŹĆŃ]");
for (String word : words) {
word = word.trim();
if (word.length() == 0) continue;
numberOfWords++;
list.put(word, word);
}
}
System.out.println(numberOfWords);
System.out.println(String.valueOf(list.size()));
bufferedReader.close();
} catch (Exception ex) {
System.err.println(ex.getMessage());
ex.printStackTrace();
}
}
}
<?php
mb_internal_encoding('UTF-8');
$list = array();
$number_of_words = 0;
$fh = fopen('potop.txt', 'r');
$fw = fopen('words.txt', 'w');
while (!feof($fh)) {
$line = fgets($fh, 1024);
$words = preg_split('/[^a-zA-ZąęóśłżźćńĄĘÓŁŚŻŹĆŃ]/', $line);
foreach ($words as $word) {
$word = strtolower(trim($word));
if (strlen($word) == 0) continue;
$list[$word] = $word;
$number_of_words++;
fwrite($fw, $word.PHP_EOL);
}
}
echo $number_of_words . "\n";
echo count($list) . "\n";
?>
#python3
import fileinput
import re
filePath = "potop.txt"
try:
with open(filePath, encoding='utf-8') as a_file:
word_list = {}
for a_line in a_file:
words = re.findall(r"\w+", a_line.lower());
for word in words:
word = word.strip()
if len(word) == 0:
continue
try:
word_list[word]+=1
except:
word_list[word]=1
res = list(sorted(word_list, key=word_list.__getitem__, reverse=True))
count = 0
for k in res:
if count == 20:
break
print(k, " => ", word_list[k])
count+=1
except:
print("Problem z otwarciem pliku")
number_of_words = 0
words = {}
File.open("potop.txt").each(sep="\w+") do |line|
all_words = line.downcase.gsub(/[^[[:word:]]\s]/,'').split.each do |s|
words[s] = s if s.strip.length > 0
end
File.open('words.txt', 'w') do |f|
f.puts all_words
end
puts all_words.length
puts words.length
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment