Created
December 4, 2013 01:03
-
-
Save wonderb0lt/7780602 to your computer and use it in GitHub Desktop.
A Go excercise for uni (word occurence counting)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"strings" | |
"sort" | |
) | |
type CountingResult struct { | |
word string | |
count uint | |
occurences []uint | |
} | |
func explode(s string) [][]string { | |
results := make([][]string, 0) | |
lines := strings.Split(s, "\n") | |
for i := 0; i < len(lines); i++ { | |
results = append(results, make([]string, 0)) | |
words := strings.Split(lines[i], " ") | |
for j := 0; j < len(words); j++ { | |
word := strings.ToLower(strings.Trim(words[j], ".,;-!?|{}\"'")) | |
results[i] = append(results[i], word) | |
} | |
} | |
return results | |
} | |
func unique(words [][]string) []string { | |
unique_words := make([]string, 0) | |
known_words := make(map[string]bool) | |
for i := 0; i < len(words); i++ { | |
for j := 0; j < len(words[i]); j++ { | |
word := words[i][j] | |
if !known_words[word] && word != "" { | |
unique_words = append(unique_words, word) | |
known_words[word] = true | |
} | |
} | |
} | |
return unique_words | |
} | |
func count(words [][]string, toCount []string , results chan CountingResult) { | |
for w := 0; w < len(toCount); w++ { | |
word := toCount[w] | |
hits := make([]uint, 0) | |
for i := 0; i < len(words); i++ { | |
for j := 0; j < len(words[i]); j++ { | |
if words[i][j] == word { | |
hits = append(hits, uint(i)) | |
} | |
} | |
} | |
results <- CountingResult{word, uint(len(hits)), hits} | |
} | |
close(results) | |
} | |
func print(result_channels []chan CountingResult) { | |
words := make([]string, 0) | |
sorted_results := make(map[string]CountingResult) | |
for i := 0; i < len(result_channels); i++ { | |
for result := range result_channels[i] { | |
// TODO: Actually sort by the struct not by some weird list we keep on the side | |
sorted_results[result.word] = result | |
words = append(words, result.word) | |
sort.Strings(words) | |
} | |
} | |
fmt.Println("# WORD, COUNT, LINEOCCURANCES") | |
for i := 0; i < len(words); i++ { | |
result := sorted_results[words[i]] | |
fmt.Printf("%s, %d, %d\n", result.word, result.count, result.occurences) | |
} | |
} | |
func main() { | |
// For now we take our data from the source file | |
goroutines := 4 | |
s := explode(`Algorithms | |
The notion of an algorithm is basic to all of computer programming, so we should | |
begin with a careful analysis of this concept. | |
The word "algorithm" itself is quite interesting; at first glance, it may look | |
as though someone intended to write "logarithm" but jumbled up the first four ...`) | |
unique_words := unique(s) | |
counting_results := make([]chan CountingResult, goroutines) | |
for i := range counting_results { | |
counting_results[i] = make(chan CountingResult) | |
} | |
for i := 0; i < goroutines; i++ { | |
go count(s, unique_words[i*len(unique_words)/goroutines:(i+1)*len(unique_words)/goroutines], counting_results[i]) | |
} | |
print(counting_results) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment