Skip to content

Instantly share code, notes, and snippets.

@markuskont
Last active June 12, 2017 12:18
Show Gist options
  • Select an option

  • Save markuskont/56f0858a6da6eda2e1ab5e0d72ad7af8 to your computer and use it in GitHub Desktop.

Select an option

Save markuskont/56f0858a6da6eda2e1ab5e0d72ad7af8 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"log"
"strings"
"time"
"sync"
"sync/atomic"
"os"
"encoding/json"
"regexp"
"github.com/hpcloud/tail"
)
// global vars
var WordCount map[string]int
var LineCount uint64
var TotalWordCount uint64
var DistinctWordCount uint64
var confFile string
type Configuration struct {
Files []string `json:"Files"`
WordDump string `json:"WordDump"`
Poll int `json:"Poll"`
Split string `json:"Split"`
Replace struct {
Regex string `json:"Regex"`
Result string `json:"Result"`
}
}
func LoadConfiguration(file string) Configuration {
var configuration Configuration
configFile, err := os.Open(file)
defer configFile.Close()
if err != nil { log.Fatal(err) }
jsonParser := json.NewDecoder(configFile)
jsonParser.Decode(&configuration)
return configuration
}
func SaveJsonFile(v interface{}, path string) {
fo, err := os.Create(path)
if err != nil { log.Fatal(err) }
defer fo.Close()
e := json.NewEncoder(fo)
if err := e.Encode(v); err != nil { log.Fatal(err) }
}
func countWordsFromTail(file string, mu *sync.Mutex, replaceRegex string, replaceResult string, splitRegex string) {
var l string
var split []string
t, err := tail.TailFile(file, tail.Config{Follow: true})
if err != nil { log.Fatal(err) }
var reReplace = regexp.MustCompile(replaceRegex)
var reSplit = regexp.MustCompile(splitRegex)
for line := range t.Lines {
atomic.AddUint64(&LineCount, 1)
if len(replaceRegex) > 1 {
l = reReplace.ReplaceAllString(line.Text, replaceResult)
} else {
l = line.Text
}
if len(splitRegex) > 1 {
split = reSplit.Split(l, -1)
} else {
split = strings.Split(l, " ")
}
for i := range split {
if len(split[i]) > 0 {
atomic.AddUint64(&TotalWordCount, 1)
mu.Lock()
if _, exists := WordCount[split[i]]; exists {
WordCount[split[i]]++
} else {
WordCount[split[i]] = 1
atomic.AddUint64(&DistinctWordCount, 1)
}
mu.Unlock()
}
}
}
}
func main() {
if len(os.Args) < 2 { confFile = "/etc/kirka.conf" } else { confFile = os.Args[1] }
configuration := LoadConfiguration(confFile)
WordCount = make(map[string]int)
LineCount = 0
TotalWordCount = 0
DistinctWordCount = 0
mu := new(sync.Mutex)
for file := range configuration.Files {
go countWordsFromTail(
configuration.Files[file],
mu,
configuration.Replace.Regex,
configuration.Replace.Result,
configuration.Split)
fmt.Println("Opened", configuration.Files[file], "for tailing")
}
for true {
fmt.Println("Lines:", atomic.LoadUint64(&LineCount), "WordDistinct:", atomic.LoadUint64(&DistinctWordCount), "WordTotal:", atomic.LoadUint64(&TotalWordCount))
mu.Lock()
SaveJsonFile(WordCount, configuration.WordDump)
mu.Unlock()
time.Sleep(time.Duration(configuration.Poll) * time.Second)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment