Created
December 17, 2019 05:13
-
-
Save david415/8523f189ed6c49d08c112d5ba236bc9b to your computer and use it in GitHub Desktop.
analytics.go
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"flag" | |
"fmt" | |
"io" | |
"os" | |
"sort" | |
"strconv" | |
"strings" | |
"time" | |
) | |
func parseFile(inputFile string) ([][]string, error) { | |
file, err := os.Open(inputFile) | |
if err != nil { | |
return nil, err | |
} | |
reader := bufio.NewReader(file) | |
output := make([][]string, 0) | |
for { | |
line, _, err := reader.ReadLine() | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
return nil, err | |
} | |
// date, timestamp, userid, countryid, siteid | |
fields := strings.Fields(string(line)) | |
output = append(output, fields) | |
} | |
return output, nil | |
} | |
func BDVPopularity(input [][]string) (string, error) { | |
// site -> user -> bool | |
m := make(map[string]map[string]bool) | |
for _, fields := range input { | |
// date, timestamp, userid, countryid, siteid | |
// country must be BDV | |
if fields[3] != "BDV" { | |
continue | |
} | |
// check for site_id | |
users, ok := m[fields[4]] | |
if ok { | |
// check for user_id | |
if _, ok := users[fields[2]]; !ok { | |
users[fields[2]] = true | |
} | |
} else { | |
// set site_id's users map | |
m[fields[4]] = make(map[string]bool) | |
m[fields[4]][fields[2]] = true | |
} | |
} | |
max := 0 | |
winner := "" | |
for site, users := range m { | |
if len(users) > max { | |
max = len(users) | |
winner = site | |
} | |
} | |
return winner, nil | |
} | |
type SiteCount struct { | |
site string | |
count int | |
} | |
type SiteCounts []SiteCount | |
func (s SiteCounts) Len() int { return len(s) } | |
func (s SiteCounts) Swap(i, j int) { s[i], s[j] = s[j], s[i] } | |
func (s SiteCounts) Less(i, j int) bool { return s[i].count < s[j].count } | |
type hit struct { | |
site string | |
time time.Time | |
} | |
func ParseDateTime(dateStr, timeStr string) (time.Time, error) { | |
dateFields := strings.Split(dateStr, "-") | |
timeFields := strings.Split(timeStr, ":") | |
year, err := strconv.Atoi(dateFields[0]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
month, err := strconv.Atoi(dateFields[1]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
day, err := strconv.Atoi(dateFields[2]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
hour, err := strconv.Atoi(timeFields[0]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
minute, err := strconv.Atoi(timeFields[1]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
sec, err := strconv.Atoi(timeFields[2]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
t := time.Date(year, time.Month(month), day, hour, minute, sec, 0, time.UTC) | |
return t, nil | |
} | |
func UsersFirstLastCount(input [][]string) (int, error) { | |
// user -> hit | |
firstMap := make(map[string]hit) | |
lastMap := make(map[string]hit) | |
// site_id -> user_unique_num | |
siteLast := make(map[string]int) | |
siteCountList := make([]SiteCount, 0) | |
// user -> site -> count | |
windowUsers := make(map[string]map[string]int) | |
windowStart := time.Date(2019, 02, 03, 0, 0, 0, 0, time.UTC) | |
windowEnd := time.Date(2019, 02, 04, 23, 59, 59, 0, time.UTC) | |
for _, fields := range input { | |
// date, timestamp, userid, countryid, siteid | |
t, err := ParseDateTime(fields[0], fields[1]) | |
if err != nil { | |
return 0, err | |
} | |
if t.After(windowStart) && windowEnd.After(t) { | |
// if we within the window for the specified problem/question? | |
_, ok := windowUsers[fields[2]] | |
if !ok { | |
windowUsers[fields[2]] = make(map[string]int) | |
windowUsers[fields[2]][fields[4]] = 1 | |
} else { | |
_, ok := windowUsers[fields[2]][fields[4]] | |
if !ok { | |
windowUsers[fields[2]][fields[4]] = 1 | |
} else { | |
windowUsers[fields[2]][fields[4]] = +1 | |
} | |
} | |
} | |
if _, ok := firstMap[fields[2]]; !ok { | |
firstMap[fields[2]] = hit{ | |
site: fields[4], | |
time: t, | |
} | |
} else { | |
if firstMap[fields[2]].time.After(t) { | |
h := firstMap[fields[2]] | |
h.site = fields[4] | |
h.time = t | |
} | |
} | |
if _, ok := lastMap[fields[2]]; !ok { | |
lastMap[fields[2]] = hit{ | |
site: fields[4], | |
time: t, | |
} | |
} else { | |
if lastMap[fields[2]].time.After(t) { | |
h := lastMap[fields[2]] | |
h.site = fields[4] | |
h.time = t | |
} | |
} | |
} // for | |
fmt.Println("time window users") | |
for user, counts := range windowUsers { | |
for site, count := range counts { | |
if count > 10 { | |
fmt.Printf("user %s site %s count %d\n", user, site, count) | |
} | |
} | |
} | |
for _, lastHit := range lastMap { | |
if _, ok := siteLast[lastHit.site]; ok { | |
siteLast[lastHit.site] += 1 | |
} else { | |
siteLast[lastHit.site] = 1 | |
} | |
} | |
for site, count := range siteLast { | |
siteCountList = append(siteCountList, SiteCount{ | |
site: site, | |
count: count, | |
}) | |
} | |
sort.Sort(sort.Reverse(SiteCounts(siteCountList))) | |
fmt.Println("sorted site counts:") | |
for _, siteCount := range siteCountList { | |
fmt.Printf("site %s count %d\n", siteCount.site, siteCount.count) | |
} | |
fmt.Println("") | |
count := 0 | |
for user, firstHit := range firstMap { | |
if lastHit, ok := lastMap[user]; ok { | |
if firstHit.site == lastHit.site { | |
count += 1 | |
} | |
} | |
} | |
return count, nil | |
} | |
func main() { | |
var inputFile string | |
flag.StringVar(&inputFile, "i", "", "input file") | |
flag.Parse() | |
if inputFile == "" { | |
panic("must specify -i input_file") | |
} | |
input, err := parseFile(inputFile) | |
if err != nil { | |
panic(err) | |
} | |
siteID, err := BDVPopularity(input) | |
if err != nil { | |
panic(err) | |
} | |
fmt.Printf("most popular site per unique users in country BDV: %s\n", siteID) | |
numUsers, err := UsersFirstLastCount(input) | |
if err != nil { | |
panic(err) | |
} | |
fmt.Printf("number of users whose first/last visits are to the same site: %d\n", numUsers) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment