move: log parsers
This commit is contained in:
5
logparser/oop/Makefile
Normal file
5
logparser/oop/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
r:
|
||||
go run . < ../../logs/log.txt
|
||||
|
||||
t:
|
||||
time go run . < ../../logs/log.txt
|
||||
38
logparser/oop/chartreport.go
Normal file
38
logparser/oop/chartreport.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package main
|
||||
|
||||
// You need to run:
|
||||
// go get -u github.com/wcharczuk/go-chart
|
||||
|
||||
// type chartReport struct {
|
||||
// title string
|
||||
// width, height int
|
||||
// }
|
||||
|
||||
// func (s *chartReport) digest(records iterator) error {
|
||||
// w := os.Stdout
|
||||
|
||||
// donut := chart.DonutChart{
|
||||
// Title: s.title,
|
||||
// TitleStyle: chart.Style{
|
||||
// FontSize: 35,
|
||||
// Show: true,
|
||||
// FontColor: chart.ColorAlternateGreen,
|
||||
// },
|
||||
// Width: s.width,
|
||||
// Height: s.height,
|
||||
// }
|
||||
|
||||
// records.each(func(r record) {
|
||||
// v := chart.Value{
|
||||
// Label: r.domain + r.page + ": " + strconv.Itoa(r.visits),
|
||||
// Value: float64(r.visits),
|
||||
// Style: chart.Style{
|
||||
// FontSize: 14,
|
||||
// },
|
||||
// }
|
||||
|
||||
// donut.Values = append(donut.Values, v)
|
||||
// })
|
||||
|
||||
// return donut.Render(chart.SVG, w)
|
||||
// }
|
||||
43
logparser/oop/filter.go
Normal file
43
logparser/oop/filter.go
Normal file
@@ -0,0 +1,43 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
type filter struct {
|
||||
src iterator
|
||||
filters []filterFunc
|
||||
}
|
||||
|
||||
func filterBy(fn ...filterFunc) *filter {
|
||||
return &filter{filters: fn}
|
||||
}
|
||||
|
||||
// transform the record
|
||||
func (f *filter) digest(records iterator) error {
|
||||
f.src = records
|
||||
return nil
|
||||
}
|
||||
|
||||
// each yields only the filtered records
|
||||
func (f *filter) each(yield recordFn) error {
|
||||
return f.src.each(func(r record) {
|
||||
if !f.check(r) {
|
||||
return
|
||||
}
|
||||
yield(r)
|
||||
})
|
||||
}
|
||||
|
||||
// check all the filters against the record
|
||||
func (f *filter) check(r record) bool {
|
||||
for _, fi := range f.filters {
|
||||
if !fi(r) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
36
logparser/oop/filters.go
Normal file
36
logparser/oop/filters.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package main
|
||||
|
||||
import "strings"
|
||||
|
||||
type filterFunc func(record) bool
|
||||
|
||||
func noopFilter(r record) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func notUsing(filter filterFunc) filterFunc {
|
||||
return func(r record) bool {
|
||||
return !filter(r)
|
||||
}
|
||||
}
|
||||
|
||||
func domainExtFilter(domains ...string) filterFunc {
|
||||
return func(r record) bool {
|
||||
for _, domain := range domains {
|
||||
if strings.HasSuffix(r.domain, "."+domain) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func domainFilter(domain string) filterFunc {
|
||||
return func(r record) bool {
|
||||
return strings.Contains(r.domain, domain)
|
||||
}
|
||||
}
|
||||
|
||||
func orgDomainsFilter(r record) bool {
|
||||
return strings.HasSuffix(r.domain, ".org")
|
||||
}
|
||||
49
logparser/oop/group.go
Normal file
49
logparser/oop/group.go
Normal file
@@ -0,0 +1,49 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"sort"
|
||||
)
|
||||
|
||||
type group struct {
|
||||
sum map[string]record // metrics per group key
|
||||
keys []string // unique group keys
|
||||
key groupFunc
|
||||
}
|
||||
|
||||
func groupBy(key groupFunc) *group {
|
||||
return &group{
|
||||
sum: make(map[string]record),
|
||||
key: key,
|
||||
}
|
||||
}
|
||||
|
||||
// digest the records
|
||||
func (g *group) digest(records iterator) error {
|
||||
return records.each(func(r record) {
|
||||
k := g.key(r)
|
||||
|
||||
if _, ok := g.sum[k]; !ok {
|
||||
g.keys = append(g.keys, k)
|
||||
}
|
||||
|
||||
g.sum[k] = r.sum(g.sum[k])
|
||||
})
|
||||
}
|
||||
|
||||
// each yields the grouped records
|
||||
func (g *group) each(yield recordFn) error {
|
||||
sort.Strings(g.keys)
|
||||
|
||||
for _, k := range g.keys {
|
||||
yield(g.sum[k])
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
15
logparser/oop/groupers.go
Normal file
15
logparser/oop/groupers.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package main
|
||||
|
||||
type groupFunc func(record) string
|
||||
|
||||
// domainGrouper groups by domain.
|
||||
// but it keeps the other fields.
|
||||
// for example: it returns pages as well, but you shouldn't use them.
|
||||
// exercise: write a function that erases the unnecessary data.
|
||||
func domainGrouper(r record) string {
|
||||
return r.domain
|
||||
}
|
||||
|
||||
func pageGrouper(r record) string {
|
||||
return r.domain + r.page
|
||||
}
|
||||
43
logparser/oop/jsonlog.go
Normal file
43
logparser/oop/jsonlog.go
Normal file
@@ -0,0 +1,43 @@
|
||||
// For more tutorials: https://bj.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"io"
|
||||
)
|
||||
|
||||
type jsonLog struct {
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
func newJSONLog(r io.Reader) *jsonLog {
|
||||
return &jsonLog{reader: r}
|
||||
}
|
||||
|
||||
func (j *jsonLog) each(yield recordFn) error {
|
||||
defer readClose(j.reader)
|
||||
|
||||
dec := json.NewDecoder(bufio.NewReader(j.reader))
|
||||
|
||||
for {
|
||||
var r record
|
||||
|
||||
err := dec.Decode(&r)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
yield(r)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
33
logparser/oop/logcount.go
Normal file
33
logparser/oop/logcount.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import "fmt"
|
||||
|
||||
// logCount counts the yielded records
|
||||
type logCount struct {
|
||||
iterator
|
||||
n int
|
||||
}
|
||||
|
||||
func (lc *logCount) each(yield recordFn) error {
|
||||
err := lc.iterator.each(func(r record) {
|
||||
lc.n++
|
||||
yield(r)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
// lc.n+1: iterator.each won't call yield on err
|
||||
return fmt.Errorf("record %d: %v", lc.n+1, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (lc *logCount) count() int {
|
||||
return lc.n
|
||||
}
|
||||
44
logparser/oop/main.go
Normal file
44
logparser/oop/main.go
Normal file
@@ -0,0 +1,44 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// newGrouper(domainGrouper)
|
||||
|
||||
// s := &chartReport{
|
||||
// title: "visits per domain",
|
||||
// width: 1920,
|
||||
// height: 800,
|
||||
// }
|
||||
|
||||
// pipe, err := fromFile("../logs/log.jsonl")
|
||||
// if err != nil {
|
||||
// log.Fatalln(err)
|
||||
// }
|
||||
|
||||
pipe := newPipeline(
|
||||
newTextLog(os.Stdin),
|
||||
// newJSONLog(os.Stdin),
|
||||
newTextReport(),
|
||||
filterBy(notUsing(domainExtFilter("com", "io"))),
|
||||
groupBy(domainGrouper),
|
||||
)
|
||||
|
||||
if err := pipe.run(); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
// if err := reportFromFile(os.Args[1]); err != nil {
|
||||
// log.Fatalln(err)
|
||||
// }
|
||||
}
|
||||
78
logparser/oop/pipeline.go
Normal file
78
logparser/oop/pipeline.go
Normal file
@@ -0,0 +1,78 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type recordFn func(record)
|
||||
|
||||
type iterator interface{ each(recordFn) error }
|
||||
type digester interface{ digest(iterator) error }
|
||||
|
||||
type transform interface {
|
||||
digester
|
||||
iterator
|
||||
}
|
||||
|
||||
type pipeline struct {
|
||||
src iterator
|
||||
trans []transform
|
||||
dst digester
|
||||
}
|
||||
|
||||
func (p *pipeline) run() error {
|
||||
defer func() {
|
||||
n := p.src.(*logCount).count()
|
||||
fmt.Printf("%d records processed.\n", n)
|
||||
}()
|
||||
|
||||
last := p.src
|
||||
|
||||
for _, t := range p.trans {
|
||||
if err := t.digest(last); err != nil {
|
||||
return err
|
||||
}
|
||||
last = t
|
||||
}
|
||||
|
||||
return p.dst.digest(last)
|
||||
}
|
||||
|
||||
func newPipeline(src iterator, dst digester, t ...transform) *pipeline {
|
||||
return &pipeline{
|
||||
src: &logCount{iterator: src},
|
||||
dst: dst,
|
||||
trans: t,
|
||||
}
|
||||
}
|
||||
|
||||
// fromFile generates a default report
|
||||
func fromFile(path string) (*pipeline, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var src iterator
|
||||
switch {
|
||||
case strings.HasSuffix(path, ".txt"):
|
||||
src = newTextLog(f)
|
||||
case strings.HasSuffix(path, ".jsonl"):
|
||||
src = newJSONLog(f)
|
||||
}
|
||||
|
||||
return newPipeline(
|
||||
src,
|
||||
newTextReport(),
|
||||
groupBy(domainGrouper),
|
||||
), nil
|
||||
}
|
||||
18
logparser/oop/readclose.go
Normal file
18
logparser/oop/readclose.go
Normal file
@@ -0,0 +1,18 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
func readClose(r io.Reader) {
|
||||
if rc, ok := r.(io.Closer); ok {
|
||||
rc.Close()
|
||||
}
|
||||
}
|
||||
82
logparser/oop/record.go
Normal file
82
logparser/oop/record.go
Normal file
@@ -0,0 +1,82 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const fieldsLength = 4
|
||||
|
||||
type record struct {
|
||||
domain string
|
||||
page string
|
||||
visits int
|
||||
uniques int
|
||||
}
|
||||
|
||||
func (r record) sum(other record) record {
|
||||
r.visits += other.visits
|
||||
r.uniques += other.uniques
|
||||
return r
|
||||
}
|
||||
|
||||
// UnmarshalText to a *record
|
||||
func (r *record) UnmarshalText(p []byte) (err error) {
|
||||
fields := strings.Fields(string(p))
|
||||
if len(fields) != fieldsLength {
|
||||
return fmt.Errorf("wrong number of fields %q", fields)
|
||||
}
|
||||
|
||||
r.domain, r.page = fields[0], fields[1]
|
||||
|
||||
if r.visits, err = parseStr("visits", fields[2]); err != nil {
|
||||
return err
|
||||
}
|
||||
if r.uniques, err = parseStr("uniques", fields[3]); err != nil {
|
||||
return err
|
||||
}
|
||||
return validate(*r)
|
||||
}
|
||||
|
||||
// UnmarshalJSON to a *record
|
||||
func (r *record) UnmarshalJSON(data []byte) error {
|
||||
var re struct {
|
||||
Domain string
|
||||
Page string
|
||||
Visits int
|
||||
Uniques int
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &re); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*r = record{re.Domain, re.Page, re.Visits, re.Uniques}
|
||||
return validate(*r)
|
||||
}
|
||||
|
||||
// parseStr helps UnmarshalText for string to positive int parsing
|
||||
func parseStr(name, v string) (int, error) {
|
||||
n, err := strconv.Atoi(v)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("record.UnmarshalText %q: %v", name, err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func validate(r record) (err error) {
|
||||
switch {
|
||||
case r.domain == "":
|
||||
err = errors.New("record.domain cannot be empty")
|
||||
case r.page == "":
|
||||
err = errors.New("record.page cannot be empty")
|
||||
case r.visits < 0:
|
||||
err = errors.New("record.visits cannot be negative")
|
||||
case r.uniques < 0:
|
||||
err = errors.New("record.uniques cannot be negative")
|
||||
}
|
||||
return
|
||||
}
|
||||
39
logparser/oop/textlog.go
Normal file
39
logparser/oop/textlog.go
Normal file
@@ -0,0 +1,39 @@
|
||||
// For more tutorials: https://bp.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
)
|
||||
|
||||
type textLog struct {
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
func newTextLog(r io.Reader) *textLog {
|
||||
return &textLog{reader: r}
|
||||
}
|
||||
|
||||
func (p *textLog) each(yield recordFn) error {
|
||||
defer readClose(p.reader)
|
||||
|
||||
in := bufio.NewScanner(p.reader)
|
||||
|
||||
for in.Scan() {
|
||||
r := new(record)
|
||||
|
||||
if err := r.UnmarshalText(in.Bytes()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
yield(*r)
|
||||
}
|
||||
|
||||
return in.Err()
|
||||
}
|
||||
49
logparser/oop/textreport.go
Normal file
49
logparser/oop/textreport.go
Normal file
@@ -0,0 +1,49 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"text/tabwriter"
|
||||
)
|
||||
|
||||
// TODO: make this configurable? or exercise?
|
||||
const (
|
||||
minWidth = 0
|
||||
tabWidth = 4
|
||||
padding = 4
|
||||
flags = 0
|
||||
)
|
||||
|
||||
type textReport struct{}
|
||||
|
||||
func newTextReport() *textReport {
|
||||
return new(textReport)
|
||||
}
|
||||
|
||||
func (s *textReport) digest(records iterator) error {
|
||||
w := tabwriter.NewWriter(os.Stdout, minWidth, tabWidth, padding, ' ', flags)
|
||||
|
||||
write := fmt.Fprintf
|
||||
|
||||
write(w, "DOMAINS\tPAGES\tVISITS\tUNIQUES\n")
|
||||
write(w, "-------\t-----\t------\t-------\n")
|
||||
|
||||
var total record
|
||||
records.each(func(r record) {
|
||||
total = total.sum(r)
|
||||
|
||||
write(w, "%s\t%s\t%d\t%d\n", r.domain, r.page, r.visits, r.uniques)
|
||||
})
|
||||
|
||||
write(w, "\t\t\t\n")
|
||||
write(w, "%s\t%s\t%d\t%d\n", "TOTAL", "", total.visits, total.uniques)
|
||||
|
||||
return w.Flush()
|
||||
}
|
||||
Reference in New Issue
Block a user