move: log parsers

This commit is contained in:
Inanc Gumus
2019-08-28 20:23:38 +03:00
parent 0a121cd911
commit 9afbe8f350
123 changed files with 1018 additions and 1515 deletions

5
logparser/oop/Makefile Normal file
View File

@@ -0,0 +1,5 @@
r:
go run . < ../../logs/log.txt
t:
time go run . < ../../logs/log.txt

View File

@@ -0,0 +1,38 @@
package main
// You need to run:
// go get -u github.com/wcharczuk/go-chart
// type chartReport struct {
// title string
// width, height int
// }
// func (s *chartReport) digest(records iterator) error {
// w := os.Stdout
// donut := chart.DonutChart{
// Title: s.title,
// TitleStyle: chart.Style{
// FontSize: 35,
// Show: true,
// FontColor: chart.ColorAlternateGreen,
// },
// Width: s.width,
// Height: s.height,
// }
// records.each(func(r record) {
// v := chart.Value{
// Label: r.domain + r.page + ": " + strconv.Itoa(r.visits),
// Value: float64(r.visits),
// Style: chart.Style{
// FontSize: 14,
// },
// }
// donut.Values = append(donut.Values, v)
// })
// return donut.Render(chart.SVG, w)
// }

43
logparser/oop/filter.go Normal file
View File

@@ -0,0 +1,43 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
type filter struct {
src iterator
filters []filterFunc
}
func filterBy(fn ...filterFunc) *filter {
return &filter{filters: fn}
}
// transform the record
func (f *filter) digest(records iterator) error {
f.src = records
return nil
}
// each yields only the filtered records
func (f *filter) each(yield recordFn) error {
return f.src.each(func(r record) {
if !f.check(r) {
return
}
yield(r)
})
}
// check all the filters against the record
func (f *filter) check(r record) bool {
for _, fi := range f.filters {
if !fi(r) {
return false
}
}
return true
}

36
logparser/oop/filters.go Normal file
View File

@@ -0,0 +1,36 @@
package main
import "strings"
type filterFunc func(record) bool
func noopFilter(r record) bool {
return true
}
func notUsing(filter filterFunc) filterFunc {
return func(r record) bool {
return !filter(r)
}
}
func domainExtFilter(domains ...string) filterFunc {
return func(r record) bool {
for _, domain := range domains {
if strings.HasSuffix(r.domain, "."+domain) {
return true
}
}
return false
}
}
func domainFilter(domain string) filterFunc {
return func(r record) bool {
return strings.Contains(r.domain, domain)
}
}
func orgDomainsFilter(r record) bool {
return strings.HasSuffix(r.domain, ".org")
}

49
logparser/oop/group.go Normal file
View File

@@ -0,0 +1,49 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"sort"
)
type group struct {
sum map[string]record // metrics per group key
keys []string // unique group keys
key groupFunc
}
func groupBy(key groupFunc) *group {
return &group{
sum: make(map[string]record),
key: key,
}
}
// digest the records
func (g *group) digest(records iterator) error {
return records.each(func(r record) {
k := g.key(r)
if _, ok := g.sum[k]; !ok {
g.keys = append(g.keys, k)
}
g.sum[k] = r.sum(g.sum[k])
})
}
// each yields the grouped records
func (g *group) each(yield recordFn) error {
sort.Strings(g.keys)
for _, k := range g.keys {
yield(g.sum[k])
}
return nil
}

15
logparser/oop/groupers.go Normal file
View File

@@ -0,0 +1,15 @@
package main
type groupFunc func(record) string
// domainGrouper groups by domain.
// but it keeps the other fields.
// for example: it returns pages as well, but you shouldn't use them.
// exercise: write a function that erases the unnecessary data.
func domainGrouper(r record) string {
return r.domain
}
func pageGrouper(r record) string {
return r.domain + r.page
}

43
logparser/oop/jsonlog.go Normal file
View File

@@ -0,0 +1,43 @@
// For more tutorials: https://bj.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"bufio"
"encoding/json"
"io"
)
type jsonLog struct {
reader io.Reader
}
func newJSONLog(r io.Reader) *jsonLog {
return &jsonLog{reader: r}
}
func (j *jsonLog) each(yield recordFn) error {
defer readClose(j.reader)
dec := json.NewDecoder(bufio.NewReader(j.reader))
for {
var r record
err := dec.Decode(&r)
if err == io.EOF {
break
}
if err != nil {
return err
}
yield(r)
}
return nil
}

33
logparser/oop/logcount.go Normal file
View File

@@ -0,0 +1,33 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import "fmt"
// logCount counts the yielded records
type logCount struct {
iterator
n int
}
func (lc *logCount) each(yield recordFn) error {
err := lc.iterator.each(func(r record) {
lc.n++
yield(r)
})
if err != nil {
// lc.n+1: iterator.each won't call yield on err
return fmt.Errorf("record %d: %v", lc.n+1, err)
}
return nil
}
func (lc *logCount) count() int {
return lc.n
}

44
logparser/oop/main.go Normal file
View File

@@ -0,0 +1,44 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"log"
"os"
)
func main() {
// newGrouper(domainGrouper)
// s := &chartReport{
// title: "visits per domain",
// width: 1920,
// height: 800,
// }
// pipe, err := fromFile("../logs/log.jsonl")
// if err != nil {
// log.Fatalln(err)
// }
pipe := newPipeline(
newTextLog(os.Stdin),
// newJSONLog(os.Stdin),
newTextReport(),
filterBy(notUsing(domainExtFilter("com", "io"))),
groupBy(domainGrouper),
)
if err := pipe.run(); err != nil {
log.Fatalln(err)
}
// if err := reportFromFile(os.Args[1]); err != nil {
// log.Fatalln(err)
// }
}

78
logparser/oop/pipeline.go Normal file
View File

@@ -0,0 +1,78 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"fmt"
"os"
"strings"
)
type recordFn func(record)
type iterator interface{ each(recordFn) error }
type digester interface{ digest(iterator) error }
type transform interface {
digester
iterator
}
type pipeline struct {
src iterator
trans []transform
dst digester
}
func (p *pipeline) run() error {
defer func() {
n := p.src.(*logCount).count()
fmt.Printf("%d records processed.\n", n)
}()
last := p.src
for _, t := range p.trans {
if err := t.digest(last); err != nil {
return err
}
last = t
}
return p.dst.digest(last)
}
func newPipeline(src iterator, dst digester, t ...transform) *pipeline {
return &pipeline{
src: &logCount{iterator: src},
dst: dst,
trans: t,
}
}
// fromFile generates a default report
func fromFile(path string) (*pipeline, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
var src iterator
switch {
case strings.HasSuffix(path, ".txt"):
src = newTextLog(f)
case strings.HasSuffix(path, ".jsonl"):
src = newJSONLog(f)
}
return newPipeline(
src,
newTextReport(),
groupBy(domainGrouper),
), nil
}

View File

@@ -0,0 +1,18 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"io"
)
func readClose(r io.Reader) {
if rc, ok := r.(io.Closer); ok {
rc.Close()
}
}

82
logparser/oop/record.go Normal file
View File

@@ -0,0 +1,82 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
)
const fieldsLength = 4
type record struct {
domain string
page string
visits int
uniques int
}
func (r record) sum(other record) record {
r.visits += other.visits
r.uniques += other.uniques
return r
}
// UnmarshalText to a *record
func (r *record) UnmarshalText(p []byte) (err error) {
fields := strings.Fields(string(p))
if len(fields) != fieldsLength {
return fmt.Errorf("wrong number of fields %q", fields)
}
r.domain, r.page = fields[0], fields[1]
if r.visits, err = parseStr("visits", fields[2]); err != nil {
return err
}
if r.uniques, err = parseStr("uniques", fields[3]); err != nil {
return err
}
return validate(*r)
}
// UnmarshalJSON to a *record
func (r *record) UnmarshalJSON(data []byte) error {
var re struct {
Domain string
Page string
Visits int
Uniques int
}
if err := json.Unmarshal(data, &re); err != nil {
return err
}
*r = record{re.Domain, re.Page, re.Visits, re.Uniques}
return validate(*r)
}
// parseStr helps UnmarshalText for string to positive int parsing
func parseStr(name, v string) (int, error) {
n, err := strconv.Atoi(v)
if err != nil {
return 0, fmt.Errorf("record.UnmarshalText %q: %v", name, err)
}
return n, nil
}
func validate(r record) (err error) {
switch {
case r.domain == "":
err = errors.New("record.domain cannot be empty")
case r.page == "":
err = errors.New("record.page cannot be empty")
case r.visits < 0:
err = errors.New("record.visits cannot be negative")
case r.uniques < 0:
err = errors.New("record.uniques cannot be negative")
}
return
}

39
logparser/oop/textlog.go Normal file
View File

@@ -0,0 +1,39 @@
// For more tutorials: https://bp.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"bufio"
"io"
)
type textLog struct {
reader io.Reader
}
func newTextLog(r io.Reader) *textLog {
return &textLog{reader: r}
}
func (p *textLog) each(yield recordFn) error {
defer readClose(p.reader)
in := bufio.NewScanner(p.reader)
for in.Scan() {
r := new(record)
if err := r.UnmarshalText(in.Bytes()); err != nil {
return err
}
yield(*r)
}
return in.Err()
}

View File

@@ -0,0 +1,49 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"fmt"
"os"
"text/tabwriter"
)
// TODO: make this configurable? or exercise?
const (
minWidth = 0
tabWidth = 4
padding = 4
flags = 0
)
type textReport struct{}
func newTextReport() *textReport {
return new(textReport)
}
func (s *textReport) digest(records iterator) error {
w := tabwriter.NewWriter(os.Stdout, minWidth, tabWidth, padding, ' ', flags)
write := fmt.Fprintf
write(w, "DOMAINS\tPAGES\tVISITS\tUNIQUES\n")
write(w, "-------\t-----\t------\t-------\n")
var total record
records.each(func(r record) {
total = total.sum(r)
write(w, "%s\t%s\t%d\t%d\n", r.domain, r.page, r.visits, r.uniques)
})
write(w, "\t\t\t\n")
write(w, "%s\t%s\t%d\t%d\n", "TOTAL", "", total.visits, total.uniques)
return w.Flush()
}