refactor: oop log parser

This commit is contained in:
Inanc Gumus
2019-08-26 21:52:47 +03:00
parent dcfc7748fe
commit fce56d299e
13 changed files with 256 additions and 181 deletions

View File

@ -1,18 +0,0 @@
[
{"domain": "learngoprogramming.com", "page": "/", "visits": 10, "uniques": 5},
{"domain": "learngoprogramming.com", "page": "/courses", "visits": 15, "uniques": 10},
{"domain": "learngoprogramming.com", "page": "/courses", "visits": 10, "uniques": 5},
{"domain": "learngoprogramming.com", "page": "/articles", "visits": 20, "uniques": 15},
{"domain": "learngoprogramming.com", "page": "/articles", "visits": 5, "uniques": 2},
{"domain": "golang.org", "page": "/", "visits": 40, "uniques": 20},
{"domain": "golang.org", "page": "/", "visits": 20, "uniques": 10},
{"domain": "golang.org", "page": "/blog", "visits": 45, "uniques": 25},
{"domain": "golang.org", "page": "/blog", "visits": 15, "uniques": 5},
{"domain": "blog.golang.org", "page": "/courses", "visits": 60, "uniques": 30},
{"domain": "blog.golang.org", "page": "/courses", "visits": 30, "uniques": 20},
{"domain": "blog.golang.org", "page": "/updates", "visits": 20, "uniques": 10},
{"domain": "blog.golang.org", "page": "/reference", "visits": 65, "uniques": 35},
{"domain": "blog.golang.org", "page": "/reference", "visits": 15, "uniques": 5},
{"domain": "inanc.io", "page": "/about", "visits": 30, "uniques": 15},
{"domain": "inanc.io", "page": "/about","visits": 70, "uniques": 35}
]

View File

@ -0,0 +1,16 @@
{"domain": "learngoprogramming.com", "page": "/", "visits": 10, "uniques": 5}
{"domain": "learngoprogramming.com", "page": "/courses", "visits": 15, "uniques": 10}
{"domain": "learngoprogramming.com", "page": "/courses", "visits": 10, "uniques": 5}
{"domain": "learngoprogramming.com", "page": "/articles", "visits": 20, "uniques": 15}
{"domain": "learngoprogramming.com", "page": "/articles", "visits": 5, "uniques": 2}
{"domain": "golang.org", "page": "/", "visits": 40, "uniques": 20}
{"domain": "golang.org", "page": "/", "visits": 20, "uniques": 10}
{"domain": "golang.org", "page": "/blog", "visits": 45, "uniques": 25}
{"domain": "golang.org", "page": "/blog", "visits": 15, "uniques": 5}
{"domain": "blog.golang.org", "page": "/courses", "visits": 60, "uniques": 30}
{"domain": "blog.golang.org", "page": "/courses", "visits": 30, "uniques": 20}
{"domain": "blog.golang.org", "page": "/updates", "visits": 20, "uniques": 10}
{"domain": "blog.golang.org", "page": "/reference", "visits": 65, "uniques": 35}
{"domain": "blog.golang.org", "page": "/reference", "visits": 15, "uniques": 5}
{"domain": "inanc.io", "page": "/about", "visits": 30, "uniques": 15}
{"domain": "inanc.io", "page": "/about","visits": 70, "uniques": 35}

View File

@ -1,62 +0,0 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import "sort"
type analysis struct {
sum map[string]result // metrics per group key
keys []string // unique group keys
groupKey groupFunc
filter filterFunc
}
func newAnalysis() *analysis {
return &analysis{
sum: make(map[string]result),
groupKey: domainGrouper,
filter: noopFilter,
}
}
// transform the result
func (a *analysis) transform(r result) {
if !a.filter(r) {
return
}
key := a.groupKey(r)
if _, ok := a.sum[key]; !ok {
a.keys = append(a.keys, key)
}
a.sum[key] = r.add(a.sum[key])
}
// each yields an analysis result
func (a *analysis) each(yield resultFn) error {
sort.Strings(a.keys)
for _, key := range a.keys {
yield(a.sum[key])
}
return nil
}
func (a *analysis) groupBy(g groupFunc) {
if g != nil {
a.groupKey = g
}
}
func (a *analysis) filterBy(f filterFunc) {
if f != nil {
a.filter = f
}
}

View File

@ -8,7 +8,7 @@ package main
// width, height int
// }
// func (s *chartReport) report(results iterator) error {
// func (s *chartReport) digest(results iterator) error {
// w := os.Stdout
// donut := chart.DonutChart{

View File

@ -0,0 +1,43 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
type filter struct {
src iterator
filters []filterFunc
}
func filterBy(fn ...filterFunc) *filter {
return &filter{filters: fn}
}
// transform the result
func (f *filter) digest(results iterator) error {
f.src = results
return nil
}
// each yields an analysis result
func (f *filter) each(yield resultFn) error {
return f.src.each(func(r result) {
if !f.check(r) {
return
}
yield(r)
})
}
// check all the filters against the result
func (f *filter) check(r result) bool {
for _, fi := range f.filters {
if !fi(r) {
return false
}
}
return true
}

View File

@ -0,0 +1,49 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import (
"sort"
)
type group struct {
sum map[string]result // metrics per group key
keys []string // unique group keys
key groupFunc
}
func groupBy(key groupFunc) *group {
return &group{
sum: make(map[string]result),
key: key,
}
}
// digest the results
func (g *group) digest(results iterator) error {
return results.each(func(r result) {
k := g.key(r)
if _, ok := g.sum[k]; !ok {
g.keys = append(g.keys, k)
}
g.sum[k] = r.add(g.sum[k])
})
}
// each yields the grouped results
func (g *group) each(yield resultFn) error {
sort.Strings(g.keys)
for _, k := range g.keys {
yield(g.sum[k])
}
return nil
}

View File

@ -8,10 +8,9 @@
package main
import (
"bufio"
"encoding/json"
"fmt"
"io"
"io/ioutil"
)
type jsonLog struct {
@ -25,37 +24,20 @@ func newJSONLog(r io.Reader) *jsonLog {
func (j *jsonLog) each(yield resultFn) error {
defer readClose(j.reader)
bytes, err := ioutil.ReadAll(j.reader)
if err != nil {
return err
}
dec := json.NewDecoder(bufio.NewReader(j.reader))
return extractJSON(bytes, yield)
}
for {
var r result
func extractJSON(bytes []byte, yield resultFn) error {
var rs []struct {
Domain string
Page string
Visits int
Uniques int
}
if err := json.Unmarshal(bytes, &rs); err != nil {
if serr, ok := err.(*json.SyntaxError); ok {
return fmt.Errorf("%v %q", serr, bytes[:serr.Offset])
err := dec.Decode(&r)
if err == io.EOF {
break
}
if err != nil {
return err
}
return err
}
for _, r := range rs {
yield(result{
domain: r.Domain,
page: r.Page,
visits: r.Visits,
uniques: r.Uniques,
})
yield(r)
}
return nil
}

View File

@ -0,0 +1,33 @@
// For more tutorials: https://blog.learngoprogramming.com
//
// Copyright © 2018 Inanc Gumus
// Learn Go Programming Course
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
//
package main
import "fmt"
// logCount counts the yielded records
type logCount struct {
iterator
n int
}
func (lc *logCount) each(yield resultFn) error {
err := lc.iterator.each(func(r result) {
lc.n++
yield(r)
})
if err != nil {
// lc.n+1: iterator.each won't call yield on err
return fmt.Errorf("record %d: %v", lc.n+1, err)
}
return nil
}
func (lc *logCount) count() int {
return lc.n
}

View File

@ -9,18 +9,11 @@ package main
import (
"log"
// "fmt"
"os"
)
func main() {
an := newAnalysis()
// an.filterBy(notUsing(domainExtFilter("io", "com")))
// an.filterBy(domainFilter("org"))
// an.groupBy(domainGrouper)
src := newTextLog(os.Stdin)
dst := newTextReport()
// newGrouper(domainGrouper)
// s := &chartReport{
// title: "visits per domain",
@ -28,7 +21,17 @@ func main() {
// height: 800,
// }
pipe := newPipeline(src, dst, an)
// pipe, err := fromFile("../logs/log.jsonl")
// if err != nil {
// log.Fatalln(err)
// }
pipe := newPipeline(
newTextLog(os.Stdin),
newTextReport(),
filterBy(domainExtFilter("com", "io")),
groupBy(domainGrouper),
)
if err := pipe.run(); err != nil {
log.Fatalln(err)

View File

@ -8,61 +8,71 @@
package main
import (
"fmt"
"os"
"strings"
)
type resultFn func(result)
type iterator interface {
each(resultFn) error
}
type iterator interface{ each(resultFn) error }
type digester interface{ digest(iterator) error }
type transformer interface {
transform(result)
type transform interface {
digester
iterator
}
type reporter interface {
report(iterator) error
}
type pipeline struct {
src iterator
dst reporter
tran transformer
src iterator
trans []transform
dst digester
}
func newPipeline(source iterator, r reporter, t transformer) *pipeline {
func (p *pipeline) run() error {
defer func() {
n := p.src.(*logCount).count()
fmt.Printf("%d records processed.\n", n)
}()
last := p.src
for _, t := range p.trans {
if err := t.digest(last); err != nil {
return err
}
last = t
}
return p.dst.digest(last)
}
func newPipeline(src iterator, dst digester, t ...transform) *pipeline {
return &pipeline{
src: source,
dst: r,
tran: t,
src: &logCount{iterator: src},
dst: dst,
trans: t,
}
}
// fromFile generates a default report
func fromFile(path string) (err error) {
func fromFile(path string) (*pipeline, error) {
f, err := os.Open(path)
if err != nil {
return err
return nil, err
}
var src iterator
switch {
case strings.HasSuffix(path, ".txt"):
src = newTextLog(f)
case strings.HasSuffix(path, ".json"):
case strings.HasSuffix(path, ".jsonl"):
src = newJSONLog(f)
}
p := newPipeline(src, newTextReport(), newAnalysis())
return p.run()
}
func (p *pipeline) run() error {
if err := p.src.each(p.tran.transform); err != nil {
return err
}
return p.dst.report(p.tran)
return newPipeline(
src,
newTextReport(),
groupBy(domainGrouper),
), nil
}

View File

@ -1,5 +1,12 @@
package main
import (
"encoding/json"
"fmt"
"strconv"
"strings"
)
const fieldsLength = 4
type result struct {
@ -14,3 +21,42 @@ func (r result) add(other result) result {
r.uniques += other.uniques
return r
}
// UnmarshalText to a *result
func (r *result) UnmarshalText(p []byte) (err error) {
fields := strings.Fields(string(p))
if len(fields) != fieldsLength {
return fmt.Errorf("wrong number of fields %q", fields)
}
r.domain, r.page = fields[0], fields[1]
r.visits, err = strconv.Atoi(fields[2])
if err != nil || r.visits < 0 {
return fmt.Errorf("wrong input %q", fields[2])
}
r.uniques, err = strconv.Atoi(fields[3])
if err != nil || r.uniques < 0 {
return fmt.Errorf("wrong input %q", fields[3])
}
return nil
}
// UnmarshalJSON to a *result
func (r *result) UnmarshalJSON(data []byte) error {
var re struct {
Domain string
Page string
Visits int
Uniques int
}
if err := json.Unmarshal(data, &re); err != nil {
return err
}
*r = result{re.Domain, re.Page, re.Visits, re.Uniques}
return nil
}

View File

@ -9,10 +9,7 @@ package main
import (
"bufio"
"fmt"
"io"
"strconv"
"strings"
)
type textLog struct {
@ -26,41 +23,17 @@ func newTextLog(r io.Reader) *textLog {
func (p *textLog) each(yield resultFn) error {
defer readClose(p.reader)
var (
l = 1
in = bufio.NewScanner(p.reader)
)
in := bufio.NewScanner(p.reader)
for in.Scan() {
r, err := extractFields(in.Text())
if err != nil {
return fmt.Errorf("line %d: %v", l, err)
r := new(result)
if err := r.UnmarshalText(in.Bytes()); err != nil {
return err
}
yield(r)
l++
yield(*r)
}
return in.Err()
}
func extractFields(s string) (r result, err error) {
fields := strings.Fields(s)
if len(fields) != fieldsLength {
return r, fmt.Errorf("wrong number of fields %q", fields)
}
r.domain, r.page = fields[0], fields[1]
r.visits, err = strconv.Atoi(fields[2])
if err != nil || r.visits < 0 {
return r, fmt.Errorf("wrong input %q", fields[2])
}
r.uniques, err = strconv.Atoi(fields[3])
if err != nil || r.uniques < 0 {
return r, fmt.Errorf("wrong input %q", fields[3])
}
return r, nil
}

View File

@ -27,7 +27,7 @@ func newTextReport() *textReport {
return new(textReport)
}
func (s *textReport) report(results iterator) error {
func (s *textReport) digest(results iterator) error {
w := tabwriter.NewWriter(os.Stdout, minWidth, tabWidth, padding, ' ', flags)
write := fmt.Fprintf