add: new pipeline to log parser

This commit is contained in:
Inanc Gumus
2019-08-08 14:11:54 +03:00
parent 91b3258042
commit e4dae5e0d4
10 changed files with 112 additions and 107 deletions

View File

@ -0,0 +1,25 @@
SHELL := /bin/bash
LINES = echo -e ">> log.txt has $$(wc -l log.txt | cut -f1 -d' ') lines"
ifeq ($(n),)
n := 18
endif
s:
time go run . < log.txt
r:
go run . < log.txt
# make n=18
load: restore
@echo "enlarging the file with itself, please wait..."
@for i in {1..$(n)}; do awk 1 log.txt log.txt > log_.txt; mv log_.txt log.txt; rm -f log_.txt; done
@$(LINES)
restore:
@$(LINES)
git checkout log.txt
lines:
@$(LINES)

View File

@ -1,17 +0,0 @@
package main
type filterFunc func(result) (include bool)
func filterBy(results []result, filterer filterFunc) []result {
out := results[:0]
for _, r := range results {
if !filterer(r) {
continue
}
out = append(out, r)
}
return out
}

View File

@ -6,13 +6,13 @@ func noopFilter(r result) bool {
return true
}
func notUsing(filter filterFunc) filterFunc {
func notUsing(filter filterFn) filterFn {
return func(r result) bool {
return !filter(r)
}
}
func domainExtFilter(domains ...string) filterFunc {
func domainExtFilter(domains ...string) filterFn {
return func(r result) bool {
for _, domain := range domains {
if strings.HasSuffix(r.domain, "."+domain) {
@ -23,7 +23,7 @@ func domainExtFilter(domains ...string) filterFunc {
}
}
func domainFilter(domain string) filterFunc {
func domainFilter(domain string) filterFn {
return func(r result) bool {
return strings.Contains(r.domain, domain)
}

View File

@ -1,19 +0,0 @@
package main
type groupFunc func(result) (key string)
func groupBy(results []result, keyer groupFunc) []result {
grouped := make(map[string]result, len(results))
for _, cur := range results {
key := keyer(cur)
grouped[key] = cur.add(grouped[key])
}
out := results[:0]
for _, r := range grouped {
out = append(out, r)
}
return out
}

View File

@ -14,7 +14,7 @@ import (
func main() {
defer recoverErr()
_, err := newReport().
_, err := newPipeline().
// from(fastTextReader(os.Stdin)).
filterBy(notUsing(domainExtFilter("com", "io"))).
groupBy(domainGrouper).

View File

@ -0,0 +1,75 @@
package main
import (
"fmt"
"os"
"time"
)
type (
inputFn func() ([]result, error)
outputFn func([]result) error
filterFn func(result) (include bool)
groupFn func(result) (key string)
)
type pipeline struct {
input inputFn
filter filterFn
groupKey groupFn
output outputFn
}
func newPipeline() *pipeline {
return &pipeline{
filter: noopFilter,
groupKey: noopGrouper,
input: textReader(os.Stdin),
output: textWriter(os.Stdout),
}
}
func (p *pipeline) from(fn inputFn) *pipeline { p.input = fn; return p }
func (p *pipeline) to(fn outputFn) *pipeline { p.output = fn; return p }
func (p *pipeline) filterBy(fn filterFn) *pipeline { p.filter = fn; return p }
func (p *pipeline) groupBy(fn groupFn) *pipeline { p.groupKey = fn; return p }
func (p *pipeline) start() ([]result, error) {
res, err := p.input()
if err != nil {
return nil, err
}
var (
out []result
gres = make(map[string]int)
)
for _, r := range res {
if !p.filter(r) {
continue
}
k := p.groupKey(r)
if i, ok := gres[k]; ok {
out[i] = out[i].add(r)
continue
}
gres[k] = len(out)
out = append(out, r)
}
err = p.output(out)
return out, err
}
// TODO: remove me
func measure(name string) func() {
start := time.Now()
return func() {
fmt.Printf("%s took %v\n", name, time.Since(start))
}
}

View File

@ -1,62 +0,0 @@
package main
import "os"
type (
inputFunc func() ([]result, error)
outputFunc func([]result) error
)
type report struct {
input inputFunc
filter filterFunc
group groupFunc
output outputFunc
}
func newReport() *report {
return &report{
filter: noopFilter,
group: noopGrouper,
input: textReader(os.Stdin),
output: textWriter(os.Stdout),
}
}
func (r *report) from(fn inputFunc) *report {
r.input = fn
return r
}
func (r *report) to(fn outputFunc) *report {
r.output = fn
return r
}
func (r *report) filterBy(fn filterFunc) *report {
r.filter = fn
return r
}
func (r *report) groupBy(fn groupFunc) *report {
r.group = fn
return r
}
func (r *report) start() ([]result, error) {
// input filterBy groupBy
// scanner (result) bool map[string]result
//
// stdin -> []result -> []results -> []result -> output(stdout)
res, err := r.input()
if err != nil {
return nil, err
}
res = filterBy(res, r.filter)
res = groupBy(res, r.group)
err = r.output(res)
return res, err
}

View File

@ -12,10 +12,11 @@ import (
"bytes"
"fmt"
"io"
"os"
"strings"
)
func textReader(r io.Reader) inputFunc {
func textReader(r io.Reader) inputFn {
return func() ([]result, error) {
// first: count the lines, so the parseText can create
// enough buffer.
@ -49,7 +50,7 @@ func parseText(in *bufio.Scanner, nlines int) ([]result, error) {
func countLines(r io.Reader) (int, error) {
var (
lines int
buf = make([]byte, 1024<<4) // read via 16 KB blocks
buf = make([]byte, os.Getpagesize()) // read via 16 KB blocks
)
for {

View File

@ -26,7 +26,7 @@ import (
// + uses a manual atoi
// +
func fastTextReader(r io.Reader) inputFunc {
func fastTextReader(r io.Reader) inputFn {
return func() ([]result, error) {
// first: count the lines, so the parseText can create
// enough buffer.
@ -45,7 +45,9 @@ func fastParseText(in *bufio.Scanner, nlines int) ([]result, error) {
res := make([]result, 0, nlines)
for l := 0; in.Scan(); l++ {
_ = in.Bytes()
r, err := fastParseFields(in.Bytes())
// r, err := result{"foo.com", "/bar", 10, 10}, error(nil)
if err != nil {
return nil, fmt.Errorf("line %d: %v", l, err)

View File

@ -19,7 +19,7 @@ const (
dashLength = 58
)
func textWriter(w io.Writer) outputFunc {
func textWriter(w io.Writer) outputFn {
return func(results []result) error {
fmt.Fprintf(w, header, "DOMAINS", "PAGES", "VISITS", "UNIQUES")
fmt.Fprintln(w, strings.Repeat("-", dashLength))
@ -36,7 +36,7 @@ func textWriter(w io.Writer) outputFunc {
}
}
func noWhere() outputFunc {
func noWhere() outputFn {
return func(res []result) error {
return nil
}