diff --git a/27-functional-programming/log-parser-exp/Makefile b/27-functional-programming/log-parser-exp/Makefile new file mode 100644 index 0000000..87685d5 --- /dev/null +++ b/27-functional-programming/log-parser-exp/Makefile @@ -0,0 +1,25 @@ +SHELL := /bin/bash +LINES = echo -e ">> log.txt has $$(wc -l log.txt | cut -f1 -d' ') lines" + +ifeq ($(n),) + n := 18 +endif + +s: + time go run . < log.txt + +r: + go run . < log.txt + +# make n=18 +load: restore + @echo "enlarging the file with itself, please wait..." + @for i in {1..$(n)}; do awk 1 log.txt log.txt > log_.txt; mv log_.txt log.txt; rm -f log_.txt; done + @$(LINES) + +restore: + @$(LINES) + git checkout log.txt + +lines: + @$(LINES) \ No newline at end of file diff --git a/27-functional-programming/log-parser-exp/filterby.go b/27-functional-programming/log-parser-exp/filterby.go deleted file mode 100644 index 5e88916..0000000 --- a/27-functional-programming/log-parser-exp/filterby.go +++ /dev/null @@ -1,17 +0,0 @@ -package main - -type filterFunc func(result) (include bool) - -func filterBy(results []result, filterer filterFunc) []result { - out := results[:0] - - for _, r := range results { - if !filterer(r) { - continue - } - - out = append(out, r) - } - - return out -} diff --git a/27-functional-programming/log-parser-exp/filters.go b/27-functional-programming/log-parser-exp/filters.go index c5032ef..fe16049 100644 --- a/27-functional-programming/log-parser-exp/filters.go +++ b/27-functional-programming/log-parser-exp/filters.go @@ -6,13 +6,13 @@ func noopFilter(r result) bool { return true } -func notUsing(filter filterFunc) filterFunc { +func notUsing(filter filterFn) filterFn { return func(r result) bool { return !filter(r) } } -func domainExtFilter(domains ...string) filterFunc { +func domainExtFilter(domains ...string) filterFn { return func(r result) bool { for _, domain := range domains { if strings.HasSuffix(r.domain, "."+domain) { @@ -23,7 +23,7 @@ func domainExtFilter(domains ...string) filterFunc { } } -func domainFilter(domain string) filterFunc { +func domainFilter(domain string) filterFn { return func(r result) bool { return strings.Contains(r.domain, domain) } diff --git a/27-functional-programming/log-parser-exp/groupby.go b/27-functional-programming/log-parser-exp/groupby.go deleted file mode 100644 index a7f069f..0000000 --- a/27-functional-programming/log-parser-exp/groupby.go +++ /dev/null @@ -1,19 +0,0 @@ -package main - -type groupFunc func(result) (key string) - -func groupBy(results []result, keyer groupFunc) []result { - grouped := make(map[string]result, len(results)) - - for _, cur := range results { - key := keyer(cur) - grouped[key] = cur.add(grouped[key]) - } - - out := results[:0] - for _, r := range grouped { - out = append(out, r) - } - - return out -} diff --git a/27-functional-programming/log-parser-exp/main.go b/27-functional-programming/log-parser-exp/main.go index dc8d46e..6f1524d 100644 --- a/27-functional-programming/log-parser-exp/main.go +++ b/27-functional-programming/log-parser-exp/main.go @@ -14,7 +14,7 @@ import ( func main() { defer recoverErr() - _, err := newReport(). + _, err := newPipeline(). // from(fastTextReader(os.Stdin)). filterBy(notUsing(domainExtFilter("com", "io"))). groupBy(domainGrouper). diff --git a/27-functional-programming/log-parser-exp/pipeline.go b/27-functional-programming/log-parser-exp/pipeline.go new file mode 100644 index 0000000..33635a5 --- /dev/null +++ b/27-functional-programming/log-parser-exp/pipeline.go @@ -0,0 +1,75 @@ +package main + +import ( + "fmt" + "os" + "time" +) + +type ( + inputFn func() ([]result, error) + outputFn func([]result) error + filterFn func(result) (include bool) + groupFn func(result) (key string) +) + +type pipeline struct { + input inputFn + filter filterFn + groupKey groupFn + output outputFn +} + +func newPipeline() *pipeline { + return &pipeline{ + filter: noopFilter, + groupKey: noopGrouper, + input: textReader(os.Stdin), + output: textWriter(os.Stdout), + } +} + +func (p *pipeline) from(fn inputFn) *pipeline { p.input = fn; return p } +func (p *pipeline) to(fn outputFn) *pipeline { p.output = fn; return p } +func (p *pipeline) filterBy(fn filterFn) *pipeline { p.filter = fn; return p } +func (p *pipeline) groupBy(fn groupFn) *pipeline { p.groupKey = fn; return p } + +func (p *pipeline) start() ([]result, error) { + res, err := p.input() + if err != nil { + return nil, err + } + + var ( + out []result + gres = make(map[string]int) + ) + + for _, r := range res { + if !p.filter(r) { + continue + } + + k := p.groupKey(r) + + if i, ok := gres[k]; ok { + out[i] = out[i].add(r) + continue + } + gres[k] = len(out) + + out = append(out, r) + } + + err = p.output(out) + + return out, err +} + +// TODO: remove me +func measure(name string) func() { + start := time.Now() + return func() { + fmt.Printf("%s took %v\n", name, time.Since(start)) + } +} diff --git a/27-functional-programming/log-parser-exp/report.go b/27-functional-programming/log-parser-exp/report.go deleted file mode 100644 index 324c70d..0000000 --- a/27-functional-programming/log-parser-exp/report.go +++ /dev/null @@ -1,62 +0,0 @@ -package main - -import "os" - -type ( - inputFunc func() ([]result, error) - outputFunc func([]result) error -) - -type report struct { - input inputFunc - filter filterFunc - group groupFunc - output outputFunc -} - -func newReport() *report { - return &report{ - filter: noopFilter, - group: noopGrouper, - input: textReader(os.Stdin), - output: textWriter(os.Stdout), - } -} - -func (r *report) from(fn inputFunc) *report { - r.input = fn - return r -} - -func (r *report) to(fn outputFunc) *report { - r.output = fn - return r -} - -func (r *report) filterBy(fn filterFunc) *report { - r.filter = fn - return r -} - -func (r *report) groupBy(fn groupFunc) *report { - r.group = fn - return r -} - -func (r *report) start() ([]result, error) { - // input filterBy groupBy - // scanner (result) bool map[string]result - // - // stdin -> []result -> []results -> []result -> output(stdout) - - res, err := r.input() - if err != nil { - return nil, err - } - - res = filterBy(res, r.filter) - res = groupBy(res, r.group) - err = r.output(res) - - return res, err -} diff --git a/27-functional-programming/log-parser-exp/textreader.go b/27-functional-programming/log-parser-exp/textreader.go index ea3eb1c..aaa35cb 100644 --- a/27-functional-programming/log-parser-exp/textreader.go +++ b/27-functional-programming/log-parser-exp/textreader.go @@ -12,10 +12,11 @@ import ( "bytes" "fmt" "io" + "os" "strings" ) -func textReader(r io.Reader) inputFunc { +func textReader(r io.Reader) inputFn { return func() ([]result, error) { // first: count the lines, so the parseText can create // enough buffer. @@ -49,7 +50,7 @@ func parseText(in *bufio.Scanner, nlines int) ([]result, error) { func countLines(r io.Reader) (int, error) { var ( lines int - buf = make([]byte, 1024<<4) // read via 16 KB blocks + buf = make([]byte, os.Getpagesize()) // read via 16 KB blocks ) for { diff --git a/27-functional-programming/log-parser-exp/textreaderfast.go b/27-functional-programming/log-parser-exp/textreaderfast.go index 6f90666..4558398 100644 --- a/27-functional-programming/log-parser-exp/textreaderfast.go +++ b/27-functional-programming/log-parser-exp/textreaderfast.go @@ -26,7 +26,7 @@ import ( // + uses a manual atoi // + -func fastTextReader(r io.Reader) inputFunc { +func fastTextReader(r io.Reader) inputFn { return func() ([]result, error) { // first: count the lines, so the parseText can create // enough buffer. @@ -45,7 +45,9 @@ func fastParseText(in *bufio.Scanner, nlines int) ([]result, error) { res := make([]result, 0, nlines) for l := 0; in.Scan(); l++ { + _ = in.Bytes() r, err := fastParseFields(in.Bytes()) + // r, err := result{"foo.com", "/bar", 10, 10}, error(nil) if err != nil { return nil, fmt.Errorf("line %d: %v", l, err) diff --git a/27-functional-programming/log-parser-exp/textwriter.go b/27-functional-programming/log-parser-exp/textwriter.go index 32a61ab..3788f64 100644 --- a/27-functional-programming/log-parser-exp/textwriter.go +++ b/27-functional-programming/log-parser-exp/textwriter.go @@ -19,7 +19,7 @@ const ( dashLength = 58 ) -func textWriter(w io.Writer) outputFunc { +func textWriter(w io.Writer) outputFn { return func(results []result) error { fmt.Fprintf(w, header, "DOMAINS", "PAGES", "VISITS", "UNIQUES") fmt.Fprintln(w, strings.Repeat("-", dashLength)) @@ -36,7 +36,7 @@ func textWriter(w io.Writer) outputFunc { } } -func noWhere() outputFunc { +func noWhere() outputFn { return func(res []result) error { return nil }