diff --git a/27-functional-programming/log-parser-exp/groupers.go b/27-functional-programming/log-parser-exp/groupers.go index 40ee1cb..d46ad56 100644 --- a/27-functional-programming/log-parser-exp/groupers.go +++ b/27-functional-programming/log-parser-exp/groupers.go @@ -12,8 +12,7 @@ func pageGrouper(r result) string { return r.domain + r.page } -// you could have created a noopGrouper as well -// but it's not necessary i think (map allocation) +// groupBy allocates map unnecessarily func noopGrouper(r result) string { // with something like: // return randomStrings() diff --git a/27-functional-programming/log-parser-exp/main.go b/27-functional-programming/log-parser-exp/main.go index ccde00b..2a4092a 100644 --- a/27-functional-programming/log-parser-exp/main.go +++ b/27-functional-programming/log-parser-exp/main.go @@ -15,7 +15,7 @@ func main() { defer recoverErr() _, err := newReport(). - // filterBy(orgDomainsFilter). + // from(fastTextReader(os.Stdin)). filterBy(notUsing(domainExtFilter("com", "io"))). groupBy(domainGrouper). start() diff --git a/27-functional-programming/log-parser-exp/report.go b/27-functional-programming/log-parser-exp/report.go index e63a447..324c70d 100644 --- a/27-functional-programming/log-parser-exp/report.go +++ b/27-functional-programming/log-parser-exp/report.go @@ -17,6 +17,7 @@ type report struct { func newReport() *report { return &report{ filter: noopFilter, + group: noopGrouper, input: textReader(os.Stdin), output: textWriter(os.Stdout), } @@ -43,30 +44,19 @@ func (r *report) groupBy(fn groupFunc) *report { } func (r *report) start() ([]result, error) { - if r.input == nil { - panic("report input cannot be nil") - } + // input filterBy groupBy + // scanner (result) bool map[string]result + // + // stdin -> []result -> []results -> []result -> output(stdout) - results, err := r.input() + res, err := r.input() if err != nil { return nil, err } - // noop if filter is nil - results = filterBy(results, r.filter) + res = filterBy(res, r.filter) + res = groupBy(res, r.group) + err = r.output(res) - // group func is more tricky - // you don't want to create an unnecessary map - if r.group != nil { - results = groupBy(results, r.group) - } - - // TODO: prefer: noop writer - if r.output != nil { - if err := r.output(results); err != nil { - return nil, err - } - } - - return results, nil + return res, err } diff --git a/27-functional-programming/log-parser-exp/textreader.go b/27-functional-programming/log-parser-exp/textreader.go index c90528e..ea3eb1c 100644 --- a/27-functional-programming/log-parser-exp/textreader.go +++ b/27-functional-programming/log-parser-exp/textreader.go @@ -9,6 +9,7 @@ package main import ( "bufio" + "bytes" "fmt" "io" "strings" @@ -16,34 +17,51 @@ import ( func textReader(r io.Reader) inputFunc { return func() ([]result, error) { - in := bufio.NewScanner(r) - return parseText(in) - } -} - -func parseText(in *bufio.Scanner) ([]result, error) { - var ( - results []result - lines int - ) - - results = make([]result, 0, 5000000) - - for in.Scan() { - lines++ - - res, err := parseFields(strings.Fields(in.Text())) + // first: count the lines, so the parseText can create + // enough buffer. + var buf bytes.Buffer + lines, err := countLines(io.TeeReader(r, &buf)) if err != nil { - // TODO: custom error type for line information - return nil, fmt.Errorf("line %d: %v", lines, err) + return nil, err } - results = append(results, res) + return parseText(bufio.NewScanner(&buf), lines) + } +} + +// TODO: custom error type for line information +func parseText(in *bufio.Scanner, nlines int) ([]result, error) { + res := make([]result, 0, nlines) + + for l := 1; in.Scan(); l++ { + fields := strings.Fields(in.Text()) + r, err := parseFields(fields) + + if err != nil { + return nil, fmt.Errorf("line %d: %v", l, err) + } + res = append(res, r) + } + + return res, in.Err() +} + +func countLines(r io.Reader) (int, error) { + var ( + lines int + buf = make([]byte, 1024<<4) // read via 16 KB blocks + ) + + for { + n, err := r.Read(buf) + lines += bytes.Count(buf[:n], []byte{'\n'}) + + if err == io.EOF { + return lines, nil + } + + if err != nil { + return lines, err + } } - - if err := in.Err(); err != nil { - return nil, err - } - - return results, nil } diff --git a/27-functional-programming/log-parser-exp/textreaderfast.go b/27-functional-programming/log-parser-exp/textreaderfast.go new file mode 100644 index 0000000..6f90666 --- /dev/null +++ b/27-functional-programming/log-parser-exp/textreaderfast.go @@ -0,0 +1,106 @@ +// For more tutorials: https://blog.learngoprogramming.com +// +// Copyright © 2018 Inanc Gumus +// Learn Go Programming Course +// License: https://creativecommons.org/licenses/by-nc-sa/4.0/ +// + +package main + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" +) + +// this could be made faster. +// currently, it's 30-35% faster. +// +// so, what's different than the textreader? +// +// + creates the buffers specific to the input file/stdin size +// + manually parses the fields: instead of strings.Fields +// + gets the lines using scanner's Bytes() method: instead of Text() +// + uses a manual atoi +// + + +func fastTextReader(r io.Reader) inputFunc { + return func() ([]result, error) { + // first: count the lines, so the parseText can create + // enough buffer. + var buf bytes.Buffer + l, err := countLines(io.TeeReader(r, &buf)) + if err != nil { + return nil, err + } + + return fastParseText(bufio.NewScanner(&buf), l) + } +} + +func fastParseText(in *bufio.Scanner, nlines int) ([]result, error) { + // needs to know the number of total lines in the file + res := make([]result, 0, nlines) + + for l := 0; in.Scan(); l++ { + r, err := fastParseFields(in.Bytes()) + + if err != nil { + return nil, fmt.Errorf("line %d: %v", l, err) + } + res = append(res, r) + } + + return res, in.Err() +} + +func fastParseFields(data []byte) (res result, err error) { + var field int + + for i, last := 0, 0; i < len(data); i++ { + done := len(data) == i+1 + + if c := data[i]; c == ' ' || done { + if done { + i = len(data) + } + + switch field { + case 0: + res.domain = string(data[last:i]) + case 1: + res.page = string(data[last:i]) + case 2: + res.visits, err = atoi(data[last:i]) + case 3: + res.uniques, err = atoi(data[last:i]) + } + + if err != nil { + return res, err + } + + last = i + 1 + field++ + } + } + + if field != 4 { + return result{}, errors.New("wrong number of fields") + } + return res, nil +} + +func atoi(input []byte) (int, error) { + val := 0 + for i := 0; i < len(input); i++ { + char := input[i] + if char < '0' || char > '9' { + return 0, errors.New("invalid number") + } + val = val*10 + int(char) - '0' + } + return val, nil +} diff --git a/27-functional-programming/log-parser-exp/textwriter.go b/27-functional-programming/log-parser-exp/textwriter.go index b7a1326..32a61ab 100644 --- a/27-functional-programming/log-parser-exp/textwriter.go +++ b/27-functional-programming/log-parser-exp/textwriter.go @@ -35,3 +35,9 @@ func textWriter(w io.Writer) outputFunc { return nil } } + +func noWhere() outputFunc { + return func(res []result) error { + return nil + } +}