optimize: log parser
add: fast text reader to log parser
This commit is contained in:
@ -12,8 +12,7 @@ func pageGrouper(r result) string {
|
||||
return r.domain + r.page
|
||||
}
|
||||
|
||||
// you could have created a noopGrouper as well
|
||||
// but it's not necessary i think (map allocation)
|
||||
// groupBy allocates map unnecessarily
|
||||
func noopGrouper(r result) string {
|
||||
// with something like:
|
||||
// return randomStrings()
|
||||
|
@ -15,7 +15,7 @@ func main() {
|
||||
defer recoverErr()
|
||||
|
||||
_, err := newReport().
|
||||
// filterBy(orgDomainsFilter).
|
||||
// from(fastTextReader(os.Stdin)).
|
||||
filterBy(notUsing(domainExtFilter("com", "io"))).
|
||||
groupBy(domainGrouper).
|
||||
start()
|
||||
|
@ -17,6 +17,7 @@ type report struct {
|
||||
func newReport() *report {
|
||||
return &report{
|
||||
filter: noopFilter,
|
||||
group: noopGrouper,
|
||||
input: textReader(os.Stdin),
|
||||
output: textWriter(os.Stdout),
|
||||
}
|
||||
@ -43,30 +44,19 @@ func (r *report) groupBy(fn groupFunc) *report {
|
||||
}
|
||||
|
||||
func (r *report) start() ([]result, error) {
|
||||
if r.input == nil {
|
||||
panic("report input cannot be nil")
|
||||
}
|
||||
// input filterBy groupBy
|
||||
// scanner (result) bool map[string]result
|
||||
//
|
||||
// stdin -> []result -> []results -> []result -> output(stdout)
|
||||
|
||||
results, err := r.input()
|
||||
res, err := r.input()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// noop if filter is nil
|
||||
results = filterBy(results, r.filter)
|
||||
res = filterBy(res, r.filter)
|
||||
res = groupBy(res, r.group)
|
||||
err = r.output(res)
|
||||
|
||||
// group func is more tricky
|
||||
// you don't want to create an unnecessary map
|
||||
if r.group != nil {
|
||||
results = groupBy(results, r.group)
|
||||
}
|
||||
|
||||
// TODO: prefer: noop writer
|
||||
if r.output != nil {
|
||||
if err := r.output(results); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
return res, err
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
@ -16,34 +17,51 @@ import (
|
||||
|
||||
func textReader(r io.Reader) inputFunc {
|
||||
return func() ([]result, error) {
|
||||
in := bufio.NewScanner(r)
|
||||
return parseText(in)
|
||||
}
|
||||
}
|
||||
|
||||
func parseText(in *bufio.Scanner) ([]result, error) {
|
||||
var (
|
||||
results []result
|
||||
lines int
|
||||
)
|
||||
|
||||
results = make([]result, 0, 5000000)
|
||||
|
||||
for in.Scan() {
|
||||
lines++
|
||||
|
||||
res, err := parseFields(strings.Fields(in.Text()))
|
||||
// first: count the lines, so the parseText can create
|
||||
// enough buffer.
|
||||
var buf bytes.Buffer
|
||||
lines, err := countLines(io.TeeReader(r, &buf))
|
||||
if err != nil {
|
||||
// TODO: custom error type for line information
|
||||
return nil, fmt.Errorf("line %d: %v", lines, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results = append(results, res)
|
||||
return parseText(bufio.NewScanner(&buf), lines)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: custom error type for line information
|
||||
func parseText(in *bufio.Scanner, nlines int) ([]result, error) {
|
||||
res := make([]result, 0, nlines)
|
||||
|
||||
for l := 1; in.Scan(); l++ {
|
||||
fields := strings.Fields(in.Text())
|
||||
r, err := parseFields(fields)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("line %d: %v", l, err)
|
||||
}
|
||||
res = append(res, r)
|
||||
}
|
||||
|
||||
return res, in.Err()
|
||||
}
|
||||
|
||||
func countLines(r io.Reader) (int, error) {
|
||||
var (
|
||||
lines int
|
||||
buf = make([]byte, 1024<<4) // read via 16 KB blocks
|
||||
)
|
||||
|
||||
for {
|
||||
n, err := r.Read(buf)
|
||||
lines += bytes.Count(buf[:n], []byte{'\n'})
|
||||
|
||||
if err == io.EOF {
|
||||
return lines, nil
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return lines, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := in.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
106
27-functional-programming/log-parser-exp/textreaderfast.go
Normal file
106
27-functional-programming/log-parser-exp/textreaderfast.go
Normal file
@ -0,0 +1,106 @@
|
||||
// For more tutorials: https://blog.learngoprogramming.com
|
||||
//
|
||||
// Copyright © 2018 Inanc Gumus
|
||||
// Learn Go Programming Course
|
||||
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
|
||||
//
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// this could be made faster.
|
||||
// currently, it's 30-35% faster.
|
||||
//
|
||||
// so, what's different than the textreader?
|
||||
//
|
||||
// + creates the buffers specific to the input file/stdin size
|
||||
// + manually parses the fields: instead of strings.Fields
|
||||
// + gets the lines using scanner's Bytes() method: instead of Text()
|
||||
// + uses a manual atoi
|
||||
// +
|
||||
|
||||
func fastTextReader(r io.Reader) inputFunc {
|
||||
return func() ([]result, error) {
|
||||
// first: count the lines, so the parseText can create
|
||||
// enough buffer.
|
||||
var buf bytes.Buffer
|
||||
l, err := countLines(io.TeeReader(r, &buf))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return fastParseText(bufio.NewScanner(&buf), l)
|
||||
}
|
||||
}
|
||||
|
||||
func fastParseText(in *bufio.Scanner, nlines int) ([]result, error) {
|
||||
// needs to know the number of total lines in the file
|
||||
res := make([]result, 0, nlines)
|
||||
|
||||
for l := 0; in.Scan(); l++ {
|
||||
r, err := fastParseFields(in.Bytes())
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("line %d: %v", l, err)
|
||||
}
|
||||
res = append(res, r)
|
||||
}
|
||||
|
||||
return res, in.Err()
|
||||
}
|
||||
|
||||
func fastParseFields(data []byte) (res result, err error) {
|
||||
var field int
|
||||
|
||||
for i, last := 0, 0; i < len(data); i++ {
|
||||
done := len(data) == i+1
|
||||
|
||||
if c := data[i]; c == ' ' || done {
|
||||
if done {
|
||||
i = len(data)
|
||||
}
|
||||
|
||||
switch field {
|
||||
case 0:
|
||||
res.domain = string(data[last:i])
|
||||
case 1:
|
||||
res.page = string(data[last:i])
|
||||
case 2:
|
||||
res.visits, err = atoi(data[last:i])
|
||||
case 3:
|
||||
res.uniques, err = atoi(data[last:i])
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
last = i + 1
|
||||
field++
|
||||
}
|
||||
}
|
||||
|
||||
if field != 4 {
|
||||
return result{}, errors.New("wrong number of fields")
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func atoi(input []byte) (int, error) {
|
||||
val := 0
|
||||
for i := 0; i < len(input); i++ {
|
||||
char := input[i]
|
||||
if char < '0' || char > '9' {
|
||||
return 0, errors.New("invalid number")
|
||||
}
|
||||
val = val*10 + int(char) - '0'
|
||||
}
|
||||
return val, nil
|
||||
}
|
@ -35,3 +35,9 @@ func textWriter(w io.Writer) outputFunc {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func noWhere() outputFunc {
|
||||
return func(res []result) error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user