dpleylxzx47207117 2019-09-14 20:09
浏览 116
已采纳

bufio.Scanner:如何知道我们正在处理换行还是截断的字符串?

I basically need to process each string line from a limited buffer that is read from a stream. Using the bufio.Scanner I can go line-by-line with the scanner but had to use what seems to be an over-complicated solution to detect "truncation". Is there a better way of doing this? Thanks a lot. I'm not tight to any lib or anything.

func (p *Parser) Read(data []byte, tmpline *string, n int, bufSize int) {
    var line string
    strdata := string(data)
    scanner := bufio.NewScanner(strings.NewReader(strdata))
    line = ""
    for scanner.Scan() {
        if line != "" {
            if p.lineProcessor != nil {
                p.lineProcessor(line)
            }
        }
        line = scanner.Text()
        if *tmpline != "" {
            line = *tmpline + line //prepend line here
            *tmpline = ""
        }
    }
    if n == bufSize && data[bufSize-1] != '
' { //detecting a fragment here, overcomplicated?
        *tmpline = line
    } else {
            //fmt.Println("last line >[" + line + "]")
            if p.lineProcessor != nil {
                p.lineProcessor(line)
            }
    }
}

func (p *Parser) Handle(r io.Reader, bufSize int) ([]byte, error) {
    var out []byte
    buf := make([]byte, bufSize)
    var n int
    var err error
    tmpline := ""
    for {
        n, err = r.Read(buf[:])
        if n > 0 {
            data := buf[:n]
            p.Read(data, &tmpline, n, bufSize)
        }
        if err != nil {
            if err == io.EOF {
                err = nil
            }
            break
        }
    }
    return out, err
}

main
...
cmd := exec.Command("ping", "8.8.8.8")

//var stdout, stderr []byte
var errStdout, errStderr error
//this is the type of stream I want to process **stdoutIn** and **stderrIn**
stdoutIn, _ := cmd.StdoutPipe()
stderrIn, _ := cmd.StderrPipe()
// 10 is the bufSize
parser.Init(stdoutIn, stderrIn, wg, 10, lineProcessor)
...

EDIT: I tried to implement the proposed solution. Now I have broke what was working (even though seemed overcomplicated). The output seems to be very funky. Here's the FULL CODE and FULL OUTPUT:

package main
import (
    "bufio"
    "fmt"
    "io"
    "log"
    "os/exec"
    "sync"
)
type LineProcessor func(string)
type Parser struct {
    r             io.Reader
    lineProcessor LineProcessor
    scanner       bufio.Scanner
}

func (p *Parser) Init(stdoutIn io.Reader, stderrIn io.Reader,
    wg sync.WaitGroup,
    lineProcessor LineProcessor) {
    wg.Add(2)
    p.lineProcessor = lineProcessor
    p.scanner = *bufio.NewScanner(stdoutIn)
    go p.Handler(stdoutIn, wg)
    go p.Handler(stderrIn, wg)
}
func (p *Parser) Handler(r io.Reader, wg sync.WaitGroup) { // ([]byte, error)
    var line string
    for p.scanner.Scan() {
        line = p.scanner.Text()
        if p.lineProcessor != nil {
            p.lineProcessor(line)
        }
    }
    wg.Done()
}
func lineProcessor(line string) {
    fmt.Println(line)
}
func main() {
    var err error
    cmd := exec.Command("ping", "8.8.8.8") 

    var errStdout, errStderr error
    stdoutIn, _ := cmd.StdoutPipe()
    stderrIn, _ := cmd.StderrPipe()

    var parser Parser

    var wg sync.WaitGroup
    parser.Init(stdoutIn, stderrIn, wg, lineProcessor)

    err = cmd.Start()
    if err != nil {
        log.Fatalf("cmd.Start() failed with '%s'
", err)
    }

    fmt.Printf("Waiting
")
    wg.Wait()

    err = cmd.Wait()
    if err != nil {
        log.Fatalf("cmd.Run() failed with %s
", err)
    }
    if errStdout != nil || errStderr != nil {
        log.Fatal("failed to capture stdout or stderr
")
    }
}

$./buggysolution
Waiting
PING 8.8.8.8 (8.8.8.8): 56 data bytes
64 bytes from 8.8.8.8: icmp_seq=0 ttl=52 time=4.786 ms
64 bytes from 8.8.8.8: icmp_seq=2 ttl=52 time=3.661 ms
64 bytes from 8.8.8.8: icmp_seq=4 ttl=52 time=4.117 ms
64 bytes from 8.8.8.8: icmp_seq=6 ttl=52 time=4.172 ms
64 bytes from 8.8.8.8: icmp_seq=8 ttl=52 time=3.584 ms
64 bytes from 8.8.8.8: icmp_seq=10 ttl=52 time=4.301 ms
mp_seq=11 ttl=52 time=4.534 ms
64 bytes from 8.8.8.8: icmp_seq=12 ttl=52 time=4.349 ms
64 bytes from 8.8.8.8: icmp_seq=13 ttl=52 time=4.923 ms
64 bytes from 8.8.8.8: icmp_seq=14 ttl=52 time=4.349 ms
64 bytes from 8.8.8.8: icmp_seq=15 ttl=52 time=4.106 ms
64 bytes from 8.8.8.8: icmp_seq=16 ttl=52 time=4.270 ms
64 bytes from 8.8.8.8: icmp_seq=17 ttl=52 time=4.231 ms
64 bytes from 8.8.8.8: icmp_seq=18 ttl=52 time=4.915 ms
64 bytes from 8.8.8.8: icmp_seq=19 ttl=52 time=4.487 ms
64 bytes from 8.8.8.8: icmp_seq=20 ttl=52 time=4.182 ms
64 bytes from 8.8.8.8: icmp_seq=21 ttl=52 time=4.369 ms
64 bytes from 8.8.8.8: icmp_seq=22 ttl=52 time=4.287 ms
64 bytes from 8.8.8.8: icmp_seq=23 ttl=52 time=3.922 ms
64 bytes from 8.8.8.8: icmp_seq=24 ttl=52 time=4.905 ms
64 bytes from 8.8.8.8: icmp_seq=25 ttl=52 time=4.226 ms
64 bytes from 8.8.8.8: icmp_seq=27 ttl=52 time=4.052 ms
64 bytes from 8.8.8.8: icmp_seq=29 ttl=52 time=3.453 ms
64 bytes from 8.8.8.8: icmp_seq=31 ttl=52 time=5.103 ms
64 bytes from 8.8.8.8: icmp_seq=33 ttl=52 time=4.066 ms
64 bytes from 8.8.8.8: icmp_seq=35 ttl=52 time=4.128 ms
64 bytes from 8.8.8.8: icmp_seq=37 ttl=52 time=4.982 ms
64 bytes from 8.8.8.8: icmp_seq=1 ttl=52 time=4.206 ms
.64 bytes from 8.8.8.8: icmp_seq=39 ttl=52 time=4.215 ms
.8: icmp_seq=3 ttl=52 time=4.218 ms
tl=52 time=3.650 ms
8: icmp_seq=2 ttl=52 time=3.661 ms
ttl=52 time=4.791 ms
: icmp_seq=5 ttl=52 time=3.581 ms
 ttl=52 time=4.211 ms
 icmp_seq=4 ttl=52 time=4.117 ms
4 ttl=52 time=4.245 ms
icmp_seq=7 ttl=52 time=4.955 ms
45 ttl=52 time=4.518 ms
cmp_seq=6 ttl=52 time=4.172 ms
=46 ttl=52 time=4.764 ms
seq 9
^C

Thanks for any fix.

  • 写回答

1条回答 默认 最新

  • doutang7383 2019-09-14 20:21
    关注

    this is not how a bufio.Scanner should be used.

    The bufio.Scanner takes a reader and can return lines directly out of it.

    bufio.Scanner will fail if the token is larger than bufio.Scanner.maxTokenSize which by default is MaxScanTokenSize but it won't return truncated results.

    You can also check the test suite at https://golang.org/src/bufio/scan_test.go#L214 (not that the MaxTokenSize method being used is defined with a special scheme to exists only during tests see https://golang.org/src/bufio/export_test.go#L16)

    You can also define your own underlying buffer before scanning to change that value https://golang.org/pkg/bufio/#Scanner.Buffer

    to roll your own version it might look like this https://play.golang.org/p/kDcO6eZPVhY however going down that path requires you write additional tests and benchmarks.


    After multiple comments, here what OP was looking for:

    package main
    
    import (
        "bufio"
        "io"
        "log"
        "os/exec"
    )
    
    func main() {
    
        cmd := exec.Command("ping", "8.8.8.8")
        var out io.Reader
        {
            stdout, err := cmd.StdoutPipe()
            if err != nil {
                log.Fatal(err)
            }
            stderr, err := cmd.StderrPipe()
            if err != nil {
                log.Fatal(err)
            }
            out = io.MultiReader(stdout, stderr)
        }
        if err := cmd.Start(); err != nil {
            log.Fatal(err)
        }
        // defer cmd.Process.Kill()
        s := bufio.NewScanner(out)
        for s.Scan() {
            log.Println(s.Text())
        }
        // if out closes, cmd closed.
        log.Println("all done")
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 乌班图ip地址配置及远程SSH
  • ¥15 怎么让点阵屏显示静态爱心,用keiluVision5写出让点阵屏显示静态爱心的代码,越快越好
  • ¥15 PSPICE制作一个加法器
  • ¥15 javaweb项目无法正常跳转
  • ¥15 VMBox虚拟机无法访问
  • ¥15 skd显示找不到头文件
  • ¥15 机器视觉中图片中长度与真实长度的关系
  • ¥15 fastreport table 怎么只让每页的最下面和最顶部有横线
  • ¥15 java 的protected权限 ,问题在注释里
  • ¥15 这个是哪里有问题啊?