dpleylxzx47207117 2019-09-14 20:09
浏览 114
已采纳

bufio.Scanner:如何知道我们正在处理换行还是截断的字符串?

I basically need to process each string line from a limited buffer that is read from a stream. Using the bufio.Scanner I can go line-by-line with the scanner but had to use what seems to be an over-complicated solution to detect "truncation". Is there a better way of doing this? Thanks a lot. I'm not tight to any lib or anything.

func (p *Parser) Read(data []byte, tmpline *string, n int, bufSize int) {
    var line string
    strdata := string(data)
    scanner := bufio.NewScanner(strings.NewReader(strdata))
    line = ""
    for scanner.Scan() {
        if line != "" {
            if p.lineProcessor != nil {
                p.lineProcessor(line)
            }
        }
        line = scanner.Text()
        if *tmpline != "" {
            line = *tmpline + line //prepend line here
            *tmpline = ""
        }
    }
    if n == bufSize && data[bufSize-1] != '
' { //detecting a fragment here, overcomplicated?
        *tmpline = line
    } else {
            //fmt.Println("last line >[" + line + "]")
            if p.lineProcessor != nil {
                p.lineProcessor(line)
            }
    }
}

func (p *Parser) Handle(r io.Reader, bufSize int) ([]byte, error) {
    var out []byte
    buf := make([]byte, bufSize)
    var n int
    var err error
    tmpline := ""
    for {
        n, err = r.Read(buf[:])
        if n > 0 {
            data := buf[:n]
            p.Read(data, &tmpline, n, bufSize)
        }
        if err != nil {
            if err == io.EOF {
                err = nil
            }
            break
        }
    }
    return out, err
}

main
...
cmd := exec.Command("ping", "8.8.8.8")

//var stdout, stderr []byte
var errStdout, errStderr error
//this is the type of stream I want to process **stdoutIn** and **stderrIn**
stdoutIn, _ := cmd.StdoutPipe()
stderrIn, _ := cmd.StderrPipe()
// 10 is the bufSize
parser.Init(stdoutIn, stderrIn, wg, 10, lineProcessor)
...

EDIT: I tried to implement the proposed solution. Now I have broke what was working (even though seemed overcomplicated). The output seems to be very funky. Here's the FULL CODE and FULL OUTPUT:

package main
import (
    "bufio"
    "fmt"
    "io"
    "log"
    "os/exec"
    "sync"
)
type LineProcessor func(string)
type Parser struct {
    r             io.Reader
    lineProcessor LineProcessor
    scanner       bufio.Scanner
}

func (p *Parser) Init(stdoutIn io.Reader, stderrIn io.Reader,
    wg sync.WaitGroup,
    lineProcessor LineProcessor) {
    wg.Add(2)
    p.lineProcessor = lineProcessor
    p.scanner = *bufio.NewScanner(stdoutIn)
    go p.Handler(stdoutIn, wg)
    go p.Handler(stderrIn, wg)
}
func (p *Parser) Handler(r io.Reader, wg sync.WaitGroup) { // ([]byte, error)
    var line string
    for p.scanner.Scan() {
        line = p.scanner.Text()
        if p.lineProcessor != nil {
            p.lineProcessor(line)
        }
    }
    wg.Done()
}
func lineProcessor(line string) {
    fmt.Println(line)
}
func main() {
    var err error
    cmd := exec.Command("ping", "8.8.8.8") 

    var errStdout, errStderr error
    stdoutIn, _ := cmd.StdoutPipe()
    stderrIn, _ := cmd.StderrPipe()

    var parser Parser

    var wg sync.WaitGroup
    parser.Init(stdoutIn, stderrIn, wg, lineProcessor)

    err = cmd.Start()
    if err != nil {
        log.Fatalf("cmd.Start() failed with '%s'
", err)
    }

    fmt.Printf("Waiting
")
    wg.Wait()

    err = cmd.Wait()
    if err != nil {
        log.Fatalf("cmd.Run() failed with %s
", err)
    }
    if errStdout != nil || errStderr != nil {
        log.Fatal("failed to capture stdout or stderr
")
    }
}

$./buggysolution
Waiting
PING 8.8.8.8 (8.8.8.8): 56 data bytes
64 bytes from 8.8.8.8: icmp_seq=0 ttl=52 time=4.786 ms
64 bytes from 8.8.8.8: icmp_seq=2 ttl=52 time=3.661 ms
64 bytes from 8.8.8.8: icmp_seq=4 ttl=52 time=4.117 ms
64 bytes from 8.8.8.8: icmp_seq=6 ttl=52 time=4.172 ms
64 bytes from 8.8.8.8: icmp_seq=8 ttl=52 time=3.584 ms
64 bytes from 8.8.8.8: icmp_seq=10 ttl=52 time=4.301 ms
mp_seq=11 ttl=52 time=4.534 ms
64 bytes from 8.8.8.8: icmp_seq=12 ttl=52 time=4.349 ms
64 bytes from 8.8.8.8: icmp_seq=13 ttl=52 time=4.923 ms
64 bytes from 8.8.8.8: icmp_seq=14 ttl=52 time=4.349 ms
64 bytes from 8.8.8.8: icmp_seq=15 ttl=52 time=4.106 ms
64 bytes from 8.8.8.8: icmp_seq=16 ttl=52 time=4.270 ms
64 bytes from 8.8.8.8: icmp_seq=17 ttl=52 time=4.231 ms
64 bytes from 8.8.8.8: icmp_seq=18 ttl=52 time=4.915 ms
64 bytes from 8.8.8.8: icmp_seq=19 ttl=52 time=4.487 ms
64 bytes from 8.8.8.8: icmp_seq=20 ttl=52 time=4.182 ms
64 bytes from 8.8.8.8: icmp_seq=21 ttl=52 time=4.369 ms
64 bytes from 8.8.8.8: icmp_seq=22 ttl=52 time=4.287 ms
64 bytes from 8.8.8.8: icmp_seq=23 ttl=52 time=3.922 ms
64 bytes from 8.8.8.8: icmp_seq=24 ttl=52 time=4.905 ms
64 bytes from 8.8.8.8: icmp_seq=25 ttl=52 time=4.226 ms
64 bytes from 8.8.8.8: icmp_seq=27 ttl=52 time=4.052 ms
64 bytes from 8.8.8.8: icmp_seq=29 ttl=52 time=3.453 ms
64 bytes from 8.8.8.8: icmp_seq=31 ttl=52 time=5.103 ms
64 bytes from 8.8.8.8: icmp_seq=33 ttl=52 time=4.066 ms
64 bytes from 8.8.8.8: icmp_seq=35 ttl=52 time=4.128 ms
64 bytes from 8.8.8.8: icmp_seq=37 ttl=52 time=4.982 ms
64 bytes from 8.8.8.8: icmp_seq=1 ttl=52 time=4.206 ms
.64 bytes from 8.8.8.8: icmp_seq=39 ttl=52 time=4.215 ms
.8: icmp_seq=3 ttl=52 time=4.218 ms
tl=52 time=3.650 ms
8: icmp_seq=2 ttl=52 time=3.661 ms
ttl=52 time=4.791 ms
: icmp_seq=5 ttl=52 time=3.581 ms
 ttl=52 time=4.211 ms
 icmp_seq=4 ttl=52 time=4.117 ms
4 ttl=52 time=4.245 ms
icmp_seq=7 ttl=52 time=4.955 ms
45 ttl=52 time=4.518 ms
cmp_seq=6 ttl=52 time=4.172 ms
=46 ttl=52 time=4.764 ms
seq 9
^C

Thanks for any fix.

  • 写回答

1条回答 默认 最新

  • doutang7383 2019-09-14 20:21
    关注

    this is not how a bufio.Scanner should be used.

    The bufio.Scanner takes a reader and can return lines directly out of it.

    bufio.Scanner will fail if the token is larger than bufio.Scanner.maxTokenSize which by default is MaxScanTokenSize but it won't return truncated results.

    You can also check the test suite at https://golang.org/src/bufio/scan_test.go#L214 (not that the MaxTokenSize method being used is defined with a special scheme to exists only during tests see https://golang.org/src/bufio/export_test.go#L16)

    You can also define your own underlying buffer before scanning to change that value https://golang.org/pkg/bufio/#Scanner.Buffer

    to roll your own version it might look like this https://play.golang.org/p/kDcO6eZPVhY however going down that path requires you write additional tests and benchmarks.


    After multiple comments, here what OP was looking for:

    package main
    
    import (
        "bufio"
        "io"
        "log"
        "os/exec"
    )
    
    func main() {
    
        cmd := exec.Command("ping", "8.8.8.8")
        var out io.Reader
        {
            stdout, err := cmd.StdoutPipe()
            if err != nil {
                log.Fatal(err)
            }
            stderr, err := cmd.StderrPipe()
            if err != nil {
                log.Fatal(err)
            }
            out = io.MultiReader(stdout, stderr)
        }
        if err := cmd.Start(); err != nil {
            log.Fatal(err)
        }
        // defer cmd.Process.Kill()
        s := bufio.NewScanner(out)
        for s.Scan() {
            log.Println(s.Text())
        }
        // if out closes, cmd closed.
        log.Println("all done")
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥100 支付宝网页转账系统不识别账号
  • ¥15 基于单片机的靶位控制系统
  • ¥15 AT89C51控制8位八段数码管显示时钟。
  • ¥15 真我手机蓝牙传输进度消息被关闭了,怎么打开?(关键词-消息通知)
  • ¥15 下图接收小电路,谁知道原理
  • ¥15 装 pytorch 的时候出了好多问题,遇到这种情况怎么处理?
  • ¥20 IOS游览器某宝手机网页版自动立即购买JavaScript脚本
  • ¥15 手机接入宽带网线,如何释放宽带全部速度
  • ¥30 关于#r语言#的问题:如何对R语言中mfgarch包中构建的garch-midas模型进行样本内长期波动率预测和样本外长期波动率预测
  • ¥15 ETLCloud 处理json多层级问题