dpleylxzx47207117 2019-09-14 20:09
浏览 114
已采纳

bufio.Scanner:如何知道我们正在处理换行还是截断的字符串?

I basically need to process each string line from a limited buffer that is read from a stream. Using the bufio.Scanner I can go line-by-line with the scanner but had to use what seems to be an over-complicated solution to detect "truncation". Is there a better way of doing this? Thanks a lot. I'm not tight to any lib or anything.

func (p *Parser) Read(data []byte, tmpline *string, n int, bufSize int) {
    var line string
    strdata := string(data)
    scanner := bufio.NewScanner(strings.NewReader(strdata))
    line = ""
    for scanner.Scan() {
        if line != "" {
            if p.lineProcessor != nil {
                p.lineProcessor(line)
            }
        }
        line = scanner.Text()
        if *tmpline != "" {
            line = *tmpline + line //prepend line here
            *tmpline = ""
        }
    }
    if n == bufSize && data[bufSize-1] != '
' { //detecting a fragment here, overcomplicated?
        *tmpline = line
    } else {
            //fmt.Println("last line >[" + line + "]")
            if p.lineProcessor != nil {
                p.lineProcessor(line)
            }
    }
}

func (p *Parser) Handle(r io.Reader, bufSize int) ([]byte, error) {
    var out []byte
    buf := make([]byte, bufSize)
    var n int
    var err error
    tmpline := ""
    for {
        n, err = r.Read(buf[:])
        if n > 0 {
            data := buf[:n]
            p.Read(data, &tmpline, n, bufSize)
        }
        if err != nil {
            if err == io.EOF {
                err = nil
            }
            break
        }
    }
    return out, err
}

main
...
cmd := exec.Command("ping", "8.8.8.8")

//var stdout, stderr []byte
var errStdout, errStderr error
//this is the type of stream I want to process **stdoutIn** and **stderrIn**
stdoutIn, _ := cmd.StdoutPipe()
stderrIn, _ := cmd.StderrPipe()
// 10 is the bufSize
parser.Init(stdoutIn, stderrIn, wg, 10, lineProcessor)
...

EDIT: I tried to implement the proposed solution. Now I have broke what was working (even though seemed overcomplicated). The output seems to be very funky. Here's the FULL CODE and FULL OUTPUT:

package main
import (
    "bufio"
    "fmt"
    "io"
    "log"
    "os/exec"
    "sync"
)
type LineProcessor func(string)
type Parser struct {
    r             io.Reader
    lineProcessor LineProcessor
    scanner       bufio.Scanner
}

func (p *Parser) Init(stdoutIn io.Reader, stderrIn io.Reader,
    wg sync.WaitGroup,
    lineProcessor LineProcessor) {
    wg.Add(2)
    p.lineProcessor = lineProcessor
    p.scanner = *bufio.NewScanner(stdoutIn)
    go p.Handler(stdoutIn, wg)
    go p.Handler(stderrIn, wg)
}
func (p *Parser) Handler(r io.Reader, wg sync.WaitGroup) { // ([]byte, error)
    var line string
    for p.scanner.Scan() {
        line = p.scanner.Text()
        if p.lineProcessor != nil {
            p.lineProcessor(line)
        }
    }
    wg.Done()
}
func lineProcessor(line string) {
    fmt.Println(line)
}
func main() {
    var err error
    cmd := exec.Command("ping", "8.8.8.8") 

    var errStdout, errStderr error
    stdoutIn, _ := cmd.StdoutPipe()
    stderrIn, _ := cmd.StderrPipe()

    var parser Parser

    var wg sync.WaitGroup
    parser.Init(stdoutIn, stderrIn, wg, lineProcessor)

    err = cmd.Start()
    if err != nil {
        log.Fatalf("cmd.Start() failed with '%s'
", err)
    }

    fmt.Printf("Waiting
")
    wg.Wait()

    err = cmd.Wait()
    if err != nil {
        log.Fatalf("cmd.Run() failed with %s
", err)
    }
    if errStdout != nil || errStderr != nil {
        log.Fatal("failed to capture stdout or stderr
")
    }
}

$./buggysolution
Waiting
PING 8.8.8.8 (8.8.8.8): 56 data bytes
64 bytes from 8.8.8.8: icmp_seq=0 ttl=52 time=4.786 ms
64 bytes from 8.8.8.8: icmp_seq=2 ttl=52 time=3.661 ms
64 bytes from 8.8.8.8: icmp_seq=4 ttl=52 time=4.117 ms
64 bytes from 8.8.8.8: icmp_seq=6 ttl=52 time=4.172 ms
64 bytes from 8.8.8.8: icmp_seq=8 ttl=52 time=3.584 ms
64 bytes from 8.8.8.8: icmp_seq=10 ttl=52 time=4.301 ms
mp_seq=11 ttl=52 time=4.534 ms
64 bytes from 8.8.8.8: icmp_seq=12 ttl=52 time=4.349 ms
64 bytes from 8.8.8.8: icmp_seq=13 ttl=52 time=4.923 ms
64 bytes from 8.8.8.8: icmp_seq=14 ttl=52 time=4.349 ms
64 bytes from 8.8.8.8: icmp_seq=15 ttl=52 time=4.106 ms
64 bytes from 8.8.8.8: icmp_seq=16 ttl=52 time=4.270 ms
64 bytes from 8.8.8.8: icmp_seq=17 ttl=52 time=4.231 ms
64 bytes from 8.8.8.8: icmp_seq=18 ttl=52 time=4.915 ms
64 bytes from 8.8.8.8: icmp_seq=19 ttl=52 time=4.487 ms
64 bytes from 8.8.8.8: icmp_seq=20 ttl=52 time=4.182 ms
64 bytes from 8.8.8.8: icmp_seq=21 ttl=52 time=4.369 ms
64 bytes from 8.8.8.8: icmp_seq=22 ttl=52 time=4.287 ms
64 bytes from 8.8.8.8: icmp_seq=23 ttl=52 time=3.922 ms
64 bytes from 8.8.8.8: icmp_seq=24 ttl=52 time=4.905 ms
64 bytes from 8.8.8.8: icmp_seq=25 ttl=52 time=4.226 ms
64 bytes from 8.8.8.8: icmp_seq=27 ttl=52 time=4.052 ms
64 bytes from 8.8.8.8: icmp_seq=29 ttl=52 time=3.453 ms
64 bytes from 8.8.8.8: icmp_seq=31 ttl=52 time=5.103 ms
64 bytes from 8.8.8.8: icmp_seq=33 ttl=52 time=4.066 ms
64 bytes from 8.8.8.8: icmp_seq=35 ttl=52 time=4.128 ms
64 bytes from 8.8.8.8: icmp_seq=37 ttl=52 time=4.982 ms
64 bytes from 8.8.8.8: icmp_seq=1 ttl=52 time=4.206 ms
.64 bytes from 8.8.8.8: icmp_seq=39 ttl=52 time=4.215 ms
.8: icmp_seq=3 ttl=52 time=4.218 ms
tl=52 time=3.650 ms
8: icmp_seq=2 ttl=52 time=3.661 ms
ttl=52 time=4.791 ms
: icmp_seq=5 ttl=52 time=3.581 ms
 ttl=52 time=4.211 ms
 icmp_seq=4 ttl=52 time=4.117 ms
4 ttl=52 time=4.245 ms
icmp_seq=7 ttl=52 time=4.955 ms
45 ttl=52 time=4.518 ms
cmp_seq=6 ttl=52 time=4.172 ms
=46 ttl=52 time=4.764 ms
seq 9
^C

Thanks for any fix.

  • 写回答

1条回答 默认 最新

  • doutang7383 2019-09-14 20:21
    关注

    this is not how a bufio.Scanner should be used.

    The bufio.Scanner takes a reader and can return lines directly out of it.

    bufio.Scanner will fail if the token is larger than bufio.Scanner.maxTokenSize which by default is MaxScanTokenSize but it won't return truncated results.

    You can also check the test suite at https://golang.org/src/bufio/scan_test.go#L214 (not that the MaxTokenSize method being used is defined with a special scheme to exists only during tests see https://golang.org/src/bufio/export_test.go#L16)

    You can also define your own underlying buffer before scanning to change that value https://golang.org/pkg/bufio/#Scanner.Buffer

    to roll your own version it might look like this https://play.golang.org/p/kDcO6eZPVhY however going down that path requires you write additional tests and benchmarks.


    After multiple comments, here what OP was looking for:

    package main
    
    import (
        "bufio"
        "io"
        "log"
        "os/exec"
    )
    
    func main() {
    
        cmd := exec.Command("ping", "8.8.8.8")
        var out io.Reader
        {
            stdout, err := cmd.StdoutPipe()
            if err != nil {
                log.Fatal(err)
            }
            stderr, err := cmd.StderrPipe()
            if err != nil {
                log.Fatal(err)
            }
            out = io.MultiReader(stdout, stderr)
        }
        if err := cmd.Start(); err != nil {
            log.Fatal(err)
        }
        // defer cmd.Process.Kill()
        s := bufio.NewScanner(out)
        for s.Scan() {
            log.Println(s.Text())
        }
        // if out closes, cmd closed.
        log.Println("all done")
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 改算法,照着压缩包里边,参考其他代码封装的格式 写到main函数里
  • ¥15 用windows做服务的同志有吗
  • ¥60 求一个简单的网页(标签-安全|关键词-上传)
  • ¥35 lstm时间序列共享单车预测,loss值优化,参数优化算法
  • ¥15 Python中的request,如何使用ssr节点,通过代理requests网页。本人在泰国,需要用大陆ip才能玩网页游戏,合法合规。
  • ¥100 为什么这个恒流源电路不能恒流?
  • ¥15 有偿求跨组件数据流路径图
  • ¥15 写一个方法checkPerson,入参实体类Person,出参布尔值
  • ¥15 我想咨询一下路面纹理三维点云数据处理的一些问题,上传的坐标文件里是怎么对无序点进行编号的,以及xy坐标在处理的时候是进行整体模型分片处理的吗
  • ¥15 一直显示正在等待HID—ISP