doujianjian2060
2014-07-19 04:53
浏览 233
已采纳

正则表达式-解析正则表达式时出错:无效的转义序列:\\ K

I'm trying to compile a regex so that I can extract an 8 digit number with/without spaces between the digits from a string using Go. For some reasons the compilations fails. What should I repalce K with ?

validAcc, err := regexp.Compile(`[ ]\K(?<!\d )(?=(?: ?\d){8})(?!(?: ?\d){9})\d[ \d]+\d`)
if err != nil {
    return
}

Play it here

More code with sample data

package main

import "strings"
import "regexp"
import "fmt"

func main() {

    msg := ` 12 34 56 78 //the number we need
 12 3455678 90123455 // the number we don't need`

    acc, err := accFromText(msg)
    if err != nil {
        panic(err)
    }
    exAcc := "12345678"
    if acc != exAcc {
        fmt.Printf("expected %v, received %v", exAcc, acc)
    }

    msg = `
More details here
1234567 12345 123456789 asd
12000000000 a number we don't need 
 12 3456 78 //this is the kind of number we need
 12 3455678 90123455 // the number we don't need`

    acc, err = accFromText(msg)
    if err != nil {
        panic(err)
    }
    exAcc = "12345678"
    if acc != exAcc {
        fmt.Printf("expected %v, received %v", exAcc, acc)
    }

}

func accFromText(msg string) (accNumber string, err error) {
    validAcc, err := regexp.Compile(`[ ]\K(?<!\d )(?=(?: ?\d){8})(?!(?: ?\d){9})\d[ \d]+\d`)
    if err != nil {
        return
    }
    accNumber = string(validAcc.Find([]byte(msg)))
    accNumber = strings.Replace(accNumber, " ", "", -1)
    return
}
  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 邀请回答

3条回答 默认 最新

  • doutang6130 2014-07-19 05:03
    已采纳

    Considering the go regexp r2 doesn't support any lookbehind/ahead, could you try a simpler expression first:

    c, err := regexp.Compile(`\b\d{8}\b`)
    

    In your case (playground), this would work

    (\d\d ){4}
    validAcc, err := regexp.Compile(`(\d\d ){4}`)
    

    Or:

    (\d\d ?){4} # matches '33 1133 06 Oth'
    validAcc, err := regexp.Compile(`(\d\d ?){4}`)
    

    Again, I try first a simple regexp, before trying more complex option: it will depend on the data you have to parse.


    For a more complex case, the regexp alone can help you capture the data in a group, and then you need to extract the number found (meaning you ned to add post-processing to your regexp):

    validAcc, err := regexp.Compile(`[^\d]((\d\d ?){4})[^\d]`)
    if err != nil {
        return
    }
    accNumber = string(validAcc.Find([]byte(msg)))[1:]
    accNumber = accNumber[:len(accNumber)-1]
    accNumber = strings.Replace(accNumber, " ", "", -1)
    

    See playground

    点赞 打赏 评论
  • douzhi1937 2014-07-19 05:47

    I suggest you take two steps:

    1) use regexp find all matches: \d[\d ]+\d

    2) filter out which contains 8 digits

    (I don’t think you can do this by a single regex in golang)

    点赞 打赏 评论
  • dongwu9972 2014-07-19 16:49

    This will do the job (faster: without any regexp need)

        package main
    
        import "fmt"
        import "unicode"
        import "strings"
    
        func main() {
    
            msg := ` 12 34 56 78 //the number we need
         12 3455678 90123455 // the number we don't need`
    
            acc, err := accFromText(msg)
            if err != nil {
                panic(err)
            }
            exAcc := "12345678"
            if acc != exAcc {
                fmt.Printf("expected %v, received %v", exAcc, acc)
            }
    
            msg = `
        More details here
        1234567 12345 123456789 asd
        12000000000 a number we don't need 
         12 3456 78 //this is the kind of number we need
         12 3455678 90123455 // the number we don't need`
    
            acc, err = accFromText(msg)
            if err != nil {
                panic(err)
            }
            exAcc = "12345678"
            if acc != exAcc {
                fmt.Printf("expected %v, received %v", exAcc, acc)
            }
    
        }
    
        func accFromText(msg string) (accNumber string, err error) {
            // split msg into lines
            lines := strings.FieldsFunc(msg, func(c rune) bool {
                return unicode.IsControl(c)
            })
    
            // filter numbers
            fn := func(ln string) (num string) {
                for _, c := range []rune(ln) {
                    if unicode.IsNumber(c) {
                        num += string(c)
                        // fmt.Println(num)
                    } else if !unicode.IsSpace(c) {
                        return num
                    }
                }
                return num
            }
    
            for _, line := range lines {
                num := fn(line)
                if len(num) == 8 {  // 8 numbers in line is the kriterium to accept
                    return num, nil
                }
            }
            return "eee", nil  // Note: Change this later; it's only needed to satisfy func calls above
        }
    

    http://play.golang.org/p/yVDgDWO9hE

    点赞 打赏 评论

相关推荐 更多相似问题