drhe80011 2014-11-24 11:48
浏览 41
已采纳

Go中的CSV和地图性能不佳

I need to write a Go script that will open a big CSV file, and create new, separate CSVs based on the value of the first element of each line.

The CSV file looks like this:

"country", "otherfield", "otherfield1", "otherfield2", "etc"
"AT", "otherfield", "otherfield1", "otherfield2", "etc"
"AT", "otherfield", "otherfield1", "otherfield2", "etc"
"DE", "otherfield", "otherfield1", "otherfield2", "etc"
"DE", "otherfield", "otherfield1", "otherfield2", "etc"

So, what I am trying to do is creating a file with the first field's value (e.g. AT.csv), containing all the lines that start with that value.

The following is the script that I have written so far:

package main

import (
    "encoding/csv"
    "fmt"
    "os"
)

func main() {

    // contentCreated := make(chan map[string]string)

    createContent("union_exp.csv")

}

func createContent(csvfilename string) {

    keys := ""

    content := make(map[string]string)

    csvfile, err := os.Open(csvfilename)

    if err != nil {
        fmt.Println(err)
    }

    defer csvfile.Close()

    reader := csv.NewReader(csvfile)

    reader.FieldsPerRecord = -1

    rawCSVdata, err := reader.ReadAll()

    if err != nil {
        fmt.Println(err)
        os.Exit(1)
    }

    for i, each := range rawCSVdata {

        if i == 0 {
            keys = "\"" + each[0] + "\",\"" + each[1] + "\",\"" + each[2] + "\",\"" + each[3] + "\",\"" + each[4] + "\"
"
        } else {

            stringtoadd := "\"" + each[0] + "\",\"" + each[1] + "\",\"" + each[2] + "\",\"" + each[3] + "\",\"" + each[4] + "\"
"

            if i%10000 == 0 {
                fmt.Println(i)
            }

            exists := Exists(content, each[0])
            if !exists {
                content[each[0]] = keys
            }

            content[each[0]] += stringtoadd

            createFile(each[0], content[each[0]])

        }
    }

}

func createFile(name, content string) {

    f, _ := os.Create(name + ".csv")
    f.WriteString(content)
    f.Close()
}

func Exists(content map[string]string, name string) bool {
    _, exists := content[name]
    return exists
}

The problem I am having at the moment is that the performances are quite slow. I even have a similar script written in PHP which is executing the same operation way faster than this. And that obviously makes me think that there must be something wrong with my Go script.

Can someone help me to understand what is wrong with it?

Thank you!

  • 写回答

2条回答 默认 最新

  • douzhimao8656 2014-11-24 12:45
    关注

    You are (unnecessarily) loading the complete CVS file at once and overwriting the files every time the contents would change.

    Try the following:

    package main
    
    import (
        "encoding/csv"
        "fmt"
        "os"
        "sync"
    )
    
    func main() {
    
        input, err := os.Open("union_exp.csv")
        if err != nil {
            fmt.Println("Error while opening CSV file.")
            return
        }
        defer input.Close()
    
        reader := csv.NewReader(input)
        reader.FieldsPerRecord = -1
        files := make(map[string]chan []string)
    
        keys, err := reader.Read()
        if err != nil {
            fmt.Println("Error while reading CSV file.")
            return
        }
    
        wg := &sync.WaitGroup{}
    
        var line []string
        for line, err = reader.Read(); err == nil; line, err = reader.Read() {
    
            ch, ok := files[line[0]]
            if ok {
                ch <- line
            } else {
                ch = make(chan []string, 8)
                wg.Add(1)
                go fileWriter(line[0], ch, wg)
                ch <- keys
                files[line[0]] = ch
            }
    
        }
        if err.Error() != "EOF" {
            fmt.Println("Error while reading CSV file.")
            return
        }
    
        for _, ch := range files {
            close(ch)
        }
        wg.Wait()
    
        fmt.Println("Done!")
    }
    
    func fileWriter(fileName string, ch chan []string, wg *sync.WaitGroup) {
        defer wg.Done()
    
        file, err := os.Create("x" + fileName + ".csv")
        if err != nil {
            fmt.Println("Error while creating output file.")
            os.Exit(1) // Kill the whole app
        }
        defer file.Close()
    
        writer := csv.NewWriter(file)
        defer writer.Flush()
    
        for line := range ch {
            writer.Write(line)
        }
    
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥15 求螺旋焊缝的图像处理
  • ¥15 blast算法(相关搜索:数据库)
  • ¥15 请问有人会紧聚焦相关的matlab知识嘛?
  • ¥15 网络通信安全解决方案
  • ¥50 yalmip+Gurobi
  • ¥20 win10修改放大文本以及缩放与布局后蓝屏无法正常进入桌面
  • ¥15 itunes恢复数据最后一步发生错误
  • ¥15 关于#windows#的问题:2024年5月15日的win11更新后资源管理器没有地址栏了顶部的地址栏和文件搜索都消失了
  • ¥100 H5网页如何调用微信扫一扫功能?
  • ¥15 讲解电路图,付费求解