I need to write a Go script that will open a big CSV file, and create new, separate CSVs based on the value of the first element of each line.
The CSV file looks like this:
"country", "otherfield", "otherfield1", "otherfield2", "etc"
"AT", "otherfield", "otherfield1", "otherfield2", "etc"
"AT", "otherfield", "otherfield1", "otherfield2", "etc"
"DE", "otherfield", "otherfield1", "otherfield2", "etc"
"DE", "otherfield", "otherfield1", "otherfield2", "etc"
So, what I am trying to do is creating a file with the first field's value (e.g. AT.csv
), containing all the lines that start with that value.
The following is the script that I have written so far:
package main
import (
"encoding/csv"
"fmt"
"os"
)
func main() {
// contentCreated := make(chan map[string]string)
createContent("union_exp.csv")
}
func createContent(csvfilename string) {
keys := ""
content := make(map[string]string)
csvfile, err := os.Open(csvfilename)
if err != nil {
fmt.Println(err)
}
defer csvfile.Close()
reader := csv.NewReader(csvfile)
reader.FieldsPerRecord = -1
rawCSVdata, err := reader.ReadAll()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
for i, each := range rawCSVdata {
if i == 0 {
keys = "\"" + each[0] + "\",\"" + each[1] + "\",\"" + each[2] + "\",\"" + each[3] + "\",\"" + each[4] + "\"
"
} else {
stringtoadd := "\"" + each[0] + "\",\"" + each[1] + "\",\"" + each[2] + "\",\"" + each[3] + "\",\"" + each[4] + "\"
"
if i%10000 == 0 {
fmt.Println(i)
}
exists := Exists(content, each[0])
if !exists {
content[each[0]] = keys
}
content[each[0]] += stringtoadd
createFile(each[0], content[each[0]])
}
}
}
func createFile(name, content string) {
f, _ := os.Create(name + ".csv")
f.WriteString(content)
f.Close()
}
func Exists(content map[string]string, name string) bool {
_, exists := content[name]
return exists
}
The problem I am having at the moment is that the performances are quite slow. I even have a similar script written in PHP which is executing the same operation way faster than this. And that obviously makes me think that there must be something wrong with my Go script.
Can someone help me to understand what is wrong with it?
Thank you!