duanna3634 2018-10-05 17:16
浏览 107
已采纳

在目录中查找重复的文件

This is my first Go program. I'm learning the language but it's a bit difficult to understand all the concepts so in order to practice I wrote a code to detect same file. It's a simple program which recursively check for duplicated files in a directory.

but:

how to detect duplicate file in directory files

the matter isn't directory recursively. the matter is how to compare

  • 写回答

2条回答 默认 最新

  • douzheng5717 2018-10-05 18:34
    关注

    use sha256 to compare files

    example:

    package main
    
    import (
        "crypto/sha256"
        "encoding/hex"
        "fmt"
        "os"
        "path/filepath"
        "sync"
        "flag"
        "runtime"
        "io"
    )
    
    var dir string
    var workers int
    
    type Result struct {
        file   string
        sha256 [32]byte
    }
    
    func worker(input chan string, results chan<- *Result, wg *sync.WaitGroup) {
        for file := range input {
            var h = sha256.New()
            var sum [32]byte
            f, err := os.Open(file)
            if err != nil {
                fmt.Fprintln(os.Stderr, err)
                continue
            }
            if _, err = io.Copy(h, f); err != nil {
                fmt.Fprintln(os.Stderr, err)
                f.Close()
                continue
            }
            f.Close()
            copy(sum[:], h.Sum(nil))
            results <- &Result{
                file:   file,
                sha256: sum,
            }
        }
        wg.Done()
    }
    
    func search(input chan string) {
        filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
            if err != nil {
                fmt.Fprintln(os.Stderr, err)
            } else if info.Mode().IsRegular() {
                input <- path
            }
            return nil
        })
        close(input)
    }
    
    func main() {
    
        flag.StringVar(&dir, "dir", ".", "directory to search")
        flag.IntVar(&workers, "workers", runtime.NumCPU(), "number of workers")
        flag.Parse()
    
        fmt.Printf("Searching in %s using %d workers...
    ", dir, workers)
    
        input := make(chan string)
        results := make(chan *Result)
    
        wg := sync.WaitGroup{}
        wg.Add(workers)
    
        for i := 0; i < workers; i++ {
            go worker(input, results, &wg)
        }
    
        go search(input)
        go func() {
            wg.Wait()
            close(results)
        }()
    
        counter := make(map[[32]byte][]string)
        for result := range results {
            counter[result.sha256] = append(counter[result.sha256], result.file)
        }
    
        for sha, files := range counter {
            if len(files) > 1 {
                fmt.Printf("Found %d duplicates for %s: 
    ", len(files), hex.EncodeToString(sha[:]))
                for _, f := range files {
                    fmt.Println("-> ", f)
                }
            }
        }
    
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥30 这是哪个作者做的宝宝起名网站
  • ¥60 版本过低apk如何修改可以兼容新的安卓系统
  • ¥25 由IPR导致的DRIVER_POWER_STATE_FAILURE蓝屏
  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程
  • ¥15 完成下列问题完成下列问题
  • ¥15 C#算法问题, 不知道怎么处理这个数据的转换
  • ¥15 YoloV5 第三方库的版本对照问题
  • ¥15 请完成下列相关问题!