duanna3634 2018-10-05 17:16
浏览 107
已采纳

在目录中查找重复的文件

This is my first Go program. I'm learning the language but it's a bit difficult to understand all the concepts so in order to practice I wrote a code to detect same file. It's a simple program which recursively check for duplicated files in a directory.

but:

how to detect duplicate file in directory files

the matter isn't directory recursively. the matter is how to compare

  • 写回答

2条回答 默认 最新

  • douzheng5717 2018-10-05 18:34
    关注

    use sha256 to compare files

    example:

    package main
    
    import (
        "crypto/sha256"
        "encoding/hex"
        "fmt"
        "os"
        "path/filepath"
        "sync"
        "flag"
        "runtime"
        "io"
    )
    
    var dir string
    var workers int
    
    type Result struct {
        file   string
        sha256 [32]byte
    }
    
    func worker(input chan string, results chan<- *Result, wg *sync.WaitGroup) {
        for file := range input {
            var h = sha256.New()
            var sum [32]byte
            f, err := os.Open(file)
            if err != nil {
                fmt.Fprintln(os.Stderr, err)
                continue
            }
            if _, err = io.Copy(h, f); err != nil {
                fmt.Fprintln(os.Stderr, err)
                f.Close()
                continue
            }
            f.Close()
            copy(sum[:], h.Sum(nil))
            results <- &Result{
                file:   file,
                sha256: sum,
            }
        }
        wg.Done()
    }
    
    func search(input chan string) {
        filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
            if err != nil {
                fmt.Fprintln(os.Stderr, err)
            } else if info.Mode().IsRegular() {
                input <- path
            }
            return nil
        })
        close(input)
    }
    
    func main() {
    
        flag.StringVar(&dir, "dir", ".", "directory to search")
        flag.IntVar(&workers, "workers", runtime.NumCPU(), "number of workers")
        flag.Parse()
    
        fmt.Printf("Searching in %s using %d workers...
    ", dir, workers)
    
        input := make(chan string)
        results := make(chan *Result)
    
        wg := sync.WaitGroup{}
        wg.Add(workers)
    
        for i := 0; i < workers; i++ {
            go worker(input, results, &wg)
        }
    
        go search(input)
        go func() {
            wg.Wait()
            close(results)
        }()
    
        counter := make(map[[32]byte][]string)
        for result := range results {
            counter[result.sha256] = append(counter[result.sha256], result.file)
        }
    
        for sha, files := range counter {
            if len(files) > 1 {
                fmt.Printf("Found %d duplicates for %s: 
    ", len(files), hex.EncodeToString(sha[:]))
                for _, f := range files {
                    fmt.Println("-> ", f)
                }
            }
        }
    
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥15 phython路径名过长报错 不知道什么问题
  • ¥15 深度学习中模型转换该怎么实现
  • ¥15 HLs设计手写数字识别程序编译通不过
  • ¥15 Stata外部命令安装问题求帮助!
  • ¥15 从键盘随机输入A-H中的一串字符串,用七段数码管方法进行绘制。提交代码及运行截图。
  • ¥15 TYPCE母转母,插入认方向
  • ¥15 如何用python向钉钉机器人发送可以放大的图片?
  • ¥15 matlab(相关搜索:紧聚焦)
  • ¥15 基于51单片机的厨房煤气泄露检测报警系统设计
  • ¥15 Arduino无法同时连接多个hx711模块,如何解决?