2.* slideing block chunking, performance is a big issue due to too many hash lookup.
3. */
4.static int file_chunk_sb(int fd, int fd_ldata, int fd_bdata, unsigned int *pos, unsigned int *block_num,
5. block_id_t **metadata, hashtable *htable, char *last_block_buf, unsigned int *last_block_len)
6.{
7. char buf[BUF_MAX_SIZE] = {0};
8. char win_buf[BLOCK_MAX_SIZE + 1] = {0};
9. char block_buf[BLOCK_MAX_SIZE] = {0};
10. char adler_pre_char;
11. unsigned char md5_checksum[16 + 1] = {0};
12. unsigned char md5_checksum1[16 + 1] = {0};
13. unsigned char crc_checksum[16] = {0};
14. unsigned int bpos = 0;
15. unsigned int slide_sz = 0;
16. unsigned int rwsize = 0;
17. unsigned int exp_rwsize = BUF_MAX_SIZE;
18. unsigned int head, tail;
19. unsigned int hkey = 0;
20. unsigned int bflag = 0;
21. int ret = 0;
22.
23. while(rwsize = read(fd, buf + bpos, exp_rwsize))
24. {
25. /* last chunk */
26. if ((rwsize + bpos + slide_sz) < g_block_size)
27. break;
28.
29. head = 0;
30. tail = bpos + rwsize;
31. while ((head + g_block_size) <= tail)
32. {
33. memcpy(win_buf, buf + head, g_block_size);
34. hkey = (slide_sz == 0) ? adler32_checksum(win_buf, g_block_size) :
35. adler32_rolling_checksum(hkey, g_block_size, adler_pre_char, buf[head+g_block_size-1]);
36. uint_2_str(hkey, crc_checksum);
37. bflag = 0;
38.
39. /* this block maybe is duplicate */
40. if (hash_exist(g_sb_htable_crc, crc_checksum))
41. {
42. bflag = 2;
43. md5(win_buf, g_block_size, md5_checksum);
44. if (hash_exist(htable, md5_checksum))
45. {
46. /* insert fragment */
47. if (slide_sz != 0)
48. {
49. md5(block_buf, slide_sz, md5_checksum1);
50. if (0 != (ret = dedup_regfile_block_process(block_buf, slide_sz, md5_checksum1,
51. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
52. {
53. perror("dedup_regfile_block_process in file_chunk_sb");
54. goto _FILE_CHUNK_SB_EXIT;
55. }
56. }
57. /* insert fixed-size block */
58. if (0 != (ret = dedup_regfile_block_process(win_buf, g_block_size, md5_checksum,
59. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
60. {
61. perror("dedup_regfile_block_process in file_chunk_sb");
62. goto _FILE_CHUNK_SB_EXIT;
63. }
64.
65. head += g_block_size;
66. slide_sz = 0;
67. bflag = 1;
68. }
69. }
70.
71. /* this block is not duplicate */
72. if (bflag != 1)
73. {
74. block_buf[slide_sz] = buf[head];
75. head++;
76. slide_sz++;
77. if (slide_sz == g_block_size)
78. {
79. if (bflag != 2) md5(block_buf, g_block_size, md5_checksum);
80. if (0 != (ret = dedup_regfile_block_process(block_buf, g_block_size, md5_checksum,
81. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
82. {
83. perror("dedup_regfile_block_process in file_chunk_sb");
84. goto _FILE_CHUNK_SB_EXIT;
85. }
86. hash_checkin(g_sb_htable_crc, crc_checksum);
87. slide_sz = 0;
88. }
89. }
90.
91. adler_pre_char = buf[head - 1];
92. }
93.
94. /* read expected data from file to full up buf */
95. bpos = tail - head;
96. exp_rwsize = BUF_MAX_SIZE - bpos;
97. adler_pre_char = buf[head - 1];
98. memmove(buf, buf + head, bpos);
99. }
100. /* last chunk */
101. *last_block_len = ((rwsize + bpos + slide_sz) > 0) ? rwsize + bpos + slide_sz : 0;
102. if (*last_block_len > 0)
103. {
104. memcpy(last_block_buf, block_buf, slide_sz);
105. memcpy(last_block_buf + slide_sz, buf, rwsize + bpos);
106. }
107.
108._FILE_CHUNK_SB_EXIT:
109. lseek(fd, 0, SEEK_SET);
110. return ret;
111.}
C代码帮看下思想,最好能写出Java代码,非常感谢
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
4条回答 默认 最新
悬赏问题
- ¥20 求一个html代码,有偿
- ¥100 关于使用MATLAB中copularnd函数的问题
- ¥20 在虚拟机的pycharm上
- ¥15 jupyterthemes 设置完毕后没有效果
- ¥15 matlab图像高斯低通滤波
- ¥15 针对曲面部件的制孔路径规划,大家有什么思路吗
- ¥15 钢筋实图交点识别,机器视觉代码
- ¥15 如何在Linux系统中,但是在window系统上idea里面可以正常运行?(相关搜索:jar包)
- ¥50 400g qsfp 光模块iphy方案
- ¥15 两块ADC0804用proteus仿真时,出现异常