2.* slideing block chunking, performance is a big issue due to too many hash lookup.
3. */
4.static int file_chunk_sb(int fd, int fd_ldata, int fd_bdata, unsigned int *pos, unsigned int *block_num,
5. block_id_t **metadata, hashtable *htable, char *last_block_buf, unsigned int *last_block_len)
6.{
7. char buf[BUF_MAX_SIZE] = {0};
8. char win_buf[BLOCK_MAX_SIZE + 1] = {0};
9. char block_buf[BLOCK_MAX_SIZE] = {0};
10. char adler_pre_char;
11. unsigned char md5_checksum[16 + 1] = {0};
12. unsigned char md5_checksum1[16 + 1] = {0};
13. unsigned char crc_checksum[16] = {0};
14. unsigned int bpos = 0;
15. unsigned int slide_sz = 0;
16. unsigned int rwsize = 0;
17. unsigned int exp_rwsize = BUF_MAX_SIZE;
18. unsigned int head, tail;
19. unsigned int hkey = 0;
20. unsigned int bflag = 0;
21. int ret = 0;
22.
23. while(rwsize = read(fd, buf + bpos, exp_rwsize))
24. {
25. /* last chunk */
26. if ((rwsize + bpos + slide_sz) < g_block_size)
27. break;
28.
29. head = 0;
30. tail = bpos + rwsize;
31. while ((head + g_block_size) <= tail)
32. {
33. memcpy(win_buf, buf + head, g_block_size);
34. hkey = (slide_sz == 0) ? adler32_checksum(win_buf, g_block_size) :
35. adler32_rolling_checksum(hkey, g_block_size, adler_pre_char, buf[head+g_block_size-1]);
36. uint_2_str(hkey, crc_checksum);
37. bflag = 0;
38.
39. /* this block maybe is duplicate */
40. if (hash_exist(g_sb_htable_crc, crc_checksum))
41. {
42. bflag = 2;
43. md5(win_buf, g_block_size, md5_checksum);
44. if (hash_exist(htable, md5_checksum))
45. {
46. /* insert fragment */
47. if (slide_sz != 0)
48. {
49. md5(block_buf, slide_sz, md5_checksum1);
50. if (0 != (ret = dedup_regfile_block_process(block_buf, slide_sz, md5_checksum1,
51. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
52. {
53. perror("dedup_regfile_block_process in file_chunk_sb");
54. goto _FILE_CHUNK_SB_EXIT;
55. }
56. }
57. /* insert fixed-size block */
58. if (0 != (ret = dedup_regfile_block_process(win_buf, g_block_size, md5_checksum,
59. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
60. {
61. perror("dedup_regfile_block_process in file_chunk_sb");
62. goto _FILE_CHUNK_SB_EXIT;
63. }
64.
65. head += g_block_size;
66. slide_sz = 0;
67. bflag = 1;
68. }
69. }
70.
71. /* this block is not duplicate */
72. if (bflag != 1)
73. {
74. block_buf[slide_sz] = buf[head];
75. head++;
76. slide_sz++;
77. if (slide_sz == g_block_size)
78. {
79. if (bflag != 2) md5(block_buf, g_block_size, md5_checksum);
80. if (0 != (ret = dedup_regfile_block_process(block_buf, g_block_size, md5_checksum,
81. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
82. {
83. perror("dedup_regfile_block_process in file_chunk_sb");
84. goto _FILE_CHUNK_SB_EXIT;
85. }
86. hash_checkin(g_sb_htable_crc, crc_checksum);
87. slide_sz = 0;
88. }
89. }
90.
91. adler_pre_char = buf[head - 1];
92. }
93.
94. /* read expected data from file to full up buf */
95. bpos = tail - head;
96. exp_rwsize = BUF_MAX_SIZE - bpos;
97. adler_pre_char = buf[head - 1];
98. memmove(buf, buf + head, bpos);
99. }
100. /* last chunk */
101. *last_block_len = ((rwsize + bpos + slide_sz) > 0) ? rwsize + bpos + slide_sz : 0;
102. if (*last_block_len > 0)
103. {
104. memcpy(last_block_buf, block_buf, slide_sz);
105. memcpy(last_block_buf + slide_sz, buf, rwsize + bpos);
106. }
107.
108._FILE_CHUNK_SB_EXIT:
109. lseek(fd, 0, SEEK_SET);
110. return ret;
111.}
C代码帮看下思想,最好能写出Java代码,非常感谢
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
4条回答
悬赏问题
- ¥35 平滑拟合曲线该如何生成
- ¥100 c语言,请帮蒟蒻写一个题的范例作参考
- ¥15 名为“Product”的列已属于此 DataTable
- ¥15 安卓adb backup备份应用数据失败
- ¥15 eclipse运行项目时遇到的问题
- ¥15 关于#c##的问题:最近需要用CAT工具Trados进行一些开发
- ¥15 南大pa1 小游戏没有界面,并且报了如下错误,尝试过换显卡驱动,但是好像不行
- ¥15 自己瞎改改,结果现在又运行不了了
- ¥15 链式存储应该如何解决
- ¥15 没有证书,nginx怎么反向代理到只能接受https的公网网站