2.* slideing block chunking, performance is a big issue due to too many hash lookup.
3. */
4.static int file_chunk_sb(int fd, int fd_ldata, int fd_bdata, unsigned int *pos, unsigned int *block_num,
5. block_id_t **metadata, hashtable *htable, char *last_block_buf, unsigned int *last_block_len)
6.{
7. char buf[BUF_MAX_SIZE] = {0};
8. char win_buf[BLOCK_MAX_SIZE + 1] = {0};
9. char block_buf[BLOCK_MAX_SIZE] = {0};
10. char adler_pre_char;
11. unsigned char md5_checksum[16 + 1] = {0};
12. unsigned char md5_checksum1[16 + 1] = {0};
13. unsigned char crc_checksum[16] = {0};
14. unsigned int bpos = 0;
15. unsigned int slide_sz = 0;
16. unsigned int rwsize = 0;
17. unsigned int exp_rwsize = BUF_MAX_SIZE;
18. unsigned int head, tail;
19. unsigned int hkey = 0;
20. unsigned int bflag = 0;
21. int ret = 0;
22.
23. while(rwsize = read(fd, buf + bpos, exp_rwsize))
24. {
25. /* last chunk */
26. if ((rwsize + bpos + slide_sz) < g_block_size)
27. break;
28.
29. head = 0;
30. tail = bpos + rwsize;
31. while ((head + g_block_size) <= tail)
32. {
33. memcpy(win_buf, buf + head, g_block_size);
34. hkey = (slide_sz == 0) ? adler32_checksum(win_buf, g_block_size) :
35. adler32_rolling_checksum(hkey, g_block_size, adler_pre_char, buf[head+g_block_size-1]);
36. uint_2_str(hkey, crc_checksum);
37. bflag = 0;
38.
39. /* this block maybe is duplicate */
40. if (hash_exist(g_sb_htable_crc, crc_checksum))
41. {
42. bflag = 2;
43. md5(win_buf, g_block_size, md5_checksum);
44. if (hash_exist(htable, md5_checksum))
45. {
46. /* insert fragment */
47. if (slide_sz != 0)
48. {
49. md5(block_buf, slide_sz, md5_checksum1);
50. if (0 != (ret = dedup_regfile_block_process(block_buf, slide_sz, md5_checksum1,
51. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
52. {
53. perror("dedup_regfile_block_process in file_chunk_sb");
54. goto _FILE_CHUNK_SB_EXIT;
55. }
56. }
57. /* insert fixed-size block */
58. if (0 != (ret = dedup_regfile_block_process(win_buf, g_block_size, md5_checksum,
59. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
60. {
61. perror("dedup_regfile_block_process in file_chunk_sb");
62. goto _FILE_CHUNK_SB_EXIT;
63. }
64.
65. head += g_block_size;
66. slide_sz = 0;
67. bflag = 1;
68. }
69. }
70.
71. /* this block is not duplicate */
72. if (bflag != 1)
73. {
74. block_buf[slide_sz] = buf[head];
75. head++;
76. slide_sz++;
77. if (slide_sz == g_block_size)
78. {
79. if (bflag != 2) md5(block_buf, g_block_size, md5_checksum);
80. if (0 != (ret = dedup_regfile_block_process(block_buf, g_block_size, md5_checksum,
81. fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
82. {
83. perror("dedup_regfile_block_process in file_chunk_sb");
84. goto _FILE_CHUNK_SB_EXIT;
85. }
86. hash_checkin(g_sb_htable_crc, crc_checksum);
87. slide_sz = 0;
88. }
89. }
90.
91. adler_pre_char = buf[head - 1];
92. }
93.
94. /* read expected data from file to full up buf */
95. bpos = tail - head;
96. exp_rwsize = BUF_MAX_SIZE - bpos;
97. adler_pre_char = buf[head - 1];
98. memmove(buf, buf + head, bpos);
99. }
100. /* last chunk */
101. *last_block_len = ((rwsize + bpos + slide_sz) > 0) ? rwsize + bpos + slide_sz : 0;
102. if (*last_block_len > 0)
103. {
104. memcpy(last_block_buf, block_buf, slide_sz);
105. memcpy(last_block_buf + slide_sz, buf, rwsize + bpos);
106. }
107.
108._FILE_CHUNK_SB_EXIT:
109. lseek(fd, 0, SEEK_SET);
110. return ret;
111.}
C代码帮看下思想,最好能写出Java代码,非常感谢
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
4条回答 默认 最新
悬赏问题
- ¥15 飞机曲面部件如机翼,壁板等具体的孔位模型
- ¥15 vs2019中数据导出问题
- ¥20 云服务Linux系统TCP-MSS值修改?
- ¥20 关于#单片机#的问题:项目:使用模拟iic与ov2640通讯环境:F407问题:读取的ID号总是0xff,自己调了调发现在读从机数据时,SDA线上并未有信号变化(语言-c语言)
- ¥20 怎么在stm32门禁成品上增加查询记录功能
- ¥15 Source insight编写代码后使用CCS5.2版本import之后,代码跳到注释行里面
- ¥50 NT4.0系统 STOP:0X0000007B
- ¥15 想问一下stata17中这段代码哪里有问题呀
- ¥15 flink cdc无法实时同步mysql数据
- ¥100 有人会搭建GPT-J-6B框架吗?有偿