表中的每一条记录和其后的每条记录进行比较,根据前后记录中字段的相似度然后删除前后的某条记录
我用的双重游标 表是千万的数据根本跑不出来 又什么其他的处理方式或者优化方法吗
for a_cur in sub_cur1 loop
exit when sub_cur1%notfound;
start_num1 := sub_cur1%ROWCOUNT;
--DBMS_OUTPUT.put_line('外循环第'||start_num1||'次循环');
insert into dzdp_ywb_tmp_log values ('外循环第'||start_num1||'次循环');
--判断当前记录在表中是否存在
select count(shop_id) into ctnum1 from dzdp_ywb where shop_id = a_cur.shop_id;
if(ctnum1=0) then
continue;
end if;
select shop_id, baidu_longitude, baidu_latitude, name
into sid, jd, wd, mname
from dzdp_ywb
where shop_id = a_cur.shop_id;
for b_cur in sub_cur2 loop
exit when sub_cur2%notfound;
start_num2:=sub_cur2%ROWCOUNT;
--判断当前记录在表中是否存在
select count(shop_id) into ctnum2 from dzdp_ywb where shop_id = b_cur.shop_id;
if (start_num2 <= start_num1 or ctnum2=0 ) then
continue;
end if;
--调用函数 计算距离
distance := get_distance(jd,wd,b_cur.baidu_longitude,b_cur.baidu_latitude);
--调用函数 计算距离
if (distance < 1) then
--如果距离小于1km 则计算相似度
similarity := SYS.UTL_MATCH.edit_distance_similarity(mname,b_cur.name);
if (similarity > 50) then
-- 记录名称较长的商户
if (length(mname) > length(b_cur.name)) then
--插入要删除的数据到dzdp_ywb_tmp表中
insert into dzdp_ywb_tmp01
(shop_id_b,shop_id_e)
values
(sid,b_cur.shop_id);
--删除 dzdp_ywb 表数据
delete from dzdp_ywb where shop_id=sid;
commit;
exit;--跳出循环
else
insert into dzdp_ywb_tmp01
(shop_id_b,shop_id_e)
values
(b_cur.shop_id,sid);
delete from dzdp_ywb where shop_id=b_cur.shop_id;
commit;
--cnt := cnt + 1;
end if;
end if;
end if;