下面FP-Growth算法是我的主算法调用的一个算法,请帮我分析以下代码运行内存太大而无法运行的原因,并给出修改(其中输入变量Dataset的维度可能在1000左右):
function [Freq,Freq_weight] = FPGrowth(FrontNo,Weight,Dataset,MST)
NumEvent=1;
T = cell(1,1);
[nRows,~]=size(Dataset);
for i = 1:nRows
c = find(Dataset(i,:));
if (isempty(c))
continue; %空行,重新读下一行
end
T{NumEvent,1}=c;
NumEvent = NumEvent+1;
end
NumEvent = NumEvent-1;
%存放当前频繁项,item
L=zeros(1,2);
%第一次扫描Dataset,统计每一个候选项,并存在二维数组中
%对所有item进行统计,并按支持度递减排序
NumEvent=size(T,1);
for i=1:NumEvent
for j=1:length(T{i})
%寻找当前项在L中是否已经存在
result=find(L(:,1)==T{i}(1,j));
if isempty(result)
%如不存在,则新建一行,支持度为1
L=cat(1,L,[T{i}(1,j),1]); %串联数组
else
%如存在,则支持度加1
L(result,2)=L(result,2)+1;
end
end
end
%将L的第一行赋空
L(1,:)=[];
%按照支持度递减排序,
L=sortrows(L,-2);%L:所有项的降序排列
%idx = L(:, 2)/NumEvent < MST;
%L(idx, :) = [];
%存储FP树
tree=cell(1,2);
%第二次扫描事务数据库Dataset,构造FP树,
for i=1:NumEvent
L_temp=[];
%把每一个事务中的item按照其支持度进行排序
for j=1:length(T{i})
result=find(L(:,1)==T{i}(1,j));
if isempty(result)==0
L_temp=cat(1,L_temp,L(result,:));
end
end
if ~isempty(L_temp)
L_temp=sortrows(L_temp,-2);
L_temp=(L_temp(:,1))';
tree=treeFunc(L_temp,tree,1,i);
end
end
%进行模式挖掘
%对L按支持度的升序重新排序
L=sortrows(L,2);
%对L的每一项进行频繁项的挖掘
%最终的频繁项
freqItems=cell(1,1);
%频繁项计数
n=1;
%临时频繁项存储
freqItems_temp=cell(1,1);
for i=1:size(L,1)-1
frequentItem=cell(1,1);
item=[];
%找出所有包含L(i,1)项
frequentItem=ruleMining(L(i,1),tree,item,frequentItem);
frequentItem(1,:)=[];
if isempty(frequentItem)
continue;
end
%构造条件树
conditionTree=cell(1,2);
conditionItem=zeros(1,2);
for j=1:size(frequentItem,1)
if size(frequentItem{j,1},2)~=0
flag_2=0;
p=0;
%conditionTree的行代表在条件树起始节点null下,有几个分支
if size(conditionTree{1,1},2)~=0
for x=1:size(conditionTree,1)
if conditionTree{x,1}==frequentItem{j,1}(1,1)
conditionTree{x,2}(1,2)=conditionTree{x,2}(1,2)+frequentItem{j,1}(1,2);
flag_2=1;
p=x;
break;
end
end
end
if flag_2==0
p=size(conditionTree,1)+1;
conditionTree{p,1}=frequentItem{j,1}(1,1);
conditionTree{p,2}=[frequentItem{j,1}(1,1),frequentItem{j,1}(1,size(frequentItem{j,1},2))];
if size(conditionTree{1,1},2)==0
conditionTree(1,:)=[];
p=p-1;
end
end
if size(conditionTree{p,2},2)==0
conditionTree{p,2}=conditionItem;
conditionItem=conditionTree{p,2};
else
conditionItem=conditionTree{p,2};
end
%验证conditionItem是否已经存在,如存在则直接将频繁项的数量可以直接相加
for m=2:size(frequentItem{j,1},2)-1
flag=0;
for n=1:size(conditionItem,1)
if conditionItem(n,1)==frequentItem{j,1}(1,m)
conditionItem(n,2)=conditionItem(n,2)+frequentItem{j,1}(1,size(frequentItem{j,1},2));
flag=1;
break;
end
end
if flag==0
conditionItem=cat(1,conditionItem,[frequentItem{j,1}(1,m),frequentItem{j,1}(1,size(frequentItem{j,1},2))]);
end
end
conditionTree{p,2}=conditionItem;
end
end
%对生成的条件树进行最小支持度过滤,
for k=1:size(conditionTree,1)
%对支持度小于MST的进行过滤
conditionItem=conditionTree{k,2};
if isempty(conditionItem)
continue;
end
result=find(conditionItem(:,2)/NumEvent<MST);
row=unique(result);
conditionItem(row,:)=[];
%对剩余item进行组合,需要把当前的
result=find(conditionItem(:,1)==L(i,1));
row=unique(result);
currentItem=conditionItem(row,:);
conditionItem(row,:)=[];
%取出所有item项
c=conditionItem(:,1)';
c_temp=cell(1,1);
%对Item进行组合,并生成频繁项
for m=1:length(c)
%组合
C=nchoosek(1:1:length(c),m);
%取最小值
for p=1:size(C,1)
cItem=currentItem;
for q=1:size(C,2)
cItem=cat(1,cItem,conditionItem(C(p,q),:));
end
%求支持度最小的项
[x,y]=min(cItem(:,2));
%得到频繁项
freqItems_temp{n,1}=cItem(:,1)';
fItem=cat(2,cItem(:,1)',cItem(y,2));
freqItems{n,1}=fItem;
n=n+1;
end
end
end
end
%------------------对重复的频繁项进行合并----------------------------------
%临时频繁项集,与上面的频繁项集基本一致,只是少了最后一列的支持度计数,目的
%是为了比较两个频繁项是否一致,以便进行合并
freqItems = freqItems(~cellfun('isempty', freqItems));
freqItems_temp = freqItems_temp(~cellfun('isempty', freqItems_temp));
while size(freqItems_temp,1)>1
for i=1:size(freqItems_temp,1)-1
flag=0;
for j=i+1:size(freqItems_temp,1)
if isequal(freqItems_temp{i,1},freqItems_temp{j,1})
%将临时频繁项的重复项进行合并
freqItems_temp(j,:)=[];
%将重复的频繁项最后的一列的支持度计数进行合并
freqItems{i,1}(1,size(freqItems{i,1},2))=freqItems{i,1}(1,size(freqItems{i,1},2))+freqItems{j,1}(1,size(freqItems{j,1},2));
%将频繁项中的重复项清除
freqItems(j,:)=[];
flag=1;
break;
end
end
if flag==1
break;
end
end
if i==size(freqItems_temp,1)-1
break;
end
end
i = size(freqItems_temp,1)-1;
j = i+1;
if i>0 && isequal(freqItems_temp{i,1},freqItems_temp{j,1})
freqItems_temp(j,:)=[];
freqItems{i,1}(1,size(freqItems{i,1},2))=freqItems{i,1}(1,size(freqItems{i,1},2))+freqItems{j,1}(1,size(freqItems{j,1},2));
freqItems(j,:)=[];
end
%显示频繁项集
%对每个频繁项集中的元素升序排列,方便后续权重更新
Freqset = freqItems_temp;
Freqset = cellfun(@(x) sort(x), freqItems_temp, 'UniformOutput', false);
disp(Freqset);
%%求每个频繁项来自的事务数据索引T_idx,及在种群(Weight)中的索引Pop_idx,并更新频繁项集的权重
T_idx = cell(size(Freqset));
Pop_idx = cell(size(Freqset));
Freq_weight =zeros(size(Freqset,1),size(Dataset,2));
for i = 1:numel(Freqset)
for j = 1:numel(T)
if isequal(Freqset{i,1}, intersect(Freqset{i,1}, T{j}))
T_idx{i,1} = [T_idx{i,1} j];
end
end
indices = find(FrontNo==1);
Pop_idx{i,1} = indices(1,T_idx{i,1});
for k = 1:size(Freqset{i,1},2)
SelectWeights = Weight(Pop_idx{i,1},Freqset{i,1}(1,k));
Freq_weight(i,Freqset{i,1}(1,k)) = mean(SelectWeights);
end
end
%%将实数矩阵转换为0,
% 创建全零矩阵
Freq = zeros(size(Freqset, 1), size(Dataset, 2));
% 对于每一行,将数据中的整数值对应的逻辑索引设为 1
for i = 1:size(Freqset, 1)
logicalIndexes = false(1, size(Dataset, 2));
% 遍历该行数据中的每个整数值,将其对应的逻辑索引设为 1
for j = 1:numel(Freqset{i})
logicalIndexes(Freqset{i}(j)) = true;
end
% 将该行数据中未包含的维度对应的逻辑索引设为 0
logicalIndexes(setdiff(1:size(Dataset, 2), Freqset{i})) = false;
% 将该行逻辑索引赋值给逻辑矩阵
Freq(i, :) = logicalIndexes;
end
% 输出逻辑矩阵
Freq
end