请教各位专家高手,帮我把这个算法转成JAVA,或者给我ID3或C4.5的java主要代码,不过要有详细的注释,谢谢!分数我会继续加的,只要能解决,我不会吝惜积分的。。
//*********************************************
//决策树ID3算生成决策树的源程序
//*********************************************
procedure TFrmShibie.Generate_decision_tree(b: TwoArr; bn: Integer;
attribute_test_list: array of integer; classCount, sn, ai, aj: Integer);
var
same_class,i,j,k,t,l,ll,lll,count_Positive,temp1:integer;
p1,p2,Entropy_Es:double;
temp:array[0..s_max-1] of integer;
b1:TwoArr;
temp_b:array[0..s_max-1,0..N-1,0..M+1] of integer;
TypeCountArr:array of Integer;
AttriArr:array of Integer;
len:Integer;
AttriArrCount:array of Integer;
q:TArr;
begin
//定义数组a记录目前待分的训练样本集,定义数组b记录目前要分结点中所含的训练样本集
//same_class用来记数,判别samples是否都属于同一个类
Trip:=Trip+1;
path_a[leaves][Trip]:=ai;
path_b[leaves][Trip]:=aj;
if(bn=0) then
begin//待分结点的样本集为空时,加上一个树叶,标记为训练集中最普通的类
//记录路径与前一路径相同
for i:=1 to Trip-1 do
begin
if(path_a[leaves][i]=0) then
begin
path_a[leaves][i]:=path_a[leaves-1][i];
path_b[leaves][i]:=path_b[leaves-1][i];
end;
end;
for i:=1 to Trip-1 do
begin
path_a[leaves][0]:=most;
end;
//修改树的深度
if(path_a[leaves][Trip]=av[path_b[leaves][Trip]-1]) then
begin
for i:=Trip downto 2 do
begin
if(path_a[leaves][i]=av[path_b[leaves][i]-1]) then
begin
dec(Trip);
end
else
begin
break;
end;
end;
end;
dec(Trip);
inc(leaves);
end
else
begin
same_class:=1; //相同类别的测试集数目
for i:=0 to bn-2 do
begin
if (b[i][0]=b[i+1][0]) then
inc(same_class);
end;
if(same_class=bn) then
begin //假如所有测试集在某个属性上取相同值
//记录路径与前一路径相同
for i:=1 to Trip-1 do
begin
if (path_a[leaves][i]=0) then
begin
path_a[leaves][i]:=path_a[leaves-1][i];
path_b[leaves][i]:=path_b[leaves-1][i];
end;
end;
path_a[leaves][0]:=b[0][0]; //如果所有的测试集都是属于同一个类型,就以该类别标记叶子
//修改树的深度
if (path_a[leaves][Trip]=av[path_b[leaves][Trip]-1]) then
begin
for i:=Trip to 2 do
begin
if(path_a[leaves][i]=av[path_b[leaves][i]-1]) then
dec(Trip)
else
break;
end;
end;
dec(Trip);
inc(leaves);
//未分类的样本集减少
l:=-1;
lll:=0;
for i:=0 to count do //count+1为训练集样本数
begin
for j:=0 to bn-1 do
begin
if(s[i][M+1]=b[j][M+1]) then //数组s[i][j]用来记录第i个训练样本的第j个属性值
//M定义为候选属性的个数
inc(lll);
end;
if(lll=0) then
begin
inc(l);
for ll:=0 to M+1 do
a[l][ll]:=s[i][ll];
end;
end;
k:=-1;
for i:=0 to l-1 do
begin
inc(k);
for ll:=0 to M+1 do
s[k][ll]:=a[i][ll];
end;
count:=count-bn;
end
else //假如不是所有测试集在某个属性上取相同值
begin
if sn=1 then
begin
for i:=0 to bn-1 do
begin
path[i][0]:=s[i][0];
path[i][1]:=s[i][1];
end;
end;
if(sn=0) then //属性个数为0时
begin
for i:=1 to Trip-1 do
begin
if(path_a[leaves][i]=0) then
begin
path_a[leaves][i]:=path_a[leaves-1][i];
path_b[leaves][i]:=path_b[leaves-1][i];
end;
end;
ll:=0;
lll:=0;
//判断类别
for i:=0 to bn-1 do
begin
if(b[i][0]=0) then
inc(ll)
else
inc(lll);
end;
if(ll>lll) then
begin
path_a[leaves][0]:=0;
end
else
begin
path_a[leaves][0]:=1;
end;
if(path_a[leaves][Trip]=av[path_b[leaves][Trip]-1]) then
begin
for i:=Trip downto 2 do
begin
if(path_a[leaves][i]=av[path_b[leaves][i]-1]) then
dec(Trip)
else
break;
end;
end;
dec(Trip);
inc(leaves);
//未分类的样本集减少
l:=-1;
lll:=0;
for i:=0 to count do
begin
for j:=0 to bn-1 do
if(s[i][M+1]=b[j][M+1]) then inc(lll);
if(lll=0) then
begin
inc(l);
for ll:=0 to M+1 do
a[l][ll]:=s[i][ll];
end;
end;
k:=-1;
for i:=0 to l-1 do
begin
inc(k);
for ll:=0 to M+1 do
s[k][ll]:=a[i][ll];
end;
count:=count-bn;
end
else
begin
count_Positive:=0;
SetLength(TypeCountArr,classCount);
for i:=0 to classCount-1 do
begin
for j:=0 to bn-1 do
begin
if s[j][0]=i then
inc(TypeCountArr[i]);
end;
end;
Entropy_Es:=Entropy(TypeCountArr,bn);
//计算信息增益
for i:=1 to sn-1 do
begin
len:=1;
setlength(AttriArr,len);
setlength(AttriArrCount,len);
AttriArr[0]:=s[0][attribute_test_list[i]];
AttriArrCount[0]:=1;
for k:=0 to bn-1 do
begin
for j:=0 to high(AttriArr) do
begin
if AttriArr[j]=s[k][attribute_test_list[i]] then
begin
inc(AttriArrCount[j]);
end
else
begin
inc(len);
setlength(AttriArr,len);
setlength(AttriArrCount,len);
AttriArr[len-1]:=s[k][attribute_test_list[i]];
AttriArrCount[len-1]:=1;
end;
end;
end;
setlength(q,sn*len*classCount);
for j:=0 to high(AttriArr) do
begin
for k:=0 to sn-1 do
begin
for t:=0 to bn-1 do
begin
if (AttriArr[j]=s[i][attribute_test_list[i]]) and
(attribute_test_list[k]=s[i][0]) then
begin
inc(q[i][j][k]);
end;
end;
end;
end;
Gain[attribute_test_list[i]-1]:=GainA(bn,TypeCountArr,AttriArrCount,q,i);
end;
//找出信息赢取最大值,用j记录哪个候选属性的信息赢取最大
max_Gain:=Gain[0];
j:=attribute_test_list[0]-1;
for i:=0 to sn-1 do
if(max_Gain<Gain[attribute_test_list[i]-1]) then
begin
max_Gain:=Gain[attribute_test_list[i]-1];
j:=attribute_test_list[i]-1;
end;
temp1:=-1;
for i:=1 to av[j] do
begin
temp[i]:=-1;
for k:=0 to bn-1 do
if(b[k][j+1]=i) then
begin
inc(temp[i]);
for l:=0 to M+1 do
temp_b[i][temp[i]][l]:=b[k][l];
end;
end;
//对于每一个分支使用递归函数重复生成树
for i:=1 to av[j] do
begin
for k:=0 to temp[i] do
for l:=0 to M+1 do
b1[k][l]:=temp_b[i][k][l];
if(i=1) then
begin
l:=0;
for ll:=0 to sn-1 do
begin
if(attribute_test_list[ll]-1<>j) then attribute_test_list[l]:=attribute_test_list[ll];
inc(l);
end;
Generate_decision_tree(b1,k,attribute_test_list,classCount,l,i,j+1);
dec(sn);
end
else
begin
Generate_decision_tree(b1,k,attribute_test_list,classCount,sn,i,j+1);
if(i=av[j]) then
attribute_test_list[sn]:=j+1;
end;
end;
end;
end;
end;
end;
//*********************************************
//决策树ID3算生成决策树的源程序
//*********************************************
procedure TFrmShibie.Generate_decision_tree(b: TwoArr; bn: Integer;
attribute_test_list: array of integer; classCount, sn, ai, aj: Integer);
var
same_class,i,j,k,t,l,ll,lll,count_Positive,temp1:integer;
p1,p2,Entropy_Es:double;
temp:array[0..s_max-1] of integer;
b1:TwoArr;
temp_b:array[0..s_max-1,0..N-1,0..M+1] of integer;
TypeCountArr:array of Integer;
AttriArr:array of Integer;
len:Integer;
AttriArrCount:array of Integer;
q:TArr;
begin
//定义数组a记录目前待分的训练样本集,定义数组b记录目前要分结点中所含的训练样本集
//same_class用来记数,判别samples是否都属于同一个类
Trip:=Trip+1;
path_a[leaves][Trip]:=ai;
path_b[leaves][Trip]:=aj;
if(bn=0) then
begin//待分结点的样本集为空时,加上一个树叶,标记为训练集中最普通的类
//记录路径与前一路径相同
for i:=1 to Trip-1 do
begin
if(path_a[leaves][i]=0) then
begin
path_a[leaves][i]:=path_a[leaves-1][i];
path_b[leaves][i]:=path_b[leaves-1][i];
end;
end;
for i:=1 to Trip-1 do
begin
path_a[leaves][0]:=most;
end;
//修改树的深度
if(path_a[leaves][Trip]=av[path_b[leaves][Trip]-1]) then
begin
for i:=Trip downto 2 do
begin
if(path_a[leaves][i]=av[path_b[leaves][i]-1]) then
begin
dec(Trip);
end
else
begin
break;
end;
end;
end;
dec(Trip);
inc(leaves);
end
else
begin
same_class:=1; //相同类别的测试集数目
for i:=0 to bn-2 do
begin
if (b[i][0]=b[i+1][0]) then
inc(same_class);
end;
if(same_class=bn) then
begin //假如所有测试集在某个属性上取相同值
//记录路径与前一路径相同
for i:=1 to Trip-1 do
begin
if (path_a[leaves][i]=0) then
begin
path_a[leaves][i]:=path_a[leaves-1][i];
path_b[leaves][i]:=path_b[leaves-1][i];
end;
end;
path_a[leaves][0]:=b[0][0]; //如果所有的测试集都是属于同一个类型,就以该类别标记叶子
//修改树的深度
if (path_a[leaves][Trip]=av[path_b[leaves][Trip]-1]) then
begin
for i:=Trip to 2 do
begin
if(path_a[leaves][i]=av[path_b[leaves][i]-1]) then
dec(Trip)
else
break;
end;
end;
dec(Trip);
inc(leaves);
//未分类的样本集减少
l:=-1;
lll:=0;
for i:=0 to count do //count+1为训练集样本数
begin
for j:=0 to bn-1 do
begin
if(s[i][M+1]=b[j][M+1]) then //数组s[i][j]用来记录第i个训练样本的第j个属性值
//M定义为候选属性的个数
inc(lll);
end;
if(lll=0) then
begin
inc(l);
for ll:=0 to M+1 do
a[l][ll]:=s[i][ll];
end;
end;
k:=-1;
for i:=0 to l-1 do
begin
inc(k);
for ll:=0 to M+1 do
s[k][ll]:=a[i][ll];
end;
count:=count-bn;
end
else //假如不是所有测试集在某个属性上取相同值
begin
if sn=1 then
begin
for i:=0 to bn-1 do
begin
path[i][0]:=s[i][0];
path[i][1]:=s[i][1];
end;
end;
if(sn=0) then //属性个数为0时
begin
for i:=1 to Trip-1 do
begin
if(path_a[leaves][i]=0) then
begin
path_a[leaves][i]:=path_a[leaves-1][i];
path_b[leaves][i]:=path_b[leaves-1][i];
end;
end;
ll:=0;
lll:=0;
//判断类别
for i:=0 to bn-1 do
begin
if(b[i][0]=0) then
inc(ll)
else
inc(lll);
end;
if(ll>lll) then
begin
path_a[leaves][0]:=0;
end
else
begin
path_a[leaves][0]:=1;
end;
if(path_a[leaves][Trip]=av[path_b[leaves][Trip]-1]) then
begin
for i:=Trip downto 2 do
begin
if(path_a[leaves][i]=av[path_b[leaves][i]-1]) then
dec(Trip)
else
break;
end;
end;
dec(Trip);
inc(leaves);
//未分类的样本集减少
l:=-1;
lll:=0;
for i:=0 to count do
begin
for j:=0 to bn-1 do
if(s[i][M+1]=b[j][M+1]) then inc(lll);
if(lll=0) then
begin
inc(l);
for ll:=0 to M+1 do
a[l][ll]:=s[i][ll];
end;
end;
k:=-1;
for i:=0 to l-1 do
begin
inc(k);
for ll:=0 to M+1 do
s[k][ll]:=a[i][ll];
end;
count:=count-bn;
end
else
begin
count_Positive:=0;
SetLength(TypeCountArr,classCount);
for i:=0 to classCount-1 do
begin
for j:=0 to bn-1 do
begin
if s[j][0]=i then
inc(TypeCountArr[i]);
end;
end;
Entropy_Es:=Entropy(TypeCountArr,bn);
//计算信息增益
for i:=1 to sn-1 do
begin
len:=1;
setlength(AttriArr,len);
setlength(AttriArrCount,len);
AttriArr[0]:=s[0][attribute_test_list[i]];
AttriArrCount[0]:=1;
for k:=0 to bn-1 do
begin
for j:=0 to high(AttriArr) do
begin
if AttriArr[j]=s[k][attribute_test_list[i]] then
begin
inc(AttriArrCount[j]);
end
else
begin
inc(len);
setlength(AttriArr,len);
setlength(AttriArrCount,len);
AttriArr[len-1]:=s[k][attribute_test_list[i]];
AttriArrCount[len-1]:=1;
end;
end;
end;
setlength(q,sn*len*classCount);
for j:=0 to high(AttriArr) do
begin
for k:=0 to sn-1 do
begin
for t:=0 to bn-1 do
begin
if (AttriArr[j]=s[i][attribute_test_list[i]]) and
(attribute_test_list[k]=s[i][0]) then
begin
inc(q[i][j][k]);
end;
end;
end;
end;
Gain[attribute_test_list[i]-1]:=GainA(bn,TypeCountArr,AttriArrCount,q,i);
end;
//找出信息赢取最大值,用j记录哪个候选属性的信息赢取最大
max_Gain:=Gain[0];
j:=attribute_test_list[0]-1;
for i:=0 to sn-1 do
if(max_Gain<Gain[attribute_test_list[i]-1]) then
begin
max_Gain:=Gain[attribute_test_list[i]-1];
j:=attribute_test_list[i]-1;
end;
temp1:=-1;
for i:=1 to av[j] do
begin
temp[i]:=-1;
for k:=0 to bn-1 do
if(b[k][j+1]=i) then
begin
inc(temp[i]);
for l:=0 to M+1 do
temp_b[i][temp[i]][l]:=b[k][l];
end;
end;
//对于每一个分支使用递归函数重复生成树
for i:=1 to av[j] do
begin
for k:=0 to temp[i] do
for l:=0 to M+1 do
b1[k][l]:=temp_b[i][k][l];
if(i=1) then
begin
l:=0;
for ll:=0 to sn-1 do
begin
if(attribute_test_list[ll]-1<>j) then attribute_test_list[l]:=attribute_test_list[ll];
inc(l);
end;
Generate_decision_tree(b1,k,attribute_test_list,classCount,l,i,j+1);
dec(sn);
end
else
begin
Generate_decision_tree(b1,k,attribute_test_list,classCount,sn,i,j+1);
if(i=av[j]) then
attribute_test_list[sn]:=j+1;
end;
end;
end;
end;
end;
end;
直接用jni调用算了
回复内容太短了!
若是的话,你可以用html来做,也是很容易的事。
attribute_test_list: array of integer; classCount, sn, ai, aj: Integer);public void Generate_decision_tree(
TwoArr b
,int bn
,int[] attribute_test_list
,int classCount
,int sn
,int ai
,int aj
)
{
....
}