a[t_List,x_List]:=E^(t.Prepend[x,1])
hypothethis[t_List, k_, x_]:= Table[a[t[[j]], x], {j, 1, k-1}]/(1 +Sum[a[t[[j]],x],{j, 1, k-1}])
indicatorFunc[eqn_]:=If[eqn,1,0]
SoftmaxRegression[x_List, y_List]:=Module[{J, vars, b, k, n},
k = Length@Union[y];
n = Length[Transpose[x]]+1;
vars = Array[b, {k - 1, n}];
J[t_]:= (-1/Length[y])*(Sum[indicatorFunc[y[[i]] == k]*Log[1/(1 +Sum[a[t[[j]],x[[i]]],{j, 1, k-1}])]+Sum[indicatorFunc[y[[i]] ==j]*Log[a[t[[j]],x[[i]]]/(1 + Sum[a[t[[j]],x[[i]]],{j, 1, k-1}])],{j,1,k-1}],{i, 1,Length[y]}])+Sum[Sum[t[[i,j]]^2,{j, 1, n}],{i, 1, k-1}];
vars/.NMinimize[J[vars],Flatten@vars][[2]]
]
http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression
I am trying to implement the softmax regression, which returns a list of coefficient for hypothethis function. And hypothethis function will gives the first n - 1 classes probability, which we can use to find the nth class's probability.
I used FisherIris data, in which there are 3 classes: virsicolor, verginica, setosa. I don't know why the 2nd class and the 3rd are combined somehow. There is something wrong in the implementation of my code but I don't know where is wrong.
iris = ExampleData[{"Statistics", "FisherIris"}];
rs = RandomSample[Range[150]];
n = Length[iris];
cut = Ceiling[0.8 n];
train = iris[[rs[[1 ;; cut]]]];
test = iris[[rs[[cut + 1 ;;]]]];
x = train[[All, 1 ;; 4]];
y = train[[All, -1]] /. {"setosa" -> 1, "versicolor" -> 2, "virginica" -> 3}
tList = SoftmaxRegression[x, y]
the result is:
{{0.0268716, 0.0458991, 0.132301, -0.209938, -0.0960074}, {-0.00348727, -0.0128626, -0.0458174, 0.0371426, 0.00473217}}
p = hypothethis[tList, 3, #] & /@ test[[All, 1 ;; 4]]
{{0.218183, 0.390056}, {0.221959, 0.387587}, {0.251179,
0.367642}, {0.270846, 0.354952}, {0.452594, 0.249346}, {0.279229,
0.352813}, {0.220481, 0.385259}, {0.212727, 0.392914}, {0.433949,
0.25978}, {0.258379, 0.360879}, {0.264716, 0.362466}, {0.235144,
0.377701}, {0.450472, 0.249237}, {0.456505, 0.246573}, {0.199231,
0.401248}, {0.251868, 0.367067}, {0.194777, 0.399115}, {0.256978,
0.366888}, {0.200664, 0.399031}, {0.20611, 0.394495}, {0.190138,
0.407413}, {0.438089, 0.257827}, {0.450832, 0.249554}, {0.233257,
0.376698}, {0.247638, 0.372358}, {0.273605, 0.352006}, {0.208238,
0.392663}, {0.270954, 0.356693}, {0.202797, 0.394089}, {0.210708,
0.388326}}
AllClassProb[prob_List]:= Append[prob, 1 - Total[prob]]
probability = AllClassProb /@ p
We found the 3rd class's probability by the previous two.
{{0.218183, 0.390056, 0.391761}, {0.221959, 0.387587, 0.390454},
{0.251179, 0.367642, 0.381179}, {0.270846, 0.354952,
0.374202}, {0.452594, 0.249346, 0.29806}, {0.279229, 0.352813,
0.367959}, {0.220481, 0.385259, 0.39426}, {0.212727, 0.392914,
0.394358}, {0.433949, 0.25978, 0.306271}, {0.258379, 0.360879,
0.380741}, {0.264716, 0.362466, 0.372818}, {0.235144, 0.377701,
0.387155}, {0.450472, 0.249237, 0.300291}, {0.456505, 0.246573,
0.296921}, {0.199231, 0.401248, 0.399521}, {0.251868, 0.367067,
0.381066}, {0.194777, 0.399115, 0.406108}, {0.256978, 0.366888,
0.376134}, {0.200664, 0.399031, 0.400304}, {0.20611, 0.394495,
0.399395}, {0.190138, 0.407413, 0.402448}, {0.438089, 0.257827,
0.304084}, {0.450832, 0.249554, 0.299614}, {0.233257, 0.376698,
0.390045}, {0.247638, 0.372358, 0.380003}, {0.273605, 0.352006,
0.374389}, {0.208238, 0.392663, 0.399099}, {0.270954, 0.356693,
0.372353}, {0.202797, 0.394089, 0.403113}, {0.210708, 0.388326,
0.400966}}
augmentProb = MapThread[
Transpose[{#1, #2}] &, {Table[Range[Length[Transpose[probability]]], {Length[probability]}], probability}]
result = MaximalBy[#, Last] & /@ augmentProb
theoreticalResult = test /. {"setosa" -> 1, "versicolor" -> 2, "virginica" -> 3};
theoreticalResult[[All, -1]]
{3, 2, 2, 2, 1, 2, 3, 3, 1, 2, 2, 3, 1, 1, 3, 2, 3, 2, 3, 3, 3, 1, 1, 3, 2, 2, 3, 2, 3, 3}
exprimentalResult = Transpose[Flatten[result, 1]];
exprimentalResult[[1]]
{3, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1, 1, 2, 3, 3, 3, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3, 3, 3}
Comments
Post a Comment