compute_kernel.m0000644000076500007650000000100110720334674014130 0ustar chapchap00000000000000function K = compute_kernel(ind1,ind2,hp) global X switch hp.type case 'linear' K = X(ind1,:)*X(ind2,:)'; case 'rbf' if isempty(ind2) K = ones(length(ind1),1); return; end; normX = sum(X(ind1,:).^2,2); normY = sum(X(ind2,:).^2,2); K = exp(-0.5/hp.sig^2*(repmat(normX ,1,length(ind2)) + ... repmat(normY',length(ind1),1) - ... 2*X(ind1,:)*X(ind2,:)')); otherwise error('Unknown kernel'); end; primal_svm.m0000644000076500007650000003253110720334634013275 0ustar chapchap00000000000000function [sol,b,obj] = primal_svm(linear,Y,lambda,opt) % [SOL, B] = PRIMAL_SVM(LINEAR,Y,LAMBDA,OPT) % Solves the SVM optimization problem in the primal (with quatratic % penalization of the training errors). % % If LINEAR is 1, a global variable X containing the training inputs % should be defined. X is an n x d matrix (n = number of points). % If LINEAR is 0, a global variable K (the n x n kernel matrix) should be defined. % Y is the target vector (+1 or -1, length n). % LAMBDA is the regularization parameter ( = 1/C) % % IF LINEAR is 0, SOL is the expansion of the solution (vector beta of length n). % IF LINEAR is 1, SOL is the hyperplane w (vector of length d). % B is the bias % The outputs on the training points are either K*SOL+B or X*SOL+B % OBJ is the objective function value % % OPT is a structure containing the options (in brackets default values): % cg: Do not use Newton, but nonlinear conjugate gradients [0] % lin_cg: Compute the Newton step with linear CG % [0 unless solving sparse linear SVM] % iter_max_Newton: Maximum number of Newton steps [20] % prec: Stopping criterion % cg_prec and cg_it: stopping criteria for the linear CG. % Copyright Olivier Chapelle, olivier.chapelle@tuebingen.mpg.de % Last modified 25/08/2006 if nargin < 4 % Assign the options to their default values opt = []; end; if ~isfield(opt,'cg'), opt.cg = 0; end; if ~isfield(opt,'lin_cg'), opt.lin_cg = 0; end; if ~isfield(opt,'iter_max_Newton'), opt.iter_max_Newton = 20; end; if ~isfield(opt,'prec'), opt.prec = 1e-6; end; if ~isfield(opt,'cg_prec'), opt.cg_prec = 1e-4; end; if ~isfield(opt,'cg_it'), opt.cg_it = 20; end; % Call the right function depending on problem type and CG / Newton % Also check that X / K exists and that the dimension of Y is correct if linear global X; if isempty(X), error('Global variable X undefined'); end; [n,d] = size(X); if issparse(X), opt.lin_cg = 1; end; if size(Y,1)~=n, error('Dimension error'); end; if ~opt.cg [sol,obj] = primal_svm_linear (Y,lambda,opt); else [sol,obj] = primal_svm_linear_cg(Y,lambda,opt); end; else global K; if isempty(K), error('Global variable K undefined'); end; n = size(Y,1); if any(size(K)~=n), error('Dimension error'); end; if ~opt.cg [sol,obj] = primal_svm_nonlinear (Y,lambda,opt); else [sol,obj] = primal_svm_nonlinear_cg(Y,lambda,opt); end; end; % The last component of the solution is the bias b. b = sol(end); sol = sol(1:end-1); fprintf('\n'); function [w,obj] = primal_svm_linear(Y,lambda,opt) % ------------------------------- % Train a linear SVM using Newton % ------------------------------- global X; [n,d] = size(X); w = zeros(d+1,1); % The last component of w is b. iter = 0; out = ones(n,1); % Vector containing 1-Y.*(X*w) while 1 iter = iter + 1; if iter > opt.iter_max_Newton; warning(sprintf(['Maximum number of Newton steps reached.' ... 'Try larger lambda'])); break; end; [obj, grad, sv] = obj_fun_linear(w,Y,lambda,out); % Compute the Newton direction either exactly or by linear CG if opt.lin_cg % Advantage of linear CG when using sparse input: the Hessian is never % computed explicitly. [step, foo, relres] = minres(@hess_vect_mult, -grad,... opt.cg_prec,opt.cg_it,[],[],[],sv,lambda); else Xsv = X(sv,:); hess = lambda*diag([ones(d,1); 0]) + ... % Hessian [[Xsv'*Xsv sum(Xsv,1)']; [sum(Xsv) length(sv)]]; step = - hess \ grad; % Newton direction end; % Do an exact line search [t,out] = line_search_linear(w,step,out,Y,lambda); w = w + t*step; fprintf(['Iter = %d, Obj = %f, Nb of sv = %d, Newton decr = %.3f, ' ... 'Line search = %.3f'],iter,obj,length(sv),-step'*grad/2,t); if opt.lin_cg fprintf(', Lin CG acc = %.4f \n',relres); else fprintf(' \n'); end; if -step'*grad < opt.prec * obj % Stop when the Newton decrement is small enough break; end; end; function [w, obj] = primal_svm_linear_cg(Y,lambda,opt) % ----------------------------------------------------- % Train a linear SVM using nonlinear conjugate gradient % ----------------------------------------------------- global X; [n,d] = size(X); w = zeros(d+1,1); % The last component of w is b. iter = 0; out = ones(n,1); % Vector containing 1-Y.*(X*w) go = [X'*Y; sum(Y)]; % -gradient at w=0 s = go; % The first search direction is given by the gradient while 1 iter = iter + 1; if iter > opt.cg_it * min(n,d) warning(sprintf(['Maximum number of CG iterations reached. ' ... 'Try larger lambda'])); break; end; % Do an exact line search [t,out] = line_search_linear(w,s,out,Y,lambda); w = w + t*s; % Compute the new gradient [obj, gn] = obj_fun_linear(w,Y,lambda,out); gn=-gn; fprintf('Iter = %d, Obj = %f, Norm of grad = %.3f \n',iter,obj,norm(gn)); % Stop when the relative decrease in the objective function is small if t*s'*go < opt.prec*obj, break; end; % Flecher-Reeves update. Change 0 in 1 for Polack-Ribiere be = (gn'*gn - 0*gn'*go) / (go'*go); s = be*s+gn; go = gn; end; function [obj, grad, sv] = obj_fun_linear(w,Y,lambda,out) % Compute the objective function, its gradient and the set of support vectors % Out is supposed to contain 1-Y.*(X*w) global X out = max(0,out); w0 = w; w0(end) = 0; % Do not penalize b obj = sum(out.^2)/2 + lambda*w0'*w0/2; % L2 penalization of the errors grad = lambda*w0 - [((out.*Y)'*X)'; sum(out.*Y)]; % Gradient sv = find(out>0); function y = hess_vect_mult(w,sv,lambda) % Compute the Hessian times a given vector x. % hess = lambda*diag([ones(d-1,1); 0]) + (X(sv,:)'*X(sv,:)); global X y = lambda*w; y(end) = 0; z = (X*w(1:end-1)+w(end)); % Computing X(sv,:)*x takes more time in Matlab :-( zz = zeros(length(z),1); zz(sv)=z(sv); y = y + [(zz'*X)'; sum(zz)]; function [t,out] = line_search_linear(w,d,out,Y,lambda) % From the current solution w, do a line search in the direction d by % 1D Newton minimization global X t = 0; % Precompute some dots products Xd = X*d(1:end-1)+d(end); wd = lambda * w(1:end-1)'*d(1:end-1); dd = lambda * d(1:end-1)'*d(1:end-1); while 1 out2 = out - t*(Y.*Xd); % The new outputs after a step of length t sv = find(out2>0); g = wd + t*dd - (out2(sv).*Y(sv))'*Xd(sv); % The gradient (along the line) h = dd + Xd(sv)'*Xd(sv); % The second derivative (along the line) t = t - g/h; % Take the 1D Newton step. Note that if d was an exact Newton % direction, t is 1 after the first iteration. if g^2/h < 1e-10, break; end; % fprintf('%f %f\n',t,g^2/h) end; out = out2; function [beta,obj] = primal_svm_nonlinear(Y,lambda,opt) % ----------------------------------- % Train a non-linear SVM using Newton % ----------------------------------- global K training = find(Y); % The points with 0 are ignored. n = length(training); % The real number of training points if n>=1000 % Train a subset first perm = randperm(n); ind = training(perm(1:round(.75*n))); % Take a random subset of size n/4 Y2 = Y; Y2(ind) = 0; beta = primal_svm_nonlinear(Y2,lambda,opt); sv = find(beta(1:end-1)~=0); Kb = K(training,sv)*beta(sv); % Kb will always contains K times the current beta else sv = training; beta = zeros(length(Y)+1,1); % The last component of beta is b. Kb = zeros(n,1); end; iter = 0; % If the set of support vectors has changed, we need to reiterate. while 1 old_sv = sv; % Computing the objective function out = 1 - Y(training) .* (Kb+beta(end)); sv = training(out > 0); obj = (lambda*beta(training)'*Kb + sum(max(0,out).^2)) / 2; iter = iter + 1; % If the set of support vectors doesn't change, we can't improve anymore if (iter > 1) & isempty(setxor(sv,old_sv)), break; end; if iter > opt.iter_max_Newton warning(sprintf(['Maximum number of Newton steps reached. ' ... 'Try larger lambda'])); break; end; H = K(sv,sv) + lambda*eye(length(sv)); cte_for_b = mean(diag(K)); H(end+1,:) = cte_for_b; % To take the bias into account H(:,end+1) = cte_for_b; % The actual value of this constant does not matter. H(end,end) = 0; % For numerical reasons, take it of the order of K. % Beta_new would be the new vevtor beta is the full Newton step is taken beta_new = zeros(length(Y)+1,1); if opt.lin_cg [beta_new([sv; end]), foo1, relres] = minres(H,[Y(sv);0],opt.cg_prec,opt.cg_it); else beta_new([sv; end]) = H\[Y(sv);0]; end; beta_new(end) = beta_new(end) * cte_for_b; % Do line search, but with a preference for a full Newton step step = beta_new - beta; [t, Kb] = line_search_nonlinear(step([training; end]),Kb,beta(end),Y,lambda,1); beta = beta + t*step; fprintf('n = %d, iter = %d, obj = %f, nb of sv = %d, line srch = %.4f',... [n iter obj length(sv) t]); if opt.lin_cg fprintf(', Lin CG acc = %.4f \n',relres); else fprintf(' \n'); end; end; sol = beta; function [beta, obj] = primal_svm_nonlinear_cg(Y,lambda,opt) % ----------------------------------------------------- % Train a linear SVM using nonlinear conjugate gradient % ----------------------------------------------------- global K; n = length(K); beta = zeros(n+1,1); % The last component of beta is b. iter = 0; Kb = zeros(n,1); % Kb will always contains K times the current beta go = [Y; sum(Y)]; % go = -gradient at beta=0 s = go; % Initial search direction Kgo = [K*Y; sum(Y)]; % We use the preconditioner [[K 0]; [0 1]] Ks = Kgo(1:end-1); % Ks will always contain K*s(1:end-1) while 1 iter = iter + 1; if iter > opt.cg_it * n warning(sprintf(['Maximum number of CG iterations reached. ' ... 'Try larger lambda'])); break; end; % Do an exact line search [t,Kb] = line_search_nonlinear(s,Kb,beta(end),Y,lambda,0,Ks); beta = beta + t*s; % Compute new gradient and objective. % Note that the gradient is already "divided" by the preconditioner [obj, grad] = obj_fun_nonlinear(beta,Y,lambda,Kb); gn = -grad; fprintf('Iter = %d, Obj = %f, Norm grad = %f \n',iter,obj,norm(gn)); % Stop when the relative decrease in the objective function is small if t*s'*Kgo < opt.prec*obj, break; end; Kgn = [K*gn(1:end-1); gn(end)]; % Multiply by the preconditioner % -> Kgn is the real gradient % Flecher-Reeves update. Change 0 in 1 for Polack-Ribiere be = (Kgn'*gn - 0*Kgn'*go) / (Kgo'*go); % be = (gn'*gn - gn'*go) / (go'*go); s = be*s+gn; Ks = be*Ks + Kgn(1:end-1); go = gn; Kgo = Kgn; end; function [t, Kb] = line_search_nonlinear(step,Kb,b,Y,lambda,fullstep,Ks) % Given the current solution (as given by Kb), do a line sesrch in % direction step. First try to take a full step if fullstep = 1. global K; training = find(Y~=0); act = find(step(1:end-1)); % The set of points for which beta change if nargin<7 Ks = K(training,training(act))*step(act); end; Kss = step(act)'*Ks(act); % Precompute some dot products Kbs = step(act)'*Kb(act); t = 0; Y = Y(training); % Compute the objective function for t=1 out = 1-Y.*(Kb+b+Ks+step(end)); sv = out>0; obj1 = (lambda*(2*Kbs+Kss)+sum(out(sv).^2))/2; while 1 out = 1-Y.*(Kb+b+t*(Ks+step(end))); sv = out>0; % The objective function and the first derivative (along the line) obj = (lambda*(2*t*Kbs+t^2*Kss)+sum(out(sv).^2))/2; g = lambda * (Kbs+t*Kss) - (Ks(sv)'+step(end))*(Y(sv).*out(sv)); if fullstep & (t==0) & (obj-obj1 > -0.2*g) % First check t=1: if it works, keep it -> sparser solution t = 1; break; end; % The second derivative (along the line) h = lambda*Kss + norm(Ks(sv)+step(end))^2; % fprintf('%d %f %f %f\n',length(find(sv)),t,obj,g^2/h); % Take the 1D Newton step t = t - g/h; if g^2/h < 1e-10, break; end; end; Kb = Kb + t*Ks; function [obj, grad] = obj_fun_nonlinear(beta,Y,lambda,Kb) global K; out = Kb+beta(end); sv = find(Y.*out < 1); % Objective function... obj = (lambda*beta(1:end-1)'*Kb + sum((1-Y(sv).*out(sv)).^2)) / 2; % ... and preconditioned gradient grad = [lambda*beta(1:end-1); sum(out(sv)-Y(sv))]; grad(sv) = grad(sv) + (out(sv)-Y(sv)); % To compute the real gradient, one would have to execute the following line % grad = [K*grad(1:end-1); grad(end)]; sparse_primal_svm.m0000644000076500007650000002121610720334655014653 0ustar chapchap00000000000000function [beta,b,S,obj,te,time] = sparse_primal_svm(Y,Yt,ker,kerparam,C,opt) % [BETA,B,S] = SPARSE_PRIMAL_SVM(Y,YT,KER,KERPARAM,C,OPT) % Approximates the SVM solution by expanding it on a small set of basis functions % % Y is the target vecor (+1 or -1, length n) % YT is a test vector (length nt) % KER is a function handle to a kernel function of the form % K = MY_KERNEL(IND1,IND2,KERPARAM) computing the kernel submatrix % between the points with indices IND1 and IND2. KERPARAM is an % additional argument containing for instance kernel % parameters. Indices are supposed to be between 1 and n+nt, an indice % larger than n corresponding to a test point. % KERPARAM: see above % C is the constant penalizing the training errors % OPT is a structure containing the following optional fields % nb_cand (aka kappa): the number of candidates at each iteration % set_size (aka dmax): the final number of basis functions % maxiter: the maximum number of iterations for Newton % base_recomp: the solution is recomputed every base_recomp^p % verb: verbosity % % BETA is the vector of expansion coefficients % B is the bias % S contains the indices if the expansion (same size as BETA) % % [BETA,B,S,OBJ,TE,TIME] = SPARSE_PRIMAL_SVM(Y,YT,KER,KERPARAM,C,OPT) % OBJ,TE contains the objective function and the test error after % each retraining % TIME contains the time spent between each retraining (it does not % include the time for computing the kernel test matrix). % Copyright Olivier Chapelle, olivier.chapelle@tuebingen.mpg.de % Last modified Sep 11, 2006 global K hess sv if nargin<6 opt = []; end; n = length(Y); % Set the parameters to their default value if ~isfield(opt,'nb_cand'), opt.nb_cand = 10; end; if ~isfield(opt,'set_size'), opt.set_size = round(n/100); end; if ~isfield(opt,'maxiter'), opt.maxiter = 20; end; if ~isfield(opt,'base_recomp'), opt.base_recomp = 2^0.25; end; if ~isfield(opt,'verb'), opt.verb = 1; end; % Memory allocation for K (size n times dmax) and Kt (size nt times dmax). % The signed outputs are computed as K*[b; beta]. That's why the first % column is Y (and just 1 for Kt) K = zeros(n,opt.set_size+1); K(:,1)=Y; Kt = zeros(length(Yt),opt.set_size+1); Kt(:,1)=1; % hess is the Cholesky decompostion of the Hessian hess = sqrt(C*n)*(1+1e-10); % At the beginning x (which contains [b; beta]) equal 0 and all points % are training errors. The set sv is set of points for which y_i f(x_i) < 1 x = 0; sv = 1:n; S = []; te = []; time = []; obj = []; tic; while 1 % Loops until all the basis functions are selected if retrain(length(S),opt) % It's time to retrain ... d0 = size(hess); update_hess(S,Y,C,ker,kerparam); % First, compute the news columns % and K and update the Hessian and % its Cholesky decomposition [x,out,obj2] = train_subset(S,C,opt,x); % And then do a Newton optimization if obj2<0 % Newton didn't converge. Probably because the Hessian % is not well conditioned. 1/C should be not much % smaller than the diagonal elements of the kernel. mean_kernel = mean(feval(ker,1:size(K,1),[],kerparam)); error(sprintf('Convergence problem. Try to take C of the oder of 10^%d.\n',... ceil(log10(1/mean_kernel)))); end; out = out(sv); obj = [obj obj2]; time = [time toc]; if ~isempty(Yt) % Compute the new test error nnt = length(S)-d0; if nnt>=0 Kt(:,d0+1:d0+nnt+1) = feval(ker,n+[1:length(Yt)],S(end-nnt:end),kerparam); end; te = [te mean(Yt.*(Kt(:,1:length(x))*x)<0)]; if opt.verb > 0, fprintf(' Test error = %.4f',te(end)); end; end; tic; end; if length(S)==opt.set_size % We're done ! break; end; % Chooses a random subset for new candidates (exclude the points % which are already in the expansion) candidates = 1:n; candidates(S) = []; candidates = candidates(randperm(length(candidates))); candidates = candidates(1:opt.nb_cand); [ind,x,out] = choose_next_point(candidates,S,x,out,Y,ker,kerparam,C,opt); S = [S candidates(ind)]; end; beta = x(2:end); b = x(1); if opt.verb>0, fprintf('\n'); end; function [x,out,obj] = train_subset(S,C,opt,x) global K hess sv persistent old_obj old_out x_old; if opt.verb>0, fprintf('\n'); end; iter = 0; d = length(hess); % We start from the old solution; the new components are either set to 0 % or estimated (cf the end of the choose_next_point function), depending % on what gives the smallest objective value. old_sv = sv; out2 = K(:,1:d)*x; sv2 = find(out2'<1); obj2 = 0.5*((x(2:end,:).*K(S,1))'*K(S,2:d)*x(2:end,:) + ... C*sum((1-out2(sv2)).^2)); if ~isempty(S) & (obj2 > old_obj) x_old = [x_old; zeros(length(x)-length(x_old),1)]; else x_old = x; old_obj = obj2; sv = sv2; old_out = out2; end; while 1 if iter > opt.maxiter obj = -1; return; end; iter = iter + 1; % The set of errors has changed (and so the Hessian). We update the % Cholesky decomposition of the Hessian for i=setdiff(sv,old_sv) hess = cholupdate(hess,sqrt(C)*K(i,1:d)','+'); end; for i=setdiff(old_sv,sv) hess = cholupdate(hess,sqrt(C)*K(i,1:d)','-'); end; old_sv = sv; % Take a few Newton step. By writing out the % equations, this simplifies to following equation: x = C*(hess \ (hess' \ sum(K(sv,1:d),1)')); step = x-x_old; % Newton step delta_out = K(:,1:d)*step; % Change in the outputs. while 1 % Backtracking: if the Newton step is too long (resulting % in an increase of the objective value), it is divided by 2. x = x_old + step; out = old_out + delta_out; sv = find(out'<1); % Identify the errors % Compute the objective function obj = 0.5*((x(2:end,:).*K(S,1))'*K(S,2:d)*x(2:end,:) + ... C*sum((1-out(sv)).^2)); if obj < old_obj, break; end; % The step is too long: divide by 2. step = step / 2; delta_out = delta_out / 2; end; x_old = x; old_obj = obj; old_out = out; if opt.verb>0 fprintf(['\nNb basis = %d, iter Newton = %d, Obj = %.2f, ' ... 'Nb errors = %d '],length(hess)-1,iter,obj,length(sv)); end; if isempty(setxor(old_sv,sv)) % No more changes -> stop break; end; end; function update_hess(S,Y,C,ker,kerparam) global K hess sv d = length(S); d0 = length(hess) - 1; if d==d0, return; end; % Compute the new rows of K corresponding to the basis that have been added K(:,d0+2:d+1) = feval(ker,1:length(Y),S(end-(d-d0-1):end),kerparam) ... .* repmat(Y,1,d-d0); h = [zeros(1,d-d0); K(S,d0+2:d+1).*repmat(Y(S),1,d-d0)] + ... C * K(sv,1:d+1)' * K(sv,d0+2:d+1); % The new Hessian would be [[old_hessian h2]; [h2' h3]] h2 = h(d0+2:end,:); h2 = h2 + 1e-10*mean(diag(h2))*eye(size(h,2)); % Ridge is only for numerical reason h3 = hess' \ h(1:d0+1,:); h4 = chol(h2-h3'*h3); % New Cholesky decomposition of the augmented Hessian hess = [[hess h3]; [zeros(d-d0,d0+1) h4]]; function [select,x,out] = choose_next_point(candidates,S,x,out,Y,ker,kerparam,C,opt) global K hess sv % When we choose the next basis function, we don't do any retraining % and assume that everyting is quadratic and that the other weights are fixed n = length(Y); K2 = feval(ker,sv,candidates,kerparam).*repmat(Y(sv),1,length(candidates)); K3 = feval(ker,S, candidates,kerparam); Kd = feval(ker,candidates,[],kerparam); % If the point candidate(i) would be added as a basis function, the % first and second derivative with respect to its weight would be g(i) and h(i) h = Kd + C*sum(K2.^2,1)'; g = K3'*x(2:end,:) + C*K2'*(out-1); score = g.^2./h; % Newton decrement [max_score, select] = max(score); % The larger the better if max_score<1e-8 warning('No good basis function'); end; x = [x; -g(select)/h(select)]; % Still assuming that the other weights % are fixed, the estimated weight of the % new basis function is g/h out = out + K2(:,select)*x(end); % Update the outputs function r = retrain(d,opt) % Check if we should retrain b = opt.base_recomp; if d<2, r=1; return; end; r = (floor(log(d)/log(b)) ~= floor(log(d-1)/log(b))); % We always retrain at the end r = r | (d==opt.set_size); test_primal_svm.m0000644000076500007650000000104710720334644014333 0ustar chapchap00000000000000global X K % Generating 3000 points in 100 dimensions X = randn(3000,100); Y = sign(X(:,1)); K = X*X'; lambda = 1; [w, b0 ]=primal_svm(1,Y,lambda); [beta,b]=primal_svm(0,Y,lambda); % The solutions are the same because the kernel is linear ! norm( K*beta+b - (X*w+b0)) % Try to now solve by conjugate gradient opt.cg = 1; [w, b0 ]=primal_svm(1,Y,lambda,opt); [beta,b]=primal_svm(0,Y,lambda,opt); norm( K*beta+b - (X*w+b0)) % Sparse linear problem X = sprandn(1e5,1e4,1e-3); Y = sign(sum(X,2)+randn(1e5,1)); [w,b]=primal_svm(1,Y,lambda); test_sparse_primal_svm.m0000644000076500007650000000222610720334665015713 0ustar chapchap00000000000000clear; randn('state',1); global X K n = 1000; nt = 3000; d = 10; fprintf('Problem of learning a spherical boundary\n'); fprintf('Generating %d noisy training points in %d dimensions\n',n,d) X = randn(n+nt,d); Y = 2* (sum(X.^2,2) > d*(1-0.1*randn(n+nt,1)))-1; fprintf('Error Bayes classifier = %f\n',mean(Y.*(sum(X.^2,2)-d)<0)); hp.type = 'rbf'; hp.sig = sqrt(d); C = 1; tic; K = compute_kernel(1:n,1:n,hp); [alpha,b,obj0] = primal_svm(0,Y(1:n),1/C); time0 = toc; te0 = mean(Y(n+1:n+nt).*(compute_kernel(n+1:n+nt,1:n,hp)*alpha+b)<0); fprintf('Test error full SVM = %f, Time = %2.2f sec\n',te0,time0); nsv = length(find(alpha)); opt.set_size=32; [alpha,b,S,obj,te,time] = sparse_primal_svm(Y(1:n),Y(n+1:end),... @compute_kernel,hp,C,opt); fprintf('Time = %2.2f sec\n',sum(time)); sz = unique(round((2^0.25).^[1:10+length(obj)])); scale = te(1)/obj(1); semilogx(sz(1:length(obj)),[te; obj*scale]); hold on plot(nsv,[te0 obj0*scale*C],'o'); hold off legend('Test error','Objective function (scaled)','Full SVM','Full SVM'); xlabel('Number of basis functions'); fprintf('About 10 basis functions are enough for this problem\n')README0000644000076500007650000000017310720335027011620 0ustar chapchap00000000000000Links to the papers explaining both algorithms can be found at: http://www.kyb.tuebingen.mpg.de/bs/people/chapelle/primal/