# Reading Data rawdata<-read.table("wiki.txt",sep=";",skip=1) helpme=data.matrix(rawdata[,c(11:13,19:21,27:42)])-1 helpme[helpme==0]=NaN begdata<-helpme[complete.cases(helpme),] quality<-begdata[,c(1:6)] image<-begdata[,c(7:9,21,22)] perschar<-begdata[,c(10:12,18:20)] behavior<-begdata[,c(13:17)] percuse<-begdata[,1:3] qual<-begdata[,4:6] socim<-begdata[,7:9] jobrel<-begdata[,c(21,22)] shaatt<-begdata[,10:12] prof<-begdata[,18:20] data=cbind(matrix(ceiling(rowSums(percuse)/3),ncol=1),matrix(ceiling(rowSums(qual)/3),ncol=1), matrix(ceiling(rowSums(socim)/3),ncol=1),matrix(ceiling(rowSums(jobrel)/2),ncol=1), matrix(ceiling(rowSums(shaatt)/3),ncol=1),matrix(ceiling(rowSums(prof)/3),ncol=1), matrix(ceiling(rowSums(behavior)/5),ncol=1)) Q=ncol(data) TP=nrow(data) # Mixture Model, Collapse Gibbs Sampler K=4 alpha=1 eta=1 s=5 z=matrix(sample(1:K,TP,replace=TRUE,prob=NULL),ncol=1) probz=matrix(0,nrow=TP,ncol=K) jointprob=-Inf maxjointprob=jointprob zstar=z probzstar=probz convergence=0 NS=500 ni=1 const=lgamma(K*alpha)-K*lgamma(alpha)-lgamma(K*alpha+TP)+K*Q*lgamma(s*eta)-K*Q*s*lgamma(eta) while (nimaxjointprob){ maxjointprob=logprob zstar=z probzstar=probz } jointprob=c(jointprob,logprob) } } ni=ni+1 if (abs(jointprob[length(jointprob)]-jointprob[length(jointprob)-1])maxjointprob){ maxjointprob=logprob zstar=z probzstar=probz } jointprob=c(jointprob,logprob) } } ni=ni+1 if (abs(jointprob[length(jointprob)]-jointprob[length(jointprob)-1])0){ for (j in 1:Q){ beta[,j,i]=t((colSums((matrix(rep(traindata[help1[,i],j],times=s),ncol=s)==matrix(rep(1:s,times=classi),ncol=s,byrow=TRUE)))+eta)/(classi+s*eta)) } } else{ beta[,j,i]=1/s } } testjointprob=0 for (i in 1:(nrow(testdata))){ personprob=0 for (j in 1:K){ classprob=theta[j] for (q in 1:Q){ classprob=classprob*beta[testdata[i,q],q,j] } personprob=personprob+classprob } testjointprob=testjointprob+log(personprob) } distbeta[,,,fold]=beta disttheta[,fold]=theta jointprobtraining[fold,pop]=maxjointprob jointprobtesting[fold,pop]=testjointprob } averagejointprobtraining[1,pop]=sum(jointprobtraining[,pop])/ncol(combcrvl) averagejointprobtesting[1,pop]=sum(jointprobtesting[,pop])/ncol(combcrvl) sdjointprobtraining[1,pop]=sqrt(sum((jointprobtraining[,pop]-averagejointprobtraining[1,pop])^2)/ncol(combcrvl)) sdjointprobtesting[1,pop]=sqrt(sum((jointprobtesting[,pop]-averagejointprobtesting[1,pop])^2)/ncol(combcrvl)) if (averagejointprobtesting[1,pop]>maxtest){ maxtest=averagejointprobtesting[1,pop] betaopt=array(0,dim=c(s,Q,K)) thetaopt=matrix(0,nrow=K,ncol=1) for (a in 1:ncol(combcrvl)){ betaopt=betaopt+distbeta[,,,a] } betaopt=betaopt/ncol(combcrvl) thetaopt=rowMeans(disttheta) Kopt=K } } # Spectral Clustering A=matrix(0,nrow=TP,ncol=TP) D=matrix(0,nrow=TP,ncol=TP) sigma2=0.06 K=4 for (i in 1:TP){ d=rowSums(abs(data-matrix(rep(data[i,],times=TP),ncol=Q,nrow=TP,byrow=TRUE))) A[,i]=exp(-d/(2*sigma2)) A[i,i]=0 } D=diag(rowSums(A)) Dinverse=solve(D) Dinverse.eig=eigen(Dinverse) Dinverse.sqrt=Dinverse.eig$vectors %*% diag(sqrt(Dinverse.eig$values)) %*% solve(Dinverse.eig$vectors) L=Dinverse.sqrt %*% A %*% Dinverse.sqrt L.eig=eigen(L) X=L.eig$vectors[,1:K] Y=X / matrix(rep(sqrt(rowSums(X * X)),times=K), ncol=K,nrow=TP) Ycluster= kmeans(Y, K, iter.max = 100, nstart = 100,algorithm = c("Hartigan-Wong", "Lloyd", "Forgy","MacQueen"), trace=FALSE) help1=matrix(rep(1:K,times=TP),ncol=K,byrow=TRUE) help2=matrix(rep(Ycluster$cluster,times=K),ncol=K) help=(help2==help1) classresponse=matrix(0,ncol=Q,nrow=2*K) for (i in 1:K){ response=data[help[,i],] classresponse[i,]=colSums(response)/nrow(response) average=matrix(rep(classresponse[i,],times=nrow(response)),ncol=Q,byrow=TRUE) classresponse[i+K,]=sqrt(colSums((response-average) * (response-average))/nrow(response)) }