library(MoTBFs) library(deal) ## Learns a NB from a dataset for classification ## data : the dataframe ## class : a string with the name of the class variable ## disc : a vector with the index of the discrete variables ## type_pot : "MOP" o "MTE" learnMoTBFnb_classification = function(data, class , disc, type_pot){ #whichDiscrete(data,disc) data2 = discreteVariables_as.character(data,discreteVariables = disc) indexClass = which(names(data2)==class) serie = 1:length(names(data2)) serie = serie[-indexClass] library(deal) net = network(data2) for(i in serie) net = getnetwork(insert(net,indexClass,i,nocalc=T)) plot(net) model = MoTBFs_Learning(graph = net, data = data2, numIntervals = 3, POTENTIAL_TYPE=type_pot,maxParam = 5) return(model) } ## predicts a class variable from a NB model for Classification ## model : The NB model ## data : the dataframe with the features ## class : a string with the name of the class variable ## disc : a vector with the index of the discrete variables in the dataframe predict.nbClass = function(model, data, class, disc){ data2 = discreteVariables_as.character(data,discreteVariables = disc) for(i in 1:length(model)){ if( model[[i]]$Child == class) indexClass = i; } result = vector(); ## Para cada fila de los datos for( i in 1:length(data2[,1])){ ## Para cada estado de la clase probClass = vector(mode = "double",length = length(model[[indexClass]]$functions[[1]]$coeff)) for(j in 1:length(probClass)){ prob = 1 ## Para cada variable predictiva de modelo for( k in 1:length(data2[i,])){ if((names(data2)[k]) != class){ prob = prob*getValue(model,data2[i,],names(data2)[k], j) } } prob = prob * model[[indexClass]]$functions[[1]]$coeff[j] probClass[j] = prob } value = which.max(probClass) result[i] = discreteVariablesStates(class,data2)[[1]]$states[value] } return(result) } ## Computes the probability for p(var | c_indexStateClass) ## model: The model where the distributions are stored ## data : A dataframe with just 1 row ## var: The name of the variable to compute the probability ## indexStateClass: The states of the class variable getValue = function(model, data, var, indexStateClass){ for(i in 1:length(model)){ if( model[[i]]$Child == var) indexVar = i; } isDisc = 0; ## Primero vemos si la variable en cuestion en numerica o discreta indexVarData = which(names(data) == var) if(class(data[1,indexVarData]) != "numeric") isDisc = 1; ##Si es continua if(isDisc == 0){ pol = model[[indexVar]]$functions[[indexStateClass]]$Px sol = evalJointFunction(pol,data[1,indexVarData]) }else{ ## Es discreta indexValueVar = which(names(model[[indexVar]]$functions[[indexStateClass]]$Px$coeff) == data[1,indexVarData]) sol = model[[indexVar]]$functions[[indexStateClass]]$Px$coeff[indexValueVar] } return(sol) } ## Learns a NB from a dataset for regression ## data : the dataframe ## class : a string with the name of the class variable ## disc : a vector with the index of the discrete variables ## type_pot : "MOP" o "MTE" learnMoTBFnb_regression = function(data, class , disc, type_pot){ #whichDiscrete(data,disc) data2 = discreteVariables_as.character(data,discreteVariables = disc) indexClass = which(names(data2)==class) serie = 1:length(names(data2)) serie = serie[-indexClass] library(deal) net = network(data2) for(i in serie) net = getnetwork(insert(net,indexClass,i,nocalc=T)) plot(net) model = MoTBFs_Learning(graph = net, data = data2, numIntervals = 3, POTENTIAL_TYPE=type_pot,maxParam = 5) return(model) } ## predicts a class variable from a NB model for Rgression ## model : The NB model ## data : the dataframe with the features ## class : a string with the name of the class variable ## disc : a vector with the index of the discrete variables in the dataframe ##Repasar Fin de bucles predict.nbReg = function(model, data, class, disc){ data2 = discreteVariables_as.character(data,discreteVariables = disc) probs=c() integral = c() ## Para sacar el indice de la variable y cuantos intervalos quedaria la funcion final maxInt = 1 indexVarMaxInt = 1; range = model[[1]]$functions[[1]]$interval[1] for(i in 1:length(model)){ if( model[[i]]$Child == class) indexClass = i; # Para cada variable for(j in 1:length(model[[i]]$functions)){ ## No esta, hay que incluirlo if(sum(range == model[[i]]$functions[[j]]$interval[1]) == 0) range = append(range,model[[i]]$functions[[j]]$interval[1]) if(j == length(model[[i]]$functions)){ if(sum(range == model[[i]]$functions[[j]]$interval[2]) == 0) range = append(range,model[[i]]$functions[[j]]$interval[2]) }##End of if }#End of for }## End for range = sort(range) cat("Este es el range: ",range) result = vector(); ## Para cada fila de los datos for( i in 1:length(data2[,1])){ cat("Predecimos el dato ",i,"\n") ## Para cada intervalo for(j in 1:(length(range)-1)){ cat("Primer intervalo: ",range[j],"-",range[j+1],"\n") prob = 1 ## Para cada variable predictiva de modelo for( k in 1:length(data2[i,])){ cat("Variable ",names(data2)[k],"\n") if((names(data2)[k]) != class){ prob = prob*getValues(model,data2[i,],names(data2)[k], range[j], range[j+1]) } } ## Resultado de multiplicar el MoTBF por la constante prob ## Este es el resultado para este intervalo probs[j] = prob newp = integralMoTBF(model[[indexClass]]$functions[[1]]) integral[j] = prob*(evalJointFunction(newp,range[j+1])- evalJointFunction(newp,range[j])) }##End of for para cada intervalo ##Ahora primero tengo que normalizar la densidad final para que integre 1 newProbs = probs/sum(probs) ## Ahora calculamos la media usando un estimador media muestral cat("YA estamos calculando la media\n") aleat = runif(3000,range[1],range[length(range)]) final = 0; for(j in 1:(length(range)-1)){ for(k in 1:length(aleat)){ if((aleat[k] >= range[j]) & (aleat[k]<= range[j+1])){ cat("aleatorio: ",aleat[k]) print(model[[indexClass]]$functions) val = list(x=aleat[k]) final = final + aleat[k]*newProbs[j]*evalJointFunction(model[[indexClass]]$functions[[1]],values = val) } }##Corregido, esto pare estar bien, pero ahora no acaba nunca !!!! final = (range[length(range)]-range[1])* final / length(aleat) result[i] = final } } return(result) } ## Computes the probability for p(var | c) for different intervals of C ## model: The model where the distributions are stored ## data : A dataframe with just 1 row ## var: The name of the variable to compute the probability ## range1: lower limit of the interval for class var ## range2: upper limit of the interval for class var getValues = function(model, data, var, range1,range2){ for(i in 1:length(model)){ if( model[[i]]$Child == var) indexVar = i; } isDisc = 0; ## Primero vemos si la variable en cuestion en numerica o discreta indexVarData = which(names(data) == var) if(class(data[1,indexVarData]) != "numeric") isDisc = 1; ##Si es continua if(isDisc == 0){ ##Primero buscamos el intervalo de C for(i in 1:length(model[[indexVar]]$functions)){ a = model[[indexVar]]$functions[[i]]$interval[1] b = model[[indexVar]]$functions[[i]]$interval[2] cat("range1 es ",range1) cat("range2 es ",range2) cat("a es ",a) cat("b es ",b) if((range1>= a) & (range1<=b) & (range2 >= a) & (range2<= b)){ cat("Hemos encotrado el intervalo: El del potencial[",a,",",b,"]") cat(" El del calculo[",range1,",",range2,"]") pol = model[[indexVar]]$functions[[i]]$Px sol = evalJointFunction(pol,data[1,indexVarData]) } } }else{ ## Es discreta ##Primero buscamos el intervalo de C for(i in 1:length(model[[indexVar]]$functions)){ a = model[[indexVar]]$functions[[i]]$interval[1] b = model[[indexVar]]$functions[[i]]$interval[2] if((range1>= a) & (range1<=b) & (range2 >= a) & (range2 <= b)){ cat("Hemos encotrado el intervalo: El del potencial[",a,",",b,"]") cat(" El del calculo[",range1,",",range2,"]") indexValueVar = which(names(model[[indexVar]]$functions[[i]]$Px$coeff) == data[1,indexVarData]) sol = model[[indexVar]]$functions[[i]]$Px$coeff[indexValueVar] } } } return(sol) }