# # RESITVE # ########################################################## # # pomozne funkcije (kopirane iz skripte) # CA <- function(obs, pred) { t <- table(obs, pred) sum(diag(t)) / sum(t) } Sensitivity <- function(obs, pred, pos.class) { tab <- table(obs, pred) tab[pos.class, pos.class] / sum(tab[pos.class,]) } Specificity <- function(obs, pred, pos.class) { tab <- table(obs, pred) neg.class <- which(row.names(tab) != pos.class) tab[neg.class, neg.class] / sum(tab[neg.class,]) } brier.score <- function(obsMat, predMat) { sum((obsMat - predMat) ^ 2) / nrow(predMat) } ######################################################### library(rpart) library(rpart.plot) library(nnet) mdata <- read.table("movies.txt", header = T, sep = ",") for (i in 18:24) mdata[,i] <- as.factor(mdata[,i]) mdata$mpaa <- as.factor(mdata$mpaa) mdata$title <- NULL mdata$budget <- NULL summary(mdata) learn <- mdata[mdata$year < 2004,] test <- mdata[mdata$year >= 2004,] # atribut "year" nam ni vec uporaben, zato ga odstranimo learn$year <- NULL test$year <- NULL dt <- rpart(Comedy~., learn) rpart.plot(dt) observed <- test$Comedy predicted <- predict(dt, test, type="class") CA(observed, predicted) Sensitivity(observed, predicted, "1") Specificity(observed, predicted, "1") predMat <- predict(dt, test, type = "prob") obsMat <- class.ind(test$Comedy) brier.score(obsMat, predMat) predMat <- predict(dt, test, type = "prob") p <- unique(sort(predMat[,"1"])) threshVec <- sort(c(-Inf, p[-length(p)] + diff(p)/2, Inf), decreasing=T) sensVec <- vector() specVec <- vector() for (th in threshVec) { predicted <- ifelse(predMat[,"1"] > th, "1", "0") predicted <- factor(predicted, levels=levels(mdata$Comedy)) sensVec <- append(sensVec, Sensitivity(observed, predicted, "1")) specVec <- append(specVec, Specificity(observed, predicted, "1")) } plot(x=1-specVec, y=sensVec, type="l", xlab="1-Specificity", ylab="Sensitivity", main="ROC krivulja") abline(a=0, b=1, col="grey") # izris ROC krivulj omogoca knjiznica pROC library(pROC) rocobj <- roc(test$Comedy, predMat[,"1"]) plot(rocobj) ####################################################################################### # # Resitev racunske naloge: # # # a) klasifikacijska tocnost: # # (300+120)/(300+0+80+120) # = 0.84 # # # b) pricakovana tocnost vecinskega klasifikatorja: # # vecinski razred je "0" # # matrika zmot vecinskega klasifikatorja: # # 0 1 # --+---+---+ # 0 |300| 0 | # --+---+---+ # 1 |200| 0 | # --+---+---+ # # 300/500 = 0.6 # # c) senzitivnost ("0" je pozitivni razred) # # TP/POS = 300 / 300 = 1 # # d) specificnost ("0" je pozitivni razred) # # TN/NEG = 120 / 200 = 0.6 # #######################################################################################