A kódoláshoz ChatGPT-t is használtunk
adatelÅ‘készÃtés (benne numerikus változók kiválasztása, standardizálás, mintavétel-az eredetiben messze túl sok megfigyelés volt, még Ãgy is nagyon lassú volt)
set.seed(2024)
library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(GenSA)
## Warning: package 'GenSA' was built under R version 4.3.3
loan_data <- read.csv("C:/Users/BF/Downloads/loan_data.csv")
szurt<-loan_data[, c(1,4,5,7,9:12,14)]
random_indices <- sample(1:nrow(szurt), size = 3000, replace = FALSE)
sampled_df <- szurt[random_indices, ]
szurt_st<-data.frame(scale(sampled_df[,1:8]),sampled_df$loan_status)
summary(szurt_st$sampled_df.loan_status)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2273 0.0000 1.0000
table(szurt_st$sampled_df.loan_status)
##
## 0 1
## 2318 682
cor(as.numeric(szurt_st$predicted_1),as.numeric(szurt_st$predicted_legjobb))
## [1] NA
szurt_st <- data.frame(szurt_st, y=ifelse(szurt_st$sampled_df.loan_status=="1", 1, -1))
averages <- sampled_df %>%
group_by(loan_status) %>%
summarise(across(everything(), mean, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(everything(), mean, na.rm = TRUE)`.
## ℹ In group 1: `loan_status = 0`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
set.seed(2024)
svm1 <- svm(as.factor(y)~loan_int_rate+loan_percent_income, data=szurt_st, kernel="radial")
plot(svm1, loan_int_rate~loan_percent_income, data=szurt_st, grid=1000, svSymbol="o")
szurt_st$predicted_1<-predict(svm1,szurt_st)
table(szurt_st$predicted_1,szurt_st$sampled_df.loan_status)
##
## 0 1
## -1 2182 349
## 1 136 333
set.seed(2024)
svm2 <- svm(as.factor(y)~person_age+person_income+person_emp_exp+loan_amnt+loan_int_rate+loan_percent_income+cb_person_cred_hist_length+credit_score, data=szurt_st, kernel="radial")
plot(svm2, loan_int_rate~loan_percent_income, data=szurt_st, grid=1000, svSymbol="o")
szurt_st$predicted_2<-predict(svm2,szurt_st)
table(szurt_st$predicted_2,szurt_st$sampled_df.loan_status)
##
## 0 1
## -1 2228 376
## 1 90 306
hiperparaméter hangolás 1
set.seed(2024)
keresztval <- trainControl(method="cv", number=10)
racs1 <- expand.grid(C=2^(0:4), sigma=2^(-4:4))
hang1 <- train(as.factor(y)~person_age+person_income+person_emp_exp+loan_amnt+loan_int_rate+loan_percent_income+cb_person_cred_hist_length+credit_score, data=szurt_st, tuneGrid=racs1, method="svmRadial")
eredmeny1 <- hang1$results
ggplot(eredmeny1, aes(x=as.factor(C), y=as.factor(sigma), fill=Accuracy))+geom_tile()
hiperparaméter hangolás 2
set.seed(2024)
racs2 <- expand.grid(C=2^(-3:1), sigma=2^(-6:-2))
hang2 <- train(as.factor(y)~person_age+person_income+person_emp_exp+loan_amnt+loan_int_rate+loan_percent_income+cb_person_cred_hist_length+credit_score, data=szurt_st, tuneGrid=racs2, method="svmRadial")
eredmeny2 <- hang2$results
ggplot(eredmeny2, aes(x=as.factor(C), y=as.factor(sigma), fill=Accuracy))+geom_tile()
hiperparaméter hangolás 3
set.seed(2024)
racs3 <- expand.grid(C=2^seq(from=-1, to=1, by=0.5), sigma=2^seq(from=-4, to=-1, by=0.5))
hang3 <- train(as.factor(y)~person_age+person_income+person_emp_exp+loan_amnt+loan_int_rate+loan_percent_income+cb_person_cred_hist_length+credit_score, data=szurt_st, tuneGrid=racs3, method="svmRadial")
eredmeny3 <- hang3$results
ggplot(eredmeny3, aes(x=as.factor(C), y=as.factor(sigma), fill=Accuracy))+geom_tile()
legjobb modell manuális hiperparaméter hangolás
set.seed(2024)
svm_legjobb<-svm(as.factor(y)~person_age+person_income+person_emp_exp+loan_amnt+loan_int_rate+loan_percent_income+cb_person_cred_hist_length+credit_score, data=szurt_st, kernel="radial", cost=1.4142136, sigma=0.125)
plot(svm_legjobb, loan_int_rate~loan_percent_income, data=szurt_st, grid=1000, svSymbol="o")
szurt_st$predicted_legjobb<-predict(svm_legjobb,szurt_st)
table(szurt_st$predicted_legjobb,szurt_st$sampled_df.loan_status)
##
## 0 1
## -1 2228 363
## 1 90 319
automatikus hiperparaméterhangolás
set.seed(2024)
svm_cv <- function(hiperparam){
modell <- svm(as.factor(y)~person_age+person_income+person_emp_exp+loan_amnt+loan_int_rate+loan_percent_income+cb_person_cred_hist_length+credit_score, data=szurt_st, kernel="radial", cost=hiperparam[1], sigma=hiperparam[2], cross=10)
return(-mean(modell$tot.accuracy))}
szim_hutes <- GenSA(fn=svm_cv, lower=c(0.01, 0.01), upper=c(10, 10), control=list(maxit=100))
szim_hutes$par
## [1] 4.7641574 0.7125827
-szim_hutes$value
## [1] 83.7
legjobb modell automatikus hiperparaméter hangolással
set.seed(2024)
svm_legjobb_2<-svm(as.factor(y)~person_age+person_income+person_emp_exp+loan_amnt+loan_int_rate+loan_percent_income+cb_person_cred_hist_length+credit_score, data=szurt_st, kernel="radial", cost=4.7641574, sigma=0.7125827)
plot(svm_legjobb_2, loan_int_rate~loan_percent_income, data=szurt_st, grid=1000, svSymbol="o")
szurt_st$predicted_legjobb_2<-predict(svm_legjobb_2,szurt_st)
table(szurt_st$predicted_legjobb_2,szurt_st$sampled_df.loan_status)
##
## 0 1
## -1 2237 342
## 1 81 340