Regressão Logística

Author

Ricardo Accioly

Published

October 28, 2025

Carregando Bibliotecas

library(tidyverse)
library(caret)
library(patchwork)
library(pROC)
library(ISLR)
data(Default)
summary(Default)

#>  default    student       balance           income     
#>  No :9667   No :7056   Min.   :   0.0   Min.   :  772  
#>  Yes: 333   Yes:2944   1st Qu.: 481.7   1st Qu.:21340  
#>                        Median : 823.6   Median :34553  
#>                        Mean   : 835.4   Mean   :33517  
#>                        3rd Qu.:1166.3   3rd Qu.:43808  
#>                        Max.   :2654.3   Max.   :73554

str(Default)

#> 'data.frame':    10000 obs. of  4 variables:
#>  $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>  $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
#>  $ balance: num  730 817 1074 529 786 ...
#>  $ income : num  44362 12106 31767 35704 38463 ...

head(Default)

#>   default student   balance    income
#> 1      No      No  729.5265 44361.625
#> 2      No     Yes  817.1804 12106.135
#> 3      No      No 1073.5492 31767.139
#> 4      No      No  529.2506 35704.494
#> 5      No      No  785.6559 38463.496
#> 6      No     Yes  919.5885  7491.559

Manipulando os dados

credito <- tibble(Default)
summary(credito)

#>  default    student       balance           income     
#>  No :9667   No :7056   Min.   :   0.0   Min.   :  772  
#>  Yes: 333   Yes:2944   1st Qu.: 481.7   1st Qu.:21340  
#>                        Median : 823.6   Median :34553  
#>                        Mean   : 835.4   Mean   :33517  
#>                        3rd Qu.:1166.3   3rd Qu.:43808  
#>                        Max.   :2654.3   Max.   :73554

# renomeando colunas
credito <- credito %>% 
                rename( inadimplente = default, estudante = student, balanco = balance,
                receita = income)
credito <- credito %>% mutate( inadimplente =  case_when(
                           inadimplente == "No"  ~ "Nao",
                           inadimplente == "Yes" ~ "Sim"
                          )) %>% mutate(inadimplente = factor(inadimplente))
credito <- credito %>% mutate( estudante =  case_when(
                           estudante == "No"  ~ 0,
                           estudante == "Yes" ~ 1
                          )) 

str(credito)

#> tibble [10,000 × 4] (S3: tbl_df/tbl/data.frame)
#>  $ inadimplente: Factor w/ 2 levels "Nao","Sim": 1 1 1 1 1 1 1 1 1 1 ...
#>  $ estudante   : num [1:10000] 0 1 0 0 0 1 0 1 0 0 ...
#>  $ balanco     : num [1:10000] 730 817 1074 529 786 ...
#>  $ receita     : num [1:10000] 44362 12106 31767 35704 38463 ...

summary(credito)

#>  inadimplente   estudante         balanco          receita     
#>  Nao:9667     Min.   :0.0000   Min.   :   0.0   Min.   :  772  
#>  Sim: 333     1st Qu.:0.0000   1st Qu.: 481.7   1st Qu.:21340  
#>               Median :0.0000   Median : 823.6   Median :34553  
#>               Mean   :0.2944   Mean   : 835.4   Mean   :33517  
#>               3rd Qu.:1.0000   3rd Qu.:1166.3   3rd Qu.:43808  
#>               Max.   :1.0000   Max.   :2654.3   Max.   :73554

Graficos de Densidade

Vamos agora explorar os dados originais para termos algum visão do comportamento das variáveis explicativas e a variável dependente.

featurePlot(x = credito[, c("balanco", "receita", "estudante")], 
            y = credito$inadimplente,
            plot = "density", 
            scales = list(x = list(relation = "free"), 
                          y = list(relation = "free")), 
            adjust = 1.5, 
            pch = "|", 
            layout = c(2, 1), 
            auto.key = list(columns = 2))

Avaliando o comportamento das variáveis em função do status (inadimplente / estudante)

p1 <- ggplot(credito, aes(x=inadimplente, y=balanco, color=inadimplente)) +
  geom_boxplot()
p2 <- ggplot(credito, aes(x=inadimplente, y=receita, color=inadimplente)) +
  geom_boxplot()
p3 <- ggplot(credito, aes(x=as.factor(estudante), y=balanco, color=as.factor(estudante))) +
  geom_boxplot()
p4 <- ggplot(credito, aes(x=as.factor(estudante), y=receita, color=as.factor(estudante))) +
  geom_boxplot()
(p1 + p2) / (p3 + p4)

Balanço vs Receita

ggplot(data = credito, aes(x=balanco,  y = receita, col = inadimplente)) +
  geom_point()

Avaliando comportamento

# proporção de inadimplentes
credito %>% select(inadimplente, balanco) %>% summarize(prop = mean(inadimplente == "Sim"))

#> # A tibble: 1 × 1
#>     prop
#>    <dbl>
#> 1 0.0333

# media do balanço dos inadimplentes 
credito %>% filter(inadimplente == "Sim") %>% summarize(valor= mean(balanco))

#> # A tibble: 1 × 1
#>   valor
#>   <dbl>
#> 1 1748.

quantis <- quantile(credito$balanco, probs = c(.1,.25, .50, .75, .9, .95, 0.97, 0.99))
quantis

#>       10%       25%       50%       75%       90%       95%       97%       99% 
#>  180.5753  481.7311  823.6370 1166.3084 1471.6253 1665.9626 1793.2910 2008.4709

credito %>% 
            mutate(grupo_balanco = case_when(
               balanco<=quantis[1] ~ quantis[1],
               balanco>quantis[1] & balanco<=quantis[2] ~ quantis[2],
               balanco>quantis[2] & balanco<=quantis[3]  ~ quantis[3],
               balanco>quantis[3] & balanco<=quantis[4]  ~ quantis[4],
               balanco>quantis[4] & balanco<=quantis[5]  ~ quantis[5],
               balanco>quantis[5] & balanco<=quantis[6]  ~ quantis[6],
               balanco>quantis[6] & balanco<=quantis[7]  ~ quantis[7],
               balanco>quantis[7] ~ quantis[8])) %>%
           group_by(grupo_balanco) %>%
           summarize(prop = mean(inadimplente == "Sim")) %>%
           ggplot(aes(grupo_balanco, prop)) +
           geom_point() +
           geom_line()

Treino e Teste

set.seed(2025)
y <- credito$inadimplente
credito_split <- createDataPartition(y, times = 1, p = 0.10, list = FALSE)

conj_treino <- credito[-credito_split,]
conj_teste <- credito[credito_split,]
                           
summary(conj_treino)

#>  inadimplente   estudante         balanco          receita     
#>  Nao:8700     Min.   :0.0000   Min.   :   0.0   Min.   :  772  
#>  Sim: 299     1st Qu.:0.0000   1st Qu.: 483.6   1st Qu.:21343  
#>               Median :0.0000   Median : 821.3   Median :34576  
#>               Mean   :0.2946   Mean   : 836.3   Mean   :33556  
#>               3rd Qu.:1.0000   3rd Qu.:1167.1   3rd Qu.:43854  
#>               Max.   :1.0000   Max.   :2654.3   Max.   :73554

summary(conj_teste)

#>  inadimplente   estudante         balanco          receita     
#>  Nao:967      Min.   :0.0000   Min.   :   0.0   Min.   : 5386  
#>  Sim: 34      1st Qu.:0.0000   1st Qu.: 453.6   1st Qu.:21319  
#>               Median :0.0000   Median : 839.9   Median :34086  
#>               Mean   :0.2927   Mean   : 827.4   Mean   :33170  
#>               3rd Qu.:1.0000   3rd Qu.:1156.3   3rd Qu.:42856  
#>               Max.   :1.0000   Max.   :2221.0   Max.   :70701

1a Regressão logística: só balanço

mod1 <- glm(inadimplente ~ balanco,data=conj_treino,family=binomial)
summary(mod1)

#> 
#> Call:
#> glm(formula = inadimplente ~ balanco, family = binomial, data = conj_treino)
#> 
#> Coefficients:
#>               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -1.081e+01  3.900e-01  -27.72   <2e-16 ***
#> balanco      5.594e-03  2.375e-04   23.55   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 2623.8  on 8998  degrees of freedom
#> Residual deviance: 1415.7  on 8997  degrees of freedom
#> AIC: 1419.7
#> 
#> Number of Fisher Scoring iterations: 8

coef(mod1)

#>   (Intercept)       balanco 
#> -10.810958936   0.005594178

summary(mod1)$coef

#>                  Estimate   Std. Error   z value      Pr(>|z|)
#> (Intercept) -10.810958936 0.3900304107 -27.71825 4.208540e-169
#> balanco       0.005594178 0.0002374991  23.55452 1.128292e-122

Avaliando o modelo

p_chapeu <- predict(mod1, newdata = conj_teste, type = "response")
y_chapeu <- ifelse(p_chapeu > 0.5, "Sim", "Nao") %>% factor(levels = levels(conj_teste$inadimplente))
confusionMatrix(y_chapeu, conj_teste$inadimplente, positive="Sim")

#> Confusion Matrix and Statistics
#> 
#>           Reference
#> Prediction Nao Sim
#>        Nao 958  25
#>        Sim   9   9
#>                                           
#>                Accuracy : 0.966           
#>                  95% CI : (0.9529, 0.9764)
#>     No Information Rate : 0.966           
#>     P-Value [Acc > NIR] : 0.5454          
#>                                           
#>                   Kappa : 0.3304          
#>                                           
#>  Mcnemar's Test P-Value : 0.0101          
#>                                           
#>             Sensitivity : 0.264706        
#>             Specificity : 0.990693        
#>          Pos Pred Value : 0.500000        
#>          Neg Pred Value : 0.974568        
#>              Prevalence : 0.033966        
#>          Detection Rate : 0.008991        
#>    Detection Prevalence : 0.017982        
#>       Balanced Accuracy : 0.627699        
#>                                           
#>        'Positive' Class : Sim             
#>

Veja as probabilidade de inadimplencia para balanços de 1000, 2000 e 3000

predict(mod1, newdata = data.frame(balanco = c(1000,2000,3000)), type="response")

#>           1           2           3 
#> 0.005395494 0.593245119 0.997456265

Curva S

# Mostrar a curva S com o resultado da regressão logística
# Salvar o gráfico em um arquivo .png
png("regressao_logistica.png", width = 800, height = 600)
inadimpl <- as.numeric(conj_treino$inadimplente) - 1
plot(inadimpl ~ balanco, data = conj_treino, col = "darkorange", pch = "|", ylim = c(0, 1), main = "Regressão Logistica - Classificacão")
abline(h = 0, lty = 3)
abline(h = 1, lty = 3)
abline(h = 0.5, lty = 2)
curve(predict(mod1, data.frame(balanco = x), type = "response"), add = TRUE, lwd = 3, col = "dodgerblue")
abline(v = -coef(mod1)[1] / coef(mod1)[2], lwd = 2)
dev.off()

#> png 
#>   2

knitr::include_graphics("regressao_logistica.png")

Valor de balanço com probabilidade de 50%

-\(\beta_0\)/\(\beta_1\)

-coef(mod1)[1] / coef(mod1)[2]

#> (Intercept) 
#>    1932.538

2a Regressão logística: todas as variáveis

mod2 <- glm(inadimplente ~ balanco + receita + estudante,data=conj_treino,family=binomial)
summary(mod2)

#> 
#> Call:
#> glm(formula = inadimplente ~ balanco + receita + estudante, family = binomial, 
#>     data = conj_treino)
#> 
#> Coefficients:
#>               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -1.100e+01  5.277e-01 -20.850   <2e-16 ***
#> balanco      5.810e-03  2.488e-04  23.356   <2e-16 ***
#> receita      2.751e-06  8.799e-06   0.313   0.7546    
#> estudante   -5.942e-01  2.516e-01  -2.362   0.0182 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 2623.8  on 8998  degrees of freedom
#> Residual deviance: 1396.8  on 8995  degrees of freedom
#> AIC: 1404.8
#> 
#> Number of Fisher Scoring iterations: 8

coef(mod2)

#>   (Intercept)       balanco       receita     estudante 
#> -1.100205e+01  5.810150e-03  2.750839e-06 -5.942048e-01

summary(mod2)$coef

#>                  Estimate   Std. Error     z value      Pr(>|z|)
#> (Intercept) -1.100205e+01 5.276811e-01 -20.8498149  1.530134e-96
#> balanco      5.810150e-03 2.487659e-04  23.3558946 1.200621e-120
#> receita      2.750839e-06 8.798705e-06   0.3126414  7.545531e-01
#> estudante   -5.942048e-01 2.515801e-01  -2.3618911  1.818198e-02

É possível se ver que receita não é significativa

3a Regressão Logística (sem receita)

mod3 <- glm(inadimplente ~ balanco + estudante,data=conj_treino,family=binomial)
summary(mod3)

#> 
#> Call:
#> glm(formula = inadimplente ~ balanco + estudante, family = binomial, 
#>     data = conj_treino)
#> 
#> Coefficients:
#>               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -1.089e+01  3.974e-01 -27.416  < 2e-16 ***
#> balanco      5.812e-03  2.487e-04  23.368  < 2e-16 ***
#> estudante   -6.561e-01  1.550e-01  -4.234  2.3e-05 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 2623.8  on 8998  degrees of freedom
#> Residual deviance: 1396.9  on 8996  degrees of freedom
#> AIC: 1402.9
#> 
#> Number of Fisher Scoring iterations: 8

coef(mod3)

#>   (Intercept)       balanco     estudante 
#> -10.894208021   0.005811976  -0.656054023

summary(mod3)$coef

#>                  Estimate   Std. Error   z value      Pr(>|z|)
#> (Intercept) -10.894208021 0.3973729389 -27.41558 1.788553e-165
#> balanco       0.005811976 0.0002487146  23.36806 9.031818e-121
#> estudante    -0.656054023 0.1549533866  -4.23388  2.296937e-05

Comparando os modelos

anova(mod2,mod3,test='LR')

#> Analysis of Deviance Table
#> 
#> Model 1: inadimplente ~ balanco + receita + estudante
#> Model 2: inadimplente ~ balanco + estudante
#>   Resid. Df Resid. Dev Df  Deviance Pr(>Chi)
#> 1      8995     1396.8                      
#> 2      8996     1396.9 -1 -0.097739   0.7546

StepAIC

Ao invé de usarmos a estatística de Wald para selecionar as variáveis significativas, podemos usar o AIC (equivalente ao Cp) como usamos na regressão múltipla para selecionar as variáveis explicativas.

A função stepAIC tem um parametro k que define se vamos usar o AIC ou o BIC para fazer a seleção. Quando k=2 temos o AIC e quando k=log(n) temos o BIC.

library(MASS)
mod3a <- stepAIC(mod2, k=2, trace=FALSE)
summary(mod3a)

#> 
#> Call:
#> glm(formula = inadimplente ~ balanco + estudante, family = binomial, 
#>     data = conj_treino)
#> 
#> Coefficients:
#>               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -1.089e+01  3.974e-01 -27.416  < 2e-16 ***
#> balanco      5.812e-03  2.487e-04  23.368  < 2e-16 ***
#> estudante   -6.561e-01  1.550e-01  -4.234  2.3e-05 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 2623.8  on 8998  degrees of freedom
#> Residual deviance: 1396.9  on 8996  degrees of freedom
#> AIC: 1402.9
#> 
#> Number of Fisher Scoring iterations: 8

(k <- log(nrow(conj_treino)))

#> [1] 9.104869

mod3b <- stepAIC(mod2, k=k, trace=FALSE)
summary(mod3b)

#> 
#> Call:
#> glm(formula = inadimplente ~ balanco + estudante, family = binomial, 
#>     data = conj_treino)
#> 
#> Coefficients:
#>               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept) -1.089e+01  3.974e-01 -27.416  < 2e-16 ***
#> balanco      5.812e-03  2.487e-04  23.368  < 2e-16 ***
#> estudante   -6.561e-01  1.550e-01  -4.234  2.3e-05 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 2623.8  on 8998  degrees of freedom
#> Residual deviance: 1396.9  on 8996  degrees of freedom
#> AIC: 1402.9
#> 
#> Number of Fisher Scoring iterations: 8

Avaliando o modelo novamente

p_chapeu <- predict(mod3, newdata = conj_teste, type = "response")
y_chapeu <- ifelse(p_chapeu > 0.5, "Sim", "Nao") %>% factor(levels = levels(conj_teste$inadimplente))
confusionMatrix(y_chapeu, conj_teste$inadimplente, positive="Sim")

#> Confusion Matrix and Statistics
#> 
#>           Reference
#> Prediction Nao Sim
#>        Nao 960  24
#>        Sim   7  10
#>                                           
#>                Accuracy : 0.969           
#>                  95% CI : (0.9563, 0.9789)
#>     No Information Rate : 0.966           
#>     P-Value [Acc > NIR] : 0.339451        
#>                                           
#>                   Kappa : 0.3781          
#>                                           
#>  Mcnemar's Test P-Value : 0.004057        
#>                                           
#>             Sensitivity : 0.29412         
#>             Specificity : 0.99276         
#>          Pos Pred Value : 0.58824         
#>          Neg Pred Value : 0.97561         
#>              Prevalence : 0.03397         
#>          Detection Rate : 0.00999         
#>    Detection Prevalence : 0.01698         
#>       Balanced Accuracy : 0.64344         
#>                                           
#>        'Positive' Class : Sim             
#>

Mudando a probabilidade (limite) para aumentar a sensibilidade

p_chapeu <- predict(mod3, newdata = conj_teste, type = "response")
y_chapeu <- ifelse(p_chapeu > 0.1, "Sim", "Nao") %>% 
             factor(levels = levels(conj_teste$inadimplente))
confusionMatrix(y_chapeu, conj_teste$inadimplente, positive="Sim")

#> Confusion Matrix and Statistics
#> 
#>           Reference
#> Prediction Nao Sim
#>        Nao 912  11
#>        Sim  55  23
#>                                           
#>                Accuracy : 0.9341          
#>                  95% CI : (0.9169, 0.9486)
#>     No Information Rate : 0.966           
#>     P-Value [Acc > NIR] : 1               
#>                                           
#>                   Kappa : 0.3815          
#>                                           
#>  Mcnemar's Test P-Value : 1.204e-07       
#>                                           
#>             Sensitivity : 0.67647         
#>             Specificity : 0.94312         
#>          Pos Pred Value : 0.29487         
#>          Neg Pred Value : 0.98808         
#>              Prevalence : 0.03397         
#>          Detection Rate : 0.02298         
#>    Detection Prevalence : 0.07792         
#>       Balanced Accuracy : 0.80980         
#>                                           
#>        'Positive' Class : Sim             
#>

Curva ROC modelo só com balanço

p_chapeu_log <- predict(mod1, newdata = conj_teste, type = "response")
head(p_chapeu_log)

#>            1            2            3            4            5            6 
#> 0.0104785745 0.0002668725 0.0029780343 0.0042745671 0.0073908814 0.0005687871

roc_log <- roc(conj_teste$inadimplente ~ p_chapeu_log, print.auc=FALSE)

# Visualização com ggroc
ggroc(roc_log) +
  ggplot2::labs(title = "ROC - Regressão Logística", x = "1 - Especificidade", y = "Sensibilidade") +
  ggplot2::theme_minimal()

# Area debaixo da curva
as.numeric(roc_log$auc)

#> [1] 0.9291015

Curva ROC 2: Modelo com balanço + estudante

p_chapeu_log <- predict(mod3, newdata = conj_teste, type = "response")
head(p_chapeu_log)

#>            1            2            3            4            5            6 
#> 0.0064107246 0.0002715269 0.0017294056 0.0048430770 0.0085502222 0.0005960041

roc_log2 <- roc(conj_teste$inadimplente ~ p_chapeu_log, print.auc=FALSE)

# Visualização com ggroc
ggroc(roc_log2) +
  ggplot2::labs(title = "ROC - Regressão Logística", x = "1 - Especificidade", y = "Sensibilidade") +
  ggplot2::theme_minimal()

AUC

# Area debaixo da curva
as.numeric(roc_log2$auc)

#> [1] 0.9325385

Melhor limite

m_limite <- coords(roc_log2, "best", ret = "threshold")$threshold
m_limite

#> [1] 0.03721192

p_chapeu <- predict(mod3, newdata = conj_teste, type = "response")
y_chapeu <- ifelse(p_chapeu > m_limite, "Sim", "Nao") %>% 
             factor(levels = levels(conj_teste$inadimplente))
confusionMatrix(y_chapeu, conj_teste$inadimplente, positive="Sim")

#> Confusion Matrix and Statistics
#> 
#>           Reference
#> Prediction Nao Sim
#>        Nao 857   5
#>        Sim 110  29
#>                                           
#>                Accuracy : 0.8851          
#>                  95% CI : (0.8637, 0.9042)
#>     No Information Rate : 0.966           
#>     P-Value [Acc > NIR] : 1               
#>                                           
#>                   Kappa : 0.2969          
#>                                           
#>  Mcnemar's Test P-Value : <2e-16          
#>                                           
#>             Sensitivity : 0.85294         
#>             Specificity : 0.88625         
#>          Pos Pred Value : 0.20863         
#>          Neg Pred Value : 0.99420         
#>              Prevalence : 0.03397         
#>          Detection Rate : 0.02897         
#>    Detection Prevalence : 0.13886         
#>       Balanced Accuracy : 0.86959         
#>                                           
#>        'Positive' Class : Sim             
#>

Duas ROCs juntas

# Visualização com ggroc
ggroc(list(reglog1= roc_log, reglog2= roc_log2)) +
  ggplot2::labs(title = "ROC - Regressão Logística", x = "1 - Especificidade", y = "Sensibilidade") +
  ggplot2::theme_minimal()

# Area debaixo da curva
as.numeric(roc_log$auc)

#> [1] 0.9291015

# Area debaixo da curva
as.numeric(roc_log2$auc)

#> [1] 0.9325385

Curva ROC 3 com o KNN

# Ajustando KNN 
conj_treino[,3:4] <- scale(conj_treino[,3:4]) # scale normaliza
conj_teste[,3:4] <- scale(conj_teste[, 3:4])
set.seed(2025)
treina_knn <- knn3(inadimplente ~ balanco + receita + estudante, data = conj_treino, k = 20)
# treina_knn
prev_knn <- predict(treina_knn, conj_teste,type = "prob")

## ROC
roc_log2 <- roc(conj_teste$inadimplente ~ p_chapeu_log, print.auc=FALSE) 
roc_knn1 <- roc(conj_teste$inadimplente ~ prev_knn[,2], print.auc=FALSE)

# Visualização com ggroc
ggroc(list(KNN= roc_knn1, RegLog= roc_log2)) +
  ggplot2::labs(title = "ROC - KNN vs Regressão Logística", x = "1 - Especificidade", y = "Sensibilidade") +
  ggplot2::theme_minimal()

# Area abaixo da curva
# Regressão Logística
as.numeric(roc_log2$auc)

#> [1] 0.9325385

## KNN
as.numeric(roc_knn1$auc)

#> [1] 0.9041304

Reprodutibilidade

sessionInfo()

#> R version 4.5.1 (2025-06-13 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 11 x64 (build 26200)
#> 
#> Matrix products: default
#>   LAPACK version 3.12.1
#> 
#> locale:
#> [1] LC_COLLATE=Portuguese_Brazil.utf8  LC_CTYPE=Portuguese_Brazil.utf8   
#> [3] LC_MONETARY=Portuguese_Brazil.utf8 LC_NUMERIC=C                      
#> [5] LC_TIME=Portuguese_Brazil.utf8    
#> 
#> time zone: America/Sao_Paulo
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] MASS_7.3-65     ISLR_1.4        pROC_1.19.0.1   patchwork_1.3.1
#>  [5] caret_7.0-1     lattice_0.22-7  lubridate_1.9.4 forcats_1.0.0  
#>  [9] stringr_1.5.1   dplyr_1.1.4     purrr_1.1.0     readr_2.1.5    
#> [13] tidyr_1.3.1     tibble_3.3.0    ggplot2_3.5.2   tidyverse_2.0.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.6         xfun_0.52            htmlwidgets_1.6.4   
#>  [4] recipes_1.3.1        tzdb_0.5.0           vctrs_0.6.5         
#>  [7] tools_4.5.1          generics_0.1.4       stats4_4.5.1        
#> [10] parallel_4.5.1       proxy_0.4-27         ModelMetrics_1.2.2.2
#> [13] pkgconfig_2.0.3      Matrix_1.7-3         data.table_1.17.8   
#> [16] RColorBrewer_1.1-3   lifecycle_1.0.4      compiler_4.5.1      
#> [19] farver_2.1.2         codetools_0.2-20     htmltools_0.5.8.1   
#> [22] class_7.3-23         yaml_2.3.10          prodlim_2025.04.28  
#> [25] pillar_1.11.0        gower_1.0.2          iterators_1.0.14    
#> [28] rpart_4.1.24         foreach_1.5.2        nlme_3.1-168        
#> [31] parallelly_1.45.1    lava_1.8.1           tidyselect_1.2.1    
#> [34] digest_0.6.37        stringi_1.8.7        future_1.67.0       
#> [37] reshape2_1.4.4       listenv_0.9.1        labeling_0.4.3      
#> [40] splines_4.5.1        fastmap_1.2.0        grid_4.5.1          
#> [43] cli_3.6.5            magrittr_2.0.3       dichromat_2.0-0.1   
#> [46] survival_3.8-3       e1071_1.7-16         future.apply_1.20.0 
#> [49] withr_3.0.2          scales_1.4.0         timechange_0.3.0    
#> [52] rmarkdown_2.29       globals_0.18.0       nnet_7.3-20         
#> [55] timeDate_4041.110    hms_1.1.3            evaluate_1.0.4      
#> [58] knitr_1.50           hardhat_1.4.1        rlang_1.1.6         
#> [61] Rcpp_1.1.0           glue_1.8.0           ipred_0.9-15        
#> [64] rstudioapi_0.17.1    jsonlite_2.0.0       R6_2.6.1            
#> [67] plyr_1.8.9