Chapter 18 Multivariate distributions
18.1 Overview
18.1.1 Variable correlation
Compute correlations between the independent variables or their suggested transformations.
<- unique(c(bact_transformed$demog_vars,
variables $pivotal_vars,
bact_transformed$vip_vars,
bact_transformed$leuko_related_vars,
bact_transformed$leuko_ratio_vars,
bact_transformed$kidney_related_vars,
bact_transformed$acute_related_vars,
bact_transformed$remaining_vars))
bact_transformed
<- c_bact %>%
corrp ::select(all_of(variables)) %>%
dplyrcor(use="pairwise.complete.obs", method="pearson")
<- c_bact %>%
corrs ::select(all_of(variables)) %>%
dplyrcor(use="pairwise.complete.obs", method="spearman")
# differences of pearson and spearman correlations to check for outliers
<- corrp-corrs corrd
Next, we depict the correlation coefficient in a quadratic heat map:
ggcorrplot(corrp, tl.cex=5, tl.srt=90)
Explore if there are clusters of variables. Such clusters may give rise to define groups of variables for which a summary or only a representative may be considered in modeling:
<-Hmisc::varclus(as.matrix(c_bact[,variables]))
vc_bactplot(vc_bact, cex=0.7)
Some of the clusters that pop up here are related to width/volume of blood cells (MPV, PDW), red blood cells (RBC, HGB, HCT; MCV, MCH), and some further ‘known’ associations such as that between KREA and eGFR (which follows from the construction of eGFR), and between ASAT and ALAT, between AMY and PAMY or between TP and ALB).
In the following scatterplots we have a look at those associations:
ggplot(c_bact, aes(MPV, PDW))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1102 rows containing non-finite values (stat_smooth).
## Warning: Removed 1102 rows containing missing values (geom_point).
ggplot(c_bact, aes(RBC, HGB))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 461 rows containing non-finite values (stat_smooth).
## Warning: Removed 461 rows containing missing values (geom_point).
ggplot(c_bact, aes(RBC, HCT))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 461 rows containing non-finite values (stat_smooth).
## Removed 461 rows containing missing values (geom_point).
ggplot(c_bact, aes(HGB, HCT))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 42 rows containing non-finite values (stat_smooth).
## Warning: Removed 42 rows containing missing values (geom_point).
ggplot(c_bact, aes(MCV, MCH))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 42 rows containing non-finite values (stat_smooth).
## Removed 42 rows containing missing values (geom_point).
ggplot(c_bact, aes(t_KREA, eGFR))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 159 rows containing non-finite values (stat_smooth).
## Warning: Removed 159 rows containing missing values (geom_point).
ggplot(c_bact, aes(t_ASAT, t_ALAT))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1195 rows containing non-finite values (stat_smooth).
## Warning: Removed 1195 rows containing missing values (geom_point).
ggplot(c_bact, aes(t_AMY, t_PAMY))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7182 rows containing non-finite values (stat_smooth).
## Warning: Removed 7182 rows containing missing values (geom_point).
ggplot(c_bact, aes(t_WBC, t_NEU))+geom_point(alpha = alpha_value, shape = 20) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 728 rows containing non-finite values (stat_smooth).
## Warning: Removed 728 rows containing missing values (geom_point).
Create scatterplots for pairs of variables with a large distance between Spearman and Pearson correlations (could be an indication of nonlinear association):
for(j in 1:(length(variables)-1)){
for(jj in (j+1):(length(variables))){
if(abs(corrd[j, jj])>0.1) print(ggplot(data=c_bact, mapping=aes(x=.data[[variables[j]]],y=.data[[variables[jj]]]))+ geom_point(alpha = alpha_value , shape = 20)+geom_smooth())
} }
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 174 rows containing non-finite values (stat_smooth).
## Warning: Removed 174 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1279 rows containing non-finite values (stat_smooth).
## Warning: Removed 1279 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Warning: Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1580 rows containing non-finite values (stat_smooth).
## Warning: Removed 1580 rows containing missing values (geom_point).
18.1.3 Distribution of leukocytes by age, coloured by sex
$gender=factor(c_bact$sex, levels=c(1,2), labels=c("male","female"))
c_bact
%>%
c_bact ggplot(data=c_bact, mapping=aes(x=Alter, y=t_WBC, color=gender)) + geom_point(shape = 20) + geom_smooth()
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
## Warning: Removed 462 rows containing missing values (geom_point).
18.1.4 Plot all variables vs. WBC in age/sex groups
$Agegroup <- factor(cut(c_bact$Alter, c(min(c_bact$Alter), 50, 65, max(c_bact$Alter))))
c_bacttable(c_bact$gender,c_bact$Agegroup)
##
## (16,50] (50,65] (65,101]
## male 2900 2671 2962
## female 2460 1579 2114
for(j in 4:length(variables)){
<-
p1 %>%
c_bact filter(!is.na(Agegroup)) %>%
ggplot(c_bact, mapping=aes(x=t_WBC,y=.data[[variables[j]]])) +
geom_point(alpha = alpha_value, shape = 20) +
geom_smooth() +
geom_rug(alpha = alpha_value) +
facet_grid(gender ~ Agegroup)
print(p1)
}
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 634 rows containing non-finite values (stat_smooth).
## Warning: Removed 634 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 620 rows containing non-finite values (stat_smooth).
## Warning: Removed 620 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 728 rows containing non-finite values (stat_smooth).
## Warning: Removed 728 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
## Warning: Removed 462 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 552 rows containing non-finite values (stat_smooth).
## Warning: Removed 552 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 557 rows containing non-finite values (stat_smooth).
## Warning: Removed 557 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 666 rows containing non-finite values (stat_smooth).
## Warning: Removed 666 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 648 rows containing non-finite values (stat_smooth).
## Warning: Removed 648 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Warning: Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## Removed 732 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2428 rows containing non-finite values (stat_smooth).
## Warning: Removed 2428 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 620 rows containing non-finite values (stat_smooth).
## Warning: Removed 620 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 635 rows containing non-finite values (stat_smooth).
## Warning: Removed 635 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2990 rows containing non-finite values (stat_smooth).
## Warning: Removed 2990 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 614 rows containing non-finite values (stat_smooth).
## Warning: Removed 614 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1610 rows containing non-finite values (stat_smooth).
## Warning: Removed 1610 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1446 rows containing non-finite values (stat_smooth).
## Warning: Removed 1446 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1718 rows containing non-finite values (stat_smooth).
## Warning: Removed 1718 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## Warning: Removed 463 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
## Warning: Removed 462 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## Warning: Removed 463 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## Removed 463 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## Removed 463 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 476 rows containing non-finite values (stat_smooth).
## Warning: Removed 476 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1104 rows containing non-finite values (stat_smooth).
## Warning: Removed 1104 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2887 rows containing non-finite values (stat_smooth).
## Warning: Removed 2887 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2955 rows containing non-finite values (stat_smooth).
## Warning: Removed 2955 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1724 rows containing non-finite values (stat_smooth).
## Warning: Removed 1724 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1733 rows containing non-finite values (stat_smooth).
## Warning: Removed 1733 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1696 rows containing non-finite values (stat_smooth).
## Warning: Removed 1696 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2313 rows containing non-finite values (stat_smooth).
## Warning: Removed 2313 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 3476 rows containing non-finite values (stat_smooth).
## Warning: Removed 3476 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1892 rows containing non-finite values (stat_smooth).
## Warning: Removed 1892 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2008 rows containing non-finite values (stat_smooth).
## Warning: Removed 2008 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2108 rows containing non-finite values (stat_smooth).
## Warning: Removed 2108 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 4323 rows containing non-finite values (stat_smooth).
## Warning: Removed 4323 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7410 rows containing non-finite values (stat_smooth).
## Warning: Removed 7410 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 4108 rows containing non-finite values (stat_smooth).
## Warning: Removed 4108 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2865 rows containing non-finite values (stat_smooth).
## Warning: Removed 2865 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1851 rows containing non-finite values (stat_smooth).
## Warning: Removed 1851 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2149 rows containing non-finite values (stat_smooth).
## Warning: Removed 2149 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2514 rows containing non-finite values (stat_smooth).
## Warning: Removed 2514 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 4572 rows containing non-finite values (stat_smooth).
## Warning: Removed 4572 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5440 rows containing non-finite values (stat_smooth).
## Warning: Removed 5440 rows containing missing values (geom_point).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5417 rows containing non-finite values (stat_smooth).
## Warning: Removed 5417 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1104 rows containing non-finite values (stat_smooth).
## Warning: Removed 1104 rows containing missing values (geom_point).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## Warning: Removed 463 rows containing missing values (geom_point).
#+
# theme_minimal()
18.1.5 Plot all variables vs. WBC in age/sex groups: loess curves only
for(j in 4:length(variables)){
<-
p1 %>%
c_bact filter(!is.na(Agegroup)) %>%
ggplot(c_bact, mapping=aes(x=t_WBC,y=.data[[variables[j]]])) +
# geom_point(alpha = alpha_value) +
geom_smooth() +
# geom_rug() +
facet_grid(gender ~ Agegroup)
print(p1)
}
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 634 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 620 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 728 rows containing non-finite values (stat_smooth).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 552 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 557 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 666 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 648 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 732 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2428 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 620 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 635 rows containing non-finite values (stat_smooth).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2990 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 614 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1610 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1446 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1718 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 462 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 476 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1104 rows containing non-finite values (stat_smooth).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2887 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2955 rows containing non-finite values (stat_smooth).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1724 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1733 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1696 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2313 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 3476 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1892 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2008 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2108 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 4323 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7410 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 4108 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2865 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1851 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2149 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 2514 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 4572 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5440 rows containing non-finite values (stat_smooth).
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5417 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1104 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 463 rows containing non-finite values (stat_smooth).
#+
# theme_minimal()
18.2 Variable redundancy
18.2.1 Redundancy among very important predictors
First we start with a redundancy analysis with the predictors deemed important by preceding studies.
<- as.formula(
formula paste("~Alter+",
paste(unique(c(bact_transformed$pivotal_vars,
$vip_vars)),
bact_transformedcollapse="+")))
::redun(formula, data=c_bact) Hmisc
##
## Redundancy Analysis
##
## Hmisc::redun(formula = formula, data = c_bact)
##
## n: 13793 p: 6 nk: 3
##
## Number of NAs: 898
## Frequencies of Missing Values Due to Each Variable
## Alter t_WBC t_BUN t_KREA t_NEU PLT
## 0 462 172 159 728 42
##
##
## Transformation of target variables forced to be linear
##
## R-squared cutoff: 0.9 Type: ordinary
##
## R^2 with which each variable can be predicted from all other variables:
##
## Alter t_WBC t_BUN t_KREA t_NEU PLT
## 0.196 0.934 0.610 0.574 0.933 0.161
##
## Rendundant variables:
##
## t_WBC
##
## Predicted from variables:
##
## Alter t_BUN t_KREA t_NEU PLT
##
## Variable Deleted R^2 R^2 after later deletions
## 1 t_WBC 0.934
This analysis suggests redundancy of WBC after NEU is in the predictor set. We investigate this further by looking only at leukocyte-related variables.
18.2.3 Redundancy among all potential predictors
Now we perform a full redundancy analysis, but omitting WBC, the leukocyte ratio variables, and BUN and KREA (for their use in constructing BUN_KREA and eGFR).
<- as.formula(
formula paste("~I(t_EOS)+I(t_BASO)+t_LYM+MONO+t_NEU+",
paste(unique(c(bact_transformed$demog_vars,
c("t_BUN_KREA","eGFR","K"),
$acute_related_vars,
bact_transformed$remaining_vars)),collapse="+")))
bact_transformed
::redun(formula, data=c_bact) Hmisc
##
## Redundancy Analysis
##
## Hmisc::redun(formula = formula, data = c_bact)
##
## n: 3980 p: 44 nk: 3
##
## Number of NAs: 10711
## Frequencies of Missing Values Due to Each Variable
## I(t_EOS) I(t_BASO) t_LYM MONO t_NEU Alter sex
## 135 146 262 246 728 0 0
## t_BUN_KREA eGFR K FIB CRP t_ASAT t_ALAT
## 174 159 2008 2567 155 1154 987
## t_GGT MCV HGB HCT MCH MCHC RDW
## 1262 42 41 42 42 42 56
## MPV NT t_APTT NA. CA t_PHOS MG
## 702 2467 2549 1282 1276 1242 1869
## HS t_GBIL TP ALB t_AMY t_PAMY t_LIP
## 3061 1441 1583 1676 3913 7114 3699
## CHE t_AP t_LDH t_CK t_GLU t_TRIG CHOL
## 2447 1400 1714 2080 4192 5061 5045
## PDW RBC
## 1102 461
##
##
## Transformation of target variables forced to be linear
##
## R-squared cutoff: 0.9 Type: ordinary
##
## R^2 with which each variable can be predicted from all other variables:
##
## I(t_EOS) I(t_BASO) t_LYM MONO t_NEU Alter sex
## 0.254 0.244 0.391 0.426 0.425 0.612 0.321
## t_BUN_KREA eGFR K FIB CRP t_ASAT t_ALAT
## 0.376 0.637 0.300 0.670 0.660 0.874 0.762
## t_GGT MCV HGB HCT MCH MCHC RDW
## 0.643 0.993 0.996 0.996 0.995 0.984 0.530
## MPV NT t_APTT NA. CA t_PHOS MG
## 0.905 0.340 0.257 0.335 0.530 0.319 0.244
## HS t_GBIL TP ALB t_AMY t_PAMY t_LIP
## 0.482 0.393 0.787 0.855 0.667 0.722 0.590
## CHE t_AP t_LDH t_CK t_GLU t_TRIG CHOL
## 0.658 0.615 0.588 0.585 0.263 0.385 0.498
## PDW RBC
## 0.899 0.977
##
## Rendundant variables:
##
## HCT MCH HGB MPV
##
## Predicted from variables:
##
## I(t_EOS) I(t_BASO) t_LYM MONO t_NEU Alter sex t_BUN_KREA eGFR K FIB CRP t_ASAT t_ALAT t_GGT MCV MCHC RDW NT t_APTT NA. CA t_PHOS MG HS t_GBIL TP ALB t_AMY t_PAMY t_LIP CHE t_AP t_LDH t_CK t_GLU t_TRIG CHOL PDW RBC
##
## Variable Deleted R^2 R^2 after later deletions
## 1 HCT 0.996 0.996 0.975 0.975
## 2 MCH 0.994 0.994 0.994
## 3 HGB 0.975 0.975
## 4 MPV 0.904
This analysis suggests that HCT, MCH, HGB and MPV may be redundant on top of the other variables. Note that BUN, KREA and WBC were already omitted from this redundancy analysis.
18.3 Section session info
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Austria.1252 LC_CTYPE=English_Austria.1252
## [3] LC_MONETARY=English_Austria.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Austria.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] mice_3.14.0 ggcorrplot_0.1.3 gtsummary_1.5.2 Hmisc_4.6-0
## [5] Formula_1.2-4 survival_3.2-13 lattice_0.20-45 plotly_4.10.0
## [9] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4
## [13] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5
## [17] tidyverse_1.3.1 here_1.0.1
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-155 fs_1.5.2 lubridate_1.8.0
## [4] RColorBrewer_1.1-2 httr_1.4.2 rprojroot_2.0.2
## [7] tools_4.1.3 backports_1.4.1 bslib_0.3.1
## [10] utf8_1.2.2 R6_2.5.1 rpart_4.1.16
## [13] mgcv_1.8-39 DBI_1.1.2 lazyeval_0.2.2
## [16] colorspace_2.0-3 nnet_7.3-17 withr_2.5.0
## [19] tidyselect_1.1.2 gridExtra_2.3 compiler_4.1.3
## [22] cli_3.2.0 rvest_1.0.2 gt_0.4.0
## [25] htmlTable_2.4.0 xml2_1.3.3 labeling_0.4.2
## [28] bookdown_0.25 sass_0.4.1 checkmate_2.0.0
## [31] scales_1.1.1 digest_0.6.29 foreign_0.8-82
## [34] rmarkdown_2.13 base64enc_0.1-3 jpeg_0.1-9
## [37] pkgconfig_2.0.3 htmltools_0.5.2 highr_0.9
## [40] dbplyr_2.1.1 fastmap_1.1.0 htmlwidgets_1.5.4
## [43] rlang_1.0.2 readxl_1.3.1 rstudioapi_0.13
## [46] farver_2.1.0 jquerylib_0.1.4 generics_0.1.2
## [49] jsonlite_1.8.0 crosstalk_1.2.0 magrittr_2.0.2
## [52] Matrix_1.4-0 Rcpp_1.0.8.3 munsell_0.5.0
## [55] fansi_1.0.3 lifecycle_1.0.1 stringi_1.7.6
## [58] yaml_2.3.5 plyr_1.8.7 grid_4.1.3
## [61] crayon_1.5.1 haven_2.4.3 splines_4.1.3
## [64] hms_1.1.1 knitr_1.38 pillar_1.7.0
## [67] reshape2_1.4.4 reprex_2.0.1 glue_1.6.2
## [70] evaluate_0.15 latticeExtra_0.6-29 broom.helpers_1.6.0
## [73] data.table_1.14.2 modelr_0.1.8 vctrs_0.3.8
## [76] png_0.1-7 tzdb_0.2.0 cellranger_1.1.0
## [79] gtable_0.3.0 assertthat_0.2.1 xfun_0.30
## [82] broom_0.7.12 viridisLite_0.4.0 cluster_2.1.2
## [85] ellipsis_0.3.2