library(tidyr)
library(ggplot2)
library(stringr)
library(gridExtra)
library(gtable)
library(grid)
library(ggpubr)
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_value"]<-"value"
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_level"]<-"level"
names(dataset)[names(dataset)=="DB_Standard_Indicators/modality_intervention"]<-"modality"
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_code"]<-"code"
names(dataset)[names(dataset)=="_id"]<-"id"
names(dataset)[names(dataset)=="Selected_Y"]<-"Y"
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_analisis_unit"]<-"analisis_unit"
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_Geo1"]<-"Geo1"
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_Geo2"]<-"Geo2"
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_Geo3"]<-"Geo3"
names(dataset)[names(dataset)=="DB_Standard_Indicators/INDICATOR_Geo4"]<-"Geo4"
dataset$Y<-str_replace_all(dataset$Y, "%", "Perc")
dataset$code<-str_replace_all(dataset$code, "%", "Perc")
dataset$Y<-str_replace_all(dataset$Y, "[[:punct:]]", "")
dataset$code<-str_replace_all(dataset$code, "[[:punct:]]", "")
dataset$Y<-str_replace_all(dataset$Y, " ", "_")
dataset$code<-str_replace_all(dataset$code, " ", "_")
Info_Y<-head(subset(dataset,dataset$code==as.character(Y),select=c(level, modality, code,analisis_unit)),1)
DependentVar<-as.character(Info_Y$code[1])
Selected_Level_Analisis=(Info_Y[1,1])
Analised_by_Level=ifelse(Selected_Level_Analisis=="Impact","Outcome",ifelse(Selected_Level_Analisis=="Outcome","Output","Gestion"))
# empezamos con analyzar el outcome seleccionado por los output del mismo nivel de analisis
df_Y<-subset(dataset,dataset$code==as.character(Y),select=c(value, Y,Geo4,id))
df_Y$Y<-"Y"
df_Y_BXPL<-subset(dataset,dataset$code==as.character(Y),select=c(value, Y,Geo1,Geo2,Geo3,Geo4,id))
# Try to perform t-test and handle errors due to insufficient observations
ttest_result <- tryCatch(t.test(value ~ Geo1, data = df_Y_BXPL), error = function(e) NULL)
if (is.null(ttest_result)) {
p_value <- NA
significance_sentence <- "There are not sufficient observations to perform the t-test."
} else {
p_value <- ttest_result$p.value
significance_sentence <- ifelse(p_value < 0.05, "There is a significant difference among the categories (p < 0.05).", "There is no significant difference among the categories (p >= 0.05).")
}
bxplot <- ggplot(df_Y_BXPL, aes(x=Geo1, y=value)) +
geom_boxplot() +
stat_compare_means(method = "t.test", label = "p.format") +
ggtitle(significance_sentence)
Spreaddf_Y<-spread(df_Y,Y,value)
df_X<-subset(dataset,level==Analised_by_Level,select=c(value, level, modality, code,id,analisis_unit,Geo1,Geo2,Geo3,Geo4))
df_X<-subset(df_X,analisis_unit==as.character(Info_Y$analisis_unit[1]),select=c(value, level, modality, code,id,analisis_unit,Geo1,Geo2,Geo3,Geo4))
Spreaddf_X<-spread(df_X,code,value)
final<-merge(Spreaddf_Y,Spreaddf_X,by="Geo4")
final<-subset(final,select=-c(Geo4,id.x,level,modality,id.y,analisis_unit,Geo1,Geo2,Geo3))
model<-lm(Y~.,data=final)
resumen<-summary(model)
r2<-round(resumen$r.squared,2)
coefficient<-round(resumen$coefficients,3)
coefficient<-subset(coefficient,select=c("Estimate","Pr(>|t|)"))
colnames(coefficient)[1]<-"Estimate"
colnames(coefficient)[2]<-"Significance"
coefficient <- transform(coefficient, Significance = ifelse(Significance < 0.05, "95%", ifelse(Significance < 0.1, "90%", ifelse(Significance < 0.15, "85%", ifelse(Significance < 0.20, "80%", "Not significant")))))
colnames(coefficient)[2]<-"Confidence level"
Num_Obs<-length(fitted(model))
# Create title, footnote, and table using ggplot2
title_plot <- ggplot() +
annotate("text", x = 1, y = 1, label = paste(DependentVar, "explained by ", Analised_by_Level, "level, Observations:", Num_Obs), size = 6) +
theme_void()
footnote_plot <- ggplot() +
annotate("text", x = 1, y = 1, label = paste("Coefficient of determination R^2: ", r2), size = 5, fontface = "italic") +
theme_void()
table_plot <- ggplot() +
theme_void() +
annotation_custom(tableGrob(coefficient, theme = ttheme_minimal(colhead = list(fg_params = list(col = "navyblue")))), xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf)
# Arrange the plots using gridExtra
final_plot <- grid.arrange(
title_plot,
table_plot,
footnote_plot,
bxplot,
ncol = 1,
heights = c(0.1, 0.4, 0.1, 0.4)
)
# Print the final plot
print(final_plot)