# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script:
# dataset <- data.frame(ProjectName, Commodity, WBS_code, Months, Running_Total_V2, Total_Scope_V2, RTPercentCompleteV2, Project_Description)
# dataset <- unique(dataset)
# Paste or type your script code here:
library(dplyr)
library(ggplot2)
library(RColorBrewer)
proposal_project <- unique(dataset[["Project_Description"]])
# there are duplicates due to WBS
df <- dataset %>% select(ProjectName, Commodity, Months, RTPercentCompleteV2) %>% distinct()
# for projects with multiple entries for the same percent complete, keep the highest value
df <- df %>% group_by(ProjectName, Commodity, RTPercentCompleteV2) %>% slice(which.max(Months)) %>% arrange(Months)
# group by and interpolate
pctcomplete <- seq(0.1, 1, by=0.05)
df <- df %>% group_by(ProjectName, Commodity) %>% group_modify(~ {approx(.x$RTPercentCompleteV2, .x$Months, xout=pctcomplete) %>% data.frame()}) %>% ungroup()
prop_df <- df %>% filter(ProjectName == proposal_project) # split out proposal data after transformations
df2df <- df %>% filter(ProjectName != proposal_project)
data_wo_proposal <- df2df %>% group_by(Commodity,x) %>% summarise_at(vars(y),list(mean_duration = mean))
# plot
mindiff <- min(c(0))
maxdiff <- max(c(max(data_wo_proposal$mean_duration, na.rm=TRUE), max(prop_df$y, na.rm=TRUE)))
ticks <- round(seq(mindiff, maxdiff, by=2))
tick_y <- seq(0,100, by=5)
g <- ggplot(data=data_wo_proposal, aes(x=mean_duration, y=x, colour=Commodity)) + geom_line(linetype=2) + geom_point()
final_plot <- g + geom_line(data = prop_df, aes(x=y,y=x,colour=Commodity)) + geom_point(data=prop_df,aes(x=y,y=x,colour=Commodity))
final_plot_label <-
final_plot + labs(title='Average Duration of Reference Projects vs Proposal Project', subtitle=paste('Proposal Project:',prop_df$ProjectName[1], '(solid line in figure below)'), x= 'Duration (Months)',y='Percent Complete') + theme_bw() + theme(legend.position='bottom', legend.title=element_blank()) + scale_colour_brewer(palette='Set1') + scale_x_continuous(breaks=ticks)+scale_y_continuous(labels = scales::percent_format(accuracy = 1),breaks=seq(0,1, by=.05))
final_plot_label