#============================================================================# ##################### Velkommen til Benchmarking ########################### #============================================================================# # Fjerner alle objekter fra arbejdsomraadet rm(list=ls()) # Saetter en fast tilfaeldig startvaerdi for at sikre reproducerbare resultater set.seed(23) # Indlaeser noedvendige pakker required_packages <- c("Benchmarking", "frontier","ggplot2","lmtest","sandwich", "readxl") for (i in required_packages){ if( !(i %in% installed.packages()) ){install.packages(i)} } library(Benchmarking) ; library(readxl) ; library(ggplot2) ; library(lmtest) ; library(sandwich) # Indlaeser data som baserer sig paa indberetninger for 2023 og 2024 setwd(choose.dir()) Data <- read_xlsx("Bilag 1 - Data til brug for fastlaeggelse af de individuelle effektiviseringskrav.xlsx", sheet = "Til R-koder") #======================================================================# ################### KORRIGEREDE NETVOLUMEN ########################## #======================================================================# # Data til korrektion af netvolumenm?lene Data_korrektion <- Data[-which(Data$TYPE=="r"),] #----------------------------------------------------------------------# ####--------------------- OPEX korrektion --------------------------#### #----------------------------------------------------------------------# ####---- OPEX-regressionen med alle selskaber ----#### OPEX_Fit<-lm(Data_korrektion$FADO_FROSSET/Data_korrektion$OPEX_U_FROSSET~Data_korrektion$ALDER_FROSSET+Data_korrektion$TAETHED_FROSSET) summary(OPEX_Fit) # Finder mulige outliers med Cook's Distance cbind(Data_korrektion$NAVN,Data_korrektion$ID,cooks.distance(OPEX_Fit))[order(cooks.distance(OPEX_Fit)),] # Endelige regressionsresultater for OPEX round(coef(OPEX_Fit),6) ; summary(OPEX_Fit) # Automatisk beregning af OPEX_KOR Data$OPEX_KOR <- (summary(OPEX_Fit)$coef[1] + summary(OPEX_Fit)$coef[2]*Data$ALDER + summary(OPEX_Fit)$coef[3]*Data$TAETHED) * Data$OPEX_U Data$OPEX_KOR_FROSSET <- (summary(OPEX_Fit)$coef[1] + summary(OPEX_Fit)$coef[2]*Data$ALDER_FROSSET + summary(OPEX_Fit)$coef[3]*Data$TAETHED_FROSSET) * Data$OPEX_U_FROSSET #----------------------------------------------------------------------# ####--------------------- CAPEX korrektion -------------------------#### #----------------------------------------------------------------------# ####---- CAPEX-regressionen med alle selskaber ----#### CAPEX_Fit<-lm(Data_korrektion$INV_OMK_FROSSET/Data_korrektion$CAPEX_U_FROSSET~Data_korrektion$ALDER_FROSSET+Data_korrektion$TAETHED_FROSSET) # Finder mulige outliers med Cook's Distance cbind(Data_korrektion$NAVN,Data_korrektion$ID,cooks.distance(CAPEX_Fit))[order(cooks.distance(CAPEX_Fit)),] ####---- CAPEX-regressionener - Iterativ process indtil alle outliers er identificeret ----#### CAPEX_outliers_1 <- c("S045","S011") Out_1 <- which(Data_korrektion$ID %in% CAPEX_outliers_1) CAPEX_Fit_out_1<-lm(Data_korrektion$INV_OMK_FROSSET[-Out_1]/Data_korrektion$CAPEX_U_FROSSET[-Out_1] ~Data_korrektion$ALDER_FROSSET[-Out_1]+Data_korrektion$TAETHED_FROSSET[-Out_1]) cbind(Data_korrektion$NAVN[-Out_1],Data_korrektion$ID[-Out_1],cooks.distance(CAPEX_Fit_out_1))[order(cooks.distance(CAPEX_Fit_out_1)),] CAPEX_outliers_2 <- c("S045","S011", "S024") Out_2 <- which(Data_korrektion$ID %in% CAPEX_outliers_2) CAPEX_Fit_out_2<-lm(Data_korrektion$INV_OMK_FROSSET[-Out_2]/Data_korrektion$CAPEX_U_FROSSET[-Out_2] ~Data_korrektion$ALDER_FROSSET[-Out_2]+Data_korrektion$TAETHED_FROSSET[-Out_2]) cbind(Data_korrektion$NAVN[-Out_2],Data_korrektion$ID[-Out_2],cooks.distance(CAPEX_Fit_out_2))[order(cooks.distance(CAPEX_Fit_out_2)),] CAPEX_outliers_3 <- c("S045","S011", "S024", "S029") Out_3 <- which(Data_korrektion$ID %in% CAPEX_outliers_3) CAPEX_Fit_out_3<-lm(Data_korrektion$INV_OMK_FROSSET[-Out_3]/Data_korrektion$CAPEX_U_FROSSET[-Out_3] ~Data_korrektion$ALDER_FROSSET[-Out_3]+Data_korrektion$TAETHED_FROSSET[-Out_3]) cbind(Data_korrektion$NAVN[-Out_3],Data_korrektion$ID[-Out_3],cooks.distance(CAPEX_Fit_out_3))[order(cooks.distance(CAPEX_Fit_out_3)),] #Endelige outliers CAPEX_outliers <- c("S045","S011","S024","S029") # "S045" - HOFOR Spildevand København A/S # "S011" - HOFOR SPILDEVAND BRØNDBY A/S # "S024" - Frederiksberg Kloak A/S # "S029" - Novafos Spildevand Gladsaxe A/S # Beregning af regressionsmodel for CAPEX CAPEX_Fit_out<-lm(Data_korrektion$INV_OMK_FROSSET[-which(Data_korrektion$ID %in% CAPEX_outliers)]/Data_korrektion$CAPEX_U_FROSSET[-which(Data_korrektion$ID %in% CAPEX_outliers)] ~Data_korrektion$ALDER_FROSSET[-which(Data_korrektion$ID %in% CAPEX_outliers)]+Data_korrektion$TAETHED_FROSSET[-which(Data_korrektion$ID %in% CAPEX_outliers)]) # Cook's Distance cbind(Data_korrektion$NAVN[-which(Data_korrektion$ID %in% CAPEX_outliers)],Data_korrektion$ID[-which(Data_korrektion$ID %in% CAPEX_outliers)],cooks.distance(CAPEX_Fit_out))[order(cooks.distance(CAPEX_Fit_out)),] # Endelige regressionsresultater for CAPEX round(coef(CAPEX_Fit_out),6) ; summary(CAPEX_Fit_out) # Automatisk beregning af CAPEX_KOR Data$CAPEX_KOR <- (summary(CAPEX_Fit_out)$coef[1] + summary(CAPEX_Fit_out)$coef[2]*Data$ALDER + summary(CAPEX_Fit_out)$coef[3]*Data$TAETHED) * Data$CAPEX_U Data$CAPEX_KOR_FROSSET <- (summary(CAPEX_Fit_out)$coef[1] + summary(CAPEX_Fit_out)$coef[2]*Data$ALDER_FROSSET + summary(CAPEX_Fit_out)$coef[3]*Data$TAETHED_FROSSET) * Data$CAPEX_U_FROSSET #======================================================================# ######################## DEA-MODELLEN ############################### #======================================================================# #Selskaber som VST konkret vurderer skal vaere outliers i aarets benchmarking uanset udfaldet af outlier tests OUT <-c("S007","S014","S024","S028","S041","S045","S075","S095","S111") # "S007" - Billund Spildevand A/S # "S014" - Energi Viborg Vand A/S # "S024" - Frederiksberg Kloak A/S # "S028" - Novafos Spildevand Gentofte A/S # "S041" - HOFOR Spildevand Albertslund A/S # "S045" - HOFOR Spildevand København A/S # "S075" - Vandmiljø Randers A/S # "S095" - Thisted Spildevand Transport A/S # "S111" - RINGKØBING-SKJERN RENSEANLÆG A/S #======================================================================# # Funktion til at beregne Mahalanobis Distance og Superefficiens calculate_outliers <- function(ID,CompanyName, Input, Output, OUT, p_value_cutoff = 0.95, Supereff = TRUE, SE_cutoff = 1.0, RTS = "crs", ORIENTATION = "in") { require("Benchmarking") require("ggplot2") # Opretter dataframe data <- na.omit(data.frame(Selskabsnavn = CompanyName, ID = ID, Input,Output)) # Beregner Mahalanobis-afstand data$distance <- mahalanobis(data[, -c(1, 2)], colMeans(data[, -c(1, 2)]), cov(data[, -c(1, 2)])) # Beregner cutoff-vaerdi i forhold til specificeret p-vaerdi cutoff <- qchisq(p = p_value_cutoff , df = ncol(data)-2) # Beregner p-vaerdier for Mahalanobis-afstande data$p_values <- pchisq(data$distance, df = ncol(data)-2, lower.tail = FALSE) # Fjerner observationer med p-vaerdi mindre end 1 - p_value_cutoff samt observationer, som på forhånd vurderes til at være outliers MahalanobisOut <- which(data$p_values <= (1 - p_value_cutoff) | data$ID %in% OUT) if (Supereff) { data$SE <- sapply(1:nrow(data),function(x){ dea(Input[x], cbind(Output[x,1],Output[x,2]), RTS = RTS, ORIENTATION = ORIENTATION, XREF = Input[-c(MahalanobisOut,x)], YREF = Output[-c(MahalanobisOut,x),])$eff }) # Fletter outliers baseret paa ID Outliers <- which(data$SE > SE_cutoff | data$ID %in% OUT) # Graf af Mahalanobis og SE Graph <- ggplot(data, aes(x = seq_along(distance), y = distance, color = SE>SE_cutoff)) + geom_point() + geom_hline(yintercept = cutoff, linetype = "dashed", color = "red") + geom_text(aes(label = ID), hjust = 1, vjust = -1.5, size = 2.5) + labs(x = "Observationer", y = "Mahalanobis Distance") + scale_color_manual(values = c("TRUE" = "red", "FALSE" = "blue"), labels = c("Non-outlier", "Outlier")) } else { # Graf af Mahalanobis Graph <- ggplot(data, aes(x = seq_along(distance), y = distance, color = distance > cutoff)) + geom_point() + geom_hline(yintercept = cutoff, linetype = "dashed", color = "red") + geom_text(aes(label = ID), hjust = 1, vjust = -1.5, size = 2.5) + labs(x = "Observationer", y = "Mahalanobis Distance") + scale_color_manual(values = c("TRUE" = "red", "FALSE" = "blue"), labels = c("Above Cutoff", "Below Cutoff")) Outliers <- which(data$distance > cutoff | data$ID == OUT) } return(list(Outliers = Outliers, Results = data,Graph = Graph,mahalanobisCutOff = cutoff)) } #======================================================================# #----------------------------------------------------------------------# ####------------------------ trans+rens ----------------------------#### #----------------------------------------------------------------------# Data_tr <-Data[which(Data$TYPE=="t+r"),] ####---- Input og output til benchmarking ----#### Input_tr <- as.matrix(Data_tr$FATO) Output_tr <- as.matrix(cbind(Data_tr$OPEX_KOR, Data_tr$CAPEX_KOR)) Input_FROSSET_tr <- as.matrix(Data_tr$FATO_FROSSET) Output_FROSSET_tr <- as.matrix(cbind(Data_tr$OPEX_KOR_FROSSET, Data_tr$CAPEX_KOR_FROSSET)) # Beregninger statistiske outliers Outliers_tr <- calculate_outliers(ID = Data_tr$ID, CompanyName = Data_tr$NAVN, Input = Input_FROSSET_tr, Output = Output_FROSSET_tr, OUT=OUT) Outliers_tr # Identificeret statistiske outliers til brug for aarets benchmarking Data_tr[Outliers_tr$Outliers,c("NAVN","ID")] #----------------------------------------------------------------------# ####------------------------- transport ----------------------------#### #----------------------------------------------------------------------# Data_t <- Data[Data$TYPE %in% c("t", "t+r"), ] ####---- Input og output til benchmarking ----#### Input_t <- as.matrix(Data_t$FATO) Output_t <- as.matrix(cbind(Data_t$OPEX_KOR, Data_t$CAPEX_KOR)) Input_FROSSET_t <- as.matrix(Data_t$FATO_FROSSET) Output_FROSSET_t <- as.matrix(cbind(Data_t$OPEX_KOR_FROSSET, Data_t$CAPEX_KOR_FROSSET)) # Beregner statistiske outliers Outliers_t <- calculate_outliers(ID = Data_t$ID, CompanyName = Data_t$NAVN, Input = Input_FROSSET_t, Output = Output_FROSSET_t, OUT=OUT) Outliers_t # Identificeret statistiske outliers til brug for aarets benchmarking Data_t[Outliers_t$Outliers,c("NAVN","ID")] #----------------------------------------------------------------------# ####------------------------ renseanlaeg ----------------------------#### #----------------------------------------------------------------------# Data_r <- Data[Data$TYPE %in% c("r", "t+r"), ] ####---- Input og output til benchmarking ----#### Input_r <- as.matrix(Data_r$FATO) Output_r <- as.matrix(cbind(Data_r$OPEX_KOR, Data_r$CAPEX_KOR)) Input_FROSSET_r <- as.matrix(Data_r$FATO_FROSSET) Output_FROSSET_r <- as.matrix(cbind(Data_r$OPEX_KOR_FROSSET, Data_r$CAPEX_KOR_FROSSET)) # Beregninger statistiske outliers Outliers_r <- calculate_outliers(ID = Data_r$ID, CompanyName = Data_r$NAVN, Input = Input_FROSSET_r, Output = Output_FROSSET_r, OUT=OUT) Outliers_r # Identificeret statistiske outliers til brug for aarets benchmarking Data_r[Outliers_r$Outliers,c("NAVN","ID")] #----------------------------------------------------------------------# ####-------------------------- GRAFER ------------------------------#### #----------------------------------------------------------------------# # GRAF for trans+rens par(mfrow=c(1,1)) dea.plot(Output_FROSSET_tr[-Outliers_tr$Outliers,1]/Input_FROSSET_tr[-Outliers_tr$Outliers], Output_FROSSET_tr[-Outliers_tr$Outliers,2]/Input_FROSSET_tr[-Outliers_tr$Outliers], RTS="crs",ORIENTATION="out",ylim=c(0,1.2),xlim=c(0,1.2)) points(Output_FROSSET_tr[Outliers_tr$Outliers,1]/Input_FROSSET_tr[Outliers_tr$Outliers], Output_FROSSET_tr[Outliers_tr$Outliers,2]/Input_FROSSET_tr[Outliers_tr$Outliers], col="red", pch=10) # GRAF for transport dea.plot(Output_FROSSET_t[-Outliers_t$Outliers,1]/Input_FROSSET_t[-Outliers_t$Outliers], Output_FROSSET_t[-Outliers_t$Outliers,2]/Input_FROSSET_t[-Outliers_t$Outliers], RTS="crs",ORIENTATION="out",ylim=c(0,1.2),xlim=c(0,1.2)) points(Output_FROSSET_t[Outliers_t$Outliers,1]/Input_FROSSET_t[Outliers_t$Outliers], Output_FROSSET_t[Outliers_t$Outliers,2]/Input_FROSSET_t[Outliers_t$Outliers], col="red", pch=10) # GRAF for renseanlaeg dea.plot(Output_FROSSET_r[-Outliers_r$Outliers,1]/Input_FROSSET_r[-Outliers_r$Outliers], Output_FROSSET_r[-Outliers_r$Outliers,2]/Input_FROSSET_r[-Outliers_r$Outliers], RTS="crs",ORIENTATION="out",ylim=c(0,1.2),xlim=c(0,1.2)) points(Output_FROSSET_r[Outliers_r$Outliers,1]/Input_FROSSET_r[Outliers_r$Outliers], Output_FROSSET_r[Outliers_r$Outliers,2]/Input_FROSSET_r[Outliers_r$Outliers], col="red", pch=10) #======================================================================# ###################### Order-M-MODELLEN ############################# #======================================================================# #======================================================================# # Funktion til at beregne Order-M med tilhoerende lambdavaerdier OrderM <- function(ID, Navn, Input, Output, Input_FROSSET, Output_FROSSET, Outliers, Data, B=3, SUPEREFF = "FALSE", RTS = "crs", ORIENTATION = "in"){ require("Benchmarking") require("dplyr") # Specificerer "Data" M=round((dim(Data)[1]-length(Outliers))/3) # Definerer printet OrderM_Eff_Scorer <- matrix(0, nrow = B, ncol = length(ID)) MyLambda <- list() for (i in 1:B){ M_bar <- unique(sample((1:length(ID))[-Outliers], M, replace = TRUE)) # Koerer Order-M for det valgte udsnit OrderM_Res <- dea(Input, Output, RTS = RTS, ORIENTATION = ORIENTATION, XREF = Input_FROSSET[M_bar, , drop = FALSE], YREF = Output_FROSSET[M_bar, , drop = FALSE]) # Samler Order-M efficiensscorer OrderM_Eff_Scorer[i,] <- round(OrderM_Res$eff,digits = 7) #Beregning af lambda til brug for costdriveranalysen senere MyLambda[[i]] <- matrix(0,length(ID),length(ID),byrow = T) # Danner matrix MyLambda[[i]][,M_bar] <- OrderM_Res$lambda # Indsaetter lambdaer paa de rigtige pladser i matricen } OrderM_Scorer <- colMeans(OrderM_Eff_Scorer) LambdaMean <- Reduce("+",MyLambda)/B #Finder de gennemsnitlige lambdaer over alle iterationerne # Saetter OrderM-efficiensscorer over 1 ned til 1 if(SUPEREFF == "FALSE"){ OrderM_Scorer <- pmin(OrderM_Scorer,1) } return(list("Eff"=OrderM_Scorer,"LambdaMean"=LambdaMean)) cat("Gennemsnitlig effektivitet:", mean(OrderM_Scorer), "/n") } #======================================================================# #----------------------------------------------------------------------# ####------------------------ trans+rens ----------------------------#### #----------------------------------------------------------------------# #Kører Order-M funktionen OrderM_Res_tr <- OrderM(ID=Data_tr$ID, Navn=Data_tr$NAVN, Input=Input_tr, Output=Output_tr, Data = Data_tr, Input_FROSSET=Input_FROSSET_tr, Output_FROSSET=Output_FROSSET_tr, Outliers=Outliers_tr$Outliers, B=10000,SUPEREFF = T) OrderM_Eff_tr <- ifelse(OrderM_Res_tr$Eff > 1, 1, OrderM_Res_tr$Eff) # Scorene fra Order-M bindes paa datasaettet Data_tr$OrderM_score <- OrderM_Eff_tr # Efficiente selskaber Data_tr[Data_tr$OrderM_score==1,c("NAVN","ID")] #----------------------------------------------------------------------# ####------------------------- transport ----------------------------#### #----------------------------------------------------------------------# #Kører Order-M funktionen OrderM_Res_t <- OrderM(ID=Data_t$ID, Navn=Data_t$NAVN, Input=Input_t, Output=Output_t, Data = Data_t, Input_FROSSET=Input_FROSSET_t, Output_FROSSET=Output_FROSSET_t, Outliers=Outliers_t$Outliers, B=10000,SUPEREFF = T) OrderM_Eff_t <- ifelse(OrderM_Res_t$Eff > 1, 1, OrderM_Res_t$Eff) # Scorene fra Order-M bindes paa datasaettet Data_t$OrderM_score <- OrderM_Eff_t # Efficiente selskaber Data_t[Data_t$OrderM_score==1,c("NAVN","ID")] #----------------------------------------------------------------------# ####------------------------ renseanl?g ----------------------------#### #----------------------------------------------------------------------# #Kører Order-M funktionen OrderM_Res_r <- OrderM(ID=Data_r$ID, Navn=Data_r$NAVN, Input=Input_r, Output=Output_r, Data = Data_r, Input_FROSSET=Input_FROSSET_r, Output_FROSSET=Output_FROSSET_r, Outliers=Outliers_r$Outliers, B=10000,SUPEREFF = T) OrderM_Eff_r <- ifelse(OrderM_Res_r$Eff > 1, 1, OrderM_Res_r$Eff) # Scorene fra Order-M bindes paa datasaettet Data_r$OrderM_score <- OrderM_Eff_r # Efficiente selskaber Data_r[Data_r$OrderM_score==1,c("NAVN","ID")] #----------------------------------------------------------------------# ####----------------- Lambda-vaerdier - ikke frosset ----------------#### #----------------------------------------------------------------------# #Matrice der indeholder de ikke-frosset lambda-vaerdierne, som bruges i costdriveranalysen LambdaMean_full <- matrix(0, nrow = length(Data$ID), ncol = length(Data$ID)) LambdaMean_full[Data$TYPE=="t+r",Data$TYPE=="t+r"] <- OrderM_Res_tr$LambdaMean #Indsætter lambda for t+r LambdaMean_full[Data$TYPE=="t",Data$TYPE=="t+r" | Data$TYPE=="t"] <- OrderM_Res_t$LambdaMean[Data$TYPE[Data$TYPE!= "r"]=="t",] #Indsætter lambda for t LambdaMean_full[Data$TYPE=="r",Data$TYPE=="t+r" | Data$TYPE=="r"] <- OrderM_Res_r$LambdaMean[Data$TYPE[Data$TYPE!= "t"]=="r",] #Indsætter lambda for r #======================================================================# #################### Costdriver-analysen ########################### #======================================================================# #===================== Koersel af FROSSET Order-M =====================# # Koersel af Order-M igen, for at opnaa frosset lambda-vaerdier # Saetter en fast tilfaeldig startvaerdi for at sikre reproducerbare resultater set.seed(23) # Koer Order-M med frosset data for trans+rens selskaber OrderM_Res_FROSSET_tr <- OrderM(ID=Data_tr$ID, Navn=Data_tr$NAVN, Input=Input_FROSSET_tr, Output=Output_FROSSET_tr, Data=Data_tr, Input_FROSSET=Input_FROSSET_tr, Output_FROSSET=Output_FROSSET_tr, Outliers=Outliers_tr$Outliers, B=10000,SUPEREFF = T) OrderM_Res_FROSSET_Eff_tr <- ifelse(OrderM_Res_FROSSET_tr$Eff > 1, 1, OrderM_Res_FROSSET_tr$Eff) Data_tr$OrderM_score_frosset <- OrderM_Res_FROSSET_Eff_tr # Koer Order-M med frosset data for transport selskaber OrderM_Res_FROSSET_t <- OrderM(ID=Data_t$ID, Navn=Data_t$NAVN, Input=Input_FROSSET_t, Output=Output_FROSSET_t, Data=Data_t, Input_FROSSET=Input_FROSSET_t, Output_FROSSET=Output_FROSSET_t, Outliers=Outliers_t$Outliers, B=10000,SUPEREFF = T) OrderM_Res_FROSSET_Eff_t <- ifelse(OrderM_Res_FROSSET_t$Eff > 1, 1, OrderM_Res_FROSSET_t$Eff) Data_t$OrderM_score_frosset <- OrderM_Res_FROSSET_Eff_t # Koer Order-M med frosset data for renseanl?g selskaber OrderM_Res_FROSSET_r <- OrderM(ID=Data_r$ID, Navn=Data_r$NAVN, Input=Input_FROSSET_r, Output=Output_FROSSET_r, Data=Data_r, Input_FROSSET=Input_FROSSET_r, Output_FROSSET=Output_FROSSET_r, Outliers=Outliers_r$Outliers, B=10000,SUPEREFF = T) OrderM_Res_FROSSET_Eff_r <- ifelse(OrderM_Res_FROSSET_r$Eff > 1, 1, OrderM_Res_FROSSET_r$Eff) Data_r$OrderM_score_frosset <- OrderM_Res_FROSSET_Eff_r #----------------------------------------------------------------------# ####------------------- Lambda-vaerdier - frosset -------------------#### #----------------------------------------------------------------------# #Opretter en matrice der indeholder de frosne lambda-v?rdierne, som bruges i costdriveranalysen LambdaMean_full_FROSSET <- matrix(0, nrow = length(Data$ID), ncol = length(Data$ID)) LambdaMean_full_FROSSET[Data$TYPE=="t+r",Data$TYPE=="t+r"] <- OrderM_Res_FROSSET_tr$LambdaMean #Indsætter lambda for t+r LambdaMean_full_FROSSET[Data$TYPE=="t",Data$TYPE=="t+r" | Data$TYPE=="t"] <- OrderM_Res_FROSSET_t$LambdaMean[Data$TYPE[Data$TYPE!= "r"]=="t",] #Indsætter lambda for t LambdaMean_full_FROSSET[Data$TYPE=="r",Data$TYPE=="t+r" | Data$TYPE=="r"] <- OrderM_Res_FROSSET_r$LambdaMean[Data$TYPE[Data$TYPE!= "t"]=="r",] #Indsætter lambda for r #============== Beregning af Ligning (5) i Bilag 3 ==============# # Vigtige variable der er givet paa forhaand z_u_FROSSET <- as.matrix(Data[, c("Ledninger_FROSSET", "Pumpe_FROSSET", "Regnbas_FROSSET", "Spildebas_FROSSET", "Rense_FROSSET", "Minirense_FROSSET", "Behandling_FROSSET", "Disponering_FROSSET", "Kunder_FROSSET")]) z_l_FROSSET <- sapply(c(1:ncol(z_u_FROSSET)),function(x){Data$Adm}) # Relative costdrivere Zstar_FROSSET <- sapply(1:ncol(z_u_FROSSET),function(x){ #Finder de relative costdrivere for hvert selskabs benchmark (vaegtet peers) rowSums(sweep(LambdaMean_full_FROSSET , 2, z_u_FROSSET[,x], "*"))/rowSums(sweep(LambdaMean_full_FROSSET , 2, z_l_FROSSET[,x], "*")) }) # Navngiver costdriverne igen colnames(Zstar_FROSSET) <- colnames(z_u_FROSSET) # Angiver differencen mellem selskabernes relative costdrivere og deres benchmark - Se ligning (5) i Bilag 3. ZDiff_FROSSET <- Zstar_FROSSET-(z_u_FROSSET/z_l_FROSSET) as.numeric(ZDiff_FROSSET) #======== Opretter dataframe med de nødvendige kolonner til brug i costdriveranalysen ==========# #Henter NAVN, ID og TYPE kolonnerne Data_CD <- data.frame(NAVN = Data$NAVN, ID = Data$ID, TYPE = Data$TYPE, ZDiff_FROSSET) #Henter efficiensscore eff_tr <- Data_tr[, c("ID", "OrderM_score_frosset")] eff_t <- Data_t[, c("ID", "OrderM_score_frosset")] eff_r <- Data_r[, c("ID", "OrderM_score_frosset")] #Kobler efficiensscorerne på datasættet Data_CD$OrderM_score_frosset[Data_CD$TYPE == "t+r"] <- eff_tr[[ "OrderM_score_frosset" ]][ match(Data_CD$ID[Data_CD$TYPE == "t+r"], eff_tr[[ "ID" ]]) ] Data_CD$OrderM_score_frosset[Data_CD$TYPE == "t"] <- eff_t[[ "OrderM_score_frosset" ]][ match(Data_CD$ID[Data_CD$TYPE == "t"], eff_t[[ "ID" ]]) ] Data_CD$OrderM_score_frosset[Data_CD$TYPE == "r"] <- eff_r[[ "OrderM_score_frosset" ]][ match(Data_CD$ID[Data_CD$TYPE == "r"], eff_r[[ "ID" ]]) ] #Laver kolonnen for selskabstype til en dummy Data_CD$TYPE <- as.factor(Data_CD$TYPE) #===================== Samler alle Order_M scorerne (frosset og ikke-frosset) =====================# Data$OrderM_score <- NA Data$OrderM_score_frosset <- NA score_tr <- setNames(Data_tr$OrderM_score, Data_tr$ID) score_frosset_tr <- setNames(Data_tr$OrderM_score_frosset, Data_tr$ID) score_t <- setNames(Data_t$OrderM_score, Data_t$ID) score_frosset_t <- setNames(Data_t$OrderM_score_frosset, Data_t$ID) score_r <- setNames(Data_r$OrderM_score, Data_r$ID) score_frosset_r <- setNames(Data_r$OrderM_score_frosset, Data_r$ID) # For TYPE == "t+r" Data$OrderM_score[Data$TYPE == "t+r"] <- score_tr[as.character(Data$ID[Data$TYPE == "t+r"])] Data$OrderM_score_frosset[Data$TYPE == "t+r"] <- score_frosset_tr[as.character(Data$ID[Data$TYPE == "t+r"])] # For TYPE == "t" Data$OrderM_score[Data$TYPE == "t"] <- score_t[as.character(Data$ID[Data$TYPE == "t"])] Data$OrderM_score_frosset[Data$TYPE == "t"] <- score_frosset_t[as.character(Data$ID[Data$TYPE == "t"])] # For TYPE == "r" Data$OrderM_score[Data$TYPE == "r"] <- score_r[as.character(Data$ID[Data$TYPE == "r"])] Data$OrderM_score_frosset[Data$TYPE == "r"] <- score_frosset_r[as.character(Data$ID[Data$TYPE == "r"])] #===================== Funktion til Ligning (6)-(7) ==================# # Funktion til at beregne regressioner inkl. automatisk outliertest Regressioner <- function(Costdriver, CutOff, Data) { formula_text <- paste("OrderM_score_frosset ~", paste(Costdriver, collapse = " + "), "+ TYPE") MyFormula <- as.formula(formula_text) Out <- NULL MyLm <- lm(MyFormula, data = as.data.frame(Data)) while (sum(cooks.distance(MyLm) > CutOff) > 0) { Out <- c(Out, as.numeric(names(which(cooks.distance(MyLm) > CutOff)))) MyLm <- lm(MyFormula, data = as.data.frame(Data), subset = c(-Out)) } return(summary(MyLm)) #Returnerer summary af den lineaere regression } #============================ Regressioner med type dummies ==================================# # Regressioner (med dummy) for hver enkelt OPEX costdriver - Se ligning (6) i Bilag 3. for (i in colnames(z_u_FROSSET)) { print(i) print(Regressioner(Costdriver = i, CutOff = 0.5, Data = Data_CD)) } # Samlet regression baseret paa de costdrivere der er signifikante hver for sig ovenfor - Se ligning (7) i Bilag 3. SignifikanteCostdrivere <- NULL for(i in colnames(z_u_FROSSET)){ #Undersoeger hvilke regressioner ovenfor der er signifikante if(Regressioner(Costdriver = i,CutOff = 0.5, Data=Data_CD)$coefficients[2,4]<=0.05){ SignifikanteCostdrivere <- c(SignifikanteCostdrivere,i) } } # Tjekker om der er signifikante costdrivere SamletRegression <- Regressioner(Costdriver = SignifikanteCostdrivere, CutOff = 0.5, Data=Data_CD) SamletRegression #=========== Beregning af endelig cost-driver-kompensation ============# # Beregner relative costdrivere paa endelig data, saa kun koefficienter til costdriver er frosset # Vigtige variable der er givet paa forhaand z_u <- as.matrix(Data[,c("Ledninger","Pumpe","Regnbas","Spildebas","Rense","Minirense","Behandling","Disponering","Kunder")]) z_l <- sapply(c(1:ncol(z_u)),function(x){Data$Adm}) # Relative costdrivere Zstar <- sapply(1:ncol(z_u),function(x){ #Finder de relative costdrivere for hvert selskabs benchmark (vaegtet peers) rowSums(sweep(LambdaMean_full, 2, z_u_FROSSET[,x], "*"))/rowSums(sweep(LambdaMean_full, 2, z_l_FROSSET[,x], "*")) }) # Navngiver costdriverne igen colnames(Zstar) <- colnames(z_u) # Angiver differencen mellem selskabernes relative costdrivere og deres benchmark - Se ligning (5) i Bilag 3. ZDiff <- Zstar-(z_u/z_l) # Beregner selskabernes kompensation paa baggrund af costdriveranalysen (med dummy) Kompensation <- rowSums(sweep(-ZDiff[, sub("_FROSSET", "", SignifikanteCostdrivere), drop = FALSE], 2, SamletRegression$coefficients[SignifikanteCostdrivere, 1], "*")) # Selskaberne kan kun kompenseres opad, da costdriveranalysen skal betragtes som et forsigtighedshensyn Kompensation[Kompensation<0] <- 0 # Scorene efter costdriveranalysen saettes til maksimalt 1 Data$OrderM_score_Efter_CD <- ifelse(Data$OrderM_score+Kompensation>1,1,Data$OrderM_score+Kompensation) # Binder resultater paa datasaettets Data <- cbind(Data,Kompensation) #======================================================================# ########################## Endelige scorer ############################# #======================================================================# Data[,c("NAVN","ID","OPEX_KOR","CAPEX_KOR","FATO")] Data[,c("NAVN","ID","OrderM_score","Kompensation","OrderM_score_Efter_CD")]