Plotting UFC Data Using Highcharter
Richard O’Brien
2020-03-24
LOADING IN THE NECESSARY PACKAGES
#Check is the packages are already installed and load the library
pkgTest <- function(...) {
pkg <- list(...)
for (i in 1:length(pkg)) {
if (!require(pkg[[i]], character.only = TRUE)) {
install.packages(pkg[[i]], dep = TRUE)
if(!require(pkg[[i]], character.only = TRUE)) stop("Package not found")
}
}
}
pkgTest("purrr")
pkgTest("highcharter")
pkgTest("dplyr")
pkgTest("stringr")
PLOTTING FINISHING METHODS
This plot provides the ability to drill-down into each of the methods by clicking on the bar
#================================================================================================#
#Merge data to get weightclasses
#================================================================================================#
career_fighter_details = merge(fighter_details_data2, data_clean2[, c("NAME", "WeightClass")],
by.x = "fighter_one", by.y = "NAME", all.x = TRUE)
career_fighter_details$Round = as.numeric(career_fighter_details$Round)
career_fighter_details = career_fighter_details[, -c(11, 13:21)]
#================================================================================================#
#Finding the general method for how fights ended
#================================================================================================#
# unique(career_fighter_details$Method)
pattern = paste(c("DEC", "KO/TKO", "SUB", "Overturned", "DQ", "Decision", 'CNC'), collapse = '|')
career_fighter_details$general_method = str_extract(as.character(career_fighter_details$Method), pattern)
overall_stats = as.data.frame(round(prop.table(
table(career_fighter_details$general_method))*100,2))
#Finding the percentages of each
specific_stats = as.data.frame(round(prop.table(table(career_fighter_details$general_method,
career_fighter_details$Method))*100,2))
#Trim whitespace from multiple columns
specific_stats[, 1:2] = lapply(specific_stats[, 1:2], function(x) trimws(x))
specific_stats[, 1:2] = lapply(specific_stats[, 1:2], function(x) str_remove_all(x, "\n"))
specific_stats[, 1:2] = lapply(specific_stats[, 1:2], function(x) str_remove_all(x, " "))
#Re-aggregating the data
specific_stats = as.data.frame(specific_stats %>%
group_by(Var1, Var2) %>%
summarise_all(sum))
df <- data_frame(
name = overall_stats[overall_stats$Var1 %in% c("DEC", "KO/TKO", "SUB", "DQ", "CNC", "Overturned"), 1],
y = overall_stats[overall_stats$Var1 %in% c("DEC", "KO/TKO", "SUB", "DQ", "CNC", "Overturned"), 2],
drilldown = tolower(name)
)
hc <- highchart() %>%
hc_chart(type = "column") %>%
hc_title(text = "Finishing Methods") %>%
hc_xAxis(type = "category") %>%
hc_legend(enabled = TRUE) %>%
hc_plotOptions(
series = list(
boderWidth = 0,
dataLabels = list(enabled = TRUE)
)
) %>%
hc_add_series(
data = df,
name = "General Method",
colorByPoint = TRUE
)
#================================================================================================#
#CREATE THE DATAFRAMES THAT ARE GOING TO BE DRILLED DOWN TO
#================================================================================================#
#Finding the subset of data relating to submissions
sub <- data_frame(
method = specific_stats[specific_stats$Var1 == "SUB" &
str_detect(specific_stats$Var2, "SUB"), 2],
value = specific_stats[specific_stats$Var1 == "SUB" &
str_detect(specific_stats$Var2, "SUB"), 3]
)
#Finding the subset of data relating to kos/tkos
ko <- data_frame(
method = specific_stats[specific_stats$Var1 == "KO/TKO" &
str_detect(specific_stats$Var2, "KO/TKO"), 2],
value = specific_stats[specific_stats$Var1 == "KO/TKO" &
str_detect(specific_stats$Var2, "KO/TKO"), 3]
)
#Finding the subset of data relating to decisions
dec <- data_frame(
method = specific_stats[specific_stats$Var1 == "DEC" &
str_detect(specific_stats$Var2, "DEC"), 2],
value = specific_stats[specific_stats$Var1 == "DEC" &
str_detect(specific_stats$Var2, "DEC"), 3]
)
#Finding the subset of data relating to dqs
dq <- data_frame(
method = specific_stats[specific_stats$Var1 == "DQ" &
str_detect(specific_stats$Var2, "DQ"), 2],
value = specific_stats[specific_stats$Var1 == "DQ" &
str_detect(specific_stats$Var2, "DQ"), 3]
)
#Finding the subset of data relating to overturned
overturned <- data_frame(
method = specific_stats[specific_stats$Var1 == "Overturned" &
str_detect(specific_stats$Var2, "Overturned"), 2],
value = specific_stats[specific_stats$Var1 == "Overturned" &
str_detect(specific_stats$Var2, "Overturned"), 3]
)
#Finding the subset of data relating to cnc
cnc <- data_frame(
method = specific_stats[specific_stats$Var1 == "CNC" &
str_detect(specific_stats$Var2, "CNC"), 2],
value = specific_stats[specific_stats$Var1 == "CNC" &
str_detect(specific_stats$Var2, "CNC"), 3]
)
#================================================================================================#
#SPECIFY WHERE IN THE INITIAL PLOT THE SUBSETS SHOULD BE REFERRED TO
#================================================================================================#
hc <- hc %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series = list(
list(
id = "sub",
data = list_parse2(sub)
),
list(
id = "ko/tko",
data = list_parse2(ko)
),
list(
id = "dec",
data = list_parse2(dec)
),
list(
id = "dq",
data = list_parse2(dq)
),
list(
id = "overturned",
data = list_parse2(overturned)
),
list(
id = "cnc",
data = list_parse2(cnc)
)
)
)
hc
# rmarkdown::render(paste0(
# "C:/Users/rjobrien/Desktop/Personal Projects/Personal Website/",
# "2020-03-24-Interactive Highchart.Rmd"
# ))