library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.0
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
fifa <- read_csv2("FIFA-21-Complete.csv")
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 17981 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (4): name, nationality, position, team
## dbl (5): player_id, overall, age, hits, potential
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fifa <- fifa %>%
    mutate(growth = potential - overall) %>%
    separate(position, sep = "\\|", into = c("pos1","pos2","pos3","pos4","pos5"), remove = FALSE) %>% 
    complete(pos1) %>% 
    complete(pos2) %>% 
    complete(pos3) %>% 
    complete(pos4) %>% 
    complete(pos5)
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 17970 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# fifa$pos_f <- factor(fifa$pos1,levels = c("GK","LB","RB","CB","LWB","RWB","CDM","LM","RM","CM","CAM","LW","RW","CF","ST"))

# ordered_pos <- c("GK","LB","RB","CB","LWB","RWB","CDM","LM","RM","CM","CAM","LW","RW","CF","ST")

ordered_pos <- fifa %>% group_by(pos1) %>% 
  summarise(med = median(growth)) %>% 
  arrange(med) %>% 
  pull(pos1)

gk <- c("GK")
defense <- c("LB","RB","CB","LWB","RWB")
midfield <- c("CDM","LM","RM","CM","CAM")
forward <- c("LW","RW","CF","ST")

fifa <- fifa %>% 
  mutate(broad_pos = case_when(pos1 %in% gk ~ "Goal Keeper",
                               pos1 %in% defense ~ "Defense",
                               pos1 %in% midfield ~ "Midfielder",
                               pos1 %in% forward ~ "Forward"))
ggplot(data = fifa)+
    geom_point(aes(x = age, y = growth, color = broad_pos))+
    # scale_color_viridis(discrete = TRUE)+
    geom_vline(
      data = . %>%
      group_by(pos1, factor(pos1,levels = ordered_pos)) %>%
      summarise(line = median(age)),
      mapping = aes(xintercept = line)
    ) +
    facet_wrap(~factor(pos1,levels = ordered_pos),5)+
    theme_minimal()+
    theme(legend.position = "top")+
    labs(color="position category")+ 
    # theme(legend.position="none")+
    ggtitle("Player's growth potential at current age")+
    theme(plot.title = element_text(hjust = 0.5))
## `summarise()` has grouped output by 'pos1'. You can override using the
## `.groups` argument.