Thanks to visit codestin.com
Credit goes to github.com

Skip to content

shawnsanto/scoutr

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

25 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

scoutr

Lifecycle: experimental R build status

Overview

A complete and consistent set of functions for reading, manipulating, and visualizing Wyscout soccer data in R.

All public Wyscout data is available at https://figshare.com/collections/Soccer_match_event_dataset/4415000/2

Installation

The package is not available on CRAN, please install the development version.

# install.packages("devtools")
devtools::install_github("shawnsanto/scoutr")

Usage

Data manipulation

library(scoutr)
library(dplyr)
library(ggplot2)

# read and preview some event data
path <- system.file("extdata", "events_england.json", package = "scoutr")
events <- fc_read_events(path)
#>   Step (1/3): Reading JSON data and converting to tibble...
#>   Step (2/3): Tidying tag variables...
#>   Step (3/3): Tidying event pitch locations...
#>   Happy scouting!

events %>%
  select(event_sec:end_y)
#> # A tibble: 1,768 x 5
#>   event_sec start_x start_y end_x end_y
#>       <dbl>   <int>   <int> <int> <int>
#> 1      2.76      49      49    31    78
#> 2      4.95      31      78    51    75
#> 3      6.54      51      75    35    71
#> 4      8.14      35      71    41    95
#> 5     10.3       41      95    72    88
#> # … with 1,763 more rows

# transform pitch locations and create sf object
events %>%
  select(event_sec:end_y) %>%
  fc_locations_transform(x = c("start_x", "end_x"), 
                         y = c("start_y", "end_y"),
                         dim = c(105, 70), units = "meters") %>% 
  fc_locations_link(start_loc = c("start_x", "start_y"), 
                    end_loc   = c("end_x", "end_y"))
#> Attributes added: 'units', 'pitch_dimensions'
#> Pitch dimensions: (105 X 70) meters
#> Simple feature collection with 1768 features and 5 fields
#> geometry type:  LINESTRING
#> dimension:      XY
#> bbox:           xmin: 0 ymin: 0 xmax: 105 ymax: 70
#> CRS:            NA
#> # A tibble: 1,768 x 6
#>   event_sec start_x start_y end_x end_y                 geometry
#>       <dbl>   <dbl>   <dbl> <dbl> <dbl>             <LINESTRING>
#> 1      2.76    51.4    34.3  32.6  54.6 (51.45 34.3, 32.55 54.6)
#> 2      4.95    32.6    54.6  53.6  52.5 (32.55 54.6, 53.55 52.5)
#> 3      6.54    53.6    52.5  36.8  49.7 (53.55 52.5, 36.75 49.7)
#> 4      8.14    36.8    49.7  43.0  66.5 (36.75 49.7, 43.05 66.5)
#> 5     10.3     43.0    66.5  75.6  61.6  (43.05 66.5, 75.6 61.6)
#> # … with 1,763 more rows

# define possessions
events %>% 
  select(match_id, event_name, team_id) %>% 
  fc_sequence_possession(event_var = "event_name", team_var = "team_id") %>% 
  print(n = 20)
#> # A tibble: 1,768 x 5
#>    match_id event_name team_id possession_id possession_seq
#>    <chr>    <chr>      <chr>           <dbl>          <int>
#>  1 2499719  Pass       1609                1              1
#>  2 2499719  Pass       1609                1              2
#>  3 2499719  Pass       1609                1              3
#>  4 2499719  Pass       1609                1              4
#>  5 2499719  Pass       1609                1              5
#>  6 2499719  Pass       1609                1              6
#>  7 2499719  Pass       1631                2              1
#>  8 2499719  Duel       1631                2              2
#>  9 2499719  Duel       1609                2              3
#> 10 2499719  Pass       1609                3              1
#> 11 2499719  Pass       1609                3              2
#> 12 2499719  Pass       1609                3              3
#> 13 2499719  Duel       1631                3              4
#> 14 2499719  Duel       1609                3              5
#> 15 2499719  Pass       1609                3              6
#> 16 2499719  Pass       1631                4              1
#> 17 2499719  Pass       1631                4              2
#> 18 2499719  Pass       1631                4              3
#> 19 2499719  Pass       1631                4              4
#> 20 2499719  Pass       1631                4              5
#> # … with 1,748 more rows

# compute velocities
events %>%
  select(-starts_with("tag_")) %>%
  fc_locations_transform(x = c("start_x", "end_x"), y = c("start_y", "end_y")) %>%
  fc_sequence_possession(event_var = "event_name", team_var = "team_id") %>%
  fc_velocity_event(start_loc = c("start_x", "start_y"), end_loc = c("end_x", "end_y")) %>%
  fc_locations_link(start_loc = c("start_x", "start_y"), end_loc = c("end_x", "end_y")) %>% 
  fc_velocity_polygon(metric = "east_west_velocity", shape = "square",
                      fcn = "median", na.rm = TRUE, size = 5)
#> Attributes added: 'units', 'pitch_dimensions'
#> Pitch dimensions: (105 X 70) meters
#> Simple feature collection with 294 features and 1 field
#> geometry type:  POLYGON
#> dimension:      XY
#> bbox:           xmin: 0 ymin: 0 xmax: 105 ymax: 70
#> CRS:            NA
#> # A tibble: 294 x 2
#>                           geometry median_east_west_velocity
#> *                        <POLYGON>                     <dbl>
#> 1      ((0 0, 5 0, 5 5, 0 5, 0 0))                      71.4
#> 2    ((5 0, 10 0, 10 5, 5 5, 5 0))                      73.3
#> 3 ((10 0, 15 0, 15 5, 10 5, 10 0))                      71.4
#> 4 ((15 0, 20 0, 20 5, 15 5, 15 0))                      24.8
#> 5 ((20 0, 25 0, 25 5, 20 5, 20 0))                      10.3
#> # … with 289 more rows

Data visualization

events %>% 
  fc_locations_transform(x = c("start_x", "end_x"), 
                         y = c("start_y", "end_y"),
                         dim = c(105, 70), units = "meters") %>% 
  filter(event_name == "Pass") %>% 
  ggplot(mapping = aes(x = start_x, y = start_y)) +
  fc_annotate_pitch(palette = "classic") +
  geom_point(color = "grey70") +
  fc_theme_classic()
#> Attributes added: 'units', 'pitch_dimensions'
#> Pitch dimensions: (105 X 70) meters


events %>% 
  filter(event_name %in% c("Pass", "Shot")) %>% 
  ggplot(mapping = aes(start_x, start_y)) +
  fc_annotate_pitch(dimensions = c(100, 100), palette = "bw", coord_flip = TRUE) +
  geom_point(aes(color = event_name), size = 2) +
  scale_color_manual(values = c("grey70", "red")) +
  labs(color = "Event") +
  coord_flip() +
  fc_theme_bw() +
  theme(legend.position = "bottom", aspect.ratio = 105 / 70)


events %>% 
  filter(event_name == "Shot") %>% 
  ggplot(mapping = aes(start_x, start_y)) +
  fc_annotate_pitch(dimensions = c(100, 100)) +
  fc_annotate_arrow(x = 50, y = -10) +
  geom_point(aes(color = team_id), size = 3) +
  labs(color = "Team ID", title = "Shots on goal") +
  fc_theme_gw() +
  theme(legend.position = "bottom")


Themes

References

Pappalardo, L., Cintia, P., Rossi, A. et al. A public data set of spatio-temporal match events in soccer competitions. Sci Data 6, 236 (2019). https://doi.org/10.1038/s41597-019-0247-7

About

Soccer data manipulation and visualization in R

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages