Sankey Plot with Plotly

sankey_ly(x, cat_cols, num_col, title = NULL)

Arguments

x

A data.frame input, must have at least two categorical columns and one numeric column

cat_cols

A vector of at least two categorical columns names

num_col

A single numeric column name

title

Optional, string to pass to plotly layout title function

Details

A customized function for data transformation and plotting sankey plot with Plotly

Examples

data("sfo_passengers")

library(dplyr)
#> 
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#> 
#>     filter, lag
#> The following objects are masked from ‘package:base’:
#> 
#>     intersect, setdiff, setequal, union

d <- sfo_passengers %>%
  filter(activity_period >= 202201 & activity_period < 202301)

head(d)
#>   activity_period    operating_airline operating_airline_iata_code
#> 1          202212          EVA Airways                          BR
#> 2          202212          EVA Airways                          BR
#> 3          202212             Emirates                          EK
#> 4          202212             Emirates                          EK
#> 5          202212 Flair Airlines, Ltd.                          F8
#> 6          202212 Flair Airlines, Ltd.                          F8
#>      published_airline published_airline_iata_code   geo_summary  geo_region
#> 1          EVA Airways                          BR International        Asia
#> 2          EVA Airways                          BR International        Asia
#> 3             Emirates                          EK International Middle East
#> 4             Emirates                          EK International Middle East
#> 5 Flair Airlines, Ltd.                          F8 International      Canada
#> 6 Flair Airlines, Ltd.                          F8 International      Canada
#>   activity_type_code price_category_code      terminal boarding_area
#> 1           Deplaned               Other International             G
#> 2           Enplaned               Other International             G
#> 3           Deplaned               Other International             A
#> 4           Enplaned               Other International             A
#> 5           Deplaned            Low Fare International             A
#> 6           Enplaned            Low Fare International             A
#>   passenger_count
#> 1           12405
#> 2           15151
#> 3           13131
#> 4           14985
#> 5            2543
#> 6            2883

d %>%
  filter(operating_airline == "United Airlines") %>%
  mutate(terminal = ifelse(terminal == "International", "international", terminal)) %>%
  group_by(operating_airline,activity_type_code, geo_summary, geo_region,  terminal) %>%
  summarise(total = sum(passenger_count), .groups = "drop") %>%
  sankey_ly(cat_cols = c("operating_airline", "terminal","geo_summary", "geo_region", "activity_type_code"),
            num_col = "total",
            title = "Distribution of United Airlines Passengers at SFO During 2022")
#> Warning: `summarise_()` was deprecated in dplyr 0.7.0.
#> Please use `summarise()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#> Warning: `group_by_()` was deprecated in dplyr 0.7.0.
#> Please use `group_by()` instead.
#> See vignette('programming') for more help
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.