--- title: "Moving Averages for Trend Analysis" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Moving Averages for Trend Analysis} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} editor_options: markdown: wrap: 80 --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 4.5, message = FALSE, warning = FALSE ) ``` ```{r setup} #| include: false library(trendseries) library(dplyr) library(ggplot2) library(tidyr) ``` # Moving Averages Moving averages are one of the most intuitive and widely-used tools for extracting trends from time series data. The basic idea is simple: average nearby observations to smooth out random fluctuations. ## Simple example ```{r} library(trendseries) library(dplyr) ``` To recreate the plots from this tutorial use `theme_series` below. ```{r} #| code-fold: true library(ggplot2) theme_series <- theme_minimal(paper = "#fefefe") + theme( legend.position = "bottom", panel.grid.minor = element_blank(), strip.background = element_rect(fill = "#2c3e50"), strip.text = element_text(color = "#fefefe"), axis.ticks.x = element_line(color = "gray40", linewidth = 0.5), axis.line.x = element_line(color = "gray40", linewidth = 0.5), axis.title.x = element_blank(), palette.colour.discrete = c( "#2c3e50", "#e74c3c", "#f39c12", "#1abc9c", "#9b59b6" ) ) ``` Let's start with vehicle production data. This series is packaged with `trendseries` and shows the amount of vehicles produced in Brazil per month. ```{r vehicles-plot} # Using the 'vehicles' dataset (ships with trendseries) vehicles_recent <- vehicles |> # Only use data after 2018 filter(date >= as.Date("2018-01-01")) ggplot(vehicles_recent, aes(date, production)) + geom_line(lwd = 0.7) + theme_series ``` Applying `augment_trends()` to the `vehicles` dataset creates a new column called `trend_ma`. ```{r ma-basic} # Apply a moving average trend vehicles_trend <- augment_trends( vehicles_recent, value_col = "production", methods = "ma", window = 12 ) vehicles_trend ``` We can visualize this trend using `ggplot2`. ```{r} ggplot(vehicles_trend, aes(date)) + geom_line(aes(y = production, color = "Original"), lwd = 0.6, alpha = 0.8) + geom_line(aes(y = trend_ma, color = "Trend: 12-month MA"), lwd = 0.7) + scale_x_date(date_breaks = "1 year", date_labels = "%Y") + scale_y_continuous(labels = scales::label_comma()) + labs( title = "Vehicle Production: Simple Moving Average", y = "Vehicles produced", color = NULL) + theme_series ``` The `augment_trends` function accepts a vector of values for `window`. Each window size produces its own column: `trend_ma_3`, `trend_ma_6`, `trend_ma_12`, `trend_ma_24`. ```{r window-comparison} # Apply different window sizes windows_to_test <- c(3, 6, 12, 24) vehicles_trend <- vehicles_recent |> augment_trends( value_col = "production", methods = "ma", window = windows_to_test ) vehicles_trend ``` For plots with more series, reshaping the data to a "tidy" long format is more convenient. ```{r} # Prepare for plotting plot_data <- vehicles_trend |> pivot_longer( cols = c(production, starts_with("trend_ma")), names_to = "method", values_to = "value" ) |> mutate( method = factor( method, levels = c("production", paste0("trend_ma_", c(3, 6, 12, 24))), labels = c( "Original", "3-month MA", "6-month MA", "12-month MA", "24-month MA" ) ) ) ``` ```{r} #| code-fold: true #| fig-height: 6 #| fig-width: 9 ggplot() + geom_line( data = vehicles_trend, aes(date, production), color = "#2c3e50", alpha = 0.5, lwd = 0.7, layout = "fixed" ) + geom_line( data = subset(plot_data, method != "Original"), aes(date, value, color = method), lwd = 0.7 ) + facet_wrap(vars(method)) + labs( title = "Effect of Window Size on Moving Average", subtitle = "Larger windows = smoother trends, but slower to react", x = NULL, y = NULL, color = NULL ) + theme_series ``` Notice how the 24-month MA is very smooth but lags behind changes, while the 3-month MA tracks the data closely but still shows some fluctuation. `augment_trends` supports different alignments via the `align` parameter. ```{r} vehicles_trend <- augment_trends( vehicles_recent, value_col = "production", methods = "ma", window = 12, align = "right" ) ``` ```{r} #| code-fold: true ggplot(vehicles_trend, aes(date)) + geom_line(aes(y = production, color = "Original"), lwd = 0.6, alpha = 0.8) + geom_line(aes(y = trend_ma, color = "Trend: 12-month MA"), lwd = 0.7) + scale_x_date(date_breaks = "1 year", date_labels = "%Y") + scale_y_continuous(labels = scales::label_comma()) + labs( x = NULL, y = NULL, title = "Vehicle Production: Simple Moving Average" ) + theme_series ``` ## Grouped series Working with multiple time series is straightforward. The `augment_trends` function accepts a `group_cols` argument to apply methods to each group independently. The data must be in "tidy" long format. Here we use the `transit_london_monthly` dataset, which aggregates ridership by Bus and Train (tube). ```{r} transit <- transit_london_monthly ggplot(transit, aes(date_month, journey_monthly, color = transit_mode)) + geom_line(lwd = 0.7) + scale_x_date(date_breaks = "1 year", date_labels = "%Y") + scale_y_continuous(labels = scales::label_comma(scale = 1e-6)) + labs( y = "Journeys (million)", title = "Transit ridership in London", subtitle = "Monthly journey counts averaged across London's transit systems", color = NULL ) + theme_series ``` ```{r} transit_trends <- augment_trends( transit, date_col = "date_month", value_col = "journey_monthly", group_cols = "transit_mode", methods = "ma", window = 12 ) ``` ```{r} #| code-fold: true ggplot(transit_trends, aes(date_month, color = transit_mode)) + geom_line(aes(y = journey_monthly), lwd = 0.7, alpha = 0.8) + geom_line(aes(y = trend_ma), lwd = 0.7) + scale_x_date(date_breaks = "1 year", date_labels = "%Y") + scale_y_continuous(labels = scales::label_comma(scale = 1e-6)) + labs( x = NULL, y = "Journeys (million)", title = "Grouped series trends", subtitle = "Monthly journey counts averaged across London's transit systems", color = NULL ) + theme_series ``` ## Related methods Other window-based smoothing methods are available in `trendseries`, selected via the `methods` parameter: 1. Moving median `methods = "median"`. 2. Weighted moving average `methods = "wma"`. 3. Exponentially weighted moving average `methods = "ewma"`. 4. Spencer moving average `methods = "spencer"`. 5. Henderson moving average `methods = "henderson"`. 5. Triangular moving average `methods = "triangular"`. These different methods can be combined in a single call to `augment_trends()`. ```{r} transit_trends <- augment_trends( transit, date_col = "date_month", value_col = "journey_monthly", group_cols = "transit_mode", methods = c("ma", "median", "spencer") ) ``` As with multiple windows, `augment_trends` creates a new column for each method. ```{r} glimpse(transit_trends) ``` Finally, we can visualize these different trends. ```{r} #| code-fold: true #| fig-width: 9 #| fig-height: 6 transit_trends_long <- transit_trends |> pivot_longer( cols = c(starts_with("trend_")), names_to = "method", names_repair = "unique" ) |> mutate( method = factor( method, levels = c( "trend_ma", "trend_median", "trend_spencer" ), labels = c( "12-month MA", "5-month median", "15-term Spencer" ) ) ) ggplot() + geom_line( data = transit_trends, aes(date_month, journey_monthly), lwd = 0.5, alpha = 0.8 ) + geom_line( data = transit_trends_long, aes(date_month, value, color = method), lwd = 0.7 ) + facet_wrap(vars(transit_mode, method), ncol = 3) + scale_x_date(date_breaks = "1 year", date_labels = "%Y") + scale_y_continuous(labels = scales::label_comma(scale = 1e-6)) + labs( x = NULL, y = "Journeys (million)", title = "Grouped series trends", subtitle = "Monthly journey counts averaged across London's transit systems", color = NULL ) + theme_series + theme( axis.text.x = element_text(angle = 90) ) ```