Let’s say we have a crazy noisy time series where we have perhaps oversampled and encounter spurious variability. Smoothing to the rescue
1000 observations sampled from a random normal distrubution. mean=20, var=10
test <- data.frame(noisydat = rnorm(1000, 20, 2))
# for plotting, we need a sequence variable
dat <- test %>%
mutate(myseq = seq_along(noisydat))
head(dat, n = 15)
noisydat | myseq |
---|---|
20.68 | 1 |
17.46 | 2 |
21.72 | 3 |
19.12 | 4 |
17.90 | 5 |
17.04 | 6 |
18.80 | 7 |
21.08 | 8 |
20.03 | 9 |
20.91 | 10 |
22.13 | 11 |
18.19 | 12 |
16.79 | 13 |
18.81 | 14 |
17.70 | 15 |
ggplot(dat, aes(x = myseq, y = noisydat)) + geom_line(color = "red", alpha = 0.7) +
ylim(c(15, 25)) + jamie.theme
#Rolling mean on the noisy mydat vector This comoputes a simple noving average for the window size (k) you specify. That is, every n’th observation is averaged with the k past observations to derive a rolling mean for that replaces the original observation reducing its unique variability.
smoothed <- dat %>%
mutate(smoothdat = zoo::rollmean(noisydat, k = 5, fill = NA)) #use a moving window of 5
head(smoothed, n = 15)
noisydat | myseq | smoothdat |
---|---|---|
20.68 | 1 | NA |
17.46 | 2 | NA |
21.72 | 3 | 19.38 |
19.12 | 4 | 18.65 |
17.90 | 5 | 18.92 |
17.04 | 6 | 18.79 |
18.80 | 7 | 18.97 |
21.08 | 8 | 19.57 |
20.03 | 9 | 20.59 |
20.91 | 10 | 20.47 |
22.13 | 11 | 19.61 |
18.19 | 12 | 19.37 |
16.79 | 13 | 18.73 |
18.81 | 14 | 18.77 |
17.70 | 15 | 18.23 |
ggplot(smoothed, aes(x = myseq, y = smoothdat)) + geom_line(color = "goldenrod2",
alpha = 0.7) + ylim(c(15, 25)) + jamie.theme