You can send me your lab results at the end of this lab at agathe.guilloux@math.cnrs.fr, this will add up to 2 points on your final grade.
I’ll only accept a .html file (which results from clickling the Knit button) not a .Rmd file
You’ll need the following R packages: ggfortify
, astsa
, forecast
.
We’ll work with the “AirPassengers” dataset (available in basic R distribution).
knitr::opts_chunk$set(fig.height=5, fig.width=7)
library(ggfortify) # for nice plots
## Loading required package: ggplot2
library(astsa) # for some of the data
library(forecast) # time series R package
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 7.3
##
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
##
## gas
## The following object is masked from 'package:ggfortify':
##
## gglagplot
data(AirPassengers) # load the data
?AirPassengers # get a description
print(AirPassengers)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
class(AirPassengers) # check the type of this data
## [1] "ts"
?ts # get help to learn what is a "ts" object
methods(class = "ts")
## [1] [ [<- aggregate as.data.frame as.Date
## [6] as.list as.zoo as.zooreg autoplot cbind
## [11] coerce coredata coredata<- cycle diff
## [16] diffinv forecast fortify index initialize
## [21] is.regular kernapply lines Math Math2
## [26] monthplot na.approx na.fill na.omit na.StructTS
## [31] na.trim Ops plot print rollapply
## [36] rollmax rollmean rollmedian rollsum show
## [41] slotsFromS3 subset t time window
## [46] window<- xblocks
## see '?methods' for accessing help and source code
autoplot(AirPassengers)
If you want to change the size of the graphic
autoplot(AirPassengers)
start(AirPassengers)
## [1] 1949 1
end(AirPassengers)
## [1] 1960 12
frequency(AirPassengers)
## [1] 12
deltat(AirPassengers)
## [1] 0.08333333
summary(AirPassengers)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 104.0 180.0 265.5 280.3 360.5 622.0
cycle(AirPassengers)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 1 2 3 4 5 6 7 8 9 10 11 12
## 1950 1 2 3 4 5 6 7 8 9 10 11 12
## 1951 1 2 3 4 5 6 7 8 9 10 11 12
## 1952 1 2 3 4 5 6 7 8 9 10 11 12
## 1953 1 2 3 4 5 6 7 8 9 10 11 12
## 1954 1 2 3 4 5 6 7 8 9 10 11 12
## 1955 1 2 3 4 5 6 7 8 9 10 11 12
## 1956 1 2 3 4 5 6 7 8 9 10 11 12
## 1957 1 2 3 4 5 6 7 8 9 10 11 12
## 1958 1 2 3 4 5 6 7 8 9 10 11 12
## 1959 1 2 3 4 5 6 7 8 9 10 11 12
## 1960 1 2 3 4 5 6 7 8 9 10 11 12
boxplot(AirPassengers~cycle(AirPassengers))
ggplot(data=AirPassengers, aes(cycle(AirPassengers),AirPassengers)) + geom_boxplot(aes(fill = factor(cycle(AirPassengers)))) #nicer boxplots !!
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
acf = acf(AirPassengers,plot=FALSE)
pacf = pacf(AirPassengers, plot =FALSE)
autoplot(acf)
autoplot(pacf)
5. Creating a
ts
object
Now suppose that you have a raw series as.numeric(AirPassengers)
and the information of the first time it had been measured and the frequency (start = c(1949,1)
, frequency=12
). Can you recreate a ts
object ?
attributes(AirPassengers)
## $tsp
## [1] 1949.000 1960.917 12.000
##
## $class
## [1] "ts"
raw_data = as.numeric(AirPassengers)
byhand_airpassengers = ts(raw_data, frequency=12, start=c(1949, 1))
attributes(byhand_airpassengers)
## $tsp
## [1] 1949.000 1960.917 12.000
##
## $class
## [1] "ts"
Simulez l’observation de 1200 variables aléatoires extraites d’un bruit blanc BB(0,1). Dans la suite, on note BB la série de données ainsi obtenue. Représentez-la graphiquement ainsi que ses ACF et PACF.
BB <- rnorm(1200, 0, 1)
plot(BB, type = "l")
Choose and add to \(BB\) a polynomial trend \(T\) and a seasonality \(S\) with amplitudes reasonable compared to the noise variance. Create a time series \(ST\) with the ts
function, assuming that its observation is monthly, and begins on January 1900.
Plot the series \(BB\),\(S\),\(T\),\(TS\) and compare with the results of slt
function. Comment
Choisissez, créez et ajoutez à BB une tendance polynomiale T et une saisonnalité S d’amplitudes respectives raisonnables par rapport à la variabilité du bruit blanc d’origine. Affectez à l’objet créé, qu’on notera ST, la classe ts en supposant qu’il correspond à un phénomène observé une fois par mois de 1900 à 1999.
Représentez graphiquement : la série BB ; La tendance T ; La saisonnalité S ; La série ST. Comparer avec les résultats de la function stl
. Commentez.
T = (1:1200-600)^3/10^8
#plot(T, type = "l")
S = sin(0.1*pi+2*pi/12*(1:1200))
#plot(S[1:100], type = "l")
ST = ts(BB + T + S, freq = 12, start = c(1900, 1), end = c(1999, 12))
BB = ts(BB, freq = 12, start = c(1900,1), end = c(1999,12))
T = ts(T, freq = 12, start = c(1900,1), end = c(1999,12))
S = ts(S, freq = 12, start = c(1900,1), end = c(1999,12))
par(mfrow = c(4,1))
plot(BB, type = "l")
plot(T, type = "l")
plot(S, type = "l")
plot(ST, type = "l")
autoplot(stl(ST,s.window = "periodic"))
monthplot
function ? What information can you gather from this plot ?Que représente la fonction monthplot ? Quelles informations en tirez-vous ?
monthplot(ST)
n = 500
T1 = arima.sim(n=n,list(ma = c(3, 1)),sd = sqrt(0.1796))
T2 = arima.sim(n=n,list( ar = c(-0.9,-0.5)),sd = sqrt(0.1796))
T3 = cumsum(rnorm(n) + 0.2)
T4 = 2*cos(2*pi*1:n/50 + .6*pi) + rnorm(n,0,0.5)
par(mfrow = c(1,2))
acf(T1)
pacf(T1)
par(mfrow = c(1,2))
acf(T2)
pacf(T2)
par(mfrow = c(1,2))
acf(T3)
pacf(T3)
par(mfrow = c(1,2))
acf(T4)
pacf(T4)
We consider a stationary AR(2) model \[X_t = \phi_1 X_{t-1} + \phi_2 X_{t-2} + \omega_t\] where \(\omega\) is a Gaussian white noise with variance \(1\). We denote by \(\xi_1\) and \(\xi_2\) the roots of \(1-\phi_1 z - \phi_2 z^2\). In this case the autocorrelation function verifies \[\rho(h) = \phi_1 \rho(h-1) + \phi_2 \rho(h-2)\] with initial conditions \(\rho(0)=1\) and \(\rho(1) = \phi_1/(1-\phi_2)\) and is then given by \[\rho(h) =\frac{(\xi_2^2 - 1) \xi_1^{1-h} - (\xi_1^2 - 1) \xi_2^{1-h}}{(\xi_1 \xi_2 + 1)(\xi_2 - \xi_1)} \] when \(\xi_1 \neq \xi_2\). We illustrate in this exercise the different behaviors of \(\gamma\).
Here is a code to generate a stem plot that you’ll nedd for plotting the true ACFs
stem <- function(x,y,pch=16,linecol=1,clinecol=1,...){
if (missing(y)){
y = x
x = 1:length(x) }
plot(x,y,pch=pch,...)
for (i in 1:length(x)){
lines(c(x[i],x[i]), c(0,y[i]),col=linecol)
}
lines(c(x[1]-2,x[length(x)]+2), c(0,0),col=clinecol)
}
#An example
x <- seq(0, 2*pi, by = 0.2)
stem(x,sin(x), main = 'Default style')
roots2ar
that calculates the coefficients \(\phi_1\) and \(\phi_2\) from the roots \(\xi_1\) and \(\xi_2\).roots2ar = function(xi1,xi2){
phi1 = 1/xi1 + 1/xi2
phi2 = - 1/(xi1 * xi2)
return(list(phi1=phi1,phi2=phi2))
}
true_acf
that calculates the acf at point h from \(\xi_1\) and \(\xi_2\)true_acf = function(xi1, xi2,h){
num = (xi2^2 - 1) * xi1^(1-h) - (xi1^2 - 1) * xi2^(1-h)
den = (xi1 * xi2 + 1) * (xi2 - xi1)
return(num /den )
}
Choose coefficients \(\phi_1\) and \(\phi_2\) such that the process is non-causal.
Try to simulate \(n=50\) observations from the arima.sim
function. Comment the comment.
Simulate \(n=50\) observations from the filter
function. You need to simulated \(n+20\) observations and keep only the last \(50\) to overcome the initialization problem.
Represent the series and its sample acf. What happens ?
xi1=0.65
xi2=0.6
phis = roots2ar(xi1,xi2)
#arima.sim(n=n,list( ar = c(phis$phi1,phis$phi2)),sd = sqrt(1))
w = rnorm(70)
ar2_noncausal = filter(w , c(phis$phi1,phis$phi2), method ="recursive")
autoplot(ts(ar2_noncausal[21:70] ))
par(mfrow=c(1,2))
autoplot(acf(ar2_noncausal,plot=FALSE))
stem(1:20,true_acf(xi1,xi2,c(1:20)))
Choose two pairs of complex conjugate roots one with moduli near \(1\) but such that the process is causal the other with moduli farther to \(1\) .
In each case, compute \(\phi_1\) and \(\phi_2\) and simulate 500 observations of the process and draw a plot.(Caution! If you chose two conjugate complex roots and compute \(\phi_1\) and \(\phi_2\), they can be complex with small imaginary part, due to numerical imprecision. In that case take the real part of it.)
++ Plot the true and sample autocorrelation functions and compare the plots.
xi1 = 1.005+0.1 * 1i
xi2 = 1.005-0.1 * 1i
Mod(xi1)
## [1] 1.009963
phis = roots2ar(xi1 ,xi2)
phis
## $phi1
## [1] 1.97054+0i
##
## $phi2
## [1] -0.9803681+0i
ar_causal_complex_conjugate = arima.sim(n=500,list( ar = c(phis$phi1,phis$phi2)),sd = sqrt(1))
autoplot(ar_causal_complex_conjugate)
par(mfrow=c(1,2))
plot(acf(ar2_noncausal,plot=FALSE))
stem(1:50,true_acf(xi1,xi2,c(1:50)))
xi1 = (2*(1+ 1i /sqrt(3))/3)^(-1)
xi2 = ( 2*(1 - 1i /sqrt(3))/3)^(-1)
phis = roots2ar(xi1 ,xi2)
phis
## $phi1
## [1] 1.333333+0i
##
## $phi2
## [1] -0.5925926+0i
ar_causal_complex_conjugate_2 = arima.sim(n=500,list( ar = c(phis$phi1,phis$phi2)),sd = sqrt(1))
autoplot(ar_causal_complex_conjugate_2)
par(mfrow=c(1,2))
acf(ar_causal_complex_conjugate_2)
stem(1:50,true_acf(xi1,xi2,c(1:50)))
Make plots of simulations, and autocorrelation functions as in question 3.
Compare the autocorrelation plots. What is the main qualitative difference?
xi1=1.005
xi2=1.001
phis = roots2ar(xi1 ,xi2)
phis
## $phi1
## [1] 1.994026
##
## $phi2
## [1] -0.9940308
ar_causal_real_1 = arima.sim(n=500,list( ar = c(phis$phi1,phis$phi2)),sd = sqrt(1))
par(mfrow=c(1,2))
autoplot(ar_causal_real_1)
par(mfrow=c(1,2))
acf(ar_causal_real_1)
stem(1:50,true_acf(xi1,xi2,c(1:50)))
xi1 = 2
xi2 = 5
phis = roots2ar(xi1 , xi2)
phis
## $phi1
## [1] 0.7
##
## $phi2
## [1] -0.1
ar_causal_real_2 = arima.sim(n=500,list( ar = c(phis$phi1,phis$phi2)),sd = sqrt(1))
par(mfrow=c(1,2))
autoplot(ar_causal_real_2)
par(mfrow=c(1,2))
acf(ar_causal_real_2)
stem(1:50,true_acf(xi1,xi2,c(1:50)))
We now work with the lh
dataset. On s’intéresse aux données lh
(voir leur description).
X <- lh
autoplot(X)
acf_lh = acf(lh , plot=FALSE)
pacf_lh = pacf(lh , plot=FALSE)
autoplot(acf_lh)
autoplot(pacf_lh)
We choose an AR(1) model \[X_t = \phi X_{t-1} + \omega_t\] for this series (cf Lab2 for a justification).
Estimate the mean.
Propose an estimation of \(\phi\) and \(\sigma^2\) based of the results of the acf
and pacf
functions (you’ll need the option type="correlation"
in the acf
function).
Compare your results with arima(lh, order = c(1, 0, 0))
. Why, in your opinion, is there a (slight) difference ?
mean(lh)
## [1] 2.4
hat_phi = pacf_lh$acf[1]
hat_gamma0 = acf(lh,type="covariance",plot=FALSE)$acf[1]
hat_sigma2 = hat_gamma0 *(1 - hat_phi^2)
print(c(hat_phi, hat_sigma2))
## [1] 0.5755245 0.1992382
arima(lh, order = c(1, 0, 0))
##
## Call:
## arima(x = lh, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.5739 2.4133
## s.e. 0.1161 0.1466
##
## sigma^2 estimated as 0.1975: log likelihood = -29.38, aic = 64.76
Using the prediction equations, the fact that, for an AR(1), \(\gamma(h)= \phi \gamma(h-1)\), and your estimations of \(\gamma(0)\) and \(\phi\)
compute the matrix \(\Gamma_n\) (use the toeplitz
function) and its inverse (function solve
)
and the vectors \(\mathbf{\gamma}_n^{(m)} = \big(\gamma(m), \ldots, \gamma(m+n-1)\big)^\top\) for \(m=1\) to 12
Propose a prediction for the next 12 values of the series (don’t forget to substract the mean)
Build prediction intervals.
Compare your results with the predict
method applied to the arima
function.
length(lh)
## [1] 48
Gamma_n = toeplitz(hat_gamma0 * exp((0:47)*log(hat_phi)))
Gamma_n_inv = solve(Gamma_n)
gamma_vectors = hat_gamma0 * exp((1:48)*log(hat_phi))
phis = Gamma_n_inv %*% gamma_vectors
predictions = exp((0:12) * log(hat_phi))*( t(phis) %*% rev((lh-mean(lh))))
print(predictions + 2.4)
## [1] 2.687762 2.565614 2.495315 2.454856 2.431571 2.418170 2.410457
## [8] 2.406018 2.403464 2.401993 2.401147 2.400660 2.400380
predict(arima(lh,c(1,0,0)),n.ahead=10)$pred
## Time Series:
## Start = 49
## End = 58
## Frequency = 1
## [1] 2.692626 2.573609 2.505301 2.466097 2.443597 2.430683 2.423271
## [8] 2.419018 2.416576 2.415175
We consider in this exercise the varve
dataset available in the astsa
package.
?varve
autoplot(varve)
autoplot(acf(varve,plot=FALSE))
autoplot(pacf(varve,plot=FALSE))
autoplot(log(varve))
autoplot(acf(log(varve),plot=FALSE))
autoplot(pacf(log(varve),plot=FALSE))
autoplot(diff(log(varve)))
autoplot(acf(diff(log(varve)),plot=FALSE))
autoplot(pacf(diff(log(varve)),plot=FALSE))
Estimations in the MA(1) \(X_t = \omega_t + \theta \omega_{t-1}\).
Estimate \(\theta\) (you can use the polyroot
function). Choose the solution that leads to an invertible MA.
and \(\sigma^2 = \text{Var}(\omega_t)\) (don’t forget the option type = "covariance"
of the acf
function)
Compare your estimations with the result of arima(lh, order = c(0, 0, 1))
rho_1 = acf(diff(log(varve)),plot=F)$acf[2]
hat_theta = Re(polyroot(c(rho_1,-1,rho_1))[1])
theta_sigma2 = acf(diff(log(varve)),type="covariance",plot=F)$acf[2]
hat_sigma2 = theta_sigma2 / hat_theta
print(c(hat_theta,hat_sigma2))
## [1] -0.4946886 0.2664769
arima(diff(log(varve)), order = c(0, 0, 1))
##
## Call:
## arima(x = diff(log(varve)), order = c(0, 0, 1))
##
## Coefficients:
## ma1 intercept
## -0.7710 -0.0013
## s.e. 0.0341 0.0044
##
## sigma^2 estimated as 0.2353: log likelihood = -440.68, aic = 887.36
The innovation algorithm for a MA(1) model
Code the innovation algorithm and deduce a prediction and a prediction interval for the next value.
Compare your results with the predict
method applied to the arima
function.
Admit that \(X_{n+m}^n = \theta_{n+m-1,1} X_{n}\) where the \(\theta_{n+m-1,1}\) are determined by the same algorithm. Deduce predictions and their errors for the next \(10\) values of the varve series itself.
Compare with the result of the predict
method applied to the arima
function.
n = length(diff(log(varve)))
predictions = rep(0,n+1)
predictions_errors = rep(0,n+1)
thetas_n1 = rep(0,n+1)
predictions_errors[1] = acf(diff(log(varve)),type="covariance",plot=F)$acf[1]
gamma_1 = acf(diff(log(varve)),type="covariance",plot=F)$acf[2]
for ( m in 1:n){
thetas_n1[m+1] = predictions_errors[m]^(-1) * gamma_1
predictions_errors[m+1] = ((1+ hat_theta^2) - predictions_errors[m]^(-1) * gamma_1^2)*hat_sigma2
predictions[m+1] = thetas_n1[m+1] * diff(log(varve))[m]
}
ts.plot( ts(predictions,start = 2, end = 635) , diff(log(varve)) , col = c("black","red"))
M=10
n = length(diff(log(varve)))
predictions = rep(0,n+M)
predictions_errors = rep(0,n+M)
thetas_n1 = rep(0,n+1)
predictions_errors[1] = acf(diff(log(varve)),type="covariance",plot=F)$acf[1]
gamma_1 = acf(diff(log(varve)),type="covariance",plot=F)$acf[2]
for ( m in 1:n){
thetas_n1[m+1] = predictions_errors[m]^(-1) * gamma_1
predictions_errors[m+1] = ((1+ hat_theta^2) - predictions_errors[m]^(-1) * gamma_1^2)*hat_sigma2
predictions[m+1] = thetas_n1[m+1] * diff(log(varve))[m]
}
for ( m in (n+1):M){
thetas_n1[m+1] = predictions_errors[m]^(-1) * gamma_1
predictions_errors[m+1] = ((1+ hat_theta^2) - predictions_errors[m]^(-1) * gamma_1^2)*hat_sigma2
}
Comments on R, RStudio versions and R notebook
I’m working on RStudio Version 1.0.136 with a R version 3.3.1.
This is an R Markdown document. When you execute code within the notebook, the results appear beneath the code.
You can execute the chunks by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Alt+Enter or Cmd+Shift+Enter.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I or Cmd+Option+I.
When you click the Knit button a html document will be generated that includes both content as well as the output of any embedded R code chunks within the document.
Check here http://rmarkdown.rstudio.com/authoring_basics.html for authoring basics.
If you experience trouble with the encoding: go the file menu and choose reopen with encoding, then choose UTF-8.