Source code for bekk.generate_data

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Data generation
===============

"""

import numpy as np
import scipy.linalg as sl
import pandas as pd

from pandas_datareader import data
from skewstudent import SkewStudent

__all__ = ['simulate_bekk', 'download_data']


[docs]def simulate_bekk(param, nobs=1000, distr='normal', degf=10, lam=0): """Simulate data. Parameters ---------- param : BEKKParams instance Attributes of this class hold parameter matrices nobs : int Number of observations to generate. Time series length distr : str Name of the distribution from which to generate innovations. Must be - 'normal' - 'student' - 'skewt' degf : int Degrees of freedom for Student or SkewStudent distributions lam : float Skewness parameter for Student or SkewStudent distributions. Must be between (-1, 1) Returns ------- innov : (nobs, nstocks) array Multivariate innovation matrix """ nstocks = param.amat.shape[0] if distr == 'normal': # Normal innovations mean, cov = np.zeros(nstocks), np.eye(nstocks) error = np.random.multivariate_normal(mean, cov, nobs) elif distr == 'student': # Student innovations error = np.random.standard_t(degf, size=(nobs, nstocks)) elif distr == 'skewt': # Skewed Student innovations error = SkewStudent(eta=degf, lam=lam).rvs(size=(nobs, nstocks)) else: raise ValueError('Unknown distribution!') # Standardize innovations error = (error - error.mean(0)) / error.std(0) hvar = np.empty((nobs, nstocks, nstocks)) innov = np.zeros((nobs, nstocks)) hvar[0] = param.get_uvar() intercept = param.cmat.dot(param.cmat.T) for i in range(1, nobs): innov2 = innov[i-1, np.newaxis].T * innov[i-1] hvar[i] = intercept + param.amat.dot(innov2).dot(param.amat.T) \ + param.bmat.dot(hvar[i-1]).dot(param.bmat.T) hvar12 = sl.cholesky(hvar[i], 1) innov[i] = hvar12.dot(np.atleast_2d(error[i]).T).flatten() return innov, hvar
[docs]def download_data(tickers=None, nobs=None, start='2002-01-01', end='2015-12-31'): """Download stock market data and save it to disk. Parameters ---------- tickers : list of str Tickers to download nobs : int Number of observations in the time series start : str First observation date end : str Last observation date Returns ------- ret : DataFrame Demeaned returns """ prices = [] colname = 'Adj Close' for tic in tickers: stock = data.DataReader(tic, 'yahoo', start, end)[colname] stock.name = tic prices.append(stock) prices = pd.concat(prices, axis=1, join='inner') ret = (np.log(prices) - np.log(prices.shift(1))) * 100 ret.dropna(inplace=True) ret = ret.apply(lambda x: x - x.mean()) ret = ret.iloc[-nobs:] if nobs is not None else ret return ret