Source code for ModelSelection

"""
Time Series Tools
========================
"""
from abc import ABCMeta

from sklearn.model_selection import BaseCrossValidator


[docs]class TimeSeriesCV(BaseCrossValidator, metaclass=ABCMeta): """ This is a very naive cross validator for time series. It simply sorts the given index (default 0) and splits the sorted index into a train and a test index set according to the given ratios. :param test_ratio: (default .2) float betweem 0. and 1., the portion of test data :param train_ratio: (default `None`-> .8) float betweem 0. and 1., the portion of train data :param index: (default 0) the index of the column that corresponds to a time parameter in the data """ def __init__(self, test_ratio=.2, train_ratio=None, index=0): self.index = index if train_ratio is not None: if train_ratio > 1.: raise ValueError("the value of `rain_ratio` should be smaller than 1.") self.train_ratio = train_ratio self.test_ratio = 1. - train_ratio elif test_ratio is not None: if test_ratio > 1.: raise ValueError("the value of `test_ratio` should be smaller than 1.") self.train_ratio = 1. - test_ratio self.test_ratio = test_ratio else: self.train_ratio = .8 self.test_ratio = .2
[docs] def get_n_splits(self, X=None, y=None, groups=None): """ Returns the number of splitting iterations in the cross-validator :param X: Always ignored, exists for compatibility. :param y: Always ignored, exists for compatibility. :param groups: Always ignored, exists for compatibility. :return: Returns the number of splitting iterations in the cross-validator which is 1 for time series. """ return 1
[docs] def split(self, X, y=None, groups=None): """ Generate indices to split data into training and test set. :param X: array-like of shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. :param y: array-like of shape (n_samples,), default=None The target variable for supervised learning problems. :param groups: array-like of shape (n_samples,), default=None Group labels for the samples used while splitting the dataset into train/test set. :return: `train` The training set indices for that split. `test` The testing set indices for that split. """ from copy import copy X_c = copy(X) sorted_index = X_c[:, self.index].argsort() cut = int(self.train_ratio * sorted_index.shape[0]) train_index = sorted_index[: cut] test_index = sorted_index[cut:] yield train_index, test_index