Source code for xpandas.transformers.series_transformers.series_transformer

from functools import partial

import pandas as pd
from tsfresh.feature_extraction.extraction import _do_extraction_on_chunk
from tsfresh.feature_extraction.settings import ComprehensiveFCParameters

from ..transformer import XSeriesTransformer


[docs]class TimeSeriesTransformer(XSeriesTransformer): ''' Extract common features 'mean', 'std', 'max', 'min', 'median', 'quantile_25', 'quantile_75', 'quantile_90', 'quantile_95' from pandas.Series. Transform XSeries to XDataFrame. ''' FEATURES = [ 'mean', 'std', 'max', 'min', 'median', 'quantile_25', 'quantile_75', 'quantile_90', 'quantile_95' ] def __init__(self, features=None, **kwargs): ''' :param features: list of features from FEATURES property ''' accepted_types = [ pd.Series ] if features is None: features = self.FEATURES else: for f in features: if f not in self.FEATURES: raise ValueError('Unrecognized feature {}. Available features {}'.format(f, self.FEATURES)) def series_transform(series): transformed_series = {} for f in features: if f.startswith('quantile_'): quant_rate = int(f.split('_')[1]) / 100. transformed_series[f] = series.quantile(quant_rate) else: method_to_call = getattr(series, f) result = method_to_call() transformed_series[f] = result return transformed_series super(TimeSeriesTransformer, self).__init__(data_types=accepted_types, transform_function=series_transform)
[docs]class TimeSeriesWindowTransformer(XSeriesTransformer): ''' Calculate rolling mean over XSeries of pandas.Series. ''' def __init__(self, windows_size=3, **kwargs): ''' :param windows_size: size of window for rolling mean ''' accepted_types = [ pd.Series ] self.windows_size = windows_size def series_transform(series, **params): return series.rolling(window=self.windows_size).mean().dropna() super(TimeSeriesWindowTransformer, self).__init__(data_types=accepted_types, transform_function=series_transform)
[docs]class MeanSeriesTransformer(XSeriesTransformer): ''' Example transformer ''' def __init__(self, **kwargs): self.total_mean = None def mean_minus_mean_function(s, total_mean=None): if total_mean is None: total_mean = self.total_mean return s.mean() - total_mean accepted_types = [ pd.Series ] super(MeanSeriesTransformer, self).__init__(data_types=accepted_types, transform_function=mean_minus_mean_function)
[docs] def fit(self, X, y=None, **kwargs): super(MeanSeriesTransformer, self).fit(X, **kwargs) sum_and_size = X.apply(lambda s: (s.sum(), len(s))) sum_total = sum([x[0] for x in sum_and_size]) total_size = sum([x[1] for x in sum_and_size]) self.total_mean = sum_total / total_size return self
[docs]class TsFreshSeriesTransformer(XSeriesTransformer): ''' Performs transformation with tsfresh http://tsfresh.readthedocs.io/en/latest/ package over XSeries of pandas.Series. ''' def __init__(self, **kwargs): accepted_types = [ pd.Series ] default_fc_parameters = ComprehensiveFCParameters() extraction_function = partial(_do_extraction_on_chunk, default_fc_parameters=default_fc_parameters, kind_to_fc_parameters=None) def series_transform(series): series_name = series.name if series_name is None: series_name = self.name input_series = ( 1, series_name, series ) extracted_data = extraction_function(input_series) extracted_data_flat = { x['variable']: x['value'] for x in extracted_data } return extracted_data_flat super(TsFreshSeriesTransformer, self).__init__(data_types=accepted_types, columns=None, transform_function=series_transform)
[docs] def transform(self, X): self.name = X.name return super(TsFreshSeriesTransformer, self).transform(X)