# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: MIT

from dataclasses import dataclass, field
from enum import Enum, auto, Flag
from typing import List, Callable, Union, Dict, Type
from abc import abstractmethod, ABC

import numpy as np
import pandas as pd
import re

from mpp.core.metric_computer import MetricComputer, _SliceMetricComputer, CompiledMetric
from mpp.core.normalizer import Normalizer
from mpp.core.types import RawDataFrameColumns as rdc, RawDataFrame, SummaryViewDataFrameColumns as svdc, \
    MetricDefinition, EventInfoDataFrame, EventInfoDataFrameColumns as eidc, RetireLatencyCountDataFrame, \
    RetireLatencyCountDataFrameColumn as rlcdc
from mpp.core.devices import Device
from mpp.core.internals.types import StatisticsDataFrame, StatisticsDataFrameColumns as sdf

RenameColumnsCallable = Union[None, Callable[[pd.DataFrame], List[str]]]

STATIC_MSR_RE = re.compile(r'(?:STATIC:.*scope=(?:.*)|REG_STATIC_)')


class ViewType(Flag):
    """
    Supported view types. Can be combined using bitwise OR, e.g. SUMMARY | DETAILS
    """
    SUMMARY = auto()
    DETAILS = auto()
    TPS_SUMMARY = auto()
    ALL = SUMMARY | DETAILS


class ViewAggregationLevel(Enum):
    """
    Supported view aggregation levels
    """
    SYSTEM = auto()
    SOCKET = auto()
    CORE = auto()
    THREAD = auto()
    UNCORE = auto()


@dataclass(frozen=True)
class ViewAttributes:
    """
    View attributes
    """
    view_name: str  # a name that uniquely identifies this view
    view_type: ViewType  # view type, e.g. Summary, Details, ...
    aggregation_level: ViewAggregationLevel  # data aggregation level, e.g. System, Socket, Core, ...
    device: Union[Device, None]  # device for this view # TODO: this may be required (can't be None)
    show_modules: bool  # show module information for this view
    metric_computer: Union[MetricComputer, None]  # metric computer assigned to the view
    normalizer: Union[Normalizer, None]  # normalizer assigned to the view
    required_events: Union[EventInfoDataFrame, None]  # column names that must appear in the view data
    percentile: Union[int, None] = None  # percentile to calculate for system summary or None (no percentile in summary)
    tps: Union[float, None] = None  # tps value for tps views (transactions per second)

    def clone(self, update: Dict = None):
        """
        Creates a copy of the ViewAttributes object

        :param update: a dict with updated attribute values to assign to the new object

        :return: a new copy of the object
        """
        new_attr = self.__dict__.copy()
        if update:
            new_attr.update(update)
        return ViewAttributes(**new_attr)


@dataclass
class ViewData:
    attributes: ViewAttributes
    data: pd.DataFrame
    retire_latency_counts: pd.DataFrame = field(default_factory=pd.DataFrame)


class ViewCollection:
    """
    Stores information about the views to generate
    """

    def __init__(self):
        """
        Initializes an empty view collection.
        """
        self.__view_configurations: List[ViewAttributes] = []

    @property
    def views(self) -> List[ViewAttributes]:
        return self.__view_configurations.copy()

    def append_views(self, views: List[ViewAttributes]) -> None:
        """
        Appends a list of ViewAttributes to the internal __view_configurations

        @param: views, A list of ViewAttributes to be appended
        @return: None, updates the internal __view_configuration list of ViewAttributes
        """
        self.__view_configurations.extend(views)

    def remove_views(self, view_name_filter):
        """
        Remove a view from the view collection. Filters __view_configurations based on view_name.

        @param view_name_filter: the name of the view you wish to remove
        @return: None, updated __view_configurations
        """
        self.__view_configurations = list(
            filter(lambda view: view_name_filter not in view.view_name, self.__view_configurations))

    def add_view(self,
                 view_name: str,
                 view_type: ViewType,
                 aggregation_level: ViewAggregationLevel,
                 device: Device,
                 show_modules: bool,
                 metric_computer: MetricComputer = None,
                 normalizer: Normalizer = None,
                 required_events: EventInfoDataFrame = None,
                 percentile: Union[int, None] = None) -> object:
        """
        Add a view configuration

        :param view_name: a name that uniquely identify this view
        :param view_type: the view type to add, e.g. Summary, Details. You can specify multiple types
                          (e.g. Summary and Details) by combining types using bitwise OR.
        :param aggregation_level: the level at which to aggregate data, e.g. System, Socket, Core...
        @param device: device to be filtered for this view
        :param show_modules: include module information if True
        :param metric_computer: the metric computer to use for the specified view
        :param normalizer: the normalizer to use for the specified view
        :param required_events: Events required to generate the specified view
        """

        def validate_preconditions():
            if view_name in [config.view_name for config in self.__view_configurations]:
                raise ValueError(f'a view with the name "{view_name}" already exists. '
                                 f'Duplicate view names not allowed')

        validate_preconditions()

        if device is not None and required_events is not None:
            required_events = self._filter_core_type_events(required_events, device)

        view_attr = ViewAttributes(view_name, view_type, aggregation_level, device, show_modules, metric_computer,
                                   normalizer, required_events, percentile)
        self.__view_configurations.append(view_attr)

    @staticmethod
    def _filter_core_type_events(event_info: EventInfoDataFrame, device: Device) -> EventInfoDataFrame:
        # TODO: change function name? see if this applies to all devices
        if (eidc.DEVICE) in event_info.columns:
            event_info = event_info.loc[~(event_info[eidc.DEVICE].isin(device.exclusions))]
        return event_info


class ViewInitializer:
    """
    Adds views to the ViewCollection using a parser, a device or list of devices, and a ViewType (Summary or Details)
    Use add_view method to add views to the view_collection.
    """

    def __init__(self, devices, api_args: 'ApiArgs'):
        self.__devices = devices
        self.__show_modules: bool = api_args.system_information.has_modules
        self.__normalizer: Normalizer = Normalizer(api_args.system_information.ref_tsc)
        self.__event_info: EventInfoDataFrame = api_args.event_info
        self.__include_detail_views: bool = not api_args.no_detail_views
        self.__system_details_view_added: bool = False
        self.__view_collection: ViewCollection = ViewCollection()
        self.__output_file_prefix = api_args.output_prefix
        self._percentile = api_args.percentile

    @property
    def view_collection(self):
        return self.__view_collection

    def add_views(self) -> ViewCollection:
        for device in self.__devices:
            for agg_level in device.aggregation_levels:
                self.add_view(device, agg_level, ViewType.SUMMARY)
                self._conditional_add_detail_view(device, agg_level)
        return self.__view_collection

    def add_view(self, device, agg_level: ViewAggregationLevel, view_type: ViewType):
        name = self._get_name(device, agg_level, view_type)
        if self.__is_unlabeled_uncore_device(agg_level, device):
            return
        self.__view_collection.add_view(name, view_type,
                                        agg_level, device, self.__show_modules, device.metric_computer,
                                        self.__normalizer, self.__event_info, self._percentile)

    def _conditional_add_detail_view(self, device, agg_level):
        if self.__include_detail_views:
            self.add_view(device, agg_level, ViewType.DETAILS)
        elif agg_level == ViewAggregationLevel.SYSTEM:
            self.add_view(device, agg_level, ViewType.DETAILS)

    def _get_name(self, device, agg_level: ViewAggregationLevel, view_type: ViewType):
        return self._get_name_template(device, agg_level).format(type=view_type.name.lower())

    def _get_name_template(self, device, agg_level: ViewAggregationLevel):
        return f"{self.__output_file_prefix}{device.decorate_label(prefix='_')}_{agg_level.name.lower()}_view_{{type}}"

    @staticmethod
    def __is_unlabeled_uncore_device(agg_level, device):
        if agg_level == ViewAggregationLevel.UNCORE and device.label == '':
            return True
        return False


class ViewGenerator:
    """
    Generate views with various levels of data aggregation (per system, socket, core, thread...)
    """

    def __init__(self, view_collection: ViewCollection = None):
        """
        Initialize view generator

        :param view_collection: views to generate
        """

        def validate_preconditions():
            if view_collection is None or len(view_collection.views) == 0:
                raise ValueError('at least one view is required but none provided')

        validate_preconditions()
        self.__views: List[_DataView] = []
        self.__initialize_view_definitions(view_collection)

    @property
    def views(self):
        return self.__views

    def generate_detail_views(self, df: RawDataFrame) -> Dict[str, ViewData]:
        """
        Process the input dataframe and generate data for all Detail Views

        :param df: input data frame

        :return: A list of `ViewData` objects, one for each Details View specified in the view generator configuration.
        """
        results = {}
        if df.empty:
            return results

        for view in filter(lambda v: ViewType.DETAILS in v.attributes.view_type, self.__views):
            details_view = view.generate_details(df)
            if details_view is not None and not details_view.data.empty:
                results[view.attributes.view_name] = details_view

        return results

    def compute_aggregates(self, df: RawDataFrame) -> List[ViewData]:
        """
        Computes aggregated sums of event values for a raw input dataframe and each view type specified in the view
        generator configuration

        :@param df: RawEmonDataFrame input

        :return: aggregated event values for each view type specified in the view generator configuration
        """
        if df.empty:
            return [ViewData(view.attributes, df, pd.DataFrame()) for view in self.__views if ViewType.SUMMARY in
                    view.attributes.view_type]
        return [view.compute_aggregate(df) for view in self.__views if ViewType.SUMMARY in view.attributes.view_type]

    def __initialize_view_definitions(self, view_collection: ViewCollection):
        def create_view_definition(config: ViewAttributes):
            return {
                ViewAggregationLevel.SYSTEM: _SystemDataView,
                ViewAggregationLevel.SOCKET: _SocketDataView,
                ViewAggregationLevel.CORE: _CoreDataView,
                ViewAggregationLevel.THREAD: _ThreadDataView,
                ViewAggregationLevel.UNCORE: _UncoreDataView,
            }[config.aggregation_level](config)

        self.__views = list(map(lambda config: create_view_definition(config), view_collection.views))


class _BaseStat:

    @abstractmethod
    def compute(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Compute aggregated statistic/s for an input dataframe
        """
        pass

    @abstractmethod
    def get_stats_values(self) -> pd.DataFrame:
        """
        Return the resulting dataframe after computing the statistic/s
        """
        pass


class _MinMax(_BaseStat):

    def __init__(self):
        self.__stats_df = None
        self.__columns = [sdf.MIN, sdf.MAX]

    def compute(self, df: pd.DataFrame) -> None:
        stats = ['min', 'max']
        block_stats_df = df.agg(stats, axis='index').T
        block_stats_df.columns = self.__columns
        if self.__stats_df is None:
            self.__stats_df = block_stats_df.copy()
        else:
            try:
                self.__get_min_max(block_stats_df)
            except TypeError:  # TypeError occurs when pd.NA is in the dataframe (only for bitwise metrics)
                self.__stats_df = self.__stats_df.fillna(np.nan)
                self.__get_min_max(block_stats_df.fillna(np.nan))

    def __get_min_max(self, block_stats_df):
        self.__stats_df[sdf.MIN] = np.fmin(self.__stats_df[sdf.MIN], block_stats_df[sdf.MIN])
        self.__stats_df[sdf.MAX] = np.fmax(self.__stats_df[sdf.MAX], block_stats_df[sdf.MAX])

    def get_stats_values(self) -> pd.DataFrame:
        return self.__stats_df


class _Percentile(_BaseStat):

    def __init__(self, percentile=95):
        from tdigest import tdigest
        self.__event_percentiles: Dict[str, tdigest.TDigest] = {}
        self.__percentile = percentile
        self.__columns = f'{percentile}th percentile'

    def compute(self, df: pd.DataFrame) -> None:
        from tdigest import tdigest
        for column in df:
            if column not in self.__event_percentiles:
                self.__event_percentiles[column] = tdigest.TDigest()
            values = df[column].values
            values = values[np.isfinite(values)]  # remove nans and infinite values
            self.__event_percentiles[column].batch_update(values)

    def get_stats_values(self) -> pd.DataFrame:
        percentile_df = pd.DataFrame([[column, metric.percentile(self.__percentile)]
                                      for column, metric in self.__event_percentiles.items()
                                      if metric.C.count > 0],
                                     columns=['name', self.__columns]).set_index('name')
        return percentile_df


class _NullPercentile(_Percentile):
    def __init__(self):
        self.__event_percentiles: Dict[str, int] = {}
        self.__columns = sdf.PERCENTILE

    def compute(self, df: pd.DataFrame) -> None:
        for column in df:
            if column not in self.__event_percentiles:
                self.__event_percentiles[column] = np.nan

    def get_stats_values(self) -> pd.DataFrame:
        percentile_df = pd.DataFrame([[column, value] for column, value in self.__event_percentiles.items()],
                                     columns=['name', self.__columns]).set_index('name')
        return percentile_df


class _Variation(_BaseStat):

    def __init__(self):
        self.__event_variance = {}
        self.__event_aggregate = {}
        self.__tmp_aggregate = (0, 0, 0)
        self.__columns = [sdf.VARIATION]

    def compute(self, df: pd.DataFrame) -> None:
        # Implementation of Welford's Online Algorithm
        # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
        for column in df:
            self.__tmp_aggregate = self.__event_aggregate.get(column, (0, 0, 0))
            values = df[column].dropna()
            if len(values) > 0:
                self.__event_variance[column] = values.apply(self.__compute_variance_value).iloc[-1]
            self.__event_aggregate[column] = self.__tmp_aggregate

    def get_stats_values(self) -> pd.DataFrame:
        stats_values_dict = dict(zip(self.__columns, [self.__event_variance]))
        stats_values_df = pd.DataFrame(stats_values_dict)
        return stats_values_df

    @staticmethod
    def __update_aggregate_value(existing_aggregate, new_value):
        (count, mean, M2) = existing_aggregate
        count += 1
        delta = new_value - mean
        mean += delta / count
        delta2 = new_value - mean
        M2 += delta * delta2
        return count, mean, M2

    @staticmethod
    def __get_variation(existing_aggregate):
        # Retrieve the mean, variance and sample variance from an aggregate
        (count, mean, M2) = existing_aggregate
        if count < 2:
            return 0  # should be null, but ruby outputs this as 0
        else:
            variance_over_mean = (np.sqrt(M2 / count)) / mean
        return variance_over_mean

    def __compute_variance_value(self, value):
        self.__tmp_aggregate = self.__update_aggregate_value(self.__tmp_aggregate, value)
        return self.__get_variation(self.__tmp_aggregate)


class _Statistics:
    """
    Compute various statistics (min, max, percentile) for events and metrics
    """

    def __init__(self, percentile: int = None):
        if percentile:
            self.__base_stats = [_MinMax(), _Percentile(percentile), _Variation()]
        else:
            self.__base_stats = [_MinMax(), _NullPercentile(), _Variation()]
        self.__stats_df: Union[StatisticsDataFrame, None] = None

    def compute(self, df: pd.DataFrame) -> None:
        """
        Compute statistics from the input data frame and update object state

        :param df: input data frame
        """
        if df.empty:
            return
        for stat in self.__base_stats:
            stat.compute(df)

    def get_statistics(self) -> StatisticsDataFrame:
        """
        :return: a data frame with the computed events and metrics statistics (min, max, percentile...)
        """
        stat_data_frames = [s.get_stats_values() for s in self.__base_stats]
        self.__stats_df = pd.concat(stat_data_frames, axis=1)
        return StatisticsDataFrame(self.__stats_df)


class Aggregator(ABC):
    """
    Abstract base class for Aggregations
    """

    def __init__(self, aggregation='sum'):
        """
        Member variables used in all Aggregation variants
        The default Aggregation is sum
        """
        self.filter = None
        self.aggregation = aggregation

    @abstractmethod
    def get_aggregated_df(self, filtered_df, group_by: List[str]):
        pass

    def set_filter(self, df):
        """
        Set the filter to be used during aggregation
        This sets the list of events/metrics to group by.
        The default filter is "no exclusions" (an array full of True)

        :param df: Dataframe to filter
        """
        self.filter = np.ones((len(df),)).astype(bool)

    def aggregate(self, df, group_by: List[str]):
        """
        Aggregate the dataframe using assigned filter, group_by, and aggregation type

        :param df: Dataframe to aggregate
        :param group_by: columns to group by (summary or details column groups)
        :returns: the given Dataframe split into
            aggregated_df: the filtered, aggregated df
            remaining_df: the unaltered remainder of the original Dataframe
        """
        self.set_filter(df)
        filtered_df = df.loc[self.filter]
        aggregated_df = self.get_aggregated_df(filtered_df, group_by)
        remaining_df = df.loc[~self.filter]
        return aggregated_df, remaining_df


class SummaryAggregator(Aggregator):

    def get_aggregated_df(self, filtered_df, group_by: List[str]):
        return pd.DataFrame(filtered_df.groupby(group_by, observed=True).agg({rdc.VALUE: self.aggregation}))


class DetailsAggregator(Aggregator):

    def get_aggregated_df(self, filtered_df, group_by: List[str]):
        return pd.DataFrame(filtered_df.groupby(group_by, as_index=False, observed=True).agg(self.aggregation,
                                                                                             numeric_only=True).reset_index())


class MinAggregator:
    COLUMNS = ['$samplingTime', '$processed_samples']


class MeanAggregator:
    COLUMNS = ['retire_latency']


class SummaryMinAggregator(SummaryAggregator):

    def __init__(self):
        super().__init__('min')

    def set_filter(self, df):
        self.filter = df.index.get_level_values(rdc.NAME).isin(MinAggregator.COLUMNS)


class SummaryMeanAggregator(SummaryAggregator):

    def __init__(self):
        super().__init__('mean')

    def set_filter(self, df):
        self.filter = df.index.get_level_values(rdc.NAME).str.contains('|'.join(MeanAggregator.COLUMNS))


class DetailsMinAggregator(DetailsAggregator):

    def __init__(self):
        super().__init__('min')

    def set_filter(self, df):
        self.filter = df[rdc.NAME].isin(MinAggregator.COLUMNS)


class DetailsMeanAggregator(DetailsAggregator):

    def __init__(self):
        super().__init__('mean')

    def set_filter(self, df):
        self.filter = df[rdc.NAME].str.contains('|'.join(MeanAggregator.COLUMNS))


class _DataView:
    """
    Define the common parameters and logic for generating an data view
    """

    def __init__(self,
                 config: ViewAttributes,
                 summary_group_by: List[str],
                 aggregator_columns: List[str],
                 aggregator_group_by: List[str],
                 details_group_by: List[str],
                 details_index: List[str],
                 device_filter=None,
                 exclude_any_device_containing=None,
                 device_filter_mode='include'
                 ):
        """
        Initialize view definition

        :param config: view attributes
        :param summary_group_by: columns to group by for summary view
        :param aggregator_columns: columns to use for computing aggregated values
        :param aggregator_group_by: columns to group by for computing aggregation (a subset of aggregator_columns)
        :param details_group_by: columns to group by for details view
        :param details_index: columns whose values will be used to determine the index of the details view
        :param device_filter: an optional device name (e.g., CORE, CHA...). When specified, only events related
                              to the specified device will be included in the generated view. This will also
                              limit the computed metrics to those that only use events related to the specified
                              device.
        """
        self.__summary_group_by = summary_group_by
        self._aggregator_columns = aggregator_columns
        self._aggregator_group_by = aggregator_group_by
        self.__details_group_by = details_group_by
        self.__details_index = details_index
        self.__device_filter = device_filter
        self.__exclude_any_device_containing = exclude_any_device_containing
        self.__device_filter_mode = device_filter_mode
        self.__normalizer = self.__get_config_value(config.normalizer, _NullNormalizer())
        self._metric_computer: MetricComputer = self.__get_config_value(config.metric_computer, _NullMetricComputer([]))
        self.__metric_names = self.__get_metric_names()
        self.__metric_column_order = None
        self.__required_events = self.__get_config_value(config.required_events,
                                                         pd.DataFrame())
        self._attributes = config
        self.__event_summary_values = pd.DataFrame()
        self.__filtered_events = set()

    @property
    def attributes(self) -> ViewAttributes:
        return self._attributes

    def update_summary(self, df: RawDataFrame, event_summary_values: pd.DataFrame) -> Union[pd.DataFrame, None]:
        """
        Process the input dataframe and update summary statistics

        @param df: input data frame of new event values
        @param event_summary_values: persisted summary values from DataAccumulator
        """
        if df.empty or ViewType.SUMMARY not in self.attributes.view_type:
            return df

        view_data = self._update_summary_values(df, event_summary_values)
        return view_data

    def update_statistics(self, details_view_df: Union[RawDataFrame, pd.DataFrame]) -> pd.DataFrame:
        # A method that subclasses can override to update statistics in summary views
        return pd.DataFrame()

    @staticmethod
    def update_retire_latency_counts(new_retire_latency_counts: RetireLatencyCountDataFrame,
                                     retire_latency_counts: pd.DataFrame):
        # Keep track of retire latency counts (the number of samples per retire latency event) so
        # they can be used to calculate retire latency means after data accumulation (addition)
        if new_retire_latency_counts.empty:
            return retire_latency_counts
        if retire_latency_counts.empty:
            retire_latency_counts = new_retire_latency_counts.copy()
        else:
            retire_latency_counts = retire_latency_counts.add(new_retire_latency_counts, fill_value=0)
        return retire_latency_counts

    @staticmethod
    def update_retire_latencies(view_data: ViewData) -> pd.DataFrame:
        # retire latencies are added in the DataAccumulator. Replace the sum with mean
        # so that retire latencies are aggregated correctly.
        if not view_data.retire_latency_counts.empty:
            # TODO: Find df operation to replace iteration
            for name, row in view_data.retire_latency_counts.iterrows():
                view_data.data.loc[[name]] = view_data.data.loc[[name]] / row[rlcdc.COUNT]
        return view_data.data

    def generate_details(self, df: RawDataFrame) -> ViewData:
        """
        Process the input dataframe and return details data

        :param df: input data frame

        :return: a `DetailsData` object. The `data` member contain the details view data, or an empty data frame if the
                 view is not configured to generate details data
        """
        details_view_df = self._generate_details_dataframe(df) if ViewType.DETAILS in self.attributes.view_type \
            else pd.DataFrame()
        return ViewData(self.attributes.clone(update={'view_type': ViewType.DETAILS}), details_view_df)

    def compute_aggregate(self, df: RawDataFrame) -> ViewData:
        """
        Return summary data

        :return: a dataframe with the summary view data
        """
        rl_counts = self._get_retire_latency_counts(df)
        df = self.__filter_devices(df, rdc)
        df = self.__filter_events(df)
        df_agg_values = df[self._aggregator_columns].groupby(self._aggregator_group_by, observed=True).sum()
        return ViewData(self.attributes.clone(update={'view_type': ViewType.SUMMARY}), df_agg_values, rl_counts)

    def generate_summary(self, event_df: pd.DataFrame, stats_df: pd.DataFrame = pd.DataFrame()) -> ViewData:
        """
        Return summary data

        :return: a dataframe with the summary view data
        """
        summary_view_df = self._generate_summary_dataframe(self.__normalizer.normalize(event_df,
                                                                                       event_axis='index')) \
            if ViewType.SUMMARY in self.attributes.view_type else pd.DataFrame()
        return ViewData(self.attributes.clone(update={'view_type': ViewType.SUMMARY}), summary_view_df)

    @staticmethod
    def _aggregate_events(df: pd.DataFrame, aggregator_list: List[Aggregator], group_by) -> pd.DataFrame:
        remaining_df = df
        events_dfs = []
        if len(aggregator_list) == 0:
            return df
        # IMPORTANT (list order requirement): SummaryAggregator is what is leftover after all
        # custom aggregator cases have been handled. It MUST go last in the list.
        for aggregator in aggregator_list:
            aggregated_df, remaining_df = aggregator.aggregate(remaining_df, group_by)
            events_dfs.append(aggregated_df)
        return pd.concat(events_dfs)

    @staticmethod
    def _get_retire_latency_counts(df: pd.DataFrame):
        filter = df[rdc.NAME].str.contains('|'.join(MeanAggregator.COLUMNS))
        new_retire_latencies = df.loc[filter]
        new_counts = new_retire_latencies[[rdc.TIMESTAMP, rdc.NAME]].drop_duplicates()[rdc.NAME].value_counts()
        return pd.DataFrame(new_counts)

    def _generate_summary_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
        if df.empty:
            return df
        aggregation_list = [SummaryMinAggregator(), SummaryMeanAggregator(), SummaryAggregator()]
        # TODO: simplify slice metrics such that only one slice_metric_computer is generated (between details and
        #  summary)
        summary_slice_metrics_df = self.__handle_sliced_metrics_summary(df, aggregation_list)
        summary_events_df, summary_metrics_df = self._aggregate_events_and_compute_summary_metrics(
            aggregation_list,
            df,
            self._metric_computer,
            self.__summary_group_by
        )

        summary_view_df = self.__get_ordered_summary_view_df(summary_events_df, summary_metrics_df,
                                                             summary_slice_metrics_df)
        return summary_view_df

    def __filter_events(self, df):
        event_timestamp_df = df[[rdc.NAME, rdc.TIMESTAMP]].drop_duplicates()
        static_msr_to_filter = self.__get_static_msr_to_filter(event_timestamp_df)
        self.__filtered_events.update(static_msr_to_filter)
        df = df[~df[rdc.NAME].isin(self.__filtered_events)]
        return df

    @staticmethod
    def __get_static_msr_to_filter(event_timestamp_df):
        static_msr = event_timestamp_df.loc[event_timestamp_df[rdc.NAME].str.contains(STATIC_MSR_RE), rdc.NAME]
        static_msr_counts = static_msr.value_counts()
        static_msr_to_filter = list(static_msr_counts[static_msr_counts > 1].index)
        return static_msr_to_filter

    def __get_ordered_summary_view_df(self, summary_events_df, summary_metrics_df, summary_slice_metrics_df):
        summary_view_df = pd.concat([summary_metrics_df, summary_slice_metrics_df, summary_events_df], axis=1)
        if not self.__metric_column_order:
            self.__metric_column_order = self.__get_metric_column_order(summary_metrics_df, summary_slice_metrics_df)
        column_order = self.__metric_column_order + summary_events_df.columns.tolist()
        summary_view_df = summary_view_df[column_order]
        return summary_view_df

    def __handle_sliced_metrics_summary(self, df, aggregation_list) -> pd.DataFrame:
        # TODO: Potentially make this function (and generate dataframe function) and the details one into two classes
        #  with a factory based on ViewType? (ViewGenerator based on ViewType)
        summary_aggregation_columns = self.__get_summary_aggregator_sliced_metrics()
        slice_metric_computer = _SliceMetricComputer(self._metric_computer.metric_definitions,
                                                     self._metric_computer.symbol_table, summary_aggregation_columns)
        self.__filter_out_slice_metrics(slice_metric_computer)
        if isinstance(self, _UncoreDataView) or not slice_metric_computer.compiled_metrics:
            return pd.DataFrame()  # TODO: support _UncoreDataView, or replace type checking
        _, summary_slice_metrics_df = self._aggregate_events_and_compute_summary_metrics(aggregation_list, df,
                                                                                         slice_metric_computer,
                                                                                         self.__summary_group_by + [
                                                                                             rdc.UNIT])
        return summary_slice_metrics_df

    def __get_summary_aggregator_sliced_metrics(self):
        summary_groupby = self.__summary_group_by.copy()
        summary_groupby.remove(rdc.NAME)
        return summary_groupby

    def _aggregate_events_and_compute_summary_metrics(self, aggregation_list, df, metric_computer: MetricComputer,
                                                      summary_group_by):
        summary_events_df = self._aggregate_events(df, aggregation_list, summary_group_by)
        if summary_events_df.index.nlevels == 1:
            summary_events_df = pd.DataFrame(summary_events_df).transpose()
        else:
            summary_events_df = summary_events_df.unstack(level=rdc.NAME).droplevel(0, axis=1)
        summary_metrics_df = self._compute_metrics(summary_events_df, metric_computer)
        return summary_events_df, summary_metrics_df

    def _compute_metrics(self, df: pd.DataFrame, metric_computer=None, calculate_block_level=False) -> pd.DataFrame:
        if not metric_computer:
            metric_computer = self._metric_computer
        constant_values = self._override_constant_values(metric_computer.symbol_table)
        metrics_df = metric_computer.compute_metric(
            df, constant_values, calculate_block_level=calculate_block_level)
        return metrics_df

    def _generate_details_dataframe(self, df: RawDataFrame) -> pd.DataFrame:
        if df.empty:
            return pd.DataFrame()

        details_events_df = self.__filter_devices(df, rdc)
        details_events_df = self.__normalizer.normalize(details_events_df)
        aggregation_list = [DetailsMinAggregator(), DetailsMeanAggregator(), DetailsAggregator()]

        slice_metrics_df = self.__handle_sliced_metrics_details(details_events_df)
        details_metrics_df, details_view_df = self._aggregate_events_and_compute_metrics(aggregation_list,
                                                                                         details_events_df,
                                                                                         self.__details_group_by,
                                                                                         self.__details_index)
        details_metrics_df = self.__merge_slice_metrics_detail_views(details_metrics_df, slice_metrics_df)
        details_view_df = self.__merge_events_and_metrics_detail_views(details_view_df, details_metrics_df)
        details_view_df = details_view_df.reset_index(level=rdc.GROUP, drop=True)
        return details_view_df

    def __merge_slice_metrics_detail_views(self, details_metrics_df, slice_metrics_df):
        if not slice_metrics_df.empty:
            details_metrics_df = details_metrics_df.merge(slice_metrics_df, left_index=True, right_index=True)
            if not self.__metric_column_order:
                self.__metric_column_order = self.__get_metric_column_order(details_metrics_df, slice_metrics_df)
            details_metrics_df = details_metrics_df[self.__metric_column_order]
        return details_metrics_df

    def __get_metric_names(self):
        metric_names = []
        for metric in self._metric_computer.compiled_metrics:
            metric_name = metric.definition.name
            if metric_name in metric_names:
                continue
            metric_names.append(metric_name)
        return metric_names

    def __get_metric_column_order(self, metrics_df, slice_metrics_df):
        return [metric_name for metric_name in self.__metric_names if (metric_name in
                                                                       metrics_df.columns) or (
                        metric_name in slice_metrics_df.columns)]

    def __handle_sliced_metrics_details(self, details_events_df):
        slice_metric_computer = _SliceMetricComputer(self._metric_computer.metric_definitions,
                                                     self._metric_computer.symbol_table, self.__details_index)
        if not slice_metric_computer.compiled_metrics or \
                (rdc.CORE in slice_metric_computer.metric_compiler.aggregation_level_columns) or isinstance(self,
                                                                                                            _UncoreDataView):
            # TODO: Implement sliced metrics compatible with core/thread/uncore views
            return pd.DataFrame()
        details_index = self.__details_index.copy()
        if rdc.SOCKET not in details_index:
            details_index.append(rdc.SOCKET)
        self.__filter_out_slice_metrics(slice_metric_computer)
        slice_metrics_df, _ = self._aggregate_events_and_compute_metrics([],
                                                                         details_events_df,
                                                                         self.__details_group_by + [
                                                                             rdc.UNIT],
                                                                         details_index + [rdc.UNIT],
                                                                         slice_metric_computer)
        return slice_metrics_df

    def __filter_out_slice_metrics(self, slice_metric_computer):
        non_slice_metrics = self.__get_non_sliced_metrics(slice_metric_computer)
        self._metric_computer.update_compiled_metrics(non_slice_metrics)

    def __get_non_sliced_metrics(self, slice_metric_computer):
        return [metric for metric in self._metric_computer.compiled_metrics if
                metric.definition not in [
                    compiled_metric.definition for compiled_metric
                    in slice_metric_computer.compiled_metrics]]

    @staticmethod
    def __filter_compiled_metrics(compiled_metrics: List[CompiledMetric]) -> List[CompiledMetric]:
        return list(filter(lambda x: re.search(r"\[(\d+:\d+|\d+|:\d+)\]", x.source_code) is not None, compiled_metrics))

    def _aggregate_events_and_compute_metrics(self,
                                              aggregation_list,
                                              details_events_df,
                                              details_group_by,
                                              details_index,
                                              metric_computer=None):
        # TODO: maybe make this an interface with separate implementation for metriccomputer, slicemetriccomputer
        details_events_df = self._aggregate_events(details_events_df, aggregation_list, details_group_by)
        details_view_df = pd.pivot_table(details_events_df,
                                         columns=[rdc.NAME],
                                         index=details_index,
                                         values=rdc.VALUE)
        details_view_df = self.__adjust_details_view_dataframe_columns(details_view_df)
        details_metrics_df = self._compute_metrics(details_view_df, metric_computer, True)
        return details_metrics_df, details_view_df

    def _update_summary_values(self, df: pd.DataFrame, event_summary_values: pd.DataFrame) -> pd.DataFrame:
        new_event_summary_values = df.reset_index()[self._aggregator_columns].groupby(self._aggregator_group_by,
                                                                                      observed=True).sum()
        if event_summary_values.empty:
            event_summary_values = new_event_summary_values.copy()
        elif len(set(df.columns).difference(set(event_summary_values.columns))) > 0:
            event_summary_values = event_summary_values.merge(new_event_summary_values, how='left', left_index=True,
                                                              right_index=True)
        else:
            event_summary_values = event_summary_values.add(new_event_summary_values, fill_value=0)
        return event_summary_values

    @staticmethod
    def __merge_events_and_metrics_detail_views(details_view_df: pd.DataFrame,
                                                details_metrics_df: pd.DataFrame) -> pd.DataFrame:
        if details_metrics_df.empty:
            return details_view_df

        details_view_df = pd.merge(details_metrics_df, details_view_df, left_index=True, right_index=True)
        return details_view_df

    def _override_constant_values(self, symbol_table: Dict) -> Dict:
        # A method that subclasses can override to modify the constant values to be used for computing metrics
        return symbol_table

    def __adjust_details_view_dataframe_columns(self, details_view_df: pd.DataFrame):
        if self.__required_events.empty:
            return details_view_df

        # Force details view columns and their order based on the `ViewGenerator` configuration
        required_events_df = self.__filter_devices(self.__required_events, eidc)
        return details_view_df.reindex(columns=required_events_df[eidc.NAME])

    @staticmethod
    def __get_config_value(value, alternate_value_if_none):
        return value if value is not None else alternate_value_if_none

    def __filter_devices(self, df: Union[pd.DataFrame, RawDataFrame], df_column_type: Union[Type[rdc],
    Type[eidc]]):
        if self.__device_filter_mode not in ['include', 'exclude']:
            raise ValueError("device_filter_mode must be either 'include' or 'exclude'")
        if self.__device_filter and self.__device_filter_mode == 'include':
            df = df.loc[df[df_column_type.DEVICE].isin(self.__device_filter)]
        elif (self.__device_filter or self.__exclude_any_device_containing) and self.__device_filter_mode == 'exclude':
            df = df.loc[~(df[df_column_type.DEVICE].isin(self.__device_filter))]
            df = df.loc[~(df[df_column_type.DEVICE].str.contains('|'.join(self.__exclude_any_device_containing)))]
        return df


class DataAccumulator:
    """
    Accumulates event counts for summary views with various levels of data aggregation (per system, socket, core,
    thread...)
    Also updates statistics for these views (currently only the system summary view)
    """

    def __init__(self, view_generator: ViewGenerator):
        self.__views = {view.attributes.view_name: view for view in view_generator.views}
        self.__event_views = {view.attributes.view_name: ViewData(view.attributes, pd.DataFrame())
                              for view in view_generator.views}
        self.__stats_views = {view.attributes.view_name: ViewData(view.attributes, pd.DataFrame())
                              for view in view_generator.views}

    def get_event_summaries(self) -> Dict[str, ViewData]:
        return self.__event_views

    def get_statistics(self) -> Dict[str, ViewData]:
        return self.__stats_views

    def update_aggregates(self, summary_computations: List[ViewData]) -> None:
        for new_view in summary_computations:
            if ViewType.SUMMARY not in new_view.attributes.view_type:
                raise ValueError('warning: request for details in summary function')
            data_view = self.__views[new_view.attributes.view_name]
            view = self.__event_views[new_view.attributes.view_name]
            view.data = data_view.update_summary(new_view.data, view.data)
            view.retire_latency_counts = \
                data_view.update_retire_latency_counts(new_view.retire_latency_counts, view.retire_latency_counts)

    def update_statistics(self, detail_views: Dict[str, ViewData] = None, df: RawDataFrame = None) -> None:
        """
        Updates summary statistics for a list of detail views or a RawEmonDataFrame.
        Takes in either a list of detail views or a RawEmonDataFrame. Only pass in a RawEmonDataFrame if you do not
        wish to generate detail views, otherwise only pass in detail_views
        @param detail_views: a list of detail_views required to update summary statistics
        @param df: a raw Emon or perfmon dataframe
        @return: None, internal stats_views will be updated and persisted inside DataAccumulator
        """

        def validate_preconditions():
            if detail_views is None and df is None:
                raise ValueError('if no detail views are requested, then a dataframe must be passed into this method')

        validate_preconditions()
        for view_id, view in self.__stats_views.items():
            if detail_views and view.attributes.view_type != ViewType.DETAILS:
                try:
                    df = detail_views[view_id.replace('summary', 'details')].data
                except KeyError:
                    # Statistics don't need to be computed for any view other than system summary for now
                    continue
            if view.attributes.view_type == ViewType.DETAILS:
                continue
            data_view = self.__views[view_id]
            view.data = data_view.update_statistics(df)

    def generate_summary_views(self) -> Dict[str, ViewData]:
        """
        Computes metrics for each view and updates statistics when needed.
        :return: summary view for each view type specified in the view generator configuration
        """
        summary_data_views = dict()
        for idx, (key, view) in enumerate(self.__event_views.items()):
            if ViewType.SUMMARY in view.attributes.view_type:
                summary_data_views.update({key: (view, self.__views[key])})
        summary_views = {}
        for view_id, (summary_view, data_view) in summary_data_views.items():
            data_view.update_retire_latencies(summary_view)
            summary_view = data_view.generate_summary(summary_view.data, self.__stats_views[view_id].data)
            summary_views[summary_view.attributes.view_name] = summary_view
        return summary_views


class _SystemDataView(_DataView):
    """
    View definition for the System summary and detail views

    Overrides the behavior of _DataView

    The `update_statistics()` method always compute the data for the details view unless detail view data is provided.
    This is needed to compute event and metric statistics for the summary view.

    The `generate_summary()` method adds event and metric statistics to the summary dataframe.
    """

    def __init__(self, config: ViewAttributes):
        super().__init__(config,
                         summary_group_by=[rdc.NAME],
                         aggregator_columns=[rdc.NAME, rdc.SOCKET, rdc.UNIT, rdc.TSC, rdc.VALUE],
                         aggregator_group_by=[rdc.NAME, rdc.SOCKET, rdc.UNIT],
                         details_group_by=[rdc.GROUP, rdc.TIMESTAMP, rdc.TSC, rdc.NAME],
                         details_index=[rdc.GROUP, rdc.TIMESTAMP],
                         device_filter=['PACKAGE'] + config.device.exclusions,
                         exclude_any_device_containing=['THREAD'],
                         device_filter_mode='exclude'
                         )
        self.__stats = _Statistics(config.percentile)

    def update_statistics(self, df: Union[RawDataFrame, pd.DataFrame]) -> StatisticsDataFrame:
        """
        Supports updating statistics when passed a raw EMON dataframe or an already generated details view.

        @param df: a raw EMON dataframe or details view dataframe
        @return: a summary dataframe with updated statistics
        """
        if self.__is_raw_emon_dataframe(df):
            details_view_df = self._generate_details_dataframe(df)
        else:
            details_view_df = df
        self.__stats.compute(details_view_df)
        return self.__stats.get_statistics()

    def generate_summary(self, event_df: pd.DataFrame, stats_df: pd.DataFrame = pd.DataFrame()) -> ViewData:
        summary_view = super().generate_summary(event_df)
        if summary_view.data.empty:
            return summary_view
        summary_data_df = summary_view.data.rename(index={rdc.VALUE: svdc.AGGREGATED})
        if not stats_df.empty:
            summary_data_df = pd.concat([summary_data_df, stats_df.dropna(axis=0, how='all').transpose()])
        metric_descriptions = self.__get_descriptions()
        summary_view.data = self.__add_descriptions(summary_data_df, metric_descriptions)
        return summary_view

    @staticmethod
    def __is_raw_emon_dataframe(df: Union[RawDataFrame, pd.DataFrame]):
        try:
            return all(df.columns == rdc.COLUMNS)
        except ValueError:
            return False

    def __get_descriptions(self):
        if self._metric_computer and not isinstance(self._metric_computer, _NullMetricComputer):
            metrics_to_compute = self._metric_computer.compiled_metrics
            result_metric_descriptions = {compiled_metric.definition.name: compiled_metric.definition.description
                                          for compiled_metric in metrics_to_compute if compiled_metric.definition}
            return result_metric_descriptions
        return {}

    @staticmethod
    def __add_descriptions(summary_data_df, metric_descriptions):
        if [description for description in metric_descriptions.values() if description != '']:
            summary_data_df = summary_data_df.transpose()
            summary_data_df[sdf.DESCRIPTION] = ""
            for index, row in summary_data_df.iterrows():
                if index in metric_descriptions.keys():
                    summary_data_df.at[index, sdf.DESCRIPTION] = metric_descriptions[index]
            summary_data_df = summary_data_df.transpose()
        return summary_data_df


class _SocketDataView(_DataView):
    """
    View definition for the Socket summary and detail views
    """

    def __init__(self, config: ViewAttributes):
        super().__init__(config,
                         summary_group_by=[rdc.NAME, rdc.SOCKET],
                         aggregator_columns=[rdc.NAME, rdc.SOCKET, rdc.UNIT, rdc.TSC, rdc.VALUE],
                         aggregator_group_by=[rdc.NAME, rdc.SOCKET, rdc.UNIT],
                         details_group_by=[rdc.GROUP, rdc.TIMESTAMP, rdc.TSC, rdc.NAME, rdc.SOCKET],
                         details_index=[rdc.GROUP, rdc.TIMESTAMP, rdc.SOCKET],
                         device_filter=['SYSTEM'] + config.device.exclusions,
                         exclude_any_device_containing=['THREAD'],
                         device_filter_mode='exclude'
                         )

    def _override_constant_values(self, symbol_table):
        # Adjust the value of the "system.socket_count" constant to 1 to properly compute socket-level metrics
        updated_system_information = symbol_table.copy()
        updated_system_information['system.socket_count'] = 1
        updated_system_information['SOCKET_COUNT'] = 1
        return updated_system_information


class _CoreDataView(_DataView):
    """
    View definition for the Core summary and detail views
    """

    def __init__(self, config: ViewAttributes):
        if config.show_modules:
            super().__init__(config,
                             summary_group_by=[rdc.NAME, rdc.SOCKET, rdc.MODULE, rdc.CORE],
                             aggregator_columns=[rdc.NAME, rdc.SOCKET, rdc.MODULE, rdc.CORE, rdc.UNIT, rdc.TSC,
                                                 rdc.VALUE],
                             aggregator_group_by=[rdc.NAME, rdc.SOCKET, rdc.MODULE, rdc.CORE, rdc.UNIT],
                             details_group_by=[rdc.GROUP, rdc.TIMESTAMP, rdc.TSC, rdc.NAME, rdc.SOCKET,
                                               rdc.MODULE, rdc.CORE],
                             details_index=[rdc.GROUP, rdc.TIMESTAMP, rdc.SOCKET, rdc.MODULE, rdc.CORE],
                             device_filter=[config.device.type_name]
                             )
        else:  # Retain backward compatibility
            super().__init__(config,
                             summary_group_by=[rdc.NAME, rdc.SOCKET, rdc.CORE],
                             aggregator_columns=[rdc.NAME, rdc.SOCKET, rdc.CORE, rdc.UNIT, rdc.TSC, rdc.VALUE],
                             aggregator_group_by=[rdc.NAME, rdc.SOCKET, rdc.CORE, rdc.UNIT],
                             details_group_by=[rdc.GROUP, rdc.TIMESTAMP, rdc.TSC, rdc.NAME, rdc.SOCKET, rdc.CORE],
                             details_index=[rdc.GROUP, rdc.TIMESTAMP, rdc.SOCKET, rdc.CORE],
                             device_filter=[config.device.type_name]
                             )


class _ThreadDataView(_DataView):
    """
    View definition for the Thread summary and detail views
    """

    def __init__(self, config: ViewAttributes):
        if config.show_modules:
            super().__init__(config,
                             summary_group_by=[rdc.NAME, rdc.UNIT, rdc.SOCKET, rdc.MODULE, rdc.CORE, rdc.THREAD],
                             aggregator_columns=[rdc.NAME, rdc.SOCKET, rdc.MODULE, rdc.CORE, rdc.THREAD, rdc.UNIT,
                                                 rdc.TSC, rdc.VALUE],
                             aggregator_group_by=[rdc.NAME, rdc.SOCKET, rdc.MODULE, rdc.CORE, rdc.THREAD,
                                                  rdc.UNIT],
                             details_group_by=[rdc.GROUP, rdc.TIMESTAMP, rdc.TSC, rdc.NAME, rdc.UNIT],
                             details_index=[rdc.GROUP, rdc.TIMESTAMP, rdc.UNIT, rdc.SOCKET,
                                            rdc.MODULE, rdc.CORE, rdc.THREAD],
                             device_filter=[config.device.type_name, 'THREAD_'+config.device.type_name]
                             )
        else:  # Retain backward compatibility
            super().__init__(config,
                             summary_group_by=[rdc.NAME, rdc.UNIT, rdc.SOCKET, rdc.CORE, rdc.THREAD],
                             aggregator_columns=[rdc.NAME, rdc.SOCKET, rdc.CORE, rdc.THREAD, rdc.UNIT,
                                                 rdc.TSC, rdc.VALUE],
                             aggregator_group_by=[rdc.NAME, rdc.SOCKET, rdc.CORE, rdc.THREAD, rdc.UNIT],
                             details_group_by=[rdc.GROUP, rdc.TIMESTAMP, rdc.TSC, rdc.NAME, rdc.UNIT],
                             details_index=[rdc.GROUP, rdc.TIMESTAMP, rdc.UNIT, rdc.SOCKET,
                                            rdc.CORE, rdc.THREAD],
                             device_filter=[config.device.type_name, 'THREAD_'+config.device.type_name]
                             )


class _UncoreDataView(_DataView):
    """
    Generic view definition for all uncore unit level views
    """

    def __init__(self, config: ViewAttributes):
        super().__init__(config,
                         summary_group_by=[rdc.NAME, rdc.UNIT, rdc.SOCKET],
                         aggregator_columns=[rdc.NAME, rdc.SOCKET, rdc.UNIT, rdc.TSC, rdc.VALUE],
                         aggregator_group_by=[rdc.NAME, rdc.SOCKET, rdc.UNIT],
                         details_group_by=[rdc.GROUP, rdc.TIMESTAMP, rdc.SOCKET, rdc.TSC, rdc.NAME, rdc.UNIT],
                         details_index=[rdc.GROUP, rdc.TIMESTAMP, rdc.UNIT, rdc.SOCKET],
                         device_filter=[config.device.type_name],
                         )

    def _override_constant_values(self, symbol_table):
        # Similar to the _override_constant_values method in _SocketDataView
        # Adjust the value of "system.socket_count", "system.cha_count", and "{uncore_unit}_per_socket" constants to 1 to
        # properly compute uncore-unit-level metrics
        updated_system_information = symbol_table.copy()
        updated_system_information['system.socket_count'] = 1
        updated_system_information['SOCKET_COUNT'] = 1
        per_socket_symbols = list(filter(lambda x: 'per_socket' in x.lower() and 'system.' in x.lower(),
                                         list(updated_system_information.keys())))
        for symbol in per_socket_symbols:
            updated_system_information[symbol] = 1
        return updated_system_information


class _NullNormalizer(Normalizer):
    """
    A "do nothing" normalizer, used as default when normalization is not required
    """

    def __init__(self):
        pass

    def normalize(self, df: pd.DataFrame, event_axis: str = 'columns') -> pd.DataFrame:
        return df


class _NullMetricComputer(MetricComputer):
    """
    a "do nothing" metric computer, used as default when metric computation is not required
    """

    def __init__(self, metric_definition_list: List[MetricDefinition], symbol_table: Dict = None):
        pass

    def compute_metric(self, df: pd.DataFrame,
                       constant_values: Dict[str, str] = None,
                       calculate_block_level: bool = False,
                       group_index_name: str = rdc.GROUP,
                       timestamp_index_name: str = rdc.TIMESTAMP) -> pd.DataFrame:
        return pd.DataFrame()

    @property
    def symbol_table(self):
        return {}

    @property
    def compiled_metrics(self) -> List[CompiledMetric]:
        return []

    @property
    def metric_definitions(self) -> List[MetricDefinition]:
        return []
