Source code for pybbda.analysis.run_expectancy.markov.markov

from functools import reduce
import itertools

import attr
from collections import defaultdict
from typing import List
import numpy as np

from pybbda.analysis.simulations.components.state import base_out_state_evolve_cached
from pybbda.analysis.simulations import (
    GameState,
    GameEvent,
    BattingEvent,
    FirstBaseRunningEvent,
    SecondBaseRunningEvent,
    ThirdBaseRunningEvent,
    RunningEventProbability,
)
from pybbda.analysis.utils import check_between_zero_one
import pandas as pd
import logging
from pybbda.analysis.simulations.constants import MAX_OUTS

logger = logging.getLogger(__name__)


[docs]@attr.s(frozen=True) class MarkovState: """ A MarkovState comprises a `GameState` and a probability for being in that state. :param game_state: The GameState :param probability: Probability for the GameState """ game_state = attr.ib(type=GameState) probability = attr.ib(type=float, validator=check_between_zero_one) @property def lineup_slot(self): return self.game_state.lineup_slot
[docs] def to_df(self): """ Converts the MarkovState to a Pandas DataFrame. :return: MarkovState as a DataFrame. .. code-block:: python markov_state = MarkovState(GameState(), 1) markov_state.to_df() first_base second_base third_base outs score pa_count prob 0 0 0 0 0 1 1 """ return pd.DataFrame( { "first_base": [self.game_state.base_out_state.base_state.first_base], "second_base": [self.game_state.base_out_state.base_state.second_base], "third_base": [self.game_state.base_out_state.base_state.third_base], "outs": [self.game_state.base_out_state.outs], "score": [self.game_state.score], "pa_count": [self.game_state.pa_count], "prob": [self.probability], } )
[docs]@attr.s(frozen=True) class MarkovEvent: """ A MarkovEvent comprises a GameEvent and a probability for that event to occur :param game_event: The GameEvent :param probability: Probability for the GameEvent """ game_event = attr.ib(type=GameEvent) probability = attr.ib(type=float, validator=check_between_zero_one)
[docs]@attr.s(frozen=True) class MarkovEvents: """ MarkovEvents comprise a list of `MarkovEvent` type :param events: List of `MarkovEvent` """ events = attr.ib(type=List[MarkovEvent]) def __iter__(self): for event in self.events: yield event @property def total_probability(self): """The total probability for the events to occur""" return sum([event.probability for event in self.events])
[docs] @staticmethod def from_players( batter, first_base_runner=None, second_base_runner=None, third_base_runner=None ): pass
[docs] @staticmethod def from_probs(batting_event_probs, running_event_probs): """ Constructs a `MarkovEvents` from batting and running probabilities :param batting_event_probs: `BattingEventProbability` :param running_event_probs: `RunningEventProbability` :return: `MarkovEvents` .. code-block:: python markov_events = ( MarkovEvents.from_probs( BattingEventProbability(0.08, 0.15, 0.05, 0.005, 0.03), RunningEventProbability(0.1, 0.1, 0.1, 0.1) ) ) """ total_prob_on_singles = ( running_event_probs.first_to_second_on_single + ( running_event_probs.first_to_third_on_single + running_event_probs.first_to_home_on_single ) * running_event_probs.second_to_home_on_single ) total_prob_on_singles_inv = 1 / total_prob_on_singles events = [ ( GameEvent( BattingEvent.OUT, FirstBaseRunningEvent.DEFAULT, SecondBaseRunningEvent.DEFAULT, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.out, ), ( GameEvent( BattingEvent.BASE_ON_BALLS, FirstBaseRunningEvent.DEFAULT, SecondBaseRunningEvent.DEFAULT, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.base_on_balls, ), ( GameEvent( BattingEvent.SINGLE, FirstBaseRunningEvent.DEFAULT, SecondBaseRunningEvent.DEFAULT, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.single * ( 1 - running_event_probs.first_to_third_on_single - running_event_probs.first_to_home_on_single ) * (1 - running_event_probs.second_to_home_on_single) * total_prob_on_singles_inv, ), ( GameEvent( BattingEvent.SINGLE, FirstBaseRunningEvent.DEFAULT, SecondBaseRunningEvent.SECOND_TO_HOME, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.single * ( 1 - running_event_probs.first_to_third_on_single - running_event_probs.first_to_home_on_single ) * running_event_probs.second_to_home_on_single * total_prob_on_singles_inv, ), ( GameEvent( BattingEvent.SINGLE, FirstBaseRunningEvent.FIRST_TO_THIRD, SecondBaseRunningEvent.SECOND_TO_HOME, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.single * running_event_probs.first_to_third_on_single * running_event_probs.second_to_home_on_single * total_prob_on_singles_inv, ), ( GameEvent( BattingEvent.SINGLE, FirstBaseRunningEvent.FIRST_TO_HOME, SecondBaseRunningEvent.SECOND_TO_HOME, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.single * running_event_probs.first_to_home_on_single * running_event_probs.second_to_home_on_single * total_prob_on_singles_inv, ), ( GameEvent( BattingEvent.DOUBLE, FirstBaseRunningEvent.DEFAULT, SecondBaseRunningEvent.DEFAULT, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.double * (1 - running_event_probs.first_to_home_on_double), ), ( GameEvent( BattingEvent.DOUBLE, FirstBaseRunningEvent.FIRST_TO_HOME, SecondBaseRunningEvent.DEFAULT, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.double * running_event_probs.first_to_home_on_double, ), ( GameEvent( BattingEvent.TRIPLE, FirstBaseRunningEvent.DEFAULT, SecondBaseRunningEvent.DEFAULT, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.triple, ), ( GameEvent( BattingEvent.HOME_RUN, FirstBaseRunningEvent.DEFAULT, SecondBaseRunningEvent.DEFAULT, ThirdBaseRunningEvent.DEFAULT, ), batting_event_probs.home_run, ), ] return MarkovEvents([MarkovEvent(*e) for e in events])
[docs]@attr.s class StateVector: """ A StateVector comprises a List of `MarkovState` objects :param _states: List of `MarkovState` """ _states = attr.ib( type=List[MarkovState], default=[MarkovState(game_state=GameState(), probability=1)], ) def __iter__(self): for state in self.states: yield state @property def lineup_slot(self): return self.states[0].lineup_slot
[docs] def to_df(self): """ Converts `StateVector` object to Pandas DataFrame :return: .. code-block:: python state_vector = StateVector() state_vector.to_df() first_base second_base third_base outs score pa_count prob 0 0 0 0 0 1 1 """ return pd.concat([MarkovState.to_df(state) for state in self.states], axis=0)
@property def mean_score(self): """ Mean score of the state vector :return: The mean score of the state """ return sum([s.probability * s.game_state.score for s in self]) @property def std_score(self): """ Standard deviation of the score of the state vector :return: The std. dev score of the state """ mean_score = self.mean_score mean_score2 = sum( [s.probability * s.game_state.score * s.game_state.score for s in self] ) return np.sqrt(mean_score2 - mean_score * mean_score) @property def end_probability(self): """ Proability for the state vector to be in an end state, example having 3 outs :return: Probability of being in an end state """ return sum( [ s.probability for s in self if s.game_state.base_out_state.outs == MAX_OUTS ] ) @property def states(self): return self._states
[docs] @staticmethod def combine_states(markov_states): """ Combines a list of `MarkovState`. It deduplicates states and sums the probabilities. :param markov_states: List of `MarkovState` :return: `StateVector` .. code-block:: python m1 = MarkovState(GameState(), 0.2) m2 = MarkovState(GameState(), 0.25) StateVector.combine_states((m1, m2)) """ def _update(acc, item): acc[item.game_state] += item.probability return acc return StateVector( [ MarkovState(*e) for e in reduce(_update, markov_states, defaultdict(float)).items() ] )
[docs]@attr.s class MarkovSimulation: """ A class for executing a Markov simulation by executing state transitions to an initial StateVector until a threshold of probability for being in the end state is crossed. The simulation is executed by calling the `Markovsimulation` object. :param state_vector: The intial `StateVector` :param termination_threshold: Termination threshold. Simulation will stop when probability to be in the end state is larger than `1 - termination_threshold` """ state_vector = attr.ib( type=StateVector, default=StateVector([MarkovState(GameState(), 1)]) ) # TODO: use runner specific values, not a single value running_event_probabilities = attr.ib( type=RunningEventProbability, default=RunningEventProbability() ) termination_threshold = attr.ib(type=float, default=1e-6) def __call__(self, lineup, running_event_probabilities=None): """ Executes the MarkovSimulation :param batting_event_probs: A BattingEventProbability object :param running_event_probs: A RunningEventProbability object :return: List of StateVector .. code-block:: python markov_simulation = MarkovSimulation() batting_event_probability = BattingEventProbability( 0.08, 0.15, 0.05, 0.005, 0.03) running_event_probability = RunningEventProbability(0.1, 0.1, 0.1, 0.1) results = markov_simulation(batting_event_probability, running_event_probability) """ running_event_probabilities = ( self.running_event_probabilities if running_event_probabilities is None else running_event_probabilities ) ncall = 0 MAX_CALL = 100 state_vector = self.state_vector results = [state_vector] while ( ncall < MAX_CALL and state_vector.end_probability < 1 - self.termination_threshold ): lineup_slot = state_vector.lineup_slot batting_event_probs = lineup.get_batting_probs(lineup_slot) running_event_probs = running_event_probabilities markov_events = MarkovEvents.from_probs( batting_event_probs, running_event_probs ) state_vector = self.markov_step(state_vector, markov_events) results.append(state_vector) ncall += 1 if ncall >= MAX_CALL: approx_error = results[-1].mean_score - results[-2].mean_score # TODO: compute error based on second derivative logger.warning( "ncall exceed max call. end_state probability is %.3e. " "approximate error is %.3e", state_vector.end_probability, approx_error, ) return results
[docs] @staticmethod def state_vectors_to_df(state_vectors): """ Converts list of `StateVector` to Pandas DataFrame :param state_vectors: List of `StateVector` :return: Pandas DataFrame .. code-block:: python markov_simulation = MarkovSimulation() batting_event_probability = BattingEventProbability(0.08, 0.15, 0.05, 0.005, 0.03) running_event_probability = RunningEventProbability(0.1, 0.1, 0.1, 0.1) results = markov_simulation(batting_event_probability, running_event_probability) sim_df = MarkovSimulation.state_vectors_to_df(results) sim_df first_base second_base third_base outs score pa_count prob 0 0 0 0 0 1 1.000000e+00 0 0 0 1 0 2 6.850000e-01 1 0 0 0 0 2 2.300000e-01 0 1 0 0 0 2 5.000000e-02 0 0 1 0 0 2 5.000000e-03 ... ... ... ... ... ... ... 1 0 0 0 17 19 8.443152e-11 0 1 1 0 16 19 9.725439e-11 0 1 0 0 17 19 5.074142e-11 0 0 1 0 17 19 1.479958e-11 0 0 0 0 18 19 8.879749e-11 """ return pd.concat( [state_vector.to_df() for state_vector in state_vectors], axis=0 )
[docs] @staticmethod def state_transition(markov_state, markov_event): """ Transition from `markov_state` based on `markov_event` :param markov_state: `MarkovState` :param markov_event: `MarkovEvent` :return: `MarkovState` """ return MarkovState( base_out_state_evolve_cached( markov_state.game_state, markov_event.game_event.batting_event, markov_event.game_event.first_base_running_event, markov_event.game_event.second_base_running_event, markov_event.game_event.third_base_running_event, ), markov_state.probability * markov_event.probability, )
[docs] @staticmethod def state_transition_tuple(markov_state_event): """ MarkovState expressed as a tuple. This is a helper to be able to apply `map` to a set of transitions. :param markov_state_event: :return: """ return MarkovSimulation.state_transition(*markov_state_event)
[docs] @staticmethod def markov_step(state_vector, markov_events): """ A step in the Markov simulation. Applies the set of `markov_events` to the `MarkovState` in the `state_vector`, and then combines the results into a `StateVector` :param state_vector: :param markov_events: :return: `StateVector` """ return StateVector.combine_states( map( MarkovSimulation.state_transition_tuple, itertools.product(state_vector, markov_events), ) )