from functools import reduce
import itertools
import attr
from collections import defaultdict
from typing import List
import numpy as np
from pybbda.analysis.simulations.components.state import base_out_state_evolve_cached
from pybbda.analysis.simulations import (
GameState,
GameEvent,
BattingEvent,
FirstBaseRunningEvent,
SecondBaseRunningEvent,
ThirdBaseRunningEvent,
RunningEventProbability,
)
from pybbda.analysis.utils import check_between_zero_one
import pandas as pd
import logging
from pybbda.analysis.simulations.constants import MAX_OUTS
logger = logging.getLogger(__name__)
[docs]@attr.s(frozen=True)
class MarkovState:
"""
A MarkovState comprises a `GameState` and a probability for being in that state.
:param game_state: The GameState
:param probability: Probability for the GameState
"""
game_state = attr.ib(type=GameState)
probability = attr.ib(type=float, validator=check_between_zero_one)
@property
def lineup_slot(self):
return self.game_state.lineup_slot
[docs] def to_df(self):
"""
Converts the MarkovState to a Pandas DataFrame.
:return: MarkovState as a DataFrame.
.. code-block:: python
markov_state = MarkovState(GameState(), 1)
markov_state.to_df()
first_base second_base third_base outs score pa_count prob
0 0 0 0 0 1 1
"""
return pd.DataFrame(
{
"first_base": [self.game_state.base_out_state.base_state.first_base],
"second_base": [self.game_state.base_out_state.base_state.second_base],
"third_base": [self.game_state.base_out_state.base_state.third_base],
"outs": [self.game_state.base_out_state.outs],
"score": [self.game_state.score],
"pa_count": [self.game_state.pa_count],
"prob": [self.probability],
}
)
[docs]@attr.s(frozen=True)
class MarkovEvent:
"""
A MarkovEvent comprises a GameEvent and a probability for that event to occur
:param game_event: The GameEvent
:param probability: Probability for the GameEvent
"""
game_event = attr.ib(type=GameEvent)
probability = attr.ib(type=float, validator=check_between_zero_one)
[docs]@attr.s(frozen=True)
class MarkovEvents:
"""
MarkovEvents comprise a list of `MarkovEvent` type
:param events: List of `MarkovEvent`
"""
events = attr.ib(type=List[MarkovEvent])
def __iter__(self):
for event in self.events:
yield event
@property
def total_probability(self):
"""The total probability for the events to occur"""
return sum([event.probability for event in self.events])
[docs] @staticmethod
def from_players(
batter, first_base_runner=None, second_base_runner=None, third_base_runner=None
):
pass
[docs] @staticmethod
def from_probs(batting_event_probs, running_event_probs):
"""
Constructs a `MarkovEvents` from batting and running probabilities
:param batting_event_probs: `BattingEventProbability`
:param running_event_probs: `RunningEventProbability`
:return: `MarkovEvents`
.. code-block:: python
markov_events = (
MarkovEvents.from_probs(
BattingEventProbability(0.08, 0.15, 0.05, 0.005, 0.03),
RunningEventProbability(0.1, 0.1, 0.1, 0.1)
)
)
"""
total_prob_on_singles = (
running_event_probs.first_to_second_on_single
+ (
running_event_probs.first_to_third_on_single
+ running_event_probs.first_to_home_on_single
)
* running_event_probs.second_to_home_on_single
)
total_prob_on_singles_inv = 1 / total_prob_on_singles
events = [
(
GameEvent(
BattingEvent.OUT,
FirstBaseRunningEvent.DEFAULT,
SecondBaseRunningEvent.DEFAULT,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.out,
),
(
GameEvent(
BattingEvent.BASE_ON_BALLS,
FirstBaseRunningEvent.DEFAULT,
SecondBaseRunningEvent.DEFAULT,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.base_on_balls,
),
(
GameEvent(
BattingEvent.SINGLE,
FirstBaseRunningEvent.DEFAULT,
SecondBaseRunningEvent.DEFAULT,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.single
* (
1
- running_event_probs.first_to_third_on_single
- running_event_probs.first_to_home_on_single
)
* (1 - running_event_probs.second_to_home_on_single)
* total_prob_on_singles_inv,
),
(
GameEvent(
BattingEvent.SINGLE,
FirstBaseRunningEvent.DEFAULT,
SecondBaseRunningEvent.SECOND_TO_HOME,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.single
* (
1
- running_event_probs.first_to_third_on_single
- running_event_probs.first_to_home_on_single
)
* running_event_probs.second_to_home_on_single
* total_prob_on_singles_inv,
),
(
GameEvent(
BattingEvent.SINGLE,
FirstBaseRunningEvent.FIRST_TO_THIRD,
SecondBaseRunningEvent.SECOND_TO_HOME,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.single
* running_event_probs.first_to_third_on_single
* running_event_probs.second_to_home_on_single
* total_prob_on_singles_inv,
),
(
GameEvent(
BattingEvent.SINGLE,
FirstBaseRunningEvent.FIRST_TO_HOME,
SecondBaseRunningEvent.SECOND_TO_HOME,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.single
* running_event_probs.first_to_home_on_single
* running_event_probs.second_to_home_on_single
* total_prob_on_singles_inv,
),
(
GameEvent(
BattingEvent.DOUBLE,
FirstBaseRunningEvent.DEFAULT,
SecondBaseRunningEvent.DEFAULT,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.double
* (1 - running_event_probs.first_to_home_on_double),
),
(
GameEvent(
BattingEvent.DOUBLE,
FirstBaseRunningEvent.FIRST_TO_HOME,
SecondBaseRunningEvent.DEFAULT,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.double
* running_event_probs.first_to_home_on_double,
),
(
GameEvent(
BattingEvent.TRIPLE,
FirstBaseRunningEvent.DEFAULT,
SecondBaseRunningEvent.DEFAULT,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.triple,
),
(
GameEvent(
BattingEvent.HOME_RUN,
FirstBaseRunningEvent.DEFAULT,
SecondBaseRunningEvent.DEFAULT,
ThirdBaseRunningEvent.DEFAULT,
),
batting_event_probs.home_run,
),
]
return MarkovEvents([MarkovEvent(*e) for e in events])
[docs]@attr.s
class StateVector:
"""
A StateVector comprises a List of `MarkovState` objects
:param _states: List of `MarkovState`
"""
_states = attr.ib(
type=List[MarkovState],
default=[MarkovState(game_state=GameState(), probability=1)],
)
def __iter__(self):
for state in self.states:
yield state
@property
def lineup_slot(self):
return self.states[0].lineup_slot
[docs] def to_df(self):
"""
Converts `StateVector` object to Pandas DataFrame
:return:
.. code-block:: python
state_vector = StateVector()
state_vector.to_df()
first_base second_base third_base outs score pa_count prob
0 0 0 0 0 1 1
"""
return pd.concat([MarkovState.to_df(state) for state in self.states], axis=0)
@property
def mean_score(self):
"""
Mean score of the state vector
:return: The mean score of the state
"""
return sum([s.probability * s.game_state.score for s in self])
@property
def std_score(self):
"""
Standard deviation of the score of the state vector
:return: The std. dev score of the state
"""
mean_score = self.mean_score
mean_score2 = sum(
[s.probability * s.game_state.score * s.game_state.score for s in self]
)
return np.sqrt(mean_score2 - mean_score * mean_score)
@property
def end_probability(self):
"""
Proability for the state vector to be in an end state,
example having 3 outs
:return: Probability of being in an end state
"""
return sum(
[
s.probability
for s in self
if s.game_state.base_out_state.outs == MAX_OUTS
]
)
@property
def states(self):
return self._states
[docs] @staticmethod
def combine_states(markov_states):
"""
Combines a list of `MarkovState`.
It deduplicates states and sums the probabilities.
:param markov_states: List of `MarkovState`
:return: `StateVector`
.. code-block:: python
m1 = MarkovState(GameState(), 0.2)
m2 = MarkovState(GameState(), 0.25)
StateVector.combine_states((m1, m2))
"""
def _update(acc, item):
acc[item.game_state] += item.probability
return acc
return StateVector(
[
MarkovState(*e)
for e in reduce(_update, markov_states, defaultdict(float)).items()
]
)
[docs]@attr.s
class MarkovSimulation:
"""
A class for executing a Markov simulation by executing state transitions to an
initial StateVector until a threshold of probability for being in the end state
is crossed. The simulation is executed by calling the `Markovsimulation` object.
:param state_vector: The intial `StateVector`
:param termination_threshold: Termination threshold. Simulation will stop when
probability to be in the end state is larger than `1 - termination_threshold`
"""
state_vector = attr.ib(
type=StateVector, default=StateVector([MarkovState(GameState(), 1)])
)
# TODO: use runner specific values, not a single value
running_event_probabilities = attr.ib(
type=RunningEventProbability, default=RunningEventProbability()
)
termination_threshold = attr.ib(type=float, default=1e-6)
def __call__(self, lineup, running_event_probabilities=None):
"""
Executes the MarkovSimulation
:param batting_event_probs: A BattingEventProbability object
:param running_event_probs: A RunningEventProbability object
:return: List of StateVector
.. code-block:: python
markov_simulation = MarkovSimulation()
batting_event_probability = BattingEventProbability(
0.08, 0.15, 0.05, 0.005, 0.03)
running_event_probability = RunningEventProbability(0.1, 0.1, 0.1, 0.1)
results = markov_simulation(batting_event_probability,
running_event_probability)
"""
running_event_probabilities = (
self.running_event_probabilities
if running_event_probabilities is None
else running_event_probabilities
)
ncall = 0
MAX_CALL = 100
state_vector = self.state_vector
results = [state_vector]
while (
ncall < MAX_CALL
and state_vector.end_probability < 1 - self.termination_threshold
):
lineup_slot = state_vector.lineup_slot
batting_event_probs = lineup.get_batting_probs(lineup_slot)
running_event_probs = running_event_probabilities
markov_events = MarkovEvents.from_probs(
batting_event_probs, running_event_probs
)
state_vector = self.markov_step(state_vector, markov_events)
results.append(state_vector)
ncall += 1
if ncall >= MAX_CALL:
approx_error = results[-1].mean_score - results[-2].mean_score
# TODO: compute error based on second derivative
logger.warning(
"ncall exceed max call. end_state probability is %.3e. "
"approximate error is %.3e",
state_vector.end_probability,
approx_error,
)
return results
[docs] @staticmethod
def state_vectors_to_df(state_vectors):
"""
Converts list of `StateVector` to Pandas DataFrame
:param state_vectors: List of `StateVector`
:return: Pandas DataFrame
.. code-block:: python
markov_simulation = MarkovSimulation()
batting_event_probability = BattingEventProbability(0.08, 0.15, 0.05, 0.005, 0.03)
running_event_probability = RunningEventProbability(0.1, 0.1, 0.1, 0.1)
results = markov_simulation(batting_event_probability, running_event_probability)
sim_df = MarkovSimulation.state_vectors_to_df(results)
sim_df
first_base second_base third_base outs score pa_count prob
0 0 0 0 0 1 1.000000e+00
0 0 0 1 0 2 6.850000e-01
1 0 0 0 0 2 2.300000e-01
0 1 0 0 0 2 5.000000e-02
0 0 1 0 0 2 5.000000e-03
... ... ... ... ... ... ...
1 0 0 0 17 19 8.443152e-11
0 1 1 0 16 19 9.725439e-11
0 1 0 0 17 19 5.074142e-11
0 0 1 0 17 19 1.479958e-11
0 0 0 0 18 19 8.879749e-11
"""
return pd.concat(
[state_vector.to_df() for state_vector in state_vectors], axis=0
)
[docs] @staticmethod
def state_transition(markov_state, markov_event):
"""
Transition from `markov_state` based on `markov_event`
:param markov_state: `MarkovState`
:param markov_event: `MarkovEvent`
:return: `MarkovState`
"""
return MarkovState(
base_out_state_evolve_cached(
markov_state.game_state,
markov_event.game_event.batting_event,
markov_event.game_event.first_base_running_event,
markov_event.game_event.second_base_running_event,
markov_event.game_event.third_base_running_event,
),
markov_state.probability * markov_event.probability,
)
[docs] @staticmethod
def state_transition_tuple(markov_state_event):
"""
MarkovState expressed as a tuple. This is a helper to be able to apply `map`
to a set of transitions.
:param markov_state_event:
:return:
"""
return MarkovSimulation.state_transition(*markov_state_event)
[docs] @staticmethod
def markov_step(state_vector, markov_events):
"""
A step in the Markov simulation. Applies the set of `markov_events` to the
`MarkovState` in the `state_vector`, and then combines the results
into a `StateVector`
:param state_vector:
:param markov_events:
:return: `StateVector`
"""
return StateVector.combine_states(
map(
MarkovSimulation.state_transition_tuple,
itertools.product(state_vector, markov_events),
)
)