Source code for pybbda.analysis.simulations.components.player_registry

from pybbda.data import LahmanData
from pybbda.data.tools.lahman.data import augment_lahman_batting
from pybbda.utils import Singleton

from .player import Batter, BattingEventProbability


[docs]class PlayerRegistry(Singleton): """ Class for a registry of players. This class is a Singleton. """ def __init__(self): self.registry = {} def _add_one(self, batter): self.registry[batter.player_id] = batter
[docs] def add(self, batters): """ Adds batters to the registry :param batters: Iterable of `Batter` :return: dict """ if not isinstance(batters, list): batters = [batters] for batter in batters: self._add_one(batter) return self.registry
@property def len(self): return len(self.registry) def _get_lahman_records(self, pa_limit): batting_df = ( augment_lahman_batting(LahmanData().batting) .groupby(["playerID", "yearID"]) .sum() .query(f"PA >= {pa_limit}") ) probability_columns = ( "base_on_balls", "single", "double", "triple", "home_run", ) stat_columns = ("BB", "1B", "2B", "3B", "HR") for probability_column, stat_column in zip(probability_columns, stat_columns): batting_df.loc[:, probability_column] = ( batting_df.loc[:, stat_column] / batting_df.loc[:, "PA"] ) return ( ( batting_df.reset_index() .assign( player_id=lambda row: row.playerID + "_" + row.yearID.astype("str") ) .loc[:, ["player_id"] + list(probability_columns)] ) .set_index("player_id") .to_dict(orient="index") )
[docs] def load_from_lahman(self, pa_limit=180): """ Loads a player registry from the Lahman batting table. The set of players are filtered to have at least `pa_limit` PAs :param pa_limit: int :return: None """ lahman_records = self._get_lahman_records(pa_limit=pa_limit) for player_id, record in lahman_records.items(): self.add( Batter( player_id=player_id, batting_event_probabilities=BattingEventProbability(**record), ) )