Source code for pybbda.analysis.projections.marcels.marcels_batting

from pybbda.data.tools.processing.aggregate import aggregate_by_season
from pybbda.data.tools.lahman.data import augment_lahman_batting
from pybbda.analysis.projections.marcels.marcels_base import MarcelsProjectionsBase


[docs]class MarcelProjectionsBatting(MarcelsProjectionsBase):
    COMPUTED_METRICS = [
        "1B",
        "2B",
        "3B",
        "HR",
        "BB",
        "HBP",
        "SB",
        "CS",
        "SO",
        "SH",
        "SF",
    ]
    RECIPROCAL_AGE_METRICS = ["SO", "CS"]
    LEAGUE_AVG_PT = 100
    NUM_REGRESSION_PLAYING_TIME = 200
    METRIC_WEIGHTS = (5, 4, 3)
    PT_WEIGHTS = (0.5, 0.1, 0)
    REQUIRED_COLUMNS = ["AB", "BB"]
    PLAYING_TIME_COLUMN = "PA"

    def __init__(self, stats_df=None, primary_pos_df=None):
        super().__init__(stats_df, primary_pos_df)

    def _load_data(self):
        return self.ld.batting

[docs]    def preprocess_data(self, stats_df):
        """
        preprocesses the data.
        :param stats_df: a data frame like Lahman batting
        :return: data frame
        """
        return aggregate_by_season(augment_lahman_batting(stats_df))

[docs]    def filter_non_representative_data(self, stats_df, primary_pos_df):
        """
        filters pitchers-as-batters. primary_pos_df is a data frame
        containing playerID, yearID, and primaryPos

        :param stats_df: a data frame like Lahman batting
        :param primary_pos_df: data frame
        :return:
        """
        return (
            stats_df.merge(primary_pos_df, on=["playerID", "yearID"], how="left")
            .query(r'primaryPos != "P"')
            .drop("primaryPos", axis=1)
        )