Source code for pybbda.data.sources.baseball_reference.data

import pandas as pd
import os
from . import WAR_PITCHING_URL, WAR_BATTING_URL
from pybbda import PYBBDA_DATA_ROOT
import logging

BBREF_DATA_PATH = PYBBDA_DATA_ROOT / "BaseballReference"

BASEBALL_REFERENCE_TABLES = {
    "war_bat": "war_daily_bat.txt",
    "war_pitch": "war_daily_pitch.txt",
}
BASEBALL_REFERENCE_URLS = {"war_bat": WAR_BATTING_URL, "war_pitch": WAR_PITCHING_URL}

logger = logging.getLogger(__name__)


[docs]class BaseballReferenceData:
    def __init__(self, data_path=None):
        if data_path is None:
            data_path = BBREF_DATA_PATH
        self.tables = BASEBALL_REFERENCE_TABLES
        self.data_path = data_path

    def _locate_file(self, name):
        data_file = self.tables[name]
        full_path = str(self.data_path / data_file)
        logger.info("searching for file %s", full_path)

        if os.path.exists(full_path):
            return full_path
        elif os.path.exists(full_path + ".gz"):
            return full_path + ".gz"
        else:
            raise FileNotFoundError(f"Cannot find file {full_path}")

    def _load(self, name):
        file_full_path = self._locate_file(name)
        return pd.read_csv(file_full_path)

    def __getattr__(self, name):
        if name not in self.tables.keys():
            raise AttributeError
        try:
            self.__dict__[name] = self._load(name)
            return self.__dict__[name]
        except FileNotFoundError:
            raise AttributeError