Source code for pybbda.data.sources.lahman.data

import re
import os
import logging
from pathlib import Path

import pandas as pd

from ..lahman import _LAHMAN_TABLES
from pybbda import PYBBDA_DATA_ROOT
from pybbda.utils.singleton import Singleton

LAHMAN_DATA_PATH = PYBBDA_DATA_ROOT / "Lahman"

logger = logging.getLogger(__name__)


[docs]class LahmanData(Singleton): def __init__(self, data_path=None): """ :param data_path: """ if data_path is None: data_path = LAHMAN_DATA_PATH self.data_path = Path(data_path) for file_name in _LAHMAN_TABLES: self.__setattr__(self._munge_attr_name(file_name), None) @staticmethod def _munge_attr_name(name): patts = [r"(^[a-z]{1})", r"_+([a-zA-Z]{1})"] for patt in patts: name = re.sub(patt, lambda m: m.group(1).upper(), name) name = name.replace("_", "") return name def _load(self, name): updated_name = self._munge_attr_name(name) data_file = updated_name + ".csv" full_path = str(self.data_path / data_file) logger.info("searching for file %s", full_path) if os.path.exists(full_path): return pd.read_csv(full_path) elif os.path.exists(full_path + ".gz"): return pd.read_csv(full_path + ".gz") else: raise FileNotFoundError(f"Cannot find file {full_path}") def __getattr__(self, name): if self._munge_attr_name(name) not in _LAHMAN_TABLES: raise AttributeError try: self.__dict__[name] = self.__dict__.get(name, self._load(name)) return self.__dict__[name] except FileNotFoundError: raise AttributeError