Skip to content

scripts

BaseEtl

Bases: AbstractEtl

This class is used to create an ETL object.

Source code in pypdtools\core\scripts.py
class BaseEtl(AbstractEtl):
    """This class is used to create an ETL object."""

    def __init__(self) -> None:
        """This method is used to initialize the ETL object."""
        super().__init__()
        self.data = None
        self.executed = False

    def extract(self):
        """This method is used to get data from the source."""
        raise NotImplementedError

    def transform(self):
        """This method is used to transform the data."""
        raise NotImplementedError

    def load(self):
        """This method is used to load the data."""
        raise NotImplementedError

    def execute(self):
        """This method is used to execute the etl."""
        if self.executed:
            return self

        self.extract()
        self.transform()
        self.load()
        self.executed = True
        return self

    @property
    def data(self) -> pd.DataFrame:
        """This method is used to get the data."""
        self.execute()
        return self._df

    def __len__(self) -> int:
        """This method is used to get the length of the data."""
        self.execute()
        return super().__len__()

    def __iter__(self) -> Iterator:
        """This method is used to iterate over the data."""
        self.execute()
        return super().__iter__()

    def __aiter__(self) -> Iterator:
        """This method is used to async iterate over the data."""
        self.execute()
        return super().__aiter__()

    def __enter__(self) -> pd.DataFrame:
        """This method is used to enter the context."""
        self.execute()
        return super().__enter__()

    async def __aenter__(self) -> pd.DataFrame:
        """This method is used to async enter the context."""
        self.execute()
        return super().__aenter__()

    def __add__(self, other) -> PtDataFrame:
        """This method is used to add two dataframes."""
        self.execute()
        other.execute()
        return super().__add__(other)

    def __radd__(self, other) -> PtDataFrame:
        """This method is used to add two dataframes."""
        self.execute()
        other.execute()
        return super().__radd__(other)

    def col_to_list(self, col_name, drop_duplicates=True) -> List:
        """This method is used to get a column as a list."""
        self.execute()
        return super().col_to_list(col_name, drop_duplicates)

data: pd.DataFrame property

This method is used to get the data.

__add__(other)

This method is used to add two dataframes.

Source code in pypdtools\core\scripts.py
def __add__(self, other) -> PtDataFrame:
    """This method is used to add two dataframes."""
    self.execute()
    other.execute()
    return super().__add__(other)

__aenter__() async

This method is used to async enter the context.

Source code in pypdtools\core\scripts.py
async def __aenter__(self) -> pd.DataFrame:
    """This method is used to async enter the context."""
    self.execute()
    return super().__aenter__()

__aiter__()

This method is used to async iterate over the data.

Source code in pypdtools\core\scripts.py
def __aiter__(self) -> Iterator:
    """This method is used to async iterate over the data."""
    self.execute()
    return super().__aiter__()

__enter__()

This method is used to enter the context.

Source code in pypdtools\core\scripts.py
def __enter__(self) -> pd.DataFrame:
    """This method is used to enter the context."""
    self.execute()
    return super().__enter__()

__init__()

This method is used to initialize the ETL object.

Source code in pypdtools\core\scripts.py
def __init__(self) -> None:
    """This method is used to initialize the ETL object."""
    super().__init__()
    self.data = None
    self.executed = False

__iter__()

This method is used to iterate over the data.

Source code in pypdtools\core\scripts.py
def __iter__(self) -> Iterator:
    """This method is used to iterate over the data."""
    self.execute()
    return super().__iter__()

__len__()

This method is used to get the length of the data.

Source code in pypdtools\core\scripts.py
def __len__(self) -> int:
    """This method is used to get the length of the data."""
    self.execute()
    return super().__len__()

__radd__(other)

This method is used to add two dataframes.

Source code in pypdtools\core\scripts.py
def __radd__(self, other) -> PtDataFrame:
    """This method is used to add two dataframes."""
    self.execute()
    other.execute()
    return super().__radd__(other)

col_to_list(col_name, drop_duplicates=True)

This method is used to get a column as a list.

Source code in pypdtools\core\scripts.py
def col_to_list(self, col_name, drop_duplicates=True) -> List:
    """This method is used to get a column as a list."""
    self.execute()
    return super().col_to_list(col_name, drop_duplicates)

execute()

This method is used to execute the etl.

Source code in pypdtools\core\scripts.py
def execute(self):
    """This method is used to execute the etl."""
    if self.executed:
        return self

    self.extract()
    self.transform()
    self.load()
    self.executed = True
    return self

extract()

This method is used to get data from the source.

Source code in pypdtools\core\scripts.py
def extract(self):
    """This method is used to get data from the source."""
    raise NotImplementedError

load()

This method is used to load the data.

Source code in pypdtools\core\scripts.py
def load(self):
    """This method is used to load the data."""
    raise NotImplementedError

transform()

This method is used to transform the data.

Source code in pypdtools\core\scripts.py
def transform(self):
    """This method is used to transform the data."""
    raise NotImplementedError