Skip to content

CachedDataSource

Model Examples

Examples of using the Model Class are in the Examples section at the bottom of this page. AWS Model setup and deployment are quite complicated to do manually but the SageWorks Model Class makes it a breeze!

CachedDataSource: Caches the method results for SageWorks DataSources

CachedDataSource

Bases: CachedArtifactMixin, AthenaSource

CachedDataSource: Caches the method results for SageWorks DataSources

Note: Cached method values may lag underlying DataSource changes.

Common Usage
my_data = CachedDataSource(name)
my_data.details()
my_data.health_check()
my_data.sageworks_meta()
Source code in src/sageworks/cached/cached_data_source.py
class CachedDataSource(CachedArtifactMixin, AthenaSource):
    """CachedDataSource: Caches the method results for SageWorks DataSources

    Note: Cached method values may lag underlying DataSource changes.

    Common Usage:
        ```python
        my_data = CachedDataSource(name)
        my_data.details()
        my_data.health_check()
        my_data.sageworks_meta()
        ```
    """

    def __init__(self, data_uuid: str, database: str = "sageworks"):
        """CachedDataSource Initialization"""
        AthenaSource.__init__(self, data_uuid=data_uuid, database=database, use_cached_meta=True)

    @CachedArtifactMixin.cache_result
    def summary(self, **kwargs) -> dict:
        """Retrieve the DataSource Details.

        Returns:
            dict: A dictionary of details about the DataSource
        """
        return super().summary(**kwargs)

    @CachedArtifactMixin.cache_result
    def details(self, **kwargs) -> dict:
        """Retrieve the DataSource Details.

        Returns:
            dict: A dictionary of details about the DataSource
        """
        return super().details(**kwargs)

    @CachedArtifactMixin.cache_result
    def health_check(self, **kwargs) -> dict:
        """Retrieve the DataSource Health Check.

        Returns:
            dict: A dictionary of health check details for the DataSource
        """
        return super().health_check(**kwargs)

    @CachedArtifactMixin.cache_result
    def sageworks_meta(self) -> Union[dict, None]:
        """Retrieve the SageWorks Metadata for this DataSource.

        Returns:
            Union[dict, None]: Dictionary of SageWorks metadata for this Artifact
        """
        return super().sageworks_meta()

    def smart_sample(self) -> pd.DataFrame:
        """Retrieve the Smart Sample for this DataSource.

        Returns:
            pd.DataFrame: The Smart Sample DataFrame
        """
        return super().smart_sample(recompute=False)

__init__(data_uuid, database='sageworks')

CachedDataSource Initialization

Source code in src/sageworks/cached/cached_data_source.py
def __init__(self, data_uuid: str, database: str = "sageworks"):
    """CachedDataSource Initialization"""
    AthenaSource.__init__(self, data_uuid=data_uuid, database=database, use_cached_meta=True)

details(**kwargs)

Retrieve the DataSource Details.

Returns:

Name Type Description
dict dict

A dictionary of details about the DataSource

Source code in src/sageworks/cached/cached_data_source.py
@CachedArtifactMixin.cache_result
def details(self, **kwargs) -> dict:
    """Retrieve the DataSource Details.

    Returns:
        dict: A dictionary of details about the DataSource
    """
    return super().details(**kwargs)

health_check(**kwargs)

Retrieve the DataSource Health Check.

Returns:

Name Type Description
dict dict

A dictionary of health check details for the DataSource

Source code in src/sageworks/cached/cached_data_source.py
@CachedArtifactMixin.cache_result
def health_check(self, **kwargs) -> dict:
    """Retrieve the DataSource Health Check.

    Returns:
        dict: A dictionary of health check details for the DataSource
    """
    return super().health_check(**kwargs)

sageworks_meta()

Retrieve the SageWorks Metadata for this DataSource.

Returns:

Type Description
Union[dict, None]

Union[dict, None]: Dictionary of SageWorks metadata for this Artifact

Source code in src/sageworks/cached/cached_data_source.py
@CachedArtifactMixin.cache_result
def sageworks_meta(self) -> Union[dict, None]:
    """Retrieve the SageWorks Metadata for this DataSource.

    Returns:
        Union[dict, None]: Dictionary of SageWorks metadata for this Artifact
    """
    return super().sageworks_meta()

smart_sample()

Retrieve the Smart Sample for this DataSource.

Returns:

Type Description
DataFrame

pd.DataFrame: The Smart Sample DataFrame

Source code in src/sageworks/cached/cached_data_source.py
def smart_sample(self) -> pd.DataFrame:
    """Retrieve the Smart Sample for this DataSource.

    Returns:
        pd.DataFrame: The Smart Sample DataFrame
    """
    return super().smart_sample(recompute=False)

summary(**kwargs)

Retrieve the DataSource Details.

Returns:

Name Type Description
dict dict

A dictionary of details about the DataSource

Source code in src/sageworks/cached/cached_data_source.py
@CachedArtifactMixin.cache_result
def summary(self, **kwargs) -> dict:
    """Retrieve the DataSource Details.

    Returns:
        dict: A dictionary of details about the DataSource
    """
    return super().summary(**kwargs)

Examples

All of the SageWorks Examples are in the Sageworks Repository under the examples/ directory. For a full code listing of any example please visit our SageWorks Examples

Pull DataSource Details

from sageworks.cached.cached_data_source import CachedDataSource

# Grab a DataSource
ds = CachedDataSource("abalone_data")

# Show the details
ds.details()

> ds.details()

{'uuid': 'abalone_data',
 'health_tags': [],
 'aws_arn': 'arn:aws:glue:x:table/sageworks/abalone_data',
 'size': 0.070272,
 'created': '2024-11-09T20:42:34.000Z',
 'modified': '2024-11-10T19:57:52.000Z',
 'input': 's3://sageworks-public-data/common/aBaLone.CSV',
 'sageworks_health_tags': '',
 'sageworks_correlations': {'length': {'diameter': 0.9868115846024996,