Skip to content

Meta

Meta Examples

Examples of using the Meta class are listed at the bottom of this page Examples.

Meta: A class that provides high level information and summaries of Cloud Platform Artifacts. The Meta class provides 'account' information, configuration, etc. It also provides metadata for Artifacts, such as Data Sources, Feature Sets, Models, and Endpoints.

Meta

Bases: CloudMeta

Meta: A class that provides metadata functionality for Cloud Platform Artifacts.

Common Usage
from sageworks.api import Meta
meta = Meta()

# Get the AWS Account Info
meta.account()
meta.config()

# These are 'list' methods
meta.etl_jobs()
meta.data_sources()
meta.feature_sets(details=True/False)
meta.models(details=True/False)
meta.endpoints()
meta.views()

# These are 'describe' methods
meta.data_source("abalone_data")
meta.feature_set("abalone_features")
meta.model("abalone-regression")
meta.endpoint("abalone-endpoint")
Source code in src/sageworks/api/meta.py
class Meta(CloudMeta):
    """Meta: A class that provides metadata functionality for Cloud Platform Artifacts.

    Common Usage:
       ```python
       from sageworks.api import Meta
       meta = Meta()

       # Get the AWS Account Info
       meta.account()
       meta.config()

       # These are 'list' methods
       meta.etl_jobs()
       meta.data_sources()
       meta.feature_sets(details=True/False)
       meta.models(details=True/False)
       meta.endpoints()
       meta.views()

       # These are 'describe' methods
       meta.data_source("abalone_data")
       meta.feature_set("abalone_features")
       meta.model("abalone-regression")
       meta.endpoint("abalone-endpoint")
       ```
    """

    def account(self) -> dict:
        """Cloud Platform Account Info

        Returns:
            dict: Cloud Platform Account Info
        """
        return super().account()

    def config(self) -> dict:
        """Return the current SageWorks Configuration

        Returns:
            dict: The current SageWorks Configuration
        """
        return super().config()

    def incoming_data(self) -> pd.DataFrame:
        """Get summary data about data in the incoming raw data

        Returns:
            pd.DataFrame: A summary of the incoming raw data
        """
        return super().incoming_data()

    def etl_jobs(self) -> pd.DataFrame:
        """Get summary data about Extract, Transform, Load (ETL) Jobs

        Returns:
            pd.DataFrame: A summary of the ETL Jobs deployed in the Cloud Platform
        """
        return super().etl_jobs()

    def data_sources(self) -> pd.DataFrame:
        """Get a summary of the Data Sources deployed in the Cloud Platform

        Returns:
            pd.DataFrame: A summary of the Data Sources deployed in the Cloud Platform
        """
        return super().data_sources()

    def views(self, database: str = "sageworks") -> pd.DataFrame:
        """Get a summary of the all the Views, for the given database, in AWS

        Args:
            database (str, optional): Glue database. Defaults to 'sageworks'.

        Returns:
            pd.DataFrame: A summary of all the Views, for the given database, in AWS
        """
        return super().views(database=database)

    def feature_sets(self, details: bool = False) -> pd.DataFrame:
        """Get a summary of the Feature Sets deployed in the Cloud Platform

        Args:
            details (bool, optional): Include detailed information. Defaults to False.

        Returns:
            pd.DataFrame: A summary of the Feature Sets deployed in the Cloud Platform
        """
        return super().feature_sets(details=details)

    def models(self, details: bool = False) -> pd.DataFrame:
        """Get a summary of the Models deployed in the Cloud Platform

        Args:
            details (bool, optional): Include detailed information. Defaults to False.

        Returns:
            pd.DataFrame: A summary of the Models deployed in the Cloud Platform
        """
        return super().models(details=details)

    def endpoints(self) -> pd.DataFrame:
        """Get a summary of the Endpoints deployed in the Cloud Platform

        Returns:
            pd.DataFrame: A summary of the Endpoints in the Cloud Platform
        """
        return super().endpoints()

    def glue_job(self, job_name: str) -> Union[dict, None]:
        """Get the details of a specific Glue Job

        Args:
            job_name (str): The name of the Glue Job

        Returns:
            dict: The details of the Glue Job (None if not found)
        """
        return super().glue_job(job_name=job_name)

    def data_source(self, data_source_name: str, database: str = "sageworks") -> Union[dict, None]:
        """Get the details of a specific Data Source

        Args:
            data_source_name (str): The name of the Data Source
            database (str, optional): The Glue database. Defaults to 'sageworks'.

        Returns:
            dict: The details of the Data Source (None if not found)
        """
        return super().data_source(table_name=data_source_name, database=database)

    def feature_set(self, feature_set_name: str) -> Union[dict, None]:
        """Get the details of a specific Feature Set

        Args:
            feature_set_name (str): The name of the Feature Set

        Returns:
            dict: The details of the Feature Set (None if not found)
        """
        return super().feature_set(feature_group_name=feature_set_name)

    def model(self, model_name: str) -> Union[dict, None]:
        """Get the details of a specific Model

        Args:
            model_name (str): The name of the Model

        Returns:
            dict: The details of the Model (None if not found)
        """
        return super().model(model_group_name=model_name)

    def endpoint(self, endpoint_name: str) -> Union[dict, None]:
        """Get the details of a specific Endpoint

        Args:
            endpoint_name (str): The name of the Endpoint

        Returns:
            dict: The details of the Endpoint (None if not found)
        """
        return super().endpoint(endpoint_name=endpoint_name)

    def __repr__(self):
        return f"Meta()\n\t{super().__repr__()}"

account()

Cloud Platform Account Info

Returns:

Name Type Description
dict dict

Cloud Platform Account Info

Source code in src/sageworks/api/meta.py
def account(self) -> dict:
    """Cloud Platform Account Info

    Returns:
        dict: Cloud Platform Account Info
    """
    return super().account()

config()

Return the current SageWorks Configuration

Returns:

Name Type Description
dict dict

The current SageWorks Configuration

Source code in src/sageworks/api/meta.py
def config(self) -> dict:
    """Return the current SageWorks Configuration

    Returns:
        dict: The current SageWorks Configuration
    """
    return super().config()

data_source(data_source_name, database='sageworks')

Get the details of a specific Data Source

Parameters:

Name Type Description Default
data_source_name str

The name of the Data Source

required
database str

The Glue database. Defaults to 'sageworks'.

'sageworks'

Returns:

Name Type Description
dict Union[dict, None]

The details of the Data Source (None if not found)

Source code in src/sageworks/api/meta.py
def data_source(self, data_source_name: str, database: str = "sageworks") -> Union[dict, None]:
    """Get the details of a specific Data Source

    Args:
        data_source_name (str): The name of the Data Source
        database (str, optional): The Glue database. Defaults to 'sageworks'.

    Returns:
        dict: The details of the Data Source (None if not found)
    """
    return super().data_source(table_name=data_source_name, database=database)

data_sources()

Get a summary of the Data Sources deployed in the Cloud Platform

Returns:

Type Description
DataFrame

pd.DataFrame: A summary of the Data Sources deployed in the Cloud Platform

Source code in src/sageworks/api/meta.py
def data_sources(self) -> pd.DataFrame:
    """Get a summary of the Data Sources deployed in the Cloud Platform

    Returns:
        pd.DataFrame: A summary of the Data Sources deployed in the Cloud Platform
    """
    return super().data_sources()

endpoint(endpoint_name)

Get the details of a specific Endpoint

Parameters:

Name Type Description Default
endpoint_name str

The name of the Endpoint

required

Returns:

Name Type Description
dict Union[dict, None]

The details of the Endpoint (None if not found)

Source code in src/sageworks/api/meta.py
def endpoint(self, endpoint_name: str) -> Union[dict, None]:
    """Get the details of a specific Endpoint

    Args:
        endpoint_name (str): The name of the Endpoint

    Returns:
        dict: The details of the Endpoint (None if not found)
    """
    return super().endpoint(endpoint_name=endpoint_name)

endpoints()

Get a summary of the Endpoints deployed in the Cloud Platform

Returns:

Type Description
DataFrame

pd.DataFrame: A summary of the Endpoints in the Cloud Platform

Source code in src/sageworks/api/meta.py
def endpoints(self) -> pd.DataFrame:
    """Get a summary of the Endpoints deployed in the Cloud Platform

    Returns:
        pd.DataFrame: A summary of the Endpoints in the Cloud Platform
    """
    return super().endpoints()

etl_jobs()

Get summary data about Extract, Transform, Load (ETL) Jobs

Returns:

Type Description
DataFrame

pd.DataFrame: A summary of the ETL Jobs deployed in the Cloud Platform

Source code in src/sageworks/api/meta.py
def etl_jobs(self) -> pd.DataFrame:
    """Get summary data about Extract, Transform, Load (ETL) Jobs

    Returns:
        pd.DataFrame: A summary of the ETL Jobs deployed in the Cloud Platform
    """
    return super().etl_jobs()

feature_set(feature_set_name)

Get the details of a specific Feature Set

Parameters:

Name Type Description Default
feature_set_name str

The name of the Feature Set

required

Returns:

Name Type Description
dict Union[dict, None]

The details of the Feature Set (None if not found)

Source code in src/sageworks/api/meta.py
def feature_set(self, feature_set_name: str) -> Union[dict, None]:
    """Get the details of a specific Feature Set

    Args:
        feature_set_name (str): The name of the Feature Set

    Returns:
        dict: The details of the Feature Set (None if not found)
    """
    return super().feature_set(feature_group_name=feature_set_name)

feature_sets(details=False)

Get a summary of the Feature Sets deployed in the Cloud Platform

Parameters:

Name Type Description Default
details bool

Include detailed information. Defaults to False.

False

Returns:

Type Description
DataFrame

pd.DataFrame: A summary of the Feature Sets deployed in the Cloud Platform

Source code in src/sageworks/api/meta.py
def feature_sets(self, details: bool = False) -> pd.DataFrame:
    """Get a summary of the Feature Sets deployed in the Cloud Platform

    Args:
        details (bool, optional): Include detailed information. Defaults to False.

    Returns:
        pd.DataFrame: A summary of the Feature Sets deployed in the Cloud Platform
    """
    return super().feature_sets(details=details)

glue_job(job_name)

Get the details of a specific Glue Job

Parameters:

Name Type Description Default
job_name str

The name of the Glue Job

required

Returns:

Name Type Description
dict Union[dict, None]

The details of the Glue Job (None if not found)

Source code in src/sageworks/api/meta.py
def glue_job(self, job_name: str) -> Union[dict, None]:
    """Get the details of a specific Glue Job

    Args:
        job_name (str): The name of the Glue Job

    Returns:
        dict: The details of the Glue Job (None if not found)
    """
    return super().glue_job(job_name=job_name)

incoming_data()

Get summary data about data in the incoming raw data

Returns:

Type Description
DataFrame

pd.DataFrame: A summary of the incoming raw data

Source code in src/sageworks/api/meta.py
def incoming_data(self) -> pd.DataFrame:
    """Get summary data about data in the incoming raw data

    Returns:
        pd.DataFrame: A summary of the incoming raw data
    """
    return super().incoming_data()

model(model_name)

Get the details of a specific Model

Parameters:

Name Type Description Default
model_name str

The name of the Model

required

Returns:

Name Type Description
dict Union[dict, None]

The details of the Model (None if not found)

Source code in src/sageworks/api/meta.py
def model(self, model_name: str) -> Union[dict, None]:
    """Get the details of a specific Model

    Args:
        model_name (str): The name of the Model

    Returns:
        dict: The details of the Model (None if not found)
    """
    return super().model(model_group_name=model_name)

models(details=False)

Get a summary of the Models deployed in the Cloud Platform

Parameters:

Name Type Description Default
details bool

Include detailed information. Defaults to False.

False

Returns:

Type Description
DataFrame

pd.DataFrame: A summary of the Models deployed in the Cloud Platform

Source code in src/sageworks/api/meta.py
def models(self, details: bool = False) -> pd.DataFrame:
    """Get a summary of the Models deployed in the Cloud Platform

    Args:
        details (bool, optional): Include detailed information. Defaults to False.

    Returns:
        pd.DataFrame: A summary of the Models deployed in the Cloud Platform
    """
    return super().models(details=details)

views(database='sageworks')

Get a summary of the all the Views, for the given database, in AWS

Parameters:

Name Type Description Default
database str

Glue database. Defaults to 'sageworks'.

'sageworks'

Returns:

Type Description
DataFrame

pd.DataFrame: A summary of all the Views, for the given database, in AWS

Source code in src/sageworks/api/meta.py
def views(self, database: str = "sageworks") -> pd.DataFrame:
    """Get a summary of the all the Views, for the given database, in AWS

    Args:
        database (str, optional): Glue database. Defaults to 'sageworks'.

    Returns:
        pd.DataFrame: A summary of all the Views, for the given database, in AWS
    """
    return super().views(database=database)

Examples

These example show how to use the Meta() class to pull lists of artifacts from AWS. DataSources, FeatureSets, Models, Endpoints and more. If you're building a web interface plugin, the Meta class is a great place to start.

SageWorks REPL

If you'd like to see exactly what data/details you get back from the Meta() class, you can spin up the SageWorks REPL, use the class and test out all the methods. Try it out! SageWorks REPL

Using SageWorks REPL
meta = Meta()
model_df = meta.models()
model_df
               Model Group   Health Owner  ...             Input     Status                Description
0      wine-classification  healthy     -  ...     wine_features  Completed  Wine Classification Model
1  abalone-regression-full  healthy     -  ...  abalone_features  Completed   Abalone Regression Model
2       abalone-regression  healthy     -  ...  abalone_features  Completed   Abalone Regression Model

[3 rows x 10 columns]

List the Models in AWS

meta_list_models.py
from sageworks.api import Meta

# Create our Meta Class and get a list of our Models
meta = Meta()
model_df = meta.models()

print(f"Number of Models: {len(model_df)}")
print(model_df)

# Get more details data on the Models
model_names = model_df["Model Group"].tolist()
for name in model_names:
    pprint(meta.model(name))

Output

Number of Models: 3
               Model Group   Health Owner  ...             Input     Status                Description
0      wine-classification  healthy     -  ...     wine_features  Completed  Wine Classification Model
1  abalone-regression-full  healthy     -  ...  abalone_features  Completed   Abalone Regression Model
2       abalone-regression  healthy     -  ...  abalone_features  Completed   Abalone Regression Model

[3 rows x 10 columns]
wine-classification
abalone-regression-full
abalone-regression

Getting Model Performance Metrics

meta_model_metrics.py
from sageworks.api import Meta

# Create our Meta Class and get a list of our Models
meta = Meta()
model_df = meta.models()

print(f"Number of Models: {len(model_df)}")
print(model_df)

# Get more details data on the Models
model_names = model_df["Model Group"].tolist()
for name in model_names[:5]:
    model_details = meta.model(name)
    print(f"\n\nModel: {name}")
    performance_metrics = model_details["sageworks_meta"]["sageworks_inference_metrics"]
    print(f"\tPerformance Metrics: {performance_metrics}")

Output

wine-classification
    ARN: arn:aws:sagemaker:us-west-2:507740646243:model-package-group/wine-classification
    Description: Wine Classification Model
    Tags: wine::classification
    Performance Metrics:
        [{'wine_class': 'TypeA', 'precision': 1.0, 'recall': 1.0, 'fscore': 1.0, 'roc_auc': 1.0, 'support': 12}, {'wine_class': 'TypeB', 'precision': 1.0, 'recall': 1.0, 'fscore': 1.0, 'roc_auc': 1.0, 'support': 14}, {'wine_class': 'TypeC', 'precision': 1.0, 'recall': 1.0, 'fscore': 1.0, 'roc_auc': 1.0, 'support': 9}]

abalone-regression
    ARN: arn:aws:sagemaker:us-west-2:507740646243:model-package-group/abalone-regression
    Description: Abalone Regression Model
    Tags: abalone::regression
    Performance Metrics:
        [{'MAE': 1.64, 'RMSE': 2.246, 'R2': 0.502, 'MAPE': 16.393, 'MedAE': 1.209, 'NumRows': 834}]

List the Endpoints in AWS

meta_list_endpoints.py
from pprint import pprint
from sageworks.api import Meta

# Create our Meta Class and get a list of our Endpoints
meta = Meta()
endpoint_df = meta.endpoints()
print(f"Number of Endpoints: {len(endpoint_df)}")
print(endpoint_df)

# Get more details data on the Endpoints
endpoint_names = endpoint_df["Name"].tolist()
for name in endpoint_names:
    pprint(meta.endpoint(name))

Output

Number of Endpoints: 2
                      Name   Health            Instance           Created  ...     Status     Variant Capture Samp(%)
0  wine-classification-end  healthy  Serverless (2GB/5)  2024-03-23 23:09  ...  InService  AllTraffic   False       -
1   abalone-regression-end  healthy  Serverless (2GB/5)  2024-03-23 21:11  ...  InService  AllTraffic   False       -

[2 rows x 10 columns]
wine-classification-end
<lots of details about endpoints>

Not Finding some particular AWS Data?

The SageWorks Meta API Class also has (details=True) arguments, so make sure to check those out.