Skip to content

Transform

API Classes

The API Classes will use Transforms internally. So model.to_endpoint() uses the ModelToEndpoint() transform. If you need more control over the Transform you can use the Core Classes directly.

The Workbench Transform class is a base/abstract class that defines API implemented by all the child classes (DataLoaders, DataSourceToFeatureSet, ModelToEndpoint, etc).

Transform: Base Class for all transforms within Workbench Inherited Classes must implement the abstract transform_impl() method

Transform

Bases: ABC

Transform: Abstract Base Class for all transforms within Workbench. Inherited Classes must implement the abstract transform_impl() method

Source code in src/workbench/core/transforms/transform.py
class Transform(ABC):
    """Transform: Abstract Base Class for all transforms within Workbench. Inherited Classes
    must implement the abstract transform_impl() method"""

    def __init__(self, input_uuid: str, output_uuid: str, catalog_db: str = "workbench"):
        """Transform Initialization

        Args:
            input_uuid (str): The UUID of the Input Artifact
            output_uuid (str): The UUID of the Output Artifact
            catalog_db (str): The AWS Data Catalog Database to use (default: "workbench")
        """

        self.log = logging.getLogger("workbench")
        self.input_type = None
        self.output_type = None
        self.output_tags = ""
        self.input_uuid = str(input_uuid)  # Occasionally we get a pathlib.Path object
        self.output_uuid = str(output_uuid)  # Occasionally we get a pathlib.Path object
        self.output_meta = {"workbench_input": self.input_uuid}
        self.data_catalog_db = catalog_db

        # Grab our Workbench Bucket
        cm = ConfigManager()
        if not cm.config_okay():
            self.log.error("Workbench Configuration Incomplete...")
            self.log.error("Run the 'workbench' command and follow the prompts...")
            raise FatalConfigError()
        self.workbench_bucket = cm.get_config("WORKBENCH_BUCKET")
        self.data_sources_s3_path = "s3://" + self.workbench_bucket + "/data-sources"
        self.feature_sets_s3_path = "s3://" + self.workbench_bucket + "/feature-sets"
        self.models_s3_path = "s3://" + self.workbench_bucket + "/models"
        self.endpoints_sets_s3_path = "s3://" + self.workbench_bucket + "/endpoints"

        # Grab a Workbench Role ARN, Boto3, SageMaker Session, and SageMaker Client
        self.aws_account_clamp = AWSAccountClamp()
        self.workbench_role_arn = self.aws_account_clamp.aws_session.get_workbench_execution_role_arn()
        self.boto3_session = self.aws_account_clamp.boto3_session
        self.sm_session = self.aws_account_clamp.sagemaker_session()
        self.sm_client = self.aws_account_clamp.sagemaker_client()

        # Delimiter for storing lists in AWS Tags
        self.tag_delimiter = "::"

    @abstractmethod
    def transform_impl(self, **kwargs):
        """Abstract Method: Implement the Transformation from Input to Output"""
        pass

    def pre_transform(self, **kwargs):
        """Perform any Pre-Transform operations"""
        self.log.debug("Pre-Transform...")

    @abstractmethod
    def post_transform(self, **kwargs):
        """Post-Transform ensures that the output Artifact is ready for use"""
        pass

    def set_output_tags(self, tags: Union[list, str]):
        """Set the tags that will be associated with the output object
        Args:
            tags (Union[list, str]): The list of tags or a '::' separated string of tags"""
        if isinstance(tags, list):
            self.output_tags = self.tag_delimiter.join(tags)
        else:
            self.output_tags = tags

    def add_output_meta(self, meta: dict):
        """Add additional metadata that will be associated with the output artifact
        Args:
            meta (dict): A dictionary of metadata"""
        self.output_meta = self.output_meta | meta

    @staticmethod
    def convert_to_aws_tags(metadata: dict):
        """Convert a dictionary to the AWS tag format (list of dicts)
        [ {Key: key_name, Value: value}, {..}, ...]"""
        return [{"Key": key, "Value": value} for key, value in metadata.items()]

    def get_aws_tags(self):
        """Get the metadata/tags and convert them into AWS Tag Format"""
        # Set up our metadata storage
        workbench_meta = {"workbench_tags": self.output_tags}
        for key, value in self.output_meta.items():
            workbench_meta[key] = value
        aws_tags = self.convert_to_aws_tags(workbench_meta)
        return aws_tags

    @final
    def transform(self, **kwargs):
        """Perform the Transformation from Input to Output with pre_transform() and post_transform() invocations"""
        self.pre_transform(**kwargs)
        self.transform_impl(**kwargs)
        self.post_transform(**kwargs)

    def input_type(self) -> TransformInput:
        """What Input Type does this Transform Consume"""
        return self.input_type

    def output_type(self) -> TransformOutput:
        """What Output Type does this Transform Produce"""
        return self.output_type

    def set_input_uuid(self, input_uuid: str):
        """Set the Input UUID (Name) for this Transform"""
        self.input_uuid = input_uuid

    def set_output_uuid(self, output_uuid: str):
        """Set the Output UUID (Name) for this Transform"""
        self.output_uuid = output_uuid

__init__(input_uuid, output_uuid, catalog_db='workbench')

Transform Initialization

Parameters:

Name Type Description Default
input_uuid str

The UUID of the Input Artifact

required
output_uuid str

The UUID of the Output Artifact

required
catalog_db str

The AWS Data Catalog Database to use (default: "workbench")

'workbench'
Source code in src/workbench/core/transforms/transform.py
def __init__(self, input_uuid: str, output_uuid: str, catalog_db: str = "workbench"):
    """Transform Initialization

    Args:
        input_uuid (str): The UUID of the Input Artifact
        output_uuid (str): The UUID of the Output Artifact
        catalog_db (str): The AWS Data Catalog Database to use (default: "workbench")
    """

    self.log = logging.getLogger("workbench")
    self.input_type = None
    self.output_type = None
    self.output_tags = ""
    self.input_uuid = str(input_uuid)  # Occasionally we get a pathlib.Path object
    self.output_uuid = str(output_uuid)  # Occasionally we get a pathlib.Path object
    self.output_meta = {"workbench_input": self.input_uuid}
    self.data_catalog_db = catalog_db

    # Grab our Workbench Bucket
    cm = ConfigManager()
    if not cm.config_okay():
        self.log.error("Workbench Configuration Incomplete...")
        self.log.error("Run the 'workbench' command and follow the prompts...")
        raise FatalConfigError()
    self.workbench_bucket = cm.get_config("WORKBENCH_BUCKET")
    self.data_sources_s3_path = "s3://" + self.workbench_bucket + "/data-sources"
    self.feature_sets_s3_path = "s3://" + self.workbench_bucket + "/feature-sets"
    self.models_s3_path = "s3://" + self.workbench_bucket + "/models"
    self.endpoints_sets_s3_path = "s3://" + self.workbench_bucket + "/endpoints"

    # Grab a Workbench Role ARN, Boto3, SageMaker Session, and SageMaker Client
    self.aws_account_clamp = AWSAccountClamp()
    self.workbench_role_arn = self.aws_account_clamp.aws_session.get_workbench_execution_role_arn()
    self.boto3_session = self.aws_account_clamp.boto3_session
    self.sm_session = self.aws_account_clamp.sagemaker_session()
    self.sm_client = self.aws_account_clamp.sagemaker_client()

    # Delimiter for storing lists in AWS Tags
    self.tag_delimiter = "::"

add_output_meta(meta)

Add additional metadata that will be associated with the output artifact Args: meta (dict): A dictionary of metadata

Source code in src/workbench/core/transforms/transform.py
def add_output_meta(self, meta: dict):
    """Add additional metadata that will be associated with the output artifact
    Args:
        meta (dict): A dictionary of metadata"""
    self.output_meta = self.output_meta | meta

convert_to_aws_tags(metadata) staticmethod

Convert a dictionary to the AWS tag format (list of dicts) [ {Key: key_name, Value: value}, {..}, ...]

Source code in src/workbench/core/transforms/transform.py
@staticmethod
def convert_to_aws_tags(metadata: dict):
    """Convert a dictionary to the AWS tag format (list of dicts)
    [ {Key: key_name, Value: value}, {..}, ...]"""
    return [{"Key": key, "Value": value} for key, value in metadata.items()]

get_aws_tags()

Get the metadata/tags and convert them into AWS Tag Format

Source code in src/workbench/core/transforms/transform.py
def get_aws_tags(self):
    """Get the metadata/tags and convert them into AWS Tag Format"""
    # Set up our metadata storage
    workbench_meta = {"workbench_tags": self.output_tags}
    for key, value in self.output_meta.items():
        workbench_meta[key] = value
    aws_tags = self.convert_to_aws_tags(workbench_meta)
    return aws_tags

input_type()

What Input Type does this Transform Consume

Source code in src/workbench/core/transforms/transform.py
def input_type(self) -> TransformInput:
    """What Input Type does this Transform Consume"""
    return self.input_type

output_type()

What Output Type does this Transform Produce

Source code in src/workbench/core/transforms/transform.py
def output_type(self) -> TransformOutput:
    """What Output Type does this Transform Produce"""
    return self.output_type

post_transform(**kwargs) abstractmethod

Post-Transform ensures that the output Artifact is ready for use

Source code in src/workbench/core/transforms/transform.py
@abstractmethod
def post_transform(self, **kwargs):
    """Post-Transform ensures that the output Artifact is ready for use"""
    pass

pre_transform(**kwargs)

Perform any Pre-Transform operations

Source code in src/workbench/core/transforms/transform.py
def pre_transform(self, **kwargs):
    """Perform any Pre-Transform operations"""
    self.log.debug("Pre-Transform...")

set_input_uuid(input_uuid)

Set the Input UUID (Name) for this Transform

Source code in src/workbench/core/transforms/transform.py
def set_input_uuid(self, input_uuid: str):
    """Set the Input UUID (Name) for this Transform"""
    self.input_uuid = input_uuid

set_output_tags(tags)

Set the tags that will be associated with the output object Args: tags (Union[list, str]): The list of tags or a '::' separated string of tags

Source code in src/workbench/core/transforms/transform.py
def set_output_tags(self, tags: Union[list, str]):
    """Set the tags that will be associated with the output object
    Args:
        tags (Union[list, str]): The list of tags or a '::' separated string of tags"""
    if isinstance(tags, list):
        self.output_tags = self.tag_delimiter.join(tags)
    else:
        self.output_tags = tags

set_output_uuid(output_uuid)

Set the Output UUID (Name) for this Transform

Source code in src/workbench/core/transforms/transform.py
def set_output_uuid(self, output_uuid: str):
    """Set the Output UUID (Name) for this Transform"""
    self.output_uuid = output_uuid

transform(**kwargs)

Perform the Transformation from Input to Output with pre_transform() and post_transform() invocations

Source code in src/workbench/core/transforms/transform.py
@final
def transform(self, **kwargs):
    """Perform the Transformation from Input to Output with pre_transform() and post_transform() invocations"""
    self.pre_transform(**kwargs)
    self.transform_impl(**kwargs)
    self.post_transform(**kwargs)

transform_impl(**kwargs) abstractmethod

Abstract Method: Implement the Transformation from Input to Output

Source code in src/workbench/core/transforms/transform.py
@abstractmethod
def transform_impl(self, **kwargs):
    """Abstract Method: Implement the Transformation from Input to Output"""
    pass

TransformInput

Bases: Enum

Enumerated Types for Workbench Transform Inputs

Source code in src/workbench/core/transforms/transform.py
class TransformInput(Enum):
    """Enumerated Types for Workbench Transform Inputs"""

    LOCAL_FILE = auto()
    PANDAS_DF = auto()
    SPARK_DF = auto()
    S3_OBJECT = auto()
    DATA_SOURCE = auto()
    FEATURE_SET = auto()
    MODEL = auto()

TransformOutput

Bases: Enum

Enumerated Types for Workbench Transform Outputs

Source code in src/workbench/core/transforms/transform.py
class TransformOutput(Enum):
    """Enumerated Types for Workbench Transform Outputs"""

    PANDAS_DF = auto()
    SPARK_DF = auto()
    S3_OBJECT = auto()
    DATA_SOURCE = auto()
    FEATURE_SET = auto()
    MODEL = auto()
    ENDPOINT = auto()