API Classes
The API Classes will use Transforms internally. So model.to_endpoint() uses the ModelToEndpoint() transform. If you need more control over the Transform you can use the Core Classes directly.
The SageWorks Transform class is a base/abstract class that defines API implemented by all the child classes (DataLoaders, DataSourceToFeatureSet, ModelToEndpoint, etc).
Transform: Base Class for all transforms within SageWorks
Inherited Classes must implement the abstract transform_impl() method
Bases: ABC
Transform: Abstract Base Class for all transforms within SageWorks. Inherited Classes
must implement the abstract transform_impl() method
Source code in src/sageworks/core/transforms/transform.py
| class Transform(ABC):
"""Transform: Abstract Base Class for all transforms within SageWorks. Inherited Classes
must implement the abstract transform_impl() method"""
def __init__(self, input_uuid: str, output_uuid: str):
"""Transform Initialization"""
self.log = logging.getLogger("sageworks")
self.input_type = None
self.output_type = None
self.output_tags = ""
self.input_uuid = str(input_uuid) # Occasionally we get a pathlib.Path object
self.output_uuid = str(output_uuid) # Occasionally we get a pathlib.Path object
self.output_meta = {"sageworks_input": self.input_uuid}
self.data_catalog_db = "sageworks"
# Grab our SageWorks Bucket
cm = ConfigManager()
if not cm.config_okay():
self.log.error("SageWorks Configuration Incomplete...")
self.log.error("Run the 'sageworks' command and follow the prompts...")
raise FatalConfigError()
self.sageworks_bucket = cm.get_config("SAGEWORKS_BUCKET")
self.data_sources_s3_path = "s3://" + self.sageworks_bucket + "/data-sources"
self.feature_sets_s3_path = "s3://" + self.sageworks_bucket + "/feature-sets"
self.models_s3_path = "s3://" + self.sageworks_bucket + "/models"
self.endpoints_sets_s3_path = "s3://" + self.sageworks_bucket + "/endpoints"
# Grab a SageWorks Role ARN, Boto3, SageMaker Session, and SageMaker Client
self.aws_account_clamp = AWSAccountClamp()
self.sageworks_role_arn = self.aws_account_clamp.aws_session.get_sageworks_execution_role_arn()
self.boto3_session = self.aws_account_clamp.boto3_session
self.sm_session = self.aws_account_clamp.sagemaker_session()
self.sm_client = self.aws_account_clamp.sagemaker_client()
# Delimiter for storing lists in AWS Tags
self.tag_delimiter = "::"
@abstractmethod
def transform_impl(self, **kwargs):
"""Abstract Method: Implement the Transformation from Input to Output"""
pass
def pre_transform(self, **kwargs):
"""Perform any Pre-Transform operations"""
self.log.debug("Pre-Transform...")
@abstractmethod
def post_transform(self, **kwargs):
"""Post-Transform ensures that the output Artifact is ready for use"""
pass
def set_output_tags(self, tags: Union[list, str]):
"""Set the tags that will be associated with the output object
Args:
tags (Union[list, str]): The list of tags or a '::' separated string of tags"""
if isinstance(tags, list):
self.output_tags = self.tag_delimiter.join(tags)
else:
self.output_tags = tags
def add_output_meta(self, meta: dict):
"""Add additional metadata that will be associated with the output artifact
Args:
meta (dict): A dictionary of metadata"""
self.output_meta = self.output_meta | meta
@staticmethod
def convert_to_aws_tags(metadata: dict):
"""Convert a dictionary to the AWS tag format (list of dicts)
[ {Key: key_name, Value: value}, {..}, ...]"""
return [{"Key": key, "Value": value} for key, value in metadata.items()]
def get_aws_tags(self):
"""Get the metadata/tags and convert them into AWS Tag Format"""
# Set up our metadata storage
sageworks_meta = {"sageworks_tags": self.output_tags}
for key, value in self.output_meta.items():
sageworks_meta[key] = value
aws_tags = self.convert_to_aws_tags(sageworks_meta)
return aws_tags
@final
def transform(self, **kwargs):
"""Perform the Transformation from Input to Output with pre_transform() and post_transform() invocations"""
self.pre_transform(**kwargs)
self.transform_impl(**kwargs)
self.post_transform(**kwargs)
def input_type(self) -> TransformInput:
"""What Input Type does this Transform Consume"""
return self.input_type
def output_type(self) -> TransformOutput:
"""What Output Type does this Transform Produce"""
return self.output_type
def set_input_uuid(self, input_uuid: str):
"""Set the Input UUID (Name) for this Transform"""
self.input_uuid = input_uuid
def set_output_uuid(self, output_uuid: str):
"""Set the Output UUID (Name) for this Transform"""
self.output_uuid = output_uuid
|
Transform Initialization
Source code in src/sageworks/core/transforms/transform.py
| def __init__(self, input_uuid: str, output_uuid: str):
"""Transform Initialization"""
self.log = logging.getLogger("sageworks")
self.input_type = None
self.output_type = None
self.output_tags = ""
self.input_uuid = str(input_uuid) # Occasionally we get a pathlib.Path object
self.output_uuid = str(output_uuid) # Occasionally we get a pathlib.Path object
self.output_meta = {"sageworks_input": self.input_uuid}
self.data_catalog_db = "sageworks"
# Grab our SageWorks Bucket
cm = ConfigManager()
if not cm.config_okay():
self.log.error("SageWorks Configuration Incomplete...")
self.log.error("Run the 'sageworks' command and follow the prompts...")
raise FatalConfigError()
self.sageworks_bucket = cm.get_config("SAGEWORKS_BUCKET")
self.data_sources_s3_path = "s3://" + self.sageworks_bucket + "/data-sources"
self.feature_sets_s3_path = "s3://" + self.sageworks_bucket + "/feature-sets"
self.models_s3_path = "s3://" + self.sageworks_bucket + "/models"
self.endpoints_sets_s3_path = "s3://" + self.sageworks_bucket + "/endpoints"
# Grab a SageWorks Role ARN, Boto3, SageMaker Session, and SageMaker Client
self.aws_account_clamp = AWSAccountClamp()
self.sageworks_role_arn = self.aws_account_clamp.aws_session.get_sageworks_execution_role_arn()
self.boto3_session = self.aws_account_clamp.boto3_session
self.sm_session = self.aws_account_clamp.sagemaker_session()
self.sm_client = self.aws_account_clamp.sagemaker_client()
# Delimiter for storing lists in AWS Tags
self.tag_delimiter = "::"
|
Add additional metadata that will be associated with the output artifact
Args:
meta (dict): A dictionary of metadata
Source code in src/sageworks/core/transforms/transform.py
| def add_output_meta(self, meta: dict):
"""Add additional metadata that will be associated with the output artifact
Args:
meta (dict): A dictionary of metadata"""
self.output_meta = self.output_meta | meta
|
Convert a dictionary to the AWS tag format (list of dicts)
[ {Key: key_name, Value: value}, {..}, ...]
Source code in src/sageworks/core/transforms/transform.py
| @staticmethod
def convert_to_aws_tags(metadata: dict):
"""Convert a dictionary to the AWS tag format (list of dicts)
[ {Key: key_name, Value: value}, {..}, ...]"""
return [{"Key": key, "Value": value} for key, value in metadata.items()]
|
Get the metadata/tags and convert them into AWS Tag Format
Source code in src/sageworks/core/transforms/transform.py
| def get_aws_tags(self):
"""Get the metadata/tags and convert them into AWS Tag Format"""
# Set up our metadata storage
sageworks_meta = {"sageworks_tags": self.output_tags}
for key, value in self.output_meta.items():
sageworks_meta[key] = value
aws_tags = self.convert_to_aws_tags(sageworks_meta)
return aws_tags
|
What Input Type does this Transform Consume
Source code in src/sageworks/core/transforms/transform.py
| def input_type(self) -> TransformInput:
"""What Input Type does this Transform Consume"""
return self.input_type
|
What Output Type does this Transform Produce
Source code in src/sageworks/core/transforms/transform.py
| def output_type(self) -> TransformOutput:
"""What Output Type does this Transform Produce"""
return self.output_type
|
post_transform(**kwargs)
abstractmethod
Post-Transform ensures that the output Artifact is ready for use
Source code in src/sageworks/core/transforms/transform.py
| @abstractmethod
def post_transform(self, **kwargs):
"""Post-Transform ensures that the output Artifact is ready for use"""
pass
|
Perform any Pre-Transform operations
Source code in src/sageworks/core/transforms/transform.py
| def pre_transform(self, **kwargs):
"""Perform any Pre-Transform operations"""
self.log.debug("Pre-Transform...")
|
Set the Input UUID (Name) for this Transform
Source code in src/sageworks/core/transforms/transform.py
| def set_input_uuid(self, input_uuid: str):
"""Set the Input UUID (Name) for this Transform"""
self.input_uuid = input_uuid
|
Set the tags that will be associated with the output object
Args:
tags (Union[list, str]): The list of tags or a '::' separated string of tags
Source code in src/sageworks/core/transforms/transform.py
| def set_output_tags(self, tags: Union[list, str]):
"""Set the tags that will be associated with the output object
Args:
tags (Union[list, str]): The list of tags or a '::' separated string of tags"""
if isinstance(tags, list):
self.output_tags = self.tag_delimiter.join(tags)
else:
self.output_tags = tags
|
Set the Output UUID (Name) for this Transform
Source code in src/sageworks/core/transforms/transform.py
| def set_output_uuid(self, output_uuid: str):
"""Set the Output UUID (Name) for this Transform"""
self.output_uuid = output_uuid
|
Perform the Transformation from Input to Output with pre_transform() and post_transform() invocations
Source code in src/sageworks/core/transforms/transform.py
| @final
def transform(self, **kwargs):
"""Perform the Transformation from Input to Output with pre_transform() and post_transform() invocations"""
self.pre_transform(**kwargs)
self.transform_impl(**kwargs)
self.post_transform(**kwargs)
|
Abstract Method: Implement the Transformation from Input to Output
Source code in src/sageworks/core/transforms/transform.py
| @abstractmethod
def transform_impl(self, **kwargs):
"""Abstract Method: Implement the Transformation from Input to Output"""
pass
|
Bases: Enum
Enumerated Types for SageWorks Transform Inputs
Source code in src/sageworks/core/transforms/transform.py
| class TransformInput(Enum):
"""Enumerated Types for SageWorks Transform Inputs"""
LOCAL_FILE = auto()
PANDAS_DF = auto()
SPARK_DF = auto()
S3_OBJECT = auto()
DATA_SOURCE = auto()
FEATURE_SET = auto()
MODEL = auto()
|
Bases: Enum
Enumerated Types for SageWorks Transform Outputs
Source code in src/sageworks/core/transforms/transform.py
| class TransformOutput(Enum):
"""Enumerated Types for SageWorks Transform Outputs"""
PANDAS_DF = auto()
SPARK_DF = auto()
S3_OBJECT = auto()
DATA_SOURCE = auto()
FEATURE_SET = auto()
MODEL = auto()
ENDPOINT = auto()
|