Module hela.test_suite.catalog_tests

Expand source code
from hela.errors import ValidationError
from hela import Catalog
from hela._catalog_class import is_dataset, is_catalog


def validate_dataset_variable_names(root_catalog: Catalog) -> True:
    """Runs a validation check to make sure field (variable) names of the catalogs
    conform with the name of their datasets.

    Args:
        root_catalog: The root catalog of your project

    Returns:
        True if all datasets were successfully validated

    Raises:
        ValidationError:    Whenever a discrepancy between field (variable) name and dataset
                            name was found
    """
    for field, obj in root_catalog.__dict__.items():
        dataset = is_dataset(obj)
        if dataset:
            if field != dataset.name:
                raise ValidationError(f'Dataset with name "{obj.name}" has field name "{field}"')
        catalog = is_catalog(obj)
        if catalog:
            validate_dataset_variable_names(catalog)
    return True


def validate_no_duplicated_columns(root_catalog: Catalog) -> True:
    """Runs a validation check to make sure there are no duplicated columns among the datasets.

    Args:
        root_catalog: The root catalog of your project

    Returns:
        True if all columns were successfully validated

    Raises:
        ValidationError:    Whenever the same columns if found in multiple datasets
                            without being referenced from store.
    """
    columns_dict = root_catalog.get_columns_datasets()
    for col, datasets in columns_dict.items():
        if len(datasets) > 1 and not col.from_store:
            raise ValidationError(
                f'Datasets {datasets} have overlapping column "{col.name}".'
                f' Consider adding column "{col.name}" to a column store and reference to store column instead.'
            )
    return True

Functions

def validate_dataset_variable_names(root_catalog: hela._catalog_class.Catalog) ‑> True

Runs a validation check to make sure field (variable) names of the catalogs conform with the name of their datasets.

Args

root_catalog
The root catalog of your project

Returns

True if all datasets were successfully validated

Raises

ValidationError
Whenever a discrepancy between field (variable) name and dataset name was found
Expand source code
def validate_dataset_variable_names(root_catalog: Catalog) -> True:
    """Runs a validation check to make sure field (variable) names of the catalogs
    conform with the name of their datasets.

    Args:
        root_catalog: The root catalog of your project

    Returns:
        True if all datasets were successfully validated

    Raises:
        ValidationError:    Whenever a discrepancy between field (variable) name and dataset
                            name was found
    """
    for field, obj in root_catalog.__dict__.items():
        dataset = is_dataset(obj)
        if dataset:
            if field != dataset.name:
                raise ValidationError(f'Dataset with name "{obj.name}" has field name "{field}"')
        catalog = is_catalog(obj)
        if catalog:
            validate_dataset_variable_names(catalog)
    return True
def validate_no_duplicated_columns(root_catalog: hela._catalog_class.Catalog) ‑> True

Runs a validation check to make sure there are no duplicated columns among the datasets.

Args

root_catalog
The root catalog of your project

Returns

True if all columns were successfully validated

Raises

ValidationError
Whenever the same columns if found in multiple datasets without being referenced from store.
Expand source code
def validate_no_duplicated_columns(root_catalog: Catalog) -> True:
    """Runs a validation check to make sure there are no duplicated columns among the datasets.

    Args:
        root_catalog: The root catalog of your project

    Returns:
        True if all columns were successfully validated

    Raises:
        ValidationError:    Whenever the same columns if found in multiple datasets
                            without being referenced from store.
    """
    columns_dict = root_catalog.get_columns_datasets()
    for col, datasets in columns_dict.items():
        if len(datasets) > 1 and not col.from_store:
            raise ValidationError(
                f'Datasets {datasets} have overlapping column "{col.name}".'
                f' Consider adding column "{col.name}" to a column store and reference to store column instead.'
            )
    return True