Module hela.web_page.generate
Expand source code
import gzip
import pkg_resources
from pathlib import Path
from typing import Union, Sequence
from hela import Catalog
from hela.web_page._json_generator import JsonGenerator
def generate_webpage(
catalogs: Union[Catalog, Sequence[Catalog]],
output_path: str,
overwrite_existing: bool = False,
include_samples: bool = False,
web_app_title: str = 'Catalog'
) -> None:
"""Generates an index.html file that can be used as a data catalog website.
Include a python script implementing this function in your CI/CD pipeline, outputting an index.html file
that you can then use to share your data catalog (e.g. on github pages).
For an example see (TODO: insert example repo link here).
Args:
catalogs: One or multiple objects inheriting the Catalog class.
If you have a tree of catalogs, only the root catalog is required.
output_path: The folder where index.html file should end up.
overwrite_existing: Flag whether and potential index.html file should be overwritten if existing.
include_samples: Flag whether to attempt to fetch sample datapoints from the columns in each
dataset. Requires `hela.BaseDataset.get_samples` function implemented.
web_app_title: Optional title of the web app.
Raises:
FileExistsError: If the index.html file already exists under `output_path` and overwrite_existing=False.
Examples:
>>> from my_catalog import MyCatalog
>>> from hela import generate_webpage
>>> generate_webpage(MyCatalog, '.', overwrite_existing=True)
"""
if not isinstance(catalogs, Sequence):
catalogs = [catalogs]
jg = JsonGenerator()
json_str = jg.generate_docs_jsons(catalogs, include_samples=include_samples)
folder_path = Path(output_path)
file_path = folder_path if '.html' in output_path else folder_path / 'index.html'
if not folder_path.exists():
folder_path.mkdir(parents=True)
if file_path.exists():
if not overwrite_existing:
raise FileExistsError(f'File {file_path} already exists, delete or set overwrite_existing=True')
file_path.unlink()
# Replace placeholder script with actual json data
replacement_str = f'<script>window.treeListData = {json_str}</script>'
match_str = '<script id="tree-list-data"></script>'
output_file = gzip.decompress(pkg_resources.resource_string(__name__, 'index.html.gz')).decode()
if match_str not in output_file:
raise ValueError('Could not insert data in frontend.')
output_file = output_file.replace(match_str, replacement_str)
if '[[ReplaceDashboard]]' not in output_file:
raise ValueError(f'Could not replace title in frontend.')
# Replace web app title with custom title
output_file = output_file.replace('[[ReplaceTitleDashboard]]', f'<title>{web_app_title}</title>')
output_file = output_file.replace('[[ReplaceDashboard]]', web_app_title)
file_path.write_text(output_file)
Functions
def generate_webpage(catalogs: Union[hela._catalog_class.Catalog, Sequence[hela._catalog_class.Catalog]], output_path: str, overwrite_existing: bool = False, include_samples: bool = False, web_app_title: str = 'Catalog') ‑> None-
Generates an index.html file that can be used as a data catalog website.
Include a python script implementing this function in your CI/CD pipeline, outputting an index.html file that you can then use to share your data catalog (e.g. on github pages). For an example see (TODO: insert example repo link here).
Args
catalogs- One or multiple objects inheriting the Catalog class. If you have a tree of catalogs, only the root catalog is required.
output_path- The folder where index.html file should end up.
overwrite_existing- Flag whether and potential index.html file should be overwritten if existing.
include_samples- Flag whether to attempt to fetch sample datapoints from the columns in each
dataset. Requires
BaseDataset.get_samples()function implemented. web_app_title- Optional title of the web app.
Raises
FileExistsError- If the index.html file already exists under
output_pathand overwrite_existing=False.
Examples:
>>> from my_catalog import MyCatalog >>> from hela import generate_webpage >>> generate_webpage(MyCatalog, '.', overwrite_existing=True)Expand source code
def generate_webpage( catalogs: Union[Catalog, Sequence[Catalog]], output_path: str, overwrite_existing: bool = False, include_samples: bool = False, web_app_title: str = 'Catalog' ) -> None: """Generates an index.html file that can be used as a data catalog website. Include a python script implementing this function in your CI/CD pipeline, outputting an index.html file that you can then use to share your data catalog (e.g. on github pages). For an example see (TODO: insert example repo link here). Args: catalogs: One or multiple objects inheriting the Catalog class. If you have a tree of catalogs, only the root catalog is required. output_path: The folder where index.html file should end up. overwrite_existing: Flag whether and potential index.html file should be overwritten if existing. include_samples: Flag whether to attempt to fetch sample datapoints from the columns in each dataset. Requires `hela.BaseDataset.get_samples` function implemented. web_app_title: Optional title of the web app. Raises: FileExistsError: If the index.html file already exists under `output_path` and overwrite_existing=False. Examples: >>> from my_catalog import MyCatalog >>> from hela import generate_webpage >>> generate_webpage(MyCatalog, '.', overwrite_existing=True) """ if not isinstance(catalogs, Sequence): catalogs = [catalogs] jg = JsonGenerator() json_str = jg.generate_docs_jsons(catalogs, include_samples=include_samples) folder_path = Path(output_path) file_path = folder_path if '.html' in output_path else folder_path / 'index.html' if not folder_path.exists(): folder_path.mkdir(parents=True) if file_path.exists(): if not overwrite_existing: raise FileExistsError(f'File {file_path} already exists, delete or set overwrite_existing=True') file_path.unlink() # Replace placeholder script with actual json data replacement_str = f'<script>window.treeListData = {json_str}</script>' match_str = '<script id="tree-list-data"></script>' output_file = gzip.decompress(pkg_resources.resource_string(__name__, 'index.html.gz')).decode() if match_str not in output_file: raise ValueError('Could not insert data in frontend.') output_file = output_file.replace(match_str, replacement_str) if '[[ReplaceDashboard]]' not in output_file: raise ValueError(f'Could not replace title in frontend.') # Replace web app title with custom title output_file = output_file.replace('[[ReplaceTitleDashboard]]', f'<title>{web_app_title}</title>') output_file = output_file.replace('[[ReplaceDashboard]]', web_app_title) file_path.write_text(output_file)