Source code for hbp_validation_framework.utils

"""
Miscellaneous methods that help in different aspects of model validation.
Does not require explicit instantiation.

The following methods are available:

=======================================   ====================================
Action                                    Method
=======================================   ====================================
View JSON data in web browser             :meth:`view_json_tree`
Prepare test for execution                :meth:`prepare_run_test_offline`
Run the validation test                   :meth:`run_test_offline`
Register result with validation service   :meth:`upload_test_result`
Run test and register result (in steps)   :meth:`run_test`
Run test and register result (direct)     :meth:`run_test_standalone`
Generate HTML report of test results      :meth:`generate_HTML_report`
Generate PDF report of test results       :meth:`generate_PDF_report`
Obtain score matrix for test results      :meth:`generate_score_matrix`
Get Pandas DataFrame from score matrix    :meth:`get_raw_dataframe`
Display score matrix in web browser       :meth:`display_score_matrix_html`
=======================================   ====================================
"""

import os
import uuid
import json
import math
import mimetypes
import pickle
import webbrowser
import argparse
import collections
import pkg_resources
from datetime import datetime
from importlib import import_module
from pathlib import Path
from urllib.parse import urlparse

import sciunit

from . import ModelCatalog, TestLibrary
from .datastores import URI_SCHEME_MAP, CollabV2DataStore

[docs]def view_json_tree(data): """Displays the JSON tree structure inside the web browser This method can be used to view any JSON data, generated by any of the validation client's methods, in a tree-like representation. Parameters ---------- data : string JSON object represented as a string. Returns ------- None Does not return any data. JSON displayed inside web browser. Examples -------- >>> model = model_catalog.get_model(alias="HCkt") >>> from hbp_validation_framework import utils >>> utils.view_json_tree(model) """ _make_js_file(data) script_dir = os.path.dirname(__file__) rel_path = "jsonTreeViewer/index.htm" abs_file_path = os.path.join(script_dir, rel_path) webbrowser.open(abs_file_path, new=2)
def _make_js_file(data): """ Creates a JavaScript file from give JSON object; loaded by the browser This eliminates cross-origin issues with loading local data files (e.g. via jQuery) """ script_dir = os.path.dirname(__file__) rel_path = "jsonTreeViewer/data.js" abs_file_path = os.path.join(script_dir, rel_path) with open(abs_file_path, 'w') as outfile: outfile.write("var data = '") json.dump(data, outfile) outfile.write("'")
[docs]def prepare_run_test_offline(username="", password=None, environment="production", test_instance_id="", test_id="", test_alias="", test_version="", client_obj=None, **params): """Gather info necessary for running validation test This method will select the specified test and prepare a config file enabling offline execution of the validation test. The observation file required by the test is also downloaded and stored locally. The test can be specified in the following ways (in order of priority): 1. specify `test_instance_id` corresponding to test instance in test library 2. specify `test_id` and `test_version` 3. specify `test_alias` and `test_version` Note: for (2) and (3) above, if `test_version` is not specified, then the latest test version is retrieved Parameters ---------- username : string Your HBP Collaboratory username. password : string Your HBP Collaboratory password. environment : string, optional Used to indicate whether being used for development/testing purposes. Set as `production` as default for using the production system, which is appropriate for most users. When set to `dev`, it uses the `development` system. For other values, an external config file would be read (the latter is currently not implemented). test_instance_id : UUID System generated unique identifier associated with test instance. test_id : UUID System generated unique identifier associated with test definition. test_alias : string User-assigned unique identifier associated with test definition. test_version : string User-assigned identifier (unique for each test) associated with test instance. client_obj : ModelCatalog/TestLibrary object Used to easily create a new ModelCatalog/TestLibrary object if either exist already. Avoids need for repeated authentications; improves performance. Also, helps minimize being blocked out by the authentication server for repeated authentication requests (applicable when running several tests in quick succession, e.g. in a loop). **params : list Keyword arguments to be passed to the Test constructor. Note ---- Should be run on node having access to external URLs (i.e. with internet access) Returns ------- path The absolute path of the generated test config file Examples -------- >>> test_config_file = utils.prepare_run_test_offline(username="shailesh", test_alias="CDT-5", test_version="5.0") """ if client_obj: test_library = TestLibrary.from_existing(client_obj) else: test_library = TestLibrary(username, password, environment=environment) if test_instance_id == "" and test_id == "" and test_alias == "": raise Exception("test_instance_id or test_id or test_alias needs to be provided for finding test.") # Gather specified test info test_instance_json = test_library.get_test_instance(instance_id=test_instance_id, test_id=test_id, alias=test_alias, version=test_version) test_id = test_instance_json["test_id"] test_instance_id = test_instance_json["id"] test_instance_path = test_instance_json["path"] test_instance_parameters = test_instance_json["parameters"] # Download test observation to local storage base_folder = os.path.join(os.getcwd(), "hbp_validation_framework", test_id, datetime.now().strftime("%Y%m%d-%H%M%S")) test_observation_paths = test_library.get_test_definition(test_id=test_id)["data_location"] if len(test_observation_paths) == 0: raise Exception("No observation data found for test with id: {}".format(test_id)) for test_observation_path in test_observation_paths: parse_result = urlparse(test_observation_path) datastore = URI_SCHEME_MAP[parse_result.scheme](auth=test_library.auth) test_observation_file = datastore.download_data([test_observation_path], local_directory=base_folder)[0] # Create test config required for offline execution test_info = {} test_info["test_id"] = test_id test_info["test_instance_id"] = test_instance_id test_info["test_instance_path"] = test_instance_path test_info["test_instance_parameters"] = test_instance_parameters test_info["test_observation_file"] = os.path.basename(os.path.realpath(test_observation_file)) test_info["params"] = params # Save test info to config file test_config_file = os.path.join(base_folder, "test_config.json") with open(test_config_file, 'w') as file: file.write(json.dumps(test_info, indent=4)) return test_config_file
[docs]def run_test_offline(model="", test_config_file=""): """Run the validation test This method will accept a model, located locally, run the test specified via the test config file (generated by :meth:`prepare_run_test_offline`), and store the results locally. Parameters ---------- model : sciunit.Model A :class:`sciunit.Model` instance. test_config_file : string Absolute path of the test config file generated by :meth:`prepare_run_test_offline` Note ---- Can be run on node(s) having no access to external URLs (i.e. without internet access). Also, it is required that the test_config_file and the test_observation_file are located in the same directory. Returns ------- path The absolute path of the generated test result file Examples -------- >>> test_result_file = utils.run_test_offline(model=model, test_config_file=test_config_file) """ if not os.path.isfile(test_config_file) : raise Exception("'test_config_file' should direct to file describing the test configuration.") base_folder = os.path.dirname(os.path.realpath(test_config_file)) # Load the test info from config file with open(test_config_file) as file: test_info = json.load(file) # Identify test class path path_parts = test_info["test_instance_path"].split(".") cls_name = path_parts[-1] module_name = ".".join(path_parts[:-1]) test_module = import_module(module_name) test_cls = getattr(test_module, cls_name) # Read observation data required by test with open(os.path.join(base_folder, test_info["test_observation_file"]), 'rb') as file: observation_data = file.read() content_type = mimetypes.guess_type(test_info["test_observation_file"])[0] if content_type == "application/json": observation_data = json.loads(observation_data) # Create the :class:`sciunit.Test` instance params = test_info["params"] test_instance_parameters = test_info["test_instance_parameters"] try: if isinstance(eval(test_instance_parameters), dict): params.update(eval(test_instance_parameters)) except: pass test = test_cls(observation=observation_data, **params) test.uuid = test_info["test_instance_id"] print("----------------------------------------------") print("Test name: ", test.name) print("Test type: ", type(test)) print("----------------------------------------------") # Check the model if not isinstance(model, sciunit.Model): raise TypeError("`model` is not a sciunit Model!") print("----------------------------------------------") print("Model name: ", model.name) print("Model type: ", type(model)) print("----------------------------------------------") # Run the test t_start = datetime.utcnow() score = test.judge(model, deep_error=True) t_end = datetime.utcnow() score.dont_hide = ["related_data"] print("----------------------------------------------") print("Score: ", score.score) if "figures" in score.related_data: print("Output files: ") for item in score.related_data["figures"]: print(item) print("----------------------------------------------") score.runtime = str(int(math.ceil((t_end-t_start).total_seconds()))) + " s" score.exec_timestamp = t_end # score.exec_platform = str(self._get_platform()) # Save the test result # Create a custom sciunit.Score object with # minimally required attributes to avoid pickling issues score_obj = sciunit.Score(score=score.score, related_data=score.related_data) score_obj.dont_hide = score.dont_hide score_obj.runtime = score.runtime score_obj.exec_timestamp = score.exec_timestamp # score_obj.exec_platform = score.exec_platform Path(os.path.join(base_folder, "results")).mkdir(parents=True, exist_ok=True) test_result_file = os.path.join(base_folder, "results", "result__" + model.name + "__" + datetime.now().strftime("%Y%m%d%H%M%S") + ".pkl") with open(test_result_file, 'wb') as file: pickle.dump(score_obj, file) return test_result_file
[docs]def upload_test_result(username="", password=None, environment="production", test_result_file="", storage_collab_id="", register_result=True, client_obj=None): """Register the result with the Validation Service This method will register the validation result specified via the test result file (generated by :meth:`run_test_offline`) with the validation service. Parameters ---------- username : string Your HBP Collaboratory username. password : string Your HBP Collaboratory password. environment : string, optional Used to indicate whether being used for development/testing purposes. Set as `production` as default for using the production system, which is appropriate for most users. When set to `dev`, it uses the `development` system. For other values, an external config file would be read (the latter is currently not implemented). test_result_file : string Absolute path of the test result file generated by :meth:`run_test_offline` storage_collab_id : string Collab ID where output files should be stored; if empty, stored in model's host Collab. register_result : boolean Specify whether the test results are to be scored on the validation framework. Default is set as True. client_obj : ModelCatalog/TestLibrary object Used to easily create a new ModelCatalog/TestLibrary object if either exist already. Avoids need for repeated authentications; improves performance. Also, helps minimize being blocked out by the authentication server for repeated authentication requests (applicable when running several tests in quick succession, e.g. in a loop). Note ---- Should be run on node having access to external URLs (i.e. with internet access) Returns ------- dict data of test result that has been created. int or float or bool score evaluated by the test. Examples -------- >>> result, score = utils.upload_test_result(username="shailesh", test_result_file=test_result_file) """ if not os.path.isfile(test_result_file) : raise Exception("'test_result_file' should direct to file containg the test result data.") # Load result info from file with open(test_result_file, 'rb') as file: score = pickle.load(file) if not register_result: return None, score # Register the result with the HBP validation framework if client_obj: model_catalog = ModelCatalog.from_existing(client_obj) else: model_catalog = ModelCatalog(username, password, environment=environment) model_instance_uuid = model_catalog.find_model_instance_else_add(score.model)["id"] model_instance_json = model_catalog.get_model_instance(instance_id=model_instance_uuid) model_json = model_catalog.get_model(model_id=model_instance_json["model_id"]) model_host_collab_id = model_json["collab_id"] model_name = model_json["name"] if not storage_collab_id: storage_collab_id = model_host_collab_id score.related_data["collab_id"] = storage_collab_id # Check if result with same hash has already been uploaded for # this (model instance, test instance) combination; if yes, don't register result # result_json = { # "model_instance_id": model_instance_uuid, # "test_instance_id": score.test.uuid, # "score": score.score, # "runtime": score.runtime, # "exectime": score.exec_timestamp#, # # "platform": score.exec_platform # } # score.score_hash = str(hash(json.dumps(result_json, sort_keys=True, default = str))) test_library = TestLibrary.from_existing(model_catalog) # results = test_library.list_results(model_instance_id=model_instance_uuid, test_instance_id=score.test.uuid)["results"] # duplicate_results = [x["id"] for x in results if x["hash"] == score.score_hash] # if duplicate_results: # raise Exception("An identical result has already been registered on the validation framework.\nExisting Result UUID = {}".format(", ".join(duplicate_results))) # `.replace(" ", "_")` used to avoid Collab storage path errors due to spaces collab_folder = "validation_results/{}/{}_{}".format(datetime.now().strftime("%Y-%m-%d"),model_name.replace(" ", "_"), datetime.now().strftime("%Y%m%d-%H%M%S")) collab_storage = CollabV2DataStore(collab_id=storage_collab_id, base_folder=collab_folder, auth=test_library.auth) response = test_library.register_result(test_result=score, data_store=collab_storage) return response, score.score
[docs]def run_test(username="", password=None, environment="production", model="", test_instance_id="", test_id="", test_alias="", test_version="", storage_collab_id="", register_result=True, client_obj=None, **params): """Run validation test and register result This will execute the following methods by relaying the output of one to the next: 1. :meth:`prepare_run_test_offline` 2. :meth:`run_test_offline` 3. :meth:`upload_test_result` Parameters ---------- username : string Your HBP Collaboratory username. password : string Your HBP Collaboratory password. environment : string, optional Used to indicate whether being used for development/testing purposes. Set as `production` as default for using the production system, which is appropriate for most users. When set to `dev`, it uses the `development` system. For other values, an external config file would be read (the latter is currently not implemented). model : sciunit.Model A :class:`sciunit.Model` instance. test_instance_id : UUID System generated unique identifier associated with test instance. test_id : UUID System generated unique identifier associated with test definition. test_alias : string User-assigned unique identifier associated with test definition. test_version : string User-assigned identifier (unique for each test) associated with test instance. storage_collab_id : string Collab ID where output files should be stored; if empty, stored in model's host Collab. register_result : boolean Specify whether the test results are to be scored on the validation framework. Default is set as True. client_obj : ModelCatalog/TestLibrary object Used to easily create a new ModelCatalog/TestLibrary object if either exist already. Avoids need for repeated authentications; improves performance. Also, helps minimize being blocked out by the authentication server for repeated authentication requests (applicable when running several tests in quick succession, e.g. in a loop). **params : list Keyword arguments to be passed to the Test constructor. Note ---- Should be run on node having access to external URLs (i.e. with internet access) Returns ------- dict data of test result that has been created. int or float or bool score evaluated by the test. Examples -------- >>> result, score = utils.run_test(username="HBP_USERNAME", password="HBP_PASSWORD" environment="production", model=cell_model, test_alias="basalg_msn_d1", test_version="1.0", storage_collab_id="8123", register_result=True) """ test_config_file = prepare_run_test_offline(username=username, password=password, environment=environment, test_instance_id=test_instance_id, test_id=test_id, test_alias=test_alias, test_version=test_version, client_obj=client_obj, **params) test_result_file = run_test_offline(model=model, test_config_file=test_config_file) result, score = upload_test_result(username=username, password=password, environment=environment, test_result_file=test_result_file, storage_collab_id=storage_collab_id, register_result=register_result, client_obj=client_obj) return result, score
[docs]def run_test_standalone(username="", password=None, environment="production", model="", test_instance_id="", test_id="", test_alias="", test_version="", storage_collab_id="", register_result=True, client_obj=None, **params): """Run validation test and register result This method will accept a model, located locally, run the specified test on the model, and store the results on the validation service. The test can be specified in the following ways (in order of priority): 1. specify `test_instance_id` corresponding to test instance in test library 2. specify `test_id` and `test_version` 3. specify `test_alias` and `test_version` Note: for (2) and (3) above, if `test_version` is not specified, then the latest test version is retrieved Note ---- :meth:`run_test_standalone()` is different from :meth:`run_test()` in that the former runs the entire workflow in one go, whereas the latter is a wrapper for the sub-steps: :meth:`prepare_run_test_offline()`, :meth:`run_test_offline()`, and :meth:`upload_test_result()`. Also, :meth:`run_test()` returns the score as the value (int or float or bool) while :meth:`run_test_standalone()` returns the `sciunit.Score` object. Parameters ---------- username : string Your HBP Collaboratory username. password : string Your HBP Collaboratory password. environment : string, optional Used to indicate whether being used for development/testing purposes. Set as `production` as default for using the production system, which is appropriate for most users. When set to `dev`, it uses the `development` system. For other values, an external config file would be read (the latter is currently not implemented). model : sciunit.Model A :class:`sciunit.Model` instance. test_instance_id : UUID System generated unique identifier associated with test instance. test_id : UUID System generated unique identifier associated with test definition. test_alias : string User-assigned unique identifier associated with test definition. test_version : string User-assigned identifier (unique for each test) associated with test instance. storage_collab_id : string Collab ID where output files should be stored; if empty, stored in model's host Collab. register_result : boolean Specify whether the test results are to be scored on the validation framework. Default is set as True. client_obj : ModelCatalog/TestLibrary object Used to easily create a new ModelCatalog/TestLibrary object if either exist already. Avoids need for repeated authentications; improves performance. Also, helps minimize being blocked out by the authentication server for repeated authentication requests (applicable when running several tests in quick succession, e.g. in a loop). **params : list Keyword arguments to be passed to the Test constructor. Note ---- This is a very basic implementation that would suffice for simple use cases. You can customize and create your own run_test() implementations. Returns ------- dict data of test result that has been created. object score object evaluated by the test. Examples -------- >>> result, score = utils.run_test_standalone(username="shailesh", model=mymodel, test_alias="CDT-5", test_version="5.0") """ if client_obj: test_library = TestLibrary.from_existing(client_obj) else: test_library = TestLibrary(username, password, environment=environment) if test_instance_id == "" and test_id == "" and test_alias == "": raise Exception("test_instance_id or test_id or test_alias needs to be provided for finding test.") test = test_library.get_validation_test(instance_id=test_instance_id, test_id=test_id, alias=test_alias, version=test_version, **params) # # Gather specified test info # test_instance_json = test_library.get_test_instance(instance_id=test_instance_id, test_id=test_id, alias=test_alias, version=test_version) # test_id = test_instance_json["test_id"] # test_instance_id = test_instance_json["id"] # test_instance_path = test_instance_json["path"] # test_instance_parameters = test_instance_json["parameters"] # # Download test observation to local storage # base_folder = os.path.join(os.getcwd(), "hbp_validation_framework", test_id, datetime.now().strftime("%Y%m%d-%H%M%S")) # test_observation_paths = test_library.get_test_definition(test_id=test_id)["data_location"] # for test_observation_path in test_observation_paths: # parse_result = urlparse(test_observation_path) # datastore = URI_SCHEME_MAP[parse_result.scheme](auth=test_library.auth) # test_observation_file = datastore.download_data([test_observation_path], local_directory=base_folder)[0] # # Create test config required for offline execution # test_info = {} # test_info["test_id"] = test_id # test_info["test_instance_id"] = test_instance_id # test_info["test_instance_path"] = test_instance_path # test_info["test_instance_parameters"] = test_instance_parameters # test_info["test_observation_file"] = os.path.basename(os.path.realpath(test_observation_file)) # test_info["params"] = params # # Identify test class path # path_parts = test_info["test_instance_path"].split(".") # cls_name = path_parts[-1] # module_name = ".".join(path_parts[:-1]) # test_module = import_module(module_name) # test_cls = getattr(test_module, cls_name) # # Read observation data required by test # with open(os.path.join(base_folder, test_info["test_observation_file"]), 'rb') as file: # observation_data = file.read() # content_type = mimetypes.guess_type(test_info["test_observation_file"])[0] # if content_type == "application/json": # observation_data = json.loads(observation_data) # # Create the :class:`sciunit.Test` instance # params = test_info["params"] # test_instance_parameters = test_info["test_instance_parameters"] # try: # if isinstance(eval(test_instance_parameters), dict): # params.update(eval(test_instance_parameters)) # except: # pass # test = test_cls(observation=observation_data, **params) # test.uuid = test_info["test_instance_id"] print("----------------------------------------------") print("Test name: ", test.name) print("Test type: ", type(test)) print("----------------------------------------------") # Check the model if not isinstance(model, sciunit.Model): raise TypeError("`model` is not a sciunit Model!") print("----------------------------------------------") print("Model name: ", model.name) print("Model type: ", type(model)) print("----------------------------------------------") # Run the test t_start = datetime.utcnow() score = test.judge(model, deep_error=True) t_end = datetime.utcnow() print("----------------------------------------------") print("Score: ", score.score) if "figures" in score.related_data: print("Output files: ") for item in score.related_data["figures"]: print(item) print("----------------------------------------------") score.runtime = str(int(math.ceil((t_end-t_start).total_seconds()))) + " s" score.exec_timestamp = t_end # score.exec_platform = str(self._get_platform()) if not register_result: return None, score # Register the result with the HBP validation framework if client_obj: model_catalog = ModelCatalog.from_existing(client_obj) else: model_catalog = ModelCatalog(username, password, environment=environment) model_instance_uuid = model_catalog.find_model_instance_else_add(score.model)["id"] model_instance_json = model_catalog.get_model_instance(instance_id=model_instance_uuid) model_json = model_catalog.get_model(model_id=model_instance_json["model_id"]) model_host_collab_id = model_json["collab_id"] model_name = model_json["name"] if not storage_collab_id: storage_collab_id = model_host_collab_id score.related_data["collab_id"] = storage_collab_id # Check if result with same hash has already been uploaded for # this (model instance, test instance) combination; if yes, don't register result # result_json = { # "model_instance_id": model_instance_uuid, # "test_instance_id": score.test.uuid, # "score": score.score, # "runtime": score.runtime, # "exectime": score.exec_timestamp#, # # "platform": score.exec_platform # } # score.score_hash = str(hash(json.dumps(result_json, sort_keys=True, default = str))) test_library = TestLibrary.from_existing(model_catalog) # results = test_library.list_results(model_instance_id=model_instance_uuid, test_instance_id=score.test.uuid)["results"] # duplicate_results = [x["id"] for x in results if x["hash"] == score.score_hash] # if duplicate_results: # raise Exception("An identical result has already been registered on the validation framework.\nExisting Result UUID = {}".format(", ".join(duplicate_results))) # `.replace(" ", "_")` used to avoid Collab storage path errors due to spaces collab_folder = "validation_results/{}/{}_{}".format(datetime.now().strftime("%Y-%m-%d"),model_name.replace(" ", "_"), datetime.now().strftime("%Y%m%d-%H%M%S")) collab_storage = CollabV2DataStore(collab_id=storage_collab_id, base_folder=collab_folder, auth=test_library.auth) response = test_library.register_result(test_result=score, data_store=collab_storage) return response, score
[docs]def generate_HTML_report(username="", password=None, environment="production", model_list=[], model_instance_list=[], test_list=[], test_instance_list=[], result_list=[], show_links=True, client_obj=None): """Generates an HTML report for specified test results This method will generate an HTML report for the specified test results. Parameters ---------- username : string Your HBP collaboratory username. environment : string, optional Used to indicate whether being used for development/testing purposes. Set as `production` as default for using the production system, which is appropriate for most users. When set to `dev`, it uses the `development` system. For other values, an external config file would be read (the latter is currently not implemented). model_list : list List of model UUIDs or aliases for which score matrix is to be generated. model_instance_list : list List of model instance UUIDs for which score matrix is to be generated. test_list : list List of test UUIDs or aliases for which score matrix is to be generated. test_instance_list : list List of test instance UUIDs for which score matrix is to be generated. result_list : list List of result UUIDs for which score matrix is to be generated. show_links : boolean, optional To specify if hyperlinks to results are to be provided. If false, these data units will not have clickable hyperlinks. client_obj : ModelCatalog/TestLibrary object Used to easily create a new ModelCatalog/TestLibrary object if either exist already. Avoids need for repeated authentications; improves performance. Also, helps minimize being blocked out by the authentication server for repeated authentication requests (applicable when running several tests in quick succession, e.g. in a loop). Returns ------- string The absolute path of the generated HTML report list List of valid UUIDs for which the HTML report was generated Examples -------- >>> result_list = ["a618a6b1-e92e-4ac6-955a-7b8c6859285a", "793e5852-761b-4801-84cb-53af6f6c1acf"] >>> report_path, valid_uuids = utils.generate_HTML_report(username="shailesh", result_list=result_list) >>> report_path, valid_uuids = utils.generate_HTML_report(html_report_path="report.html") """ try: from jinja2 import Environment, FileSystemLoader except ImportError: print("Please install the following package: Jinja2") return if client_obj: model_catalog = ModelCatalog.from_existing(client_obj) else: model_catalog = ModelCatalog(username, password, environment=environment) test_library = TestLibrary.from_existing(model_catalog) # retrieve all model instances from specified models if model_list: for entry in model_list: try: uuid.UUID(entry, version=4) data = model_catalog.list_model_instances(model_id=entry) except ValueError: data = model_catalog.list_model_instances(alias=entry) for item in data: model_instance_list.append(item["id"]) # retrieve all test instances from specified tests if test_list: for entry in test_list: try: uuid.UUID(entry, version=4) data = test_library.list_test_instances(test_id=entry) except ValueError: data = test_library.list_test_instances(alias=entry) for item in data: test_instance_list.append(item["id"]) # extend results list to include all results corresponding to above # identified model instances and test instances for item in model_instance_list: results_json = test_library.list_results(model_instance_id=item) result_list.extend([r["id"] for r in results_json]) for item in test_instance_list: results_json = test_library.list_results(test_instance_id=item) result_list.extend([r["id"] for r in results_json]) # remove duplicate result UUIDs result_list = list(collections.OrderedDict.fromkeys(result_list).keys()) # utilize each result entry result_summary_table = [] # list of dicts, each with 4 keys -> result_id, model_label, test_label, score list_results = [] list_models = [] list_model_instances = [] list_tests = [] list_test_instances = [] valid_result_uuids = [] for r_id in result_list: result = test_library.get_result(result_id=r_id) valid_result_uuids.append(r_id) model_instance = model_catalog.get_model_instance(instance_id=result["model_instance_id"]) test_instance = test_library.get_test_instance(instance_id=result["test_instance_id"]) model = model_catalog.get_model(model_id=model_instance["model_id"]) test = test_library.get_test_definition(test_id=test_instance["test_id"]) list_results.append(result) list_models.append(model) list_model_instances.append(model_instance) list_tests.append(test) list_test_instances.append(test_instance) model_label = (model["alias"] if model["alias"] else model["name"]) + " (" + str(model_instance["version"]) + ")" test_label = (test["alias"] if test["alias"] else test["name"]) + " (" + str(test_instance["version"]) + ")" if show_links: result_url = "https://model-catalog.brainsimulation.eu/#result_id.{}".format(r_id) model_url = "https://model-catalog.brainsimulation.eu/#model_id.{}".format(model["id"]) test_url = "https://model-catalog.brainsimulation.eu/#test_id.{}".format(test["id"]) result_summary_table.append({"result_id": (r_id, result_url), "model_label": (model_label, model_url), "test_label": (test_label, test_url), "score": (result["score"], result_url)}) else: result_summary_table.append({"result_id": (r_id), "model_label": (model_label), "test_label": (test_label), "score": (result["score"])}) timestamp = datetime.now() report_name = str("HBP_VF_Report_" + timestamp.strftime("%Y%m%d-%H%M%S") + ".html") template_path = pkg_resources.resource_filename("hbp_validation_framework", "templates/report_template.html") env = Environment(loader=FileSystemLoader(os.path.dirname(template_path))) template = env.get_template(os.path.basename(template_path)) template_vars = {"report_name" : report_name, "created_date" : timestamp.strftime("%Y-%m-%d %H:%M:%S"), "result_summary_table" : result_summary_table, "list_results" : list_results, "list_models" : list_models, "list_model_instances" : list_model_instances, "list_tests" : list_tests, "list_test_instances" : list_test_instances} html_out = template.render(template_vars) with open(report_name, "w") as outfile: outfile.write(html_out) return os.path.abspath(report_name), valid_result_uuids
[docs]def generate_PDF_report(html_report_path=None, username="", password=None, environment="production", model_list=[], model_instance_list=[], test_list=[], test_instance_list=[], result_list=[], show_links=True, only_results=False, client_obj=None): """Generates a PDF report for specified test results This method will generate a PDF report for the specified test results. Parameters ---------- html_report_path : string Path to HTML report generated via :meth:`generate_HTML_report()`. If specified, then all other parameters (except `only_results`) are irrelevant. If not specified, then this method will generate both an HTML report as well as a PDF report. username : string Your HBP collaboratory username. environment : string, optional Used to indicate whether being used for development/testing purposes. Set as `production` as default for using the production system, which is appropriate for most users. When set to `dev`, it uses the `development` system. For other values, an external config file would be read (the latter is currently not implemented). model_list : list List of model UUIDs or aliases for which score matrix is to be generated. model_instance_list : list List of model instance UUIDs for which score matrix is to be generated. test_list : list List of test UUIDs or aliases for which score matrix is to be generated. test_instance_list : list List of test instance UUIDs for which score matrix is to be generated. result_list : list List of result UUIDs for which score matrix is to be generated. show_links : boolean, optional To specify if hyperlinks to results are to be provided. If false, these data units will not have clickable hyperlinks. only_results : boolean, optional Indicates whether output PDF should contain only result related info. Set to `False` as default. When set to `True`, the PDF will have info on the result, model, model instance, test and test instance. client_obj : ModelCatalog/TestLibrary object Used to easily create a new ModelCatalog/TestLibrary object if either exist already. Avoids need for repeated authentications; improves performance. Also, helps minimize being blocked out by the authentication server for repeated authentication requests (applicable when running several tests in quick succession, e.g. in a loop). Returns ------- string The absolute path of the generated PDF report list List of valid UUIDs for which the PDF report was generated; returns `None` if `html_report_path` is set Examples -------- >>> result_list = ["a618a6b1-e92e-4ac6-955a-7b8c6859285a", "793e5852-761b-4801-84cb-53af6f6c1acf"] >>> report_path, valid_uuids = utils.generate_PDF_report(username="shailesh", result_list=result_list) >>> report_path, valid_uuids = utils.generate_PDF_report(html_report_path="report.html", only_results=True) """ params = locals() try: from pyppdf import save_pdf except ImportError: print("Please install the following package: pyppdf") return if only_results: try: from bs4 import BeautifulSoup except ImportError: print("To use 'only_results=True', please install the following package: beautifulsoup4") return valid_result_uuids = None if not html_report_path: params.pop("html_report_path") params.pop("only_results") html_report_path, valid_result_uuids = generate_HTML_report(**params) with open(html_report_path, "r") as html_file: html_string = html_file.read() # Exchanging the order of these JS files is sufficient to remove the # 'tabs' organization of info in HTML file to a sequential layout script_jquery = "https://code.jquery.com/jquery-3.3.1.js" script_materialize = "https://cdnjs.cloudflare.com/ajax/libs/materialize/0.97.3/js/materialize.min.js" html_string = html_string.replace(script_materialize, script_jquery, 1) html_string = html_string.replace(script_jquery, script_materialize, 1) if only_results: # remove tabs navigation bar html_soup = BeautifulSoup(html_string, "html.parser") for item in html_soup.findAll("ul", {"class": "tabs"}): item.parent.decompose() # remove model and test tabs for item in html_soup.findAll('div', id=lambda x: x and x.startswith(('model_', 'test_'))): item.decompose() html_string = html_soup filepath = os.path.splitext(os.path.abspath(html_report_path))[0] + ".pdf" content = save_pdf(output_file=filepath, html=html_string, args_dict={"pdf":{"format":"A4", "landscape":False, "printBackground":True, "margin":{"top":'0.25in', "right":'0.25in', "bottom":'0.25in', "left":'0.25in'}}}, goto="temp") return filepath, valid_result_uuids
[docs]def generate_score_matrix(username="", password=None, environment="production", model_list=[], model_instance_list=[], test_list=[], test_instance_list=[], result_list=[], show_links=True, round_places=None, client_obj=None): """Generates a styled pandas dataframe with score matrix This method will generate a styled pandas dataframe for the specified test results. Each row will correspond to a particular model instance, and the columns correspond to the test instances. Parameters ---------- username : string Your HBP collaboratory username. environment : string, optional Used to indicate whether being used for development/testing purposes. Set as `production` as default for using the production system, which is appropriate for most users. When set to `dev`, it uses the `development` system. For other values, an external config file would be read (the latter is currently not implemented). model_list : list List of model UUIDs or aliases for which score matrix is to be generated. model_instance_list : list List of model instance UUIDs for which score matrix is to be generated. test_list : list List of test UUIDs or aliases for which score matrix is to be generated. test_instance_list : list List of test instance UUIDs for which score matrix is to be generated. result_list : list List of result UUIDs for which score matrix is to be generated. show_links : boolean, optional To specify if hyperlinks to results are to be provided. If false, these data units will not have clickable hyperlinks. round_places: int, optional Specify to how many decimal places the scores should be rounded while displaying. No rounding done as default. client_obj : ModelCatalog/TestLibrary object Used to easily create a new ModelCatalog/TestLibrary object if either exist already. Avoids need for repeated authentications; improves performance. Also, helps minimize being blocked out by the authentication server for repeated authentication requests (applicable when running several tests in quick succession, e.g. in a loop). Note ---- Only the latest score entry from specified input for a particular model instance and test instance combination will be selectedself. To get the raw (unstyled) dataframe, use :meth:`get_raw_dataframe()` Returns ------- pandas.io.formats.style.Styler A 2-dimensional matrix representation of the scores list List of entries from specified input that could not be resolved and thus ignored Examples -------- >>> result_list = ["a618a6b1-e92e-4ac6-955a-7b8c6859285a", "793e5852-761b-4801-84cb-53af6f6c1acf"] >>> styled_df, excluded = utils.generate_score_matrix(username="shailesh", result_list=result_list) """ try: import pandas as pd except ImportError: print("Please install the following package: pandas") return if client_obj: model_catalog = ModelCatalog.from_existing(client_obj) else: model_catalog = ModelCatalog(username, password, environment=environment) if client_obj: test_library = TestLibrary.from_existing(client_obj) else: test_library = TestLibrary(username, password, environment=environment) # retrieve all model instances from specified models if model_list: for entry in model_list: try: uuid.UUID(entry, version=4) data = model_catalog.list_model_instances(model_id=entry) except ValueError: data = model_catalog.list_model_instances(alias=entry) for item in data: model_instance_list.append(item["id"]) # retrieve all test instances from specified tests if test_list: for entry in test_list: try: uuid.UUID(entry, version=4) data = test_library.list_test_instances(test_id=entry) except ValueError: data = test_library.list_test_instances(alias=entry) for item in data: test_instance_list.append(item["id"]) # extend results list to include all results corresponding to above # identified model instances and test instances for item in model_instance_list: results_json = test_library.list_results(model_instance_id=item)["results"] result_list.extend([r["id"] for r in results_json]) for item in test_instance_list: results_json = test_library.list_results(test_instance_id=item)["results"] result_list.extend([r["id"] for r in results_json]) # remove duplicate result UUIDs result_list = list(collections.OrderedDict.fromkeys(result_list).keys()) results_dict = collections.OrderedDict() model_instances_dict = collections.OrderedDict() test_instances_dict = collections.OrderedDict() excluded_results = [] # not latest entry for a particular model instance and test instance combination for r_id in result_list: result = test_library.get_result(result_id = r_id) temp_score = round(float(result["score"]), round_places) if round_places else result["score"] # '#*#' is used as separator between score and result UUID (latter used for constructing hyperlink) if result["test_instance_id"] in results_dict.keys(): if result["model_instance_id"] not in results_dict[result["test_instance_id"]].keys(): results_dict[result["test_instance_id"]][result["model_instance_id"]] = [result["timestamp"], str(temp_score) + "#*#" + r_id] elif result["timestamp"] > results_dict[result["test_instance_id"]][result["model_instance_id"]][0]: excluded_results.append(results_dict[result["test_instance_id"]][result["model_instance_id"]][1].split('#*#')[1]) results_dict[result["test_instance_id"]][result["model_instance_id"]] = [result["timestamp"], str(temp_score) + "#*#" + r_id] else: excluded_results.append(r_id) else: results_dict[result["test_instance_id"]] = {result["model_instance_id"]: [result["timestamp"], str(temp_score) + "#*#" + r_id]} if result["model_instance_id"] not in model_instances_dict.keys(): model_instances_dict[result["model_instance_id"]] = None if result["test_instance_id"] not in model_instances_dict.keys(): test_instances_dict[result["test_instance_id"]] = None # update results_dict values to contain only scores; remove timestamps for key_test_inst in results_dict.keys(): for key_model_inst, value in results_dict[key_test_inst].items(): results_dict[key_test_inst][key_model_inst] = value[1] # form test labels: test_name(version_name) for t_id in test_instances_dict.keys(): test = test_library.get_test_instance(instance_id=t_id) test_version = test["version"] test = test_library.get_test_definition(test_id=test["test_id"]) test_name = test["alias"] if test["alias"] else test["name"] test_label = test_name + " (" + str(test_version) + ")" test_instances_dict[t_id] = test_label # form model labels: model_name(version_name) for m_id in model_instances_dict.keys(): model = model_catalog.get_model_instance(instance_id=m_id) model_version = model["version"] model = model_catalog.get_model(model_id=model["model_id"]) model_name = model["alias"] if model["alias"] else model["name"] model_label = model_name + "(" + str(model_version) + ")" model_instances_dict[m_id] = model_label data = {} for t_key, t_val in test_instances_dict.items(): score_vals = [] for m_key in model_instances_dict.keys(): try: score_vals.append(results_dict[t_key][m_key]) except KeyError: score_vals.append(None) data[t_val] = score_vals df = pd.DataFrame(data, index = model_instances_dict.values()) def make_clickable(value): if not value: return value score, result_uuid = value.split('#*#') if show_links: result_url = "https://model-catalog.brainsimulation.eu/#result_id.{}".format(result_uuid) return '<a target="_blank" href="{}">{}</a>'.format(result_url,score) else: return score return df.style.format(make_clickable), excluded_results
[docs]def get_raw_dataframe(styled_df): """Creates DataFrame from output of :meth`generate_score_matrix` This method creates a raw DataFrame objects from its styled variant as generated by :meth`generate_score_matrix`. The cell values in latter could contain additional data (i.e. result UUIDs) for creating hyperlinks. This is filtered out here such that the cell values only contain scores. Parameters ---------- styled_df : pandas.io.formats.style.Styler Styled DataFrame object generated by :meth`generate_score_matrix` Returns ------- pandas.core.frame.DataFrame A 2-dimensional matrix representation of the scores without any formatting Examples -------- >>> df = utils.get_raw_dataframe(styled_df) """ def make_raw_scores(value): if value: return value.split('#*#')[0] return styled_df.data.applymap(make_raw_scores)
[docs]def display_score_matrix_html(styled_df=None, df=None): """Displays score matrix generated from :meth`generate_score_matrix` inside web browser This method displays the scoring matrix generated by :meth`generate_score_matrix` inside a web browser. Input can either be the styled DataFrame object generated by :meth`generate_score_matrix` or the raw DataFrame object from :meth`get_raw_dataframe`. Parameters ---------- styled_df : pandas.io.formats.style.Styler Styled DataFrame object generated by :meth`generate_score_matrix` df : pandas.core.frame.DataFrame DataFrame object generated by :meth`get_raw_dataframe` Returns ------- None Does not return any data. JSON displayed inside web browser. Examples -------- >>> utils.display_score_matrix_html(styled_df) """ if styled_df is None and df is None: raise Exception("styled_df or df needs to be provided for displaying the score matrix.") filename = "hbp_vf_score_dataframe_{}.html".format(datetime.now().strftime("%Y%m%d-%H%M%S")) if styled_df: df = get_raw_dataframe(styled_df) df.to_html(filename) webbrowser.open(filename, new=2)