Source code for hbp_validation_framework.utils

"""
Miscellaneous methods that help in different aspects of model validation.
Does not require explicit instantiation.

The following methods are available:

=======================================   ====================================
Action                                    Method
=======================================   ====================================
View JSON data in web browser             :meth:`view_json_tree`
Prepare test for execution                :meth:`prepare_run_test_offline`
Run the validation test                   :meth:`run_test_offline`
Register result with validation service   :meth:`upload_test_result`
Run test and register result              :meth:`run_test`
Download PDF report of test results       :meth:`generate_report`
Obtain score matrix for test results      :meth:`generate_score_matrix`
Get Pandas DataFrame from score matrix    :meth:`get_raw_dataframe`
Display score matrix in web browser       :meth:`display_score_matrix_html`
=======================================   ====================================
"""

import os
import uuid
import json
import pickle
import webbrowser
import argparse
import collections
import unicodedata
try:
    raw_input
except NameError:  # Python 3
    raw_input = input
import sciunit
from datetime import datetime
from . import TestLibrary, ModelCatalog
from .datastores import CollabDataStore, URI_SCHEME_MAP
try:  # Python 3
    from urllib.parse import urlparse
except ImportError:  # Python 2
    from urlparse import urlparse
from importlib import import_module
import mimetypes
import math
try:
    from pathlib import Path
except ImportError:
    from pathlib2 import Path  # Python 2 backport

[docs]def view_json_tree(data):
    """Displays the JSON tree structure inside the web browser

    This method can be used to view any JSON data, generated by any of the
    validation client's methods, in a tree-like representation.

    Parameters
    ----------
    data : string
        JSON object represented as a string.

    Returns
    -------
    None
        Does not return any data. JSON displayed inside web browser.

    Examples
    --------
    >>> model = model_catalog.get_model(alias="HCkt")
    >>> from hbp_validation_framework import utils
    >>> utils.view_json_tree(model)
    """

    _make_js_file(data)

    script_dir = os.path.dirname(__file__)
    rel_path = "jsonTreeViewer/index.htm"
    abs_file_path = os.path.join(script_dir, rel_path)
    webbrowser.open(abs_file_path, new=2)

def _make_js_file(data):
    """
    Creates a JavaScript file from give JSON object; loaded by the browser
    This eliminates cross-origin issues with loading local data files (e.g. via jQuery)
    """

    script_dir = os.path.dirname(__file__)
    rel_path = "jsonTreeViewer/data.js"
    abs_file_path = os.path.join(script_dir, rel_path)
    with open(abs_file_path, 'w') as outfile:
        outfile.write("var data = '")
        json.dump(data, outfile)
        outfile.write("'")

[docs]def prepare_run_test_offline(username="", password=None, environment="production", test_instance_id="", test_id="", test_alias="", test_version="", client_obj=None, **params):
    """Gather info necessary for running validation test

    This method will select the specified test and prepare a config file
    enabling offline execution of the validation test. The observation file
    required by the test is also downloaded and stored locally.
    The test can be specified in the following ways (in order of priority):

    1. specify `test_instance_id` corresponding to test instance in test library
    2. specify `test_id` and `test_version`
    3. specify `test_alias` and `test_version`
    Note: for (2) and (3) above, if `test_version` is not specified,
          then the latest test version is retrieved

    Parameters
    ----------
    username : string
        Your HBP Collaboratory username.
    password : string
        Your HBP Collaboratory password.
    environment : string, optional
        Used to indicate whether being used for development/testing purposes.
        Set as `production` as default for using the production system,
        which is appropriate for most users. When set to `dev`, it uses the
        `development` system. For other values, an external config file would
        be read (the latter is currently not implemented).
    test_instance_id : UUID
        System generated unique identifier associated with test instance.
    test_id : UUID
        System generated unique identifier associated with test definition.
    test_alias : string
        User-assigned unique identifier associated with test definition.
    test_version : string
        User-assigned identifier (unique for each test) associated with test instance.
    client_obj : ModelCatalog/TestLibrary object
        Used to easily create a new ModelCatalog/TestLibrary object if either exist already.
        Avoids need for repeated authentications; improves performance. Also, helps minimize
        being blocked out by the authentication server for repeated authentication requests
        (applicable when running several tests in quick succession, e.g. in a loop).
    **params : list
        Keyword arguments to be passed to the Test constructor.

    Note
    ----
    Should be run on node having access to external URLs (i.e. with internet access)

    Returns
    -------
    path
        The absolute path of the generated test config file

    Examples
    --------
    >>> test_config_file = utils.prepare_run_test_offline(username="shailesh", test_alias="CDT-5", test_version="5.0")
    """

    if client_obj:
        test_library = TestLibrary.from_existing(client_obj)
    else:
        test_library = TestLibrary(username, password, environment=environment)

    if test_instance_id == "" and test_id == "" and test_alias == "":
        raise Exception("test_instance_id or test_id or test_alias needs to be provided for finding test.")

    # Gather specified test info
    test_instance_json = test_library.get_test_instance(instance_id=test_instance_id, test_id=test_id, alias=test_alias, version=test_version)
    test_id = test_instance_json["test_definition_id"]
    test_instance_id = test_instance_json["id"]
    test_instance_path = test_instance_json["path"]

    # Download test observation to local storage
    test_observation_path = test_library.get_test_definition(test_id=test_id)["data_location"]
    parse_result = urlparse(test_observation_path)
    datastore = URI_SCHEME_MAP[parse_result.scheme](auth=test_library.auth)
    base_folder = os.path.join(os.getcwd(), "hbp_validation_framework", test_id, datetime.now().strftime("%Y%m%d-%H%M%S"))
    test_observation_file = datastore.download_data([test_observation_path], local_directory=base_folder)[0]

    # Create test config required for offline execution
    test_info = {}
    test_info["test_id"] = test_id
    test_info["test_instance_id"] = test_instance_id
    test_info["test_instance_path"] = test_instance_path
    test_info["test_observation_file"] = os.path.basename(os.path.realpath(test_observation_file))
    test_info["params"] = params

    # Save test info to config file
    test_config_file = os.path.join(base_folder, "test_config.json")
    with open(test_config_file, 'w') as file:
        file.write(json.dumps(test_info, indent=4))
    return test_config_file

[docs]def run_test_offline(model="", test_config_file=""):
    """Run the validation test

    This method will accept a model, located locally, run the test specified
    via the test config file (generated by :meth:`prepare_run_test_offline`),
    and store the results locally.

    Parameters
    ----------
    model : sciunit.Model
        A :class:`sciunit.Model` instance.
    test_config_file : string
        Absolute path of the test config file generated by :meth:`prepare_run_test_offline`

    Note
    ----
    Can be run on node(s) having no access to external URLs (i.e. without internet access).
    Also, it is required that the test_config_file and the test_observation_file are located
    in the same directory.

    Returns
    -------
    path
        The absolute path of the generated test result file

    Examples
    --------
    >>> test_result_file = utils.run_test_offline(model=model, test_config_file=test_config_file)
    """

    if not os.path.isfile(test_config_file) :
        raise Exception("'test_config_file' should direct to file describing the test configuration.")
    base_folder = os.path.dirname(os.path.realpath(test_config_file))

    # Load the test info from config file
    with open(test_config_file) as file:
        test_info = json.load(file)

    # Identify test class path
    path_parts = test_info["test_instance_path"].split(".")
    cls_name = path_parts[-1]
    module_name = ".".join(path_parts[:-1])
    test_module = import_module(module_name)
    test_cls = getattr(test_module, cls_name)

    # Read observation data required by test
    with open(os.path.join(base_folder, test_info["test_observation_file"]), 'r') as file:
        observation_data = file.read()
    content_type = mimetypes.guess_type(test_info["test_observation_file"])[0]
    if content_type == "application/json":
        observation_data = json.loads(observation_data)

    # Create the :class:`sciunit.Test` instance
    params = test_info["params"]
    test = test_cls(observation=observation_data, **params)
    test.uuid = test_info["test_instance_id"]

    print("----------------------------------------------")
    print("Test name: ", test.name)
    print("Test type: ", type(test))
    print("----------------------------------------------")

    # Check the model
    if not isinstance(model, sciunit.Model):
        raise TypeError("`model` is not a sciunit Model!")
    print("----------------------------------------------")
    print("Model name: ", model.name)
    print("Model type: ", type(model))
    print("----------------------------------------------")

    # Run the test
    t_start = datetime.utcnow()
    score = test.judge(model, deep_error=True)
    t_end = datetime.utcnow()

    print("----------------------------------------------")
    print("Score: ", score.score)
    if "figures" in score.related_data:
        print("Output files: ")
        for item in score.related_data["figures"]:
            print(item)
    print("----------------------------------------------")

    score.runtime = str(int(math.ceil((t_end-t_start).total_seconds()))) + " s"
    score.exec_timestamp = t_end
    # score.exec_platform = str(self._get_platform())

    # Save result info to file
    Path(os.path.join(base_folder, "results")).mkdir(parents=True, exist_ok=True)
    test_result_file = os.path.join(base_folder, "results", "result__" + model.name + "__" + datetime.now().strftime("%Y%m%d%H%M%S") + ".pkl")
    with open(test_result_file, 'wb') as file:
        pickle.dump(score, file)
    return test_result_file

[docs]def upload_test_result(username="", password=None, environment="production", test_result_file="", storage_collab_id="", register_result=True, client_obj=None):
    """Register the result with the Validation Service

    This method will register the validation result specified via the test result file
    (generated by :meth:`run_test_offline`) with the validation service.

    Parameters
    ----------
    username : string
        Your HBP Collaboratory username.
    password : string
        Your HBP Collaboratory password.
    environment : string, optional
        Used to indicate whether being used for development/testing purposes.
        Set as `production` as default for using the production system,
        which is appropriate for most users. When set to `dev`, it uses the
        `development` system. For other values, an external config file would
        be read (the latter is currently not implemented).
    test_result_file : string
        Absolute path of the test result file generated by :meth:`run_test_offline`
    storage_collab_id : string
        Collab ID where output files should be stored; if empty, stored in model's host Collab.
    register_result : boolean
        Specify whether the test results are to be scored on the validation framework.
        Default is set as True.
    client_obj : ModelCatalog/TestLibrary object
        Used to easily create a new ModelCatalog/TestLibrary object if either exist already.
        Avoids need for repeated authentications; improves performance. Also, helps minimize
        being blocked out by the authentication server for repeated authentication requests
        (applicable when running several tests in quick succession, e.g. in a loop).

    Note
    ----
    Should be run on node having access to external URLs (i.e. with internet access)

    Returns
    -------
    UUID
        UUID of the test result that has been created.
    object
        score object evaluated by the test.

    Examples
    --------
    >>> result_id, score = utils.upload_test_result(username="shailesh", test_result_file=test_result_file)
    """

    if not register_result:
        return None, None
    if not os.path.isfile(test_result_file) :
        raise Exception("'test_result_file' should direct to file containg the test result data.")

    # Load result info from file
    with open(test_result_file, 'rb') as file:
        score = pickle.load(file)

    # Register the result with the HBP validation framework
    if client_obj:
        model_catalog = ModelCatalog.from_existing(client_obj)
    else:
        model_catalog = ModelCatalog(username, password, environment=environment)
    model_instance_uuid = model_catalog.find_model_instance_else_add(score.model)
    model_instance_json = model_catalog.get_model_instance(instance_id=model_instance_uuid)
    model_json = model_catalog.get_model(model_id=model_instance_json["model_id"])
    model_host_collab_id = model_json["app"]["collab_id"]
    model_name = model_json["name"]

    if not storage_collab_id:
        storage_collab_id = model_host_collab_id
    score.related_data["project"] = storage_collab_id

    # Check if result with same hash has already been uploaded for
    # this (model instance, test instance) combination; if yes, don't register result
    result_json = {
                    "model_instance_id": model_instance_uuid,
                    "test_code_id": score.test.uuid,
                    "score": score.score,
                    "runtime": score.runtime,
                    "exectime": score.exec_timestamp#,
                    # "platform": score.exec_platform
                  }
    score.score_hash = str(hash(json.dumps(result_json, sort_keys=True, default = str)))
    test_library = TestLibrary.from_existing(model_catalog)
    results = test_library.list_results(model_version_id=model_instance_uuid, test_code_id=score.test.uuid)["results"]
    duplicate_results =  [x["id"] for x in results if x["hash"] == score.score_hash]
    if duplicate_results:
        raise Exception("An identical result has already been registered on the validation framework.\nExisting Result UUID = {}".format(", ".join(duplicate_results)))

    collab_folder = "validation_results/{}/{}_{}".format(datetime.now().strftime("%Y-%m-%d"),model_name, datetime.now().strftime("%Y%m%d-%H%M%S"))
    collab_storage = CollabDataStore(collab_id=storage_collab_id,
                                     base_folder=collab_folder,
                                     auth=test_library.auth)

    response = test_library.register_result(test_result=score, data_store=collab_storage)
    return response, score

[docs]def run_test(username="", password=None, environment="production", model="", test_instance_id="", test_id="", test_alias="", test_version="", storage_collab_id="", register_result=True, client_obj=None, **params):
    """Run validation test and register result

    This will execute the following methods by relaying the output of one to the next:
    1. :meth:`prepare_run_test_offline`
    2. :meth:`run_test_offline`
    3. :meth:`upload_test_result`

    Parameters
    ----------
    username : string
        Your HBP Collaboratory username.
    password : string
        Your HBP Collaboratory password.
    environment : string, optional
        Used to indicate whether being used for development/testing purposes.
        Set as `production` as default for using the production system,
        which is appropriate for most users. When set to `dev`, it uses the
        `development` system. For other values, an external config file would
        be read (the latter is currently not implemented).
    model : sciunit.Model
        A :class:`sciunit.Model` instance.
    test_instance_id : UUID
        System generated unique identifier associated with test instance.
    test_id : UUID
        System generated unique identifier associated with test definition.
    test_alias : string
        User-assigned unique identifier associated with test definition.
    test_version : string
        User-assigned identifier (unique for each test) associated with test instance.
    storage_collab_id : string
        Collab ID where output files should be stored; if empty, stored in model's host Collab.
    register_result : boolean
        Specify whether the test results are to be scored on the validation framework.
        Default is set as True.
    client_obj : ModelCatalog/TestLibrary object
        Used to easily create a new ModelCatalog/TestLibrary object if either exist already.
        Avoids need for repeated authentications; improves performance. Also, helps minimize
        being blocked out by the authentication server for repeated authentication requests
        (applicable when running several tests in quick succession, e.g. in a loop).
    **params : list
        Keyword arguments to be passed to the Test constructor.

    Note
    ----
    Should be run on node having access to external URLs (i.e. with internet access)

    Returns
    -------
    UUID
        UUID of the test result that has been created.
    object
        score object evaluated by the test.

    Examples
    --------
    >>> result_id, score = utils.run_test(username="HBP_USERNAME", password="HBP_PASSWORD" environment="production", model=cell_model, test_alias="basalg_msn_d1", test_version="1.0", storage_collab_id="8123", register_result=True)
    """

    test_config_file = prepare_run_test_offline(username=username, password=password, environment=environment, test_instance_id=test_instance_id, test_id=test_id, test_alias=test_alias, test_version=test_version, client_obj=client_obj, **params)
    test_result_file = run_test_offline(model=model, test_config_file=test_config_file)
    result_id, score = upload_test_result(username=username, password=password, environment=environment, test_result_file=test_result_file, storage_collab_id=storage_collab_id, register_result=register_result, client_obj=client_obj)
    return result_id, score

[docs]def generate_report(username="", password=None, environment="production", result_list=[], only_combined=True, client_obj=None):
    """Generates and downloads a PDF report of test results

    This method will generate and download a PDF report of the specified
    test results. The report will consist of all information relevant to
    that particular result, such as:

    * result info
    * model info
    * model instance info
    * test info
    * test instance info
    * output files associated with result

    Parameters
    ----------
    username : string
        Your HBP collaboratory username.
    environment : string, optional
        Used to indicate whether being used for development/testing purposes.
        Set as `production` as default for using the production system,
        which is appropriate for most users. When set to `dev`, it uses the
        `development` system. For other values, an external config file would
        be read (the latter is currently not implemented).
    result_list : list
        List of result UUIDs that need to be included in report.
    only_combined : boolean, optional
        Indicates whether only a single combined PDF should be saved. Set to
        `True` as default. When set to `False`, then `n+2` PDFs will be saved,
        where `n` is the number of valid result UUIDs. These would include:

        * Combined PDF report
        * Summary of call to `generate_report()`
        * One PDF for each valid result UUID

    client_obj : ModelCatalog/TestLibrary object
        Used to easily create a new ModelCatalog/TestLibrary object if either exist already.
        Avoids need for repeated authentications; improves performance. Also, helps minimize
        being blocked out by the authentication server for repeated authentication requests
        (applicable when running several tests in quick succession, e.g. in a loop).

    Returns
    -------
    list
        List of valid UUIDs for which the PDF report was generated
    path
        The absolute path of the generated report

    Examples
    --------
    >>> result_list = ["a618a6b1-e92e-4ac6-955a-7b8c6859285a", "793e5852-761b-4801-84cb-53af6f6c1acf"]
    >>> valid_uuids, report_path = utils.generate_report(username="shailesh", result_list=result_list)
    """
    # This method can be significantly improved in future.

    try:
        from fpdf import FPDF
    except ImportError:
        print("Please install the following package: fpdf")
        return
    try:
        from PyPDF2 import PdfFileMerger, PdfFileReader
    except ImportError:
        print("Please install the following package: PyPDF2")
        return

    class PDF(FPDF):
        def header(self):
            # Logo
            self.image('https://i.imgur.com/sHi1OSs.png', 80, 8, 50)
            # Arial bold 15
            self.set_font('Arial', 'B', 18)
            # Move to the right
            self.ln(15)
            self.cell(45)
            # Title
            self.cell(100, 10, 'Validation Framework Report', 1, 0, 'C')
            # Line break
            self.ln(20)

        # # Page footer
        # def footer(self):
        #     # Position at 1.5 cm from bottom
        #     self.set_y(-15)
        #     # Arial italic 8
        #     self.set_font('Arial', 'I', 8)
        #     # Page number
        #     self.cell(0, 10, 'Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C')

    if client_obj:
        model_catalog = ModelCatalog.from_existing(client_obj)
    else:
        model_catalog = ModelCatalog(username, password, environment=environment)
    test_library = TestLibrary.from_existing(model_catalog)
    result_data = {}
    valid_uuids = []

    for result_id in result_list:
        result = test_library.get_result(result_id=result_id)
        if len(result["results"]) != 0:
            valid_uuids.append(result_id)
            result_data[result_id] = result["results"][0]

    def _print_param_value(pdf, param, value, fontsize):
        pdf.set_font('Arial', 'B', fontsize)
        pdf.cell(40, 10, param)
        pdf.set_font('Arial', '', fontsize)
        pdf.cell(0, 10, value)

    pdf = PDF()
    # pdf.alias_nb_pages()

    timestamp = datetime.now()
    filename = str("HBP_VF_Report_" + timestamp.strftime("%Y%m%d-%H%M%S") + ".pdf")

    # Cover Page
    pdf.add_page()
    _print_param_value(pdf, "Report Name: ", filename, 14)
    pdf.ln(10)
    _print_param_value(pdf, "Created Date: ", timestamp.strftime("%Y-%m-%d %H:%M:%S"), 14)
    pdf.ln(20)
    pdf.set_font('Arial', 'B', 14)
    pdf.cell(40, 10, "Contains data for following Result UUIDs: ")
    pdf.ln(10)
    pdf.set_font('Arial', '', 14)

    for result_id in valid_uuids:
            pdf.cell(40)
            pdf.cell(0, 10, result_id)
            pdf.ln(10)

    if len(valid_uuids) < len(result_list):
        pdf.ln(10)
        pdf.set_font('Arial', 'B', 14)
        pdf.cell(40, 10, "Following UUIDs were invalid: ")
        pdf.ln(10)
        pdf.set_font('Arial', '', 14)
        for result_id in result_list:
            if result_id not in valid_uuids:
                pdf.cell(40)
                pdf.cell(0, 10, result_id)
                pdf.ln(10)

    pdf.ln(50)
    pdf.set_font('Arial', 'B', 14)
    pdf.cell(190, 10, 'Report generated by the HBP Validation Framework', 0, 1, 'C')
    pdf.ln(10)
    pdf.set_font('Arial', 'I', 12)
    pdf.cell(90, 10, 'For more information, you may visit:')
    pdf.ln(10)
    pdf.cell(15)
    _print_param_value(pdf, "Python Client: ", "https://github.com/apdavison/hbp-validation-client/", 12)
    pdf.ln(10)
    pdf.cell(15)
    _print_param_value(pdf, "Documentation: ", "http://hbp-validation-client.readthedocs.io/", 12)

    if not os.path.exists("./report/"):
        os.makedirs("./report/")
    pdf.output(str("./report/"+filename[:-4]+"_cover.pdf"), 'F')
    result_ctr = 0

    # Result Pages
    for result_id in valid_uuids:
        pdf = PDF()
        # pdf.alias_nb_pages()
        pdf.add_page()

        # General Result Info
        model_instance_id = result_data[result_id]["model_version_id"]
        model_instance_info = model_catalog.get_model_instance(instance_id=model_instance_id)
        model_id = model_instance_info["model_id"]
        model_info = model_catalog.get_model(model_id=model_id, instances=False, images=False)
        test_instance_id = result_data[result_id]["test_code_id"]
        test_instance_info = test_library.get_test_instance(instance_id=test_instance_id)
        test_id = test_instance_info["test_definition_id"]
        test_info = test_library.get_test_definition(test_id=test_id)
        test_info.pop("codes")

        # pdf.add_page()
        _print_param_value(pdf, "Result UUID: ", result_id, 14)
        # Result Info
        pdf.ln(10)
        pdf.set_font('Arial', 'BU', 14)
        pdf.ln(10)
        pdf.cell(190, 10, 'Result Info', 0, 1, 'C')
        for key, val in result_data[result_id].items():
            _print_param_value(pdf, str(key + ": "), str(val), 12)
            pdf.ln(10)

        # Model Info
        pdf.ln(10)
        pdf.set_font('Arial', 'BU', 14)
        pdf.ln(10)
        pdf.cell(190, 10, 'Model Info', 0, 1, 'C')
        for key, val in model_info.items():
            if key == "app":
                _print_param_value(pdf, "collab_id", str(val["collab_id"]), 12)
                pdf.ln(10)
                if "id" in val:
                    _print_param_value(pdf, "app_id", str(val["id"]), 12)
            else:
                _print_param_value(pdf, str(key + ": "), unicodedata.normalize('NFKD', val).encode('ascii','ignore') if isinstance(val, unicode) else str(val), 12)
            pdf.ln(10)

        # Model Instance Info
        pdf.ln(10)
        pdf.set_font('Arial', 'BU', 14)
        pdf.ln(10)
        pdf.cell(190, 10, 'Model Instance Info', 0, 1, 'C')
        for key, val in model_instance_info.items():
            _print_param_value(pdf, str(key + ": "), str(val), 12)
            pdf.ln(10)

        # Test Info
        pdf.ln(10)
        pdf.set_font('Arial', 'BU', 14)
        pdf.ln(10)
        pdf.cell(190, 10, 'Test Info', 0, 1, 'C')
        for key, val in test_info.items():
            _print_param_value(pdf, str(key + ": "), str(val), 12)
            pdf.ln(10)

        # Test Instance Info
        pdf.ln(10)
        pdf.set_font('Arial', 'BU', 14)
        pdf.ln(10)
        pdf.cell(190, 10, 'Test Instance Info', 0, 1, 'C')
        for key, val in test_instance_info.items():
            _print_param_value(pdf, str(key + ": "), str(val), 12)
            pdf.ln(10)

        pdf.output(str("./report/"+filename[:-4]+"_temp_"+str(result_ctr)+".pdf"), 'F')

        # Additional Files
        if result_data[result_id]["results_storage"]:
            datastore = CollabDataStore(auth=model_catalog.auth)
            entity_uuid = datastore._translate_URL_to_UUID(result_data[result_id]["results_storage"])
            file_list = datastore.download_data_using_uuid(entity_uuid)

            merger = PdfFileMerger()
            merger.append(str("./report/"+filename[:-4]+"_temp_"+str(result_ctr)+".pdf"))
            temp_txt_files = []

            for datafile in file_list:
                if datafile.endswith(".pdf"):
                    merger.append(PdfFileReader(file(datafile, 'rb')))
                elif datafile.endswith((".txt", ".json")):
                    txt_pdf = FPDF()
                    txt_pdf.add_page()
                    txt_pdf.set_font('Arial', 'BU', 14)
                    txt_pdf.cell(0, 10, os.path.basename(datafile), 0, 1, 'C')
                    txt_pdf.set_font('Courier', '', 8)
                    with open(datafile, 'r') as txt_file:
                        txt_content = txt_file.read().splitlines()
                    for txt_line in txt_content:
                        txt_pdf.cell(0,0, txt_line)
                        txt_pdf.ln(5)
                    savepath = os.path.join("./report", "temp_"+os.path.splitext(os.path.basename(datafile))[0]+"_"+str(result_ctr)+".pdf")
                    temp_txt_files.append(savepath)
                    txt_pdf.output(str(savepath), 'F')
                    merger.append(PdfFileReader(file(savepath, 'rb')))

            merger.write(str("./report/"+filename[:-4]+"_"+str(result_ctr)+".pdf"))
            os.remove(str("./report/"+filename[:-4]+"_temp_"+str(result_ctr)+".pdf"))
            for tempfile in temp_txt_files:
                os.remove(tempfile)
            result_ctr = result_ctr + 1

    merger = PdfFileMerger()
    merger.append(str("./report/"+filename[:-4]+"_cover.pdf"))
    if only_combined:
        os.remove(str("./report/"+filename[:-4]+"_cover.pdf"))
    for i in range(result_ctr):
        merger.append(str("./report/"+filename[:-4]+"_"+str(i)+".pdf"))
        if only_combined:
            os.remove(str("./report/"+filename[:-4]+"_"+str(i)+".pdf"))
    merger.write(str("./report/"+filename))
    report_path = os.path.abspath("./report/"+filename)
    print("Report generated at: {}".format(report_path))
    return valid_uuids, report_path

[docs]def generate_score_matrix(username="", password=None, environment="production", model_list=[], model_instance_list=[], test_list=[], test_instance_list=[], result_list=[], collab_id=None, client_obj=None):
    """Generates a styled pandas dataframe with score matrix

    This method will generate a styled pandas dataframe for the specified test results.
    Each row will correspond to a particular model instance, and the columns
    correspond to the test instances.

    Parameters
    ----------
    username : string
        Your HBP collaboratory username.
    environment : string, optional
        Used to indicate whether being used for development/testing purposes.
        Set as `production` as default for using the production system,
        which is appropriate for most users. When set to `dev`, it uses the
        `development` system. For other values, an external config file would
        be read (the latter is currently not implemented).
    model_list : list
        List of model UUIDs or aliases for which score matrix is to be generated.
    model_instance_list : list
        List of model instance UUIDs for which score matrix is to be generated.
    test_list : list
        List of test UUIDs or aliases for which score matrix is to be generated.
    test_instance_list : list
        List of test instance UUIDs for which score matrix is to be generated.
    result_list : list
        List of result UUIDs for which score matrix is to be generated.
    collab_id : string, optional
        Collaboratory ID where hyperlinks to results are to be redirected.
        If unspecified, the scores will not have clickable hyperlinks.
    client_obj : ModelCatalog/TestLibrary object
        Used to easily create a new ModelCatalog/TestLibrary object if either exist already.
        Avoids need for repeated authentications; improves performance. Also, helps minimize
        being blocked out by the authentication server for repeated authentication requests
        (applicable when running several tests in quick succession, e.g. in a loop).

    Note
    ----
    Only the latest score entry from specified input for a particular
    model instance and test instance combination will be selectedself.
    To get the raw (unstyled) dataframe, use :meth:`get_raw_dataframe()`

    Returns
    -------
    pandas.io.formats.style.Styler
        A 2-dimensional matrix representation of the scores
    list
        List of entries from specified input that could not be resolved and thus ignored

    Examples
    --------
    >>> result_list = ["a618a6b1-e92e-4ac6-955a-7b8c6859285a", "793e5852-761b-4801-84cb-53af6f6c1acf"]
    >>> styled_df, excluded = utils.generate_score_matrix(username="shailesh", result_list=result_list)
    """

    try:
        import pandas as pd
    except ImportError:
        print("Please install the following package: pandas")
        return

    if client_obj:
        model_catalog = ModelCatalog.from_existing(client_obj)
    else:
        model_catalog = ModelCatalog(username, password, environment=environment)

    if client_obj:
        test_library = TestLibrary.from_existing(client_obj)
    else:
        test_library = TestLibrary(username, password, environment=environment)

    if collab_id:
        # check if app exists; if not then create
        VFapp_navID = test_library.exists_in_collab_else_create(collab_id)
        test_library.set_app_config(collab_id=collab_id, app_id=VFapp_navID, only_if_new="True")

    # retrieve all model instances from specified models
    if model_list:
        for entry in model_list:
            try:
                uuid.UUID(entry, version=4)
                data = model_catalog.list_model_instances(model_id=entry)
            except ValueError:
                data = model_catalog.list_model_instances(alias=entry)
            for item in data:
                model_instance_list.append(item["id"])

    # retrieve all test instances from specified tests
    if test_list:
        for entry in test_list:
            try:
                uuid.UUID(entry, version=4)
                data = test_library.list_test_instances(test_id=entry)
            except ValueError:
                data = test_library.list_test_instances(alias=entry)
            for item in data:
                test_instance_list.append(item["id"])

    # extend results list to include all results corresponding to above
    # identified model instances and test instances
    for item in model_instance_list:
        results_json = test_library.list_results(model_version_id=item)["results"]
        result_list.extend([r["id"] for r in results_json])
    for item in test_instance_list:
        results_json = test_library.list_results(test_code_id=item)["results"]
        result_list.extend([r["id"] for r in results_json])

    # remove duplicate result UUIDs
    result_list = list(collections.OrderedDict.fromkeys(result_list).keys())

    results_dict = collections.OrderedDict()
    model_instances_dict = collections.OrderedDict()
    test_instances_dict = collections.OrderedDict()

    excluded_results = []   # not latest entry for a particular model instance and test instance combination
    for r_id in result_list:
        result = test_library.get_result(result_id = r_id)["results"][0]
        # '#*#' is used as separator between score and result UUID (latter used for constructing hyperlink)
        if result["test_code_id"] in results_dict.keys():
            if result["model_version_id"] not in results_dict[result["test_code_id"]].keys():
                results_dict[result["test_code_id"]][result["model_version_id"]] = [result["timestamp"], str(result["score"]) + "#*#" + r_id]
            elif result["timestamp"] > results_dict[result["test_code_id"]][result["model_version_id"]][0]:
                excluded_results.append(results_dict[result["test_code_id"]][result["model_version_id"]][1].split('#*#')[1])
                results_dict[result["test_code_id"]][result["model_version_id"]] = [result["timestamp"], str(result["score"]) + "#*#" + r_id]
            else:
                excluded_results.append(r_id)
        else:
            results_dict[result["test_code_id"]] = {result["model_version_id"]: [result["timestamp"], str(result["score"]) + "#*#" + r_id]}

        if result["model_version_id"] not in model_instances_dict.keys():
            model_instances_dict[result["model_version_id"]] = None
        if result["test_code_id"] not in model_instances_dict.keys():
            test_instances_dict[result["test_code_id"]] = None

    # update results_dict values to contain only scores; remove timestamps
    for key_test_inst in results_dict.keys():
        for key_model_inst, value in results_dict[key_test_inst].items():
            results_dict[key_test_inst][key_model_inst] = value[1]

    # form test labels: test_name(version_name)
    for t_id in test_instances_dict.keys():
        test = test_library.get_test_instance(instance_id=t_id)
        test_version = test["version"]
        test = test_library.get_test_definition(test_id=test["test_definition_id"])
        test_name = test["alias"] if test["alias"] else test["name"]
        test_label = test_name + " (" + str(test_version) + ")"
        test_instances_dict[t_id] = test_label

    # form model labels: model_name(version_name)
    for m_id in model_instances_dict.keys():
        model = model_catalog.get_model_instance(instance_id=m_id)
        model_version = model["version"]
        model = model_catalog.get_model(model_id=model["model_id"])
        model_name = model["alias"] if model["alias"] else model["name"]
        model_label = model_name + "(" + str(model_version) + ")"
        model_instances_dict[m_id] = model_label

    data = {}
    for t_key, t_val in test_instances_dict.items():
        score_vals = []
        for m_key in model_instances_dict.keys():
            try:
                score_vals.append(results_dict[t_key][m_key])
            except KeyError:
                score_vals.append(None)
        data[t_val] = score_vals
    df = pd.DataFrame(data, index = model_instances_dict.values())

    def make_clickable(value):
        if not value:
            return value
        score, result_uuid = value.split('#*#')
        if collab_id:
            result_url = "https://collab.humanbrainproject.eu/#/collab/{}/nav/{}?state=result.{}".format(str(collab_id),str(VFapp_navID), result_uuid)
            return '<a target="_blank" href="{}">{}</a>'.format(result_url,score)
        else:
            return score

    return df.style.format(make_clickable), excluded_results

[docs]def get_raw_dataframe(styled_df):
    """Creates DataFrame from output of :meth`generate_score_matrix`

    This method creates a raw DataFrame objects from its styled variant as
    generated by :meth`generate_score_matrix`. The cell values in latter could
    contain additional data (i.e. result UUIDs) for creating hyperlinks.
    This is filtered out here such that the cell values only contain scores.

    Parameters
    ----------
    styled_df : pandas.io.formats.style.Styler
        Styled DataFrame object generated by :meth`generate_score_matrix`

    Returns
    -------
    pandas.core.frame.DataFrame
        A 2-dimensional matrix representation of the scores without any formatting

    Examples
    --------
    >>> df = utils.get_raw_dataframe(styled_df)
    """

    def make_raw_scores(value):
        if value:
            return value.split('#*#')[0]
    return styled_df.data.applymap(make_raw_scores)

[docs]def display_score_matrix_html(styled_df=None, df=None):
    """Displays score matrix generated from :meth`generate_score_matrix` inside web browser

    This method displays the scoring matrix generated by :meth`generate_score_matrix`
    inside a web browser. Input can either be the styled DataFrame object generated by
    :meth`generate_score_matrix` or the raw DataFrame object from :meth`get_raw_dataframe`.

    Parameters
    ----------
    styled_df : pandas.io.formats.style.Styler
        Styled DataFrame object generated by :meth`generate_score_matrix`
    df : pandas.core.frame.DataFrame
        DataFrame object generated by :meth`get_raw_dataframe`

    Returns
    -------
    None
        Does not return any data. JSON displayed inside web browser.

    Examples
    --------
    >>> utils.display_score_matrix_html(styled_df)
    """

    if styled_df is None and df is None:
        raise Exception("styled_df or df needs to be provided for displaying the score matrix.")

    filename = "hbp_vf_score_dataframe_{}.html".format(datetime.now().strftime("%Y%m%d-%H%M%S"))
    if styled_df:
        df = get_raw_dataframe(styled_df)
    df.to_html(filename)
    webbrowser.open(filename, new=2)