Source code for pmxbiobb.pmxanalyse

#!/usr/bin/env python3

"""Module containing the PMX analyse class and the command line interface."""

import shutil
from pathlib import Path, PurePath
from typing import Optional

from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools import file_utils as fu
from biobb_common.tools.file_utils import launchlogger


[docs] class Pmxanalyse(BiobbObject): """ | biobb_pmx Pmxanalyse | Wrapper class for the `PMX analyse <https://github.com/deGrootLab/pmx>`_ module. | Analyze the work values from the dgdl.xvg files of the A and B states to calculate the free energy difference between two states. Args: input_a_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the A state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_A.zip>`_. Accepted formats: zip (edam:format_3987). input_b_xvg_zip_path (str): Path the zip file containing the dgdl.xvg files of the B state. File type: input. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/data/pmx/xvg_B.zip>`_. Accepted formats: zip (edam:format_3987). output_result_path (str): Path to the TXT results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_result.txt>`_. Accepted formats: txt (edam:format_2330). output_work_plot_path (str): Path to the PNG plot results file. File type: output. `Sample file <https://github.com/bioexcel/biobb_pmx/raw/master/biobb_pmx/test/reference/pmx/ref_plot.png>`_. Accepted formats: png (edam:format_3603). properties (dic): * **method** (*str*) - ("CGI BAR JARZ") Choose one or more estimators to use. Values: CGI (Crooks Gaussian Intersection), BAR (Bennet Acceptance Ratio), JARZ (Jarzynski's estimator). * **temperature** (*float*) - (298.15) [0~1000|0.05] Temperature in Kelvin. * **nboots** (*int*) - (0) [0~1000|1] Number of bootstrap samples to use for the bootstrap estimate of the standard errors. * **nblocks** (*int*) - (1) [0~1000|1] Number of blocks to divide the data into for an estimate of the standard error. * **integ_only** (*bool*) - (False) Whether to do integration only. * **reverseB** (*bool*) - (False) Whether to reverse the work values for the backward (B->A) transformation. * **skip** (*int*) - (1) [0~1000|1] Skip files. * **slice** (*str*) - (None) Subset of trajectories to analyze. Provide list slice, e.g. "10 50" will result in selecting dhdl_files[10:50]. * **rand** (*int*) - (None) [0~1000|1] Take a random subset of trajectories. Default is None (do not take random subset). * **index** (*str*) - (None) Zero-based index of files to analyze (e.g. "0 10 20 50 60"). It keeps the dhdl.xvg files according to their position in the list, sorted according to the filenames. * **prec** (*int*) - (2) [0~100|1] The decimal precision of the screen/file output. * **units** (*str*) - ("kJ") The units of the output. Values: kJ (Kilojoules), kcal (Kilocalories), kT (the product of the Boltzmann constant k and the temperature). * **no_ks** (*bool*) - (False) Whether to do a Kolmogorov-Smirnov test to check whether the Gaussian assumption for CGI holds. * **nbins** (*int*) - (20) [0~1000|1] Number of histograms bins for the plot. * **dpi** (*int*) - (300) [72~2048|1] Resolution of the plot. * **binary_path** (*str*) - ("pmx") Path to the PMX command line interface. * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. * **container_path** (*str*) - (None) Path to the binary executable of your container. * **container_image** (*str*) - ("gromacs/gromacs:latest") Container Image identifier. * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. Examples: This is a use example of how to use the building block from Python:: from biobb_pmx.pmxbiobb.pmxanalyse import pmxanalyse prop = { 'method': 'CGI BAR JARZ', 'temperature': 298.15, 'dpi': 600 } pmxanalyse(input_a_xvg_zip_path='/path/to/myAStateFiles.zip', input_b_xvg_zip_path='/path/to/myBStateFiles.zip', output_result_path='/path/to/newResults.txt', output_work_plot_path='/path/to/newResults.png', properties=prop) Info: * wrapped_software: * name: PMX analyse * version: >=1.0.1 * license: GNU * ontology: * name: EDAM * schema: http://edamontology.org/EDAM.owl """ def __init__( self, input_a_xvg_zip_path: str, input_b_xvg_zip_path: str, output_result_path: str, output_work_plot_path: str, properties: Optional[dict] = None, **kwargs, ) -> None: properties = properties or {} # Call parent class constructor super().__init__(properties) self.locals_var_dict = locals().copy() # Input/Output files self.io_dict = { "in": {}, "out": { "output_result_path": output_result_path, "output_work_plot_path": output_work_plot_path, }, } # Should not be copied inside container self.input_a_xvg_zip_path = input_a_xvg_zip_path self.input_b_xvg_zip_path = input_b_xvg_zip_path # Properties specific for BB self.method = properties.get("method", "CGI BAR JARZ") self.temperature = properties.get("temperature", 298.15) self.nboots = properties.get("nboots", 0) self.nblocks = properties.get("nblocks", 1) self.integ_only = properties.get("integ_only", False) self.reverseB = properties.get("reverseB", False) self.skip = properties.get("skip", 1) self.slice = properties.get("slice", None) self.rand = properties.get("rand", None) self.index = properties.get("index", None) self.prec = properties.get("prec", 2) self.units = properties.get("units", "kJ") self.no_ks = properties.get("no_ks", False) self.nbins = properties.get("nbins", 20) self.dpi = properties.get("dpi", 300) # Properties common in all PMX BB self.binary_path = properties.get("binary_path", "pmx") # Check the properties self.check_properties(properties) self.check_arguments()
[docs] @launchlogger def launch(self) -> int: """Execute the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` pmx.pmxanalyse.Pmxanalyse object.""" # Setup Biobb if self.check_restart(): return 0 self.stage_files() if self.container_path: working_dir = self.container_volume_path if self.container_volume_path else "/data" else: working_dir = self.stage_io_dict.get("unique_dir", "") # Check if executable is exists if not self.container_path: if not Path(self.binary_path).is_file(): if not shutil.which(self.binary_path): raise FileNotFoundError( "Executable %s not found. Check if it is installed in your system and correctly defined in the properties" % self.binary_path ) list_a_dir = fu.create_unique_dir() list_b_dir = fu.create_unique_dir() list_a = list( filter( lambda f: Path(f).exists() and Path(f).stat().st_size > 10, fu.unzip_list(self.input_a_xvg_zip_path, list_a_dir, self.out_log), ) ) list_b = list( filter( lambda f: Path(f).exists() and Path(f).stat().st_size > 10, fu.unzip_list(self.input_b_xvg_zip_path, list_b_dir, self.out_log), ) ) # Copy extra files to sandbox: two directories containing the xvg files list_a_dir_in_sandbox = Path(self.stage_io_dict.get("unique_dir", "")).joinpath( Path(list_a_dir).name ) list_b_dir_in_sandbox = Path(self.stage_io_dict.get("unique_dir", "")).joinpath( Path(list_b_dir).name ) shutil.copytree(list_a_dir, list_a_dir_in_sandbox) shutil.copytree(list_b_dir, list_b_dir_in_sandbox) # Keep the full relative paths returned by unzip_list (including frame*/ subfolders). string_a = " ".join(list_a) string_b = " ".join(list_b) self.cmd = [ "cd", working_dir, ";", self.binary_path, "analyse", "-fA", string_a, "-fB", string_b, "-o", PurePath(self.stage_io_dict["out"]["output_result_path"]).name, "-w", PurePath(self.stage_io_dict["out"]["output_work_plot_path"]).name, ] if self.method: self.cmd.append("-m") self.cmd.append(self.method) if self.temperature: self.cmd.append("-t") self.cmd.append(str(self.temperature)) if self.nboots: self.cmd.append("-b") self.cmd.append(str(self.nboots)) if self.nblocks: self.cmd.append("-n") self.cmd.append(str(self.nblocks)) if self.integ_only: self.cmd.append("--integ_only") if self.reverseB: self.cmd.append("--reverseB") if self.skip: self.cmd.append("--skip") self.cmd.append(str(self.skip)) if self.slice: self.cmd.append("--slice") self.cmd.append(self.slice) if self.rand: self.cmd.append("--rand") if self.index: self.cmd.append("--index") self.cmd.append(self.index) if self.prec: self.cmd.append("--prec") self.cmd.append(str(self.prec)) if self.units: self.cmd.append("--units") self.cmd.append(self.units) if self.no_ks: self.cmd.append("--no_ks") if self.nbins: self.cmd.append("--nbins") self.cmd.append(str(self.nbins)) if self.dpi: self.cmd.append("--dpi") self.cmd.append(str(self.dpi)) # Run Biobb block self.run_biobb() # Copy files to host self.copy_to_host() self.tmp_files.extend([list_a_dir, list_b_dir]) self.remove_tmp_files() self.check_arguments(output_files_created=True, raise_exception=False) return self.return_code
[docs] def pmxanalyse( input_a_xvg_zip_path: str, input_b_xvg_zip_path: str, output_result_path: str, output_work_plot_path: str, properties: Optional[dict] = None, **kwargs, ) -> int: """Create the :class:`Pmxanalyse <pmx.pmxanalyse.Pmxanalyse>` class and execute the :meth:`launch() <pmx.pmxanalyse.Pmxanalyse.launch> method.""" return Pmxanalyse(**dict(locals())).launch()
pmxanalyse.__doc__ = Pmxanalyse.__doc__ main = Pmxanalyse.get_main(pmxanalyse, "Wrapper class for the PMX analyse module.") if __name__ == "__main__": main()