Skip to content

Commit

Permalink
merge: pull request #9 from brsynth/fix-knime-install
Browse files Browse the repository at this point in the history
Specify package version per Knime version
  • Loading branch information
tduigou authored Nov 28, 2022
2 parents e3d41fd + 3fe9fad commit 0d4957d
Show file tree
Hide file tree
Showing 17 changed files with 2,595 additions and 206 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
runs-on: ${{ matrix.os }}-latest
strategy:
matrix:
os: ["ubuntu", "macos", "windows"]
os: ["macos", "ubuntu", "windows"]
defaults:
run:
shell: bash -l {0}
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ __pycache__
package.json
package-lock.json
node_modules
.coverage
.coverage
build
*.egg-info
18 changes: 13 additions & 5 deletions retropath2_wrapper/Args.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,23 @@
from retropath2_wrapper._version import __version__


DEFAULT_TIMEOUT = 60 # minutes
DEFAULT_MSC_TIMEOUT = 10 # minutes
DEFAULT_KNIME_VERSION = '4.5.0'
DEFAULT_RP2_VERSION = 'r20220104'
KNIME_PACKAGE = {
'4.5.0': {
'org.knime.features.chem.types.feature.group': '4.5.0.v202107011901',
'org.knime.features.datageneration.feature.group': '4.5.0.v202107011901',
'org.knime.features.python.feature.group': '4.5.2.v202203041212',
'org.rdkit.knime.feature.feature.group': '4.5.0.v202207051536',
},
}
RETCODES = {
'OK': 0,
'NoError': 0,
# Warnings
'SrcInSink': -1,
'NoSolution': -2,
'TimeLimit': -3,
# Errors
'FileNotFound': 1,
'OSError': 2,
Expand Down Expand Up @@ -96,6 +103,7 @@ def _add_arguments(parser):
parser.add_argument(
'--kver',
type=str,
choices=list(KNIME_PACKAGE.keys()),
default=DEFAULT_KNIME_VERSION,
help='version of KNIME (mandatory if --kexec is passed).',
)
Expand All @@ -120,10 +128,10 @@ def _add_arguments(parser):
parser.add_argument('--dmax' , type=int, default=1000)
parser.add_argument('--mwmax_source' , type=int, default=1000)
parser.add_argument(
'--timeout',
'--msc_timeout',
type=int,
default=DEFAULT_TIMEOUT,
help=f'Defines the time after which the program will stop and return calculated results (default: {DEFAULT_TIMEOUT})'
default=DEFAULT_MSC_TIMEOUT,
help=f'Defines the time after which the RDKit MCS Aggregation method will stop searching for best match (default: {DEFAULT_MSC_TIMEOUT}).'
)
# parser.add_argument('--forward' , action='store_true')

Expand Down
131 changes: 72 additions & 59 deletions retropath2_wrapper/RetroPath2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
@description: Python wrapper to run RetroPath2.0 KNIME workflow
"""
import os
from os import (
mkdir as os_mkdir,
path as os_path,
Expand All @@ -22,9 +23,6 @@
rmtree
)
from sys import platform as sys_platform
from subprocess import (
TimeoutExpired
) # nosec
from brs_utils import (
download_and_extract_tar_gz,
download,
Expand Down Expand Up @@ -54,11 +52,14 @@
from csv import reader
from .Args import (
DEFAULT_KNIME_FOLDER,
DEFAULT_TIMEOUT,
DEFAULT_MSC_TIMEOUT,
DEFAULT_KNIME_VERSION,
DEFAULT_RP2_VERSION,
RETCODES
KNIME_PACKAGE,
RETCODES,
)
from retropath2_wrapper.preference import Preference


here = os_path.dirname(os_path.realpath(__file__))

Expand Down Expand Up @@ -115,6 +116,11 @@ def set_vars(
kpath = kexec[:kexec.rfind('/')]
kinstall = kpath[:kpath.rfind('/')]

workflow = os_path.join(
here, 'workflows', f'RetroPath2.0_{rp2_version}.knwf'
)


# Build a dict to store KNIME vars
return {
'kexec' : kexec,
Expand All @@ -123,11 +129,7 @@ def set_vars(
'kpath' : kpath,
'kinstall' : kinstall,
'kpkg_install' : kpkg_install,
'workflow' : os_path.join(
here,
'workflows',
f'RetroPath2.0_{rp2_version}.knwf'
)
'workflow' : workflow,
}


Expand All @@ -142,7 +144,7 @@ def retropath2(
topx: int = 100,
dmin: int = 0, dmax: int = 100,
mwmax_source: int = 1000,
timeout: int = DEFAULT_TIMEOUT,
msc_timeout: int = DEFAULT_MSC_TIMEOUT,
logger: Logger = getLogger(__name__)
) -> Tuple[str, Dict]:

Expand All @@ -161,7 +163,7 @@ def retropath2(
logger.debug(f'dmin: {dmin}')
logger.debug(f'dmax: {dmax}')
logger.debug(f'mwmax_source: {mwmax_source}')
logger.debug(f'timeout: {timeout}')
logger.debug(f'msc_timeout: {msc_timeout}')

if kvars is None:
# Store KNIME vars into a dictionary
Expand Down Expand Up @@ -231,6 +233,9 @@ def retropath2(

logger.info('{attr1}Initializing{attr2}'.format(attr1=attr('bold'), attr2=attr('reset')))

# Preferences
preference = Preference(rdkit_timeout_minutes=msc_timeout)

with TemporaryDirectory() as tempd:

# Format files for KNIME
Expand All @@ -247,13 +252,13 @@ def retropath2(

# Call KNIME
r_code = call_knime(
kvars,
files,
rp2_params,
timeout,
logger
kvars=kvars,
files=files,
params=rp2_params,
preference=preference,
logger=logger,
)
if r_code == RETCODES['TimeLimit'] or r_code == RETCODES['OSError']:
if r_code == RETCODES['OSError']:
return r_code, files

r_code = check_src_in_sink_2(
Expand All @@ -274,14 +279,14 @@ def check_input(

# Check if InChI is well-formed
inchi = check_inchi_from_file(source_file, logger)
if inchi == '':
if inchi == '' or inchi in RETCODES.values():
return RETCODES['InChI'], None

# Check if source is in sink
r_code = check_src_in_sink_1(inchi, sink_file, logger)
if r_code == -1:
if r_code == RETCODES['SrcInSink']:
return RETCODES['SrcInSink'], None
elif r_code == -2:
elif r_code == RETCODES['FileNotFound']:
return RETCODES['FileNotFound'], None

return RETCODES['OK'], inchi
Expand Down Expand Up @@ -465,7 +470,7 @@ def install_knime(

logger.info(' |--url: '+kurl)
logger.info(' |--install_dir: '+kinstall)


def gunzip_to_csv(filename: str, indir: str) -> str:
"""
Expand All @@ -491,6 +496,11 @@ def gunzip_to_csv(filename: str, indir: str) -> str:
return filename


def standardize_path(path: str) -> str:
if sys_platform == 'win32':
path = "/".join(path.split(os.sep))
return path

def format_files_for_knime(
sinkfile: str, sourcefile: str, rulesfile: str,
indir: str, outdir: str,
Expand Down Expand Up @@ -577,21 +587,18 @@ def install_knime_pkgs(
logger.debug(f' + kpkg_install: {kpkg_install}')
logger.debug(f' + kver: {kver}')

args = \
' -application org.eclipse.equinox.p2.director' \
+ ' -nosplash -consolelog' \
+ ' -r http://update.knime.org/community-contributions/trunk,' \
args = [kexec]
args += ['-application', 'org.eclipse.equinox.p2.director']
args += ['-nosplash']
args += ['-consoleLog']
args += ['-r', 'http://update.knime.org/community-contributions/trunk,' \
+ 'http://update.knime.com/community-contributions/trusted/'+kver[:3]+',' \
+ 'http://update.knime.com/analytics-platform/'+kver[:3] \
+ ' -i org.knime.features.chem.types.feature.group,' \
+ 'org.knime.features.datageneration.feature.group,' \
+ 'org.knime.features.python.feature.group,' \
+ 'org.rdkit.knime.feature.feature.group' \
+ ' -bundlepool ' + kpkg_install + ' -d ' + kpkg_install
+ 'http://update.knime.com/analytics-platform/'+kver[:3]]
args += ['-i', ','.join([x + '/' + y for x, y in KNIME_PACKAGE[kver].items()])]
args += ['-bundlepool', kpkg_install]
args += ['-d', kpkg_install]

cmd = f'{kexec} {args}'

returncode = subprocess_call(cmd, logger=logger)
returncode = subprocess_call(" ".join(args), logger=logger)
StreamHandler.terminator = "\n"
logger.info(' OK')
return returncode
Expand All @@ -600,7 +607,7 @@ def call_knime(
kvars: Dict,
files: Dict,
params: Dict,
timeout: int,
preference: Preference,
logger: Logger = getLogger(__name__)
) -> int:
"""
Expand All @@ -614,8 +621,8 @@ def call_knime(
Paths of sink, source, rules files.
params: Dict
Parameters of the workflow to process.
timeout: int
Time after which the run returns.
preference: Preference
A preference object.
logger : Logger
The logger object.
Expand All @@ -628,21 +635,32 @@ def call_knime(
StreamHandler.terminator = ""
logger.info('{attr1}Running KNIME...{attr2}'.format(attr1=attr('bold'), attr2=attr('reset')))

args = ' -nosplash -nosave -reset --launcher.suppressErrors -application org.knime.product.KNIME_BATCH_APPLICATION ' \
+ ' -workflowFile=' + kvars['workflow'] \
+ ' -workflow.variable=input.dmin,"' + str(params['dmin']) + '",int' \
+ ' -workflow.variable=input.dmax,"' + str(params['dmax']) + '",int' \
+ ' -workflow.variable=input.max-steps,"' + str(params['max_steps']) + '",int' \
+ ' -workflow.variable=input.sourcefile,"' + files['source'] + '",String' \
+ ' -workflow.variable=input.sinkfile,"' + files['sink'] + '",String' \
+ ' -workflow.variable=input.rulesfile,"' + files['rules'] + '",String' \
+ ' -workflow.variable=input.topx,"' + str(params['topx']) + '",int' \
+ ' -workflow.variable=input.mwmax-source,"' + str(params['mwmax_source']) + '",int' \
+ ' -workflow.variable=output.dir,"' + files['outdir'] + '",String' \
+ ' -workflow.variable=output.solutionfile,"' + files['results'] + '",String' \
+ ' -workflow.variable=output.sourceinsinkfile,"' + files['src-in-sk'] + '",String'

logger.debug(kvars['kexec'] + ' ' + args)
args = [kvars["kexec"]]
args += ["-nosplash"]
args += ["-nosave"]
args += ["-reset"]
args += ["-consoleLog"]
args += ["--launcher.suppressErrors"]
args += ["-application", "org.knime.product.KNIME_BATCH_APPLICATION"]
args += ["-workflowFile=%s" % (standardize_path(path=kvars['workflow']),)]

args += ['-workflow.variable=input.dmin,"%s",int' % (params['dmin'],)]
args += ['-workflow.variable=input.dmax,"%s",int' % (params['dmax'],)]
args += ['-workflow.variable=input.max-steps,"%s",int' % (params['max_steps'],)]
args += ['-workflow.variable=input.topx,"%s",int' % (params['topx'],)]
args += ['-workflow.variable=input.mwmax-source,"%s",int' % (params['mwmax_source'],)]

args += ['-workflow.variable=input.sourcefile,"%s",String' % (standardize_path(files['source']),)]
args += ['-workflow.variable=input.sinkfile,"%s",String' % (standardize_path(files['sink']),)]
args += ['-workflow.variable=input.rulesfile,"%s",String' % (standardize_path(files['rules']),)]
args += ['-workflow.variable=output.dir,"%s",String' % (standardize_path(files['outdir']),)]
args += ['-workflow.variable=output.solutionfile,"%s",String' % (standardize_path(files['results']),)]
args += ['-workflow.variable=output.sourceinsinkfile,"%s",String' % (standardize_path(files['src-in-sk']),)]
if preference and preference.is_init():
preference.to_file()
args += ["-preferences=" + standardize_path(preference.path)]

logger.debug(" ".join(args))

try:
printout = open(devnull, 'wb') if logger.level > 10 else None
Expand All @@ -654,19 +672,14 @@ def call_knime(
os_environ['CONDA_PREFIX'],
"lib"
)
returncode = subprocess_call(cmd=kvars['kexec'] + args, logger=logger)
returncode = subprocess_call(cmd=" ".join(args), logger=logger)
os_environ['LD_LIBRARY_PATH'] = ':'.join(
os_environ['LD_LIBRARY_PATH'].split(':')[:-1]
)
StreamHandler.terminator = "\n"
logger.info(' {bold}OK{reset}'.format(bold=attr('bold'), reset=attr('reset')))
return returncode

except TimeoutExpired as e:
logger.warning(' |- Time limit ({timeout} min) is reached'.format(timeout=timeout))
logger.warning(' Results collected until now are available')
return RETCODES['TimeLimit']

except OSError as e:
logger.error(e)
return RETCODES['OSError']
6 changes: 2 additions & 4 deletions retropath2_wrapper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,13 @@ def _cli():
dmax=args.dmax,
mwmax_source=args.mwmax_source,
rp2_version=args.rp2_version,
timeout=args.timeout,
msc_timeout=args.msc_timeout,
logger=logger
)

logger.info('')

if r_code == RETCODES['OK'] or r_code == RETCODES['TimeLimit']:
if r_code == RETCODES['TimeLimit']:
logger.warning('Time limit is reached.')
if r_code == RETCODES['OK']:
logger.info('{attr1}Results{attr2}'.format(attr1=attr('bold'), attr2=attr('reset')))
logger.info(' |- Checking... ')
r_code = check_results(result_files, logger)
Expand Down
26 changes: 26 additions & 0 deletions retropath2_wrapper/preference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import datetime
import tempfile


class Preference(object):
def __init__(self, *args, **kwargs) -> None:
self.path = kwargs.get("path", tempfile.NamedTemporaryFile(suffix=".epf").name)
self.rdkit_timeout_minutes = kwargs.get("rdkit_timeout_minutes")

def is_init(self) -> bool:
if self.rdkit_timeout_minutes:
return True
return False

def to_file(self) -> None:
now = datetime.datetime.now(datetime.timezone.utc)
with open(self.path, "w") as fod:
fod.write("#")
fod.write(now.strftime("%a %b %d %H:%M:%S %Z %Y"))
fod.write("\n")
fod.write("\\!/=")
fod.write("\n")
if self.rdkit_timeout_minutes:
fod.write("/instance/org.rdkit.knime.nodes/mcsAggregation.timeout=")
fod.write(str(int(self.rdkit_timeout_minutes) * 60))
fod.write("\n")
Loading

0 comments on commit 0d4957d

Please sign in to comment.