v1.2.1 progress bar

FoxoTech · marcmaxson · Jan 16, 2020 · Nov 18, 2019 · Nov 22, 2019 · Nov 22, 2019
commit 3062fa5baddb0092a885ed51e85beacd0d0d181d
diff --git a/methylprep/processing/pipeline.py b/methylprep/processing/pipeline.py
@@ -2,7 +2,7 @@
 import logging
 import numpy as np
 import pandas as pd
-from tqdm import tqdm
+from ..utils.progress_bar import * # checks environment and imports tqdm appropriately.
 from collections import Counter
 from pathlib import Path
 # App
@@ -19,8 +19,8 @@
 from .preprocess import preprocess_noob
 from .raw_dataset import get_raw_datasets
 
-__all__ = ['SampleDataContainer', 'get_manifest', 'run_pipeline', 'consolidate_values_for_sheet']
 
+__all__ = ['SampleDataContainer', 'get_manifest', 'run_pipeline', 'consolidate_values_for_sheet']
 
 LOGGER = logging.getLogger(__name__)
 
@@ -170,7 +170,7 @@ def run_pipeline(data_dir, array_type=None, export=False, manifest_filepath=None
 
         batch_data_containers = []
         export_paths = set() # inform CLI user where to look
-        for raw_dataset in tqdm(raw_datasets):
+        for raw_dataset in tqdm(raw_datasets, total=len(raw_datasets), desc="Processing samples"):
             data_container = SampleDataContainer(
                 raw_dataset=raw_dataset,
                 manifest=manifest,
@@ -185,6 +185,11 @@ def run_pipeline(data_dir, array_type=None, export=False, manifest_filepath=None
                 data_container.export(output_path)
                 export_paths.add(output_path)
 
+            print(f"obj container: {pympler.asizeof.basicsize(data_container)}")
+            print(f"obj batch container: {pympler.asizeof.basicsize(batch_data_containers)}")
+
+        print('[finished SampleDataContainer processing]')
+
         if betas:
             df = consolidate_values_for_sheet(batch_data_containers, postprocess_func_colname='beta_value')
             if not batch_size:

diff --git a/methylprep/processing/raw_dataset.py b/methylprep/processing/raw_dataset.py
@@ -10,7 +10,7 @@
 )
 from ..files import IdatDataset
 from ..utils import inner_join_data
-
+from ..utils.progress_bar import * # checks environment and imports tqdm appropriately.
 
 __all__ = ['RawDataset', 'RawMetaDataset', 'get_raw_datasets', 'get_raw_meta_datasets', 'get_array_type']
 
@@ -57,10 +57,10 @@ def get_raw_datasets(sample_sheet, sample_name=None, from_s3=None, meta_only=Fal
     elif from_s3 and not meta_only:
         parser = RawDataset.from_sample_s3
         zip_reader = from_s3
-        raw_datasets = [parser(zip_reader, sample) for sample in samples]
+        raw_datasets = tqdm([parser(zip_reader, sample) for sample in samples], total=len(samples), desc='Getting raw datasets')
     elif not from_s3 and not meta_only:
         parser = RawDataset.from_sample
-        raw_datasets = [parser(sample) for sample in samples]
+        raw_datasets = tqdm([parser(sample) for sample in samples], total=len(samples), desc='Getting raw datasets')
 
     if not meta_only:
         # ensure all idat files have same number of probes

diff --git a/methylprep/utils/progress_bar.py b/methylprep/utils/progress_bar.py
@@ -0,0 +1,10 @@
+def in_notebook():
+    if hasattr(__builtins__,'__IPYTHON__'):
+        return True #globals()[notebook_mod] = __import__(notebook_mod)
+    else:
+        return False #globals()[console_mod] = __import__(console_mod)
+
+if in_notebook():
+    from tqdm import tqdm_notebook as tqdm
+else:
+    from tqdm import tqdm as tqdm