Minor code formatting and bug fixes

ibcp · May 20, 2019 · 7a569ef · 7a569ef
1 parent 2be6a84
commit 7a569ef
Show file tree

Hide file tree

Showing 4 changed files with 228 additions and 118 deletions.
diff --git a/pyspectra/__init__.py b/pyspectra/__init__.py
@@ -1,6 +1,8 @@
 from .fileio import *
 from .spectra import *
+from .reshape import *
 
 # TODO: Get rid of it.
 del fileio
-del spectra
+del spectra
+del reshape
diff --git a/pyspectra/fileio.py b/pyspectra/fileio.py
@@ -1,38 +1,43 @@
-import os
+"""TODO: Docstring"""
+
 import warnings
 import numpy as np
 import pandas as pd
-from .spectra import *
+from .spectra import Spectra
+from .reshape import rbind
 
 __all__ = ["read_txt", "read_bwtek", "read_fileset"]
 
+
 def read_txt(path):
-    data = pd.read_csv(path, header=None, names=['wl', 'y'], dtype=np.float64)
+    """TODO: Docstring"""
+    data = pd.read_csv(path, header=None, names=["wl", "y"], dtype=np.float64)
     return Spectra(spc=data.y, wl=data.wl)
 
 
 def read_bwtek(path, x="Raman Shift", y="Dark Subtracted #1"):
-    with open(path,'r') as fp:
+    """TODO: Docstring"""
+    with open(path, "r") as fp:
         line = fp.readline()
-        cnt=0
-        while line and not line.startswith('Pixel;'):
+        cnt = 0
+        while line and not line.startswith("Pixel;"):
             line = fp.readline()
             cnt += 1
-    if not line.startswith('Pixel;'):
-        raise TypeError('Incorrect BWTek file format.')
+    if not line.startswith("Pixel;"):
+        raise TypeError("Incorrect BWTek file format.")
     # Try with comma as decimal separator
-    na_values = ('',' ','  ','   ', '    ')
+    na_values = ("", " ", "  ", "   ", "    ")
     try:
         data = pd.read_csv(
             path,
             skiprows=cnt,
-            sep=';',
-            decimal=',',
+            sep=";",
+            decimal=",",
             na_values=na_values,
             usecols=[x, y],
-            dtype=np.float64
+            dtype=np.float64,
         )
-    except  Exception as e:
+    except Exception as e:
         warnings.warn(str(e))
         data = None
     # If failed try a dot as a decimal separator
@@ -41,31 +46,36 @@ def read_bwtek(path, x="Raman Shift", y="Dark Subtracted #1"):
             data = pd.read_csv(
                 path,
                 skiprows=cnt,
-                sep=';',
-                decimal='.',
+                sep=";",
+                decimal=".",
                 na_values=na_values,
                 usecols=[x, y],
-                dtype=np.float64
+                dtype=np.float64,
             )
         except Exception as e:
             warnings.warn(str(e))
     # If cound not read the data by any of separators
     if data is None:
-        raise TypeError(f'Cound not read bwtek file {path}. It seems to be incorrect file format.')
+        raise TypeError(
+            f"Cound not read bwtek file {path}. It seems to be incorrect file format."
+        )
     # Filter rows where wl is missing
     data = data[data[x].notnull()]
     return Spectra(spc=data[y], wl=data[x])
 
 
-def read_fileset(files, callback=read_txt, join="strict", keep_file_names=True, name_pattern=None):
+def read_fileset(files, callback=read_txt, join="strict", keep_file_names=True):
+    """TODO: Docstring"""
     spectra = [callback(f) for f in files]
     if keep_file_names:
         for i, spec in enumerate(spectra):
-            spec.data['filename'] = files[i] 
+            spec.data["filename"] = files[i]
     if join:
         try:
             spectra = rbind(*spectra, join=join)
         except Exception as e:
             warnings.warn(str(e))
-            warnings.warn('Could not join spectra from files. List of spectra is returned')
+            warnings.warn(
+                "Could not join spectra from files. List of spectra is returned"
+            )
     return spectra
diff --git a/pyspectra/reshape.py b/pyspectra/reshape.py
@@ -0,0 +1,42 @@
+"""TODO: Docstring"""
+
+import numpy as np
+import pandas as pd
+from .spectra import Spectra
+
+__all__ = ["rbind"]
+
+
+def rbind(*objs, join="strict", data_join=None, spc_join=None):
+    """TODO: Docstring"""
+    if data_join is None:
+        data_join = join
+    if spc_join is None:
+        spc_join = join
+
+    allowed_joins = ("strict", "outer", "inner")
+    if (spc_join not in allowed_joins) or (data_join not in allowed_joins):
+        raise ValueError("Incorrect join strategy")
+    if len(objs) <= 1:
+        raise ValueError("No data to bind.")
+
+    if spc_join == "strict":
+        for obj in objs:
+            if not np.array_equal(obj.wl, objs[0].wl):
+                raise ValueError(
+                    "Strict join is not possible: Spectra have different wavelenghts."
+                )
+        spc_join = "outer"
+
+    if data_join == "strict":
+        for obj in objs:
+            if not np.array_equal(obj.data.columns, objs[0].data.columns):
+                raise ValueError(
+                    "Strict join is not possible: Data have different columns."
+                )
+        data_join = "outer"
+
+    return Spectra(
+        spc=pd.concat([obj.spc for obj in objs], join=spc_join),
+        data=pd.concat([obj.data for obj in objs], join=data_join),
+    )