Skip to content

Commit

Permalink
mdfreader: improved unicode handling for several export methods
Browse files Browse the repository at this point in the history
  • Loading branch information
aymeric.rateau@gmail.com committed Feb 27, 2015
1 parent 01f2e80 commit 8f4c08d
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 34 deletions.
2 changes: 1 addition & 1 deletion README
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ In the case of big files and lack of memory, you can optionally:

Warning:
========
MDF 4.x specification is much complex compared to 3.x and file reading functionality is not yet fully complete (Working on composition of channels in structure or Channel Arrays), but you should be able to import simple files.
MDF 4.x specification is much complex compared to 3.x and its implementation is young. Even though I tested it with around 150 different files, chances of bug is higher with version 4.x compared to 3.x

For great data visualization, dataPlugin for Veusz (from 1.16, http://home.gna.org/veusz/) is also existing ; please follow instructions from Veusz documentation and plugin file's header.

Expand Down
83 changes: 52 additions & 31 deletions mdfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,10 +598,20 @@ def exportToCSV( self, filename = None, sampling = 0.1 ):
f = open( filename, "wt" , encoding='latin-1')
writer = csv.writer( f, dialect = csv.excel )
# writes header
writer.writerow( [name for name in list(self.keys()) if self.getChannelData(name).dtype in ('float64','float32')] ) # writes channel names
writer.writerow( [(self.getChannelUnit(name)) for name in list(self.keys()) if self.getChannelData(name).dtype in ('float64','float32')] ) # writes units
writer.writerow( [name for name in list(self.keys()) if self.getChannelData(name).dtype.kind not in ('S','U')] ) # writes channel names
if PythonVersion<3:
units=[]
for name in list(self.keys()):
if self.getChannelData(name).dtype.kind not in ('S','U'):
if self.getChannelUnit(name) is bytes:
units.append(self.getChannelUnit(name).encode('unicode', 'ignore'))
else:
units.append(self.getChannelUnit(name))
writer.writerow( units ) # writes units
else:
writer.writerow([self.getChannelUnit(name) for name in list(self.keys()) if self.getChannelData(name).dtype.kind not in ('S','U')]) # writes units
# concatenate all channels
buf = vstack( [self.getChannelData(name).transpose() for name in list(self.keys()) if self.getChannelData(name).dtype in ('float64','float32')] )
buf = vstack( [self.getChannelData(name).transpose() for name in list(self.keys()) if self.getChannelData(name).dtype.kind not in ('S','U')] )
buf = buf.transpose()
# Write all rows
r, c = buf.shape
Expand Down Expand Up @@ -637,7 +647,12 @@ def cleanName( name ):
return buf

def setAttribute(f, name, value):
if len(value)>0: # netcdf does not allow empty strings...
if value is not None and len(value)>0: # netcdf does not allow empty strings...
if value is dict and 'name' in value:
value = value['name']
if PythonVersion>=3 and value is bytes:
value = value.encode('utf-8', 'ignore')
value = cleanName(value)
setattr( f, name, value)
else:
pass
Expand All @@ -647,18 +662,13 @@ def setAttribute(f, name, value):
filename = splitext(self.fileName)[0]
filename = filename + '.nc'
f = netcdf.netcdf_file( filename, 'w' )
setAttribute( f, 'Date', (self.date))
setAttribute( f, 'Time', (self.time))
if self.author is not None:
setAttribute(f, 'Author', self.author)
if self.organisation is not None:
setAttribute( f, 'Organization', (self.organisation))
if self.project is not None:
setAttribute( f, 'ProjectName', (self.project))
if self.subject is not None:
setAttribute( f, 'Subject', (self.subject))
if self.comment is not None:
setAttribute( f, 'Comment', (self.comment))
setAttribute( f, 'Date', self.date)
setAttribute( f, 'Time', self.time)
setAttribute(f, 'Author', self.author)
setAttribute( f, 'Organization', self.organisation)
setAttribute( f, 'ProjectName', self.project)
setAttribute( f, 'Subject', self.subject)
setAttribute( f, 'Comment', self.comment)
# Create dimensions having name of all time channels
for time in list(self.masterChannelList.keys()):
f.createDimension( time, len( self.getChannelData(time) ) )
Expand Down Expand Up @@ -686,8 +696,8 @@ def setAttribute(f, name, value):
var[name] = f.createVariable( CleanedName, type, ( self[name]['master'], ) )
# Create attributes
setAttribute( var[name], 'title', CleanedName )
setAttribute( var[name], 'units', cleanName(self.getChannelUnit(name)))
setAttribute( var[name], 'Description', cleanName(self[name]['description']))
setAttribute( var[name], 'units', self.getChannelUnit(name))
setAttribute( var[name], 'Description', self[name]['description'])
if name in list(self.masterChannelList.keys()):
setAttribute( var[name], 'Type', 'Master Channel' )
setAttribute( var[name], 'datatype', 'master' )
Expand Down Expand Up @@ -725,20 +735,31 @@ def exportToHDF5( self, filename = None, sampling = None ):
except:
print( 'h5py not found' )
raise
def setAttribute(obj, name, value):
if value is not None and len(value)>0:
try:
if value is dict and 'name' in value:
value = value['name']
obj.attrs[name] = value
except:
pass
else:
pass
if sampling != None:
self.resample( sampling )
if filename == None:
filename = splitext(self.fileName)[0]
filename = filename + '.hdf'
f = h5py.File( filename, 'w' ) # create hdf5 file
filegroup=f.create_group(os.path.basename(filename)) # create group in root associated to file
filegroup.attrs['Author']=self.author
filegroup.attrs['Date']=self.date
filegroup.attrs['Time']= self.time
filegroup.attrs['Organization']=self.organisation
filegroup.attrs['ProjectName']=self.project
filegroup.attrs['Subject']=self.subject
filegroup.attrs['Comment']=self.comment
filegroup = f.create_group(os.path.basename(filename)) # create group in root associated to file
setAttribute(filegroup, 'Author', self.author)
setAttribute(filegroup, 'Date', self.date)
setAttribute(filegroup, 'Time', self.time)
setAttribute(filegroup, 'Time', self.time)
setAttribute(filegroup, 'Organization', self.organisation)
setAttribute(filegroup, 'ProjectName', self.project)
setAttribute(filegroup, 'Subject', self.subject)
setAttribute(filegroup, 'Comment', self.comment)
if len( list(self.masterChannelList.keys()) ) > 1:
# if several time groups of channels, not resampled
groups = {}
Expand All @@ -751,14 +772,14 @@ def exportToHDF5( self, filename = None, sampling = None ):
groups[self[channel]['master'] ] = ngroups
grp[ngroups] = filegroup.create_group( self[channel]['master'] )
dset = grp[groups[self[channel]['master'] ]].create_dataset( channel, data = self.getChannelData(channel) )
dset.attrs[ 'unit']=self.getChannelUnit(channel)
dset.attrs['description']=self[channel]['description']
setAttribute(dset, 'unit', self.getChannelUnit(channel))
setAttribute(dset, 'description', self[channel]['description'])
else: # resampled or only one time for all channels : no groups
for channel in list(self.keys()):
channelName=convertMatlabName(channel)
dset = filegroup.create_dataset( channelName, data = self.getChannelData(channel) )
dset.attrs[ 'unit']=self.getChannelUnit(channel)
dset.attrs['description']=self[channel]['description']
setAttribute(dset, 'unit', self.getChannelUnit(channel))
setAttribute(dset, 'description', self[channel]['description'])
f.close()

def exportToMatlab( self, filename = None ):
Expand Down Expand Up @@ -1062,7 +1083,7 @@ def convertToPandas(self, sampling=None):
self[group+'_group'].pop(group) # delete time channel, no need anymore
# clean rest of self from data and time channel information
[self[channel].pop('data') for channel in originalKeys]
[self[channel].pop('master') for channel in originalKeys]
[self[channel].pop('master') for channel in originalKeys if 'master' in self[channel]]
self.masterGroups=[] # save time groups name in list
[self.masterGroups.append(group+'_group') for group in list(self.masterChannelList.keys())]
self.masterChannelList={}
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/development.html#single-sourcing-the-version
version='0.0.3',
version='0.0.5',

description='A Measured Data Format file parser',
long_description=long_description,
Expand All @@ -35,7 +35,7 @@
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
'Development Status :: 3 - Alpha',
'Development Status :: 4 - Beta',

# Indicate who your project is intended for
'Intended Audience :: Science/Research',
Expand Down

0 comments on commit 8f4c08d

Please sign in to comment.