forked from gremau/NMEG_FluxProc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUNM_parse_mpi_eddyproc_output.m
142 lines (126 loc) · 5.18 KB
/
UNM_parse_mpi_eddyproc_output.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
function [ varargout ] = ...
UNM_parse_mpi_eddyproc_output( sitecode, year , varargin )
% UNM_PARSE_MPI_EDDYPROC_OUTPUT - parse the output of MPI Jena's online
% gapfilling/partitioning tool into Matlab tables. In January 2012 Jena
% updated the tool to merge the old DatasetAfterGapfill.txt into
% DataSetAfterPartition_GL2010.txt for jobs that requested partitioning. In
% 2016, MPI updated the tool again
% This version of the function expects to find two partitioned output
% files or 1 partitioned file:
% DataSetAfterPartition_GL2010.txt and DataSetAfterPartition.txt.
%
% [ pt_in_MR, pt_in_GL ] = ...
% UNM_parse_mpi_eddyproc_output( sitecode, year )
%
% OUTPUT:
% pt_in_MR: Matlab table containing data from DataSetAfterPartition.txt
% (partitioning algorithm of Markus Reichstein 2005)
% pt_in_GL: Matlab table containing data from
% DataSetAfterPartition_GL2010.txt (partitioning algorithm of
% Gita Lasslop 2010)
%
% author: Gregory E. Maurer, UNM, April 2015
% Modified from: UNM_parse_gapfilled_partitioned_output_TWH and
% parse_jena_output by Timothy Hilton.
mpi_vers = varargin{1};
% ------------------------------------------
% Parse the combinged Reichstein/Lasslop partitioned file
if strcmpi(mpi_vers,'mpi_current')
fname = fullfile( get_site_directory( sitecode ), ...
'processed_flux', ...
sprintf( 'DataSetafterFluxpartMRGL_%d.txt', year ) );
[ ~, fname_short, ext ] = fileparts( fname );
fprintf( 'reading %s%s... ', fname_short, ext );
try
pt_in_MRGL = parse_eddyproc_output( fname , year , mpi_vers ); %GL == Gita Lasslop
catch err
error( sprintf( 'error parsing %s', fname) );
end
fprintf( 'done\n');
varargout{1} = pt_in_MRGL;
elseif strcmpi(mpi_vers,'mpi_old')
% ---------------------------------------
% Parse the Lasslop 2010 partitioned file
fname = fullfile( get_site_directory( sitecode ), ...
'processed_flux', ...
sprintf( 'DataSetafterFluxpartGL2010_%d.txt', year ) );
[ ~, fname_short, ext ] = fileparts( fname );
fprintf( 'reading %s%s... ', fname_short, ext );
% Exception handling
try
pt_in_GL = parse_eddyproc_output( fname , year , mpi_vers ); %GL == Gita Lasslop
catch err
error( sprintf( 'error parsing %s', fname) );
end
fprintf( 'done\n');
varargout{1} = pt_in_GL;
% ------------------------------------------
% Parse the Reichstein 2005 partitioned file
fname = fullfile( get_site_directory( sitecode ), ...
'processed_flux', ...
sprintf( 'DataSetafterFluxpart_%d.txt', year ) );
[ ~, fname_short, ext ] = fileparts( fname );
fprintf( 'reading %s%s... ', fname_short, ext );
try
pt_in_MR = parse_eddyproc_output( fname , year , mpi_vers ); %MR == Markus Reichstein
catch err
error( sprintf( 'error parsing %s', fname ) );
end
varargout{2} = pt_in_MR;
end
fprintf( 'done\n');
function out = parse_eddyproc_output( fname , varargin )
% PARSE_EDDYPROC_OUTPUT - parses an output file from the MPI online
% gapfilling/partitioning tool (used to be parse_jena_output).
%
% The online tool places some leading whitespace on each data line;
% this parser does not require that whitespace to be removed.
% Likewise, this parser does not require the second header line of
% the data file to be removed.
%
% Missing values (-9999) are replaced with NaNs
%
% USAGE
% out = parse_eddyproc_output(fname);
%
% INPUTS
% fname: string; full path to the data file to be parsed
% OUTPUTS
% out: matlab table array; the data in the file
%
% SEE ALSO
% table, replace_badvals
year = varargin{1};
mpi_vers = varargin{2};
fid = fopen(fname, 'r');
line1 = fgetl(fid);
% split line1 by consecutive whitespace
vars = regexp( line1, '\s*', 'split' );
vars = vars(not( cellfun( @isempty, vars )));
vars = genvarname( vars ); %make sure vars are valid matlab names
nvars = numel( vars );
% throw out the second header line (units)
line2 = fgetl( fid );
units = regexp( line2, '\s*', 'split' );
% Read data from the file
fmt = repmat( '%f', 1, nvars );
arr = cell2mat( textscan( fid, fmt, 'CollectOutput', true ));
% Create table
out = array2table( arr, 'VariableNames', vars );
% Replace -9999 with matlab NaNs
out = replace_badvals( out, [-9999], 1e-6 );
% Add a timestamp. Switch based on MPI eddyproc version
if strcmpi( mpi_vers ,'mpi_old' )
seconds = zeros( size( out.Year ) );
out.timestamp = datenum( out.Year, ...
out.Month, ...
out.Day, ...
out.Hour, ...
out.Minute, ...
seconds );
elseif strcmpi(mpi_vers,'mpi_current')
jday = out.DoY + out.Hour./24;
out.timestamp = jday + datenum([year,1,0]);
end
end
end