Skip to content

Commit

Permalink
Merge pull request equinor#27 from JensGM/bazefetcher-source
Browse files Browse the repository at this point in the history
Bazefetcher source
  • Loading branch information
JensGM authored Oct 5, 2018
2 parents 06ce756 + 77ed9fc commit 7a815f1
Show file tree
Hide file tree
Showing 78 changed files with 186 additions and 0 deletions.
1 change: 1 addition & 0 deletions camille/source/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .bazefetcher import bazefetcher
70 changes: 70 additions & 0 deletions camille/source/bazefetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import re
import datetime
import json
import gzip
import pytz
import pandas as pd


def _fn_start_date(fn):
# File names are on the form:
# |- start_date ----------| |- end_date ------------|
# tag_YYYY-MM-DDTHH.MM.SS+HH.MM_YYYY-MM-DDTHH.MM.SS+HH.MM.json.gz
date_str = fn.split('_')[-2] # extract start date
date_str = date_str.replace('.', '') # UTC offset does not support '.'
date_fmt = "%Y-%m-%dT%H%M%S%z"
return datetime.datetime.strptime(date_str, date_fmt)


def _safe_read(fn, **kwargs):
"""
TODO: Manually infer pandas read function
"""
try:
return pd.read_json(fn, **kwargs)
except:
return pd.DataFrame()


def bazefetcher(root, tzinfo=pytz.utc):
if not os.path.isdir(root):
raise ValueError('{} is not a directory'.format(root))

if not isinstance(tzinfo, datetime.tzinfo):
raise ValueError('tzinfo must be instance of datetime.tzinfo')

def bazefetcher_internal(tag, start_date, end_date):
if start_date.tzinfo is None or end_date.tzinfo is None:
raise ValueError('dates must be timezone aware')

if not start_date <= end_date:
raise ValueError('start_date must be earlier than end_date')

tag_root = os.path.join(root, tag)

if not os.path.isdir(tag_root):
raise ValueError('Tag {} not found'.format(tag))

files = [
os.path.join(tag_root, fn) for fn in os.listdir(tag_root)
if start_date <= _fn_start_date(fn) <= end_date]

if not files:
raise ValueError('No data for {} between {} and {}'.format(
tag, str(start_date), str(end_date)))

df = pd.concat((_safe_read(fn) for fn in files), sort=True)

df.rename(columns={
't': 'time',
'v': 'value',
}, inplace=True)

df.time = pd.to_datetime(df.time, unit='ms')
df.set_index('time', inplace=True)
df.index = df.index.tz_localize(tzinfo)

return df.value

return bazefetcher_internal
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ numpy
pandas
scipy
rainflow
pytz
Empty file added tests/source/__init__.py
Empty file.
41 changes: 41 additions & 0 deletions tests/source/test_bazefetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
import datetime
import pytz
import pytest
from camille.source import bazefetcher


start_date = datetime.datetime(2018, 4, 9, tzinfo=pytz.utc)
end_date = datetime.datetime(2018, 5, 11, tzinfo=pytz.utc)
start_date_no_tz = datetime.datetime(2018, 4, 9)
future_date = datetime.datetime(2018, 6, 1, tzinfo=pytz.utc)
bazefield = bazefetcher('tests/test_data/bazefield_data_root')


def test_bazefetcher():
tag2 = bazefield('installation-04-tag-2', start_date, end_date)
assert len(tag2) == 576
assert (1.100000e+32 < tag2).all()
assert (tag2 < 1.300000e+32).all()
assert min(tag2.index) >= start_date
assert max(tag2.index) < end_date

aaa = bazefield('AAA.... -.- ...0000---__aa', start_date, end_date)
assert min(aaa.index) >= start_date
assert max(aaa.index) < end_date


def test_bad_input():
with pytest.raises(ValueError) as excinfo:
t = bazefield('non-existing-tag', start_date, end_date)
assert 'Tag non-existing-tag not found' in str(excinfo.value)

with pytest.raises(ValueError) as excinfo:
t = bazefield('installation-04-tag-1', end_date, future_date)
assert ('No data for {} between {} and {}'
.format('installation-04-tag-1', end_date, future_date)
in str(excinfo.value))

with pytest.raises(ValueError) as excinfo:
t = bazefield('installation-04-tag-1', start_date_no_tz, end_date)
assert 'dates must be timezone aware' in str(excinfo.value)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
73 changes: 73 additions & 0 deletions tests/test_data/synthetic_bazefield_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python
"""
Simple program to create synthetic bazefield data
"""

import os
import datetime
import random
import pytz
import pandas as pd
import numpy as np


def fmt_date(d):
tz = d.strftime('%z')
sgn = tz[0]
hh = tz[1:3]
mm = tz[3:]
return d.strftime(f'%Y-%m-%dT%H.%M.%S{sgn}{hh}.{mm}')


def main():
root = 'bazefield_data_root'
start_date = datetime.datetime(2018, 4, 15, tzinfo=pytz.utc)
end_date = datetime.datetime(2018, 5, 9, tzinfo=pytz.utc)
tags = {
'installation-04-tag-1': {
'mean': 3.14,
'sd': 1.17,
'error': 0.5,
},
'installation-04-tag-2': {
'mean': 1.2E32,
'sd': 2.22,
'error': 0.05,
},
'AAA.... -.- ...0000---__aa': {
'mean': 0.0,
'sd': 1.0,
'error': 0.0,
},
}

assert os.path.isdir(root)

for tag in tags.keys():
tag_root = os.path.join(root, tag)
if not os.path.exists(tag_root):
os.makedirs(tag_root)

samples = pd.date_range(start_date, end_date, freq='1H', tz=pytz.utc)
days = pd.date_range(start_date, end_date, freq='1D', tz=pytz.utc)

mean = tags[tag]['mean']
sd = tags[tag]['sd']

df = pd.DataFrame(
data={
't': samples,
'q': 192,
'v': np.random.normal(mean, sd, len(samples)),
})

tag_prefix = os.path.join(tag_root, tag)

for date, next_date in zip(days[:-1], days[1:]):
fname = f'{tag_prefix}_{fmt_date(date)}_{fmt_date(next_date)}.json.gz'
date_df = df[(df.t >= date) & (df.t < next_date)]
date_df.to_json(fname, compression='gzip')


if __name__ == '__main__':
main()

0 comments on commit 7a815f1

Please sign in to comment.