-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#78 Sieve with AttrValueRecordSieve implemented/tested
- Loading branch information
Showing
13 changed files
with
995 additions
and
622 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Trivial example Sieve filter. | ||
# The input data is in input/cities.csv. | ||
# We sieve out (passthrough) all records where city attr value | ||
# matches "amsterdam" or "otterlo". | ||
|
||
[etl] | ||
chains = input_csv|attr_value_sieve|output_std, | ||
input_csv|attr_value_sieve|output_file | ||
|
||
[input_csv] | ||
class = inputs.fileinput.CsvFileInput | ||
file_path = input/cities.csv | ||
output_format = record_array | ||
|
||
[attr_value_sieve] | ||
class = filters.sieve.AttrValueRecordSieve | ||
input_format = record_array | ||
output_format = record_array | ||
attr_name = city | ||
attr_values = amsterdam,otterlo | ||
|
||
[output_std] | ||
class = outputs.standardoutput.StandardOutput | ||
|
||
[output_file] | ||
class = outputs.fileoutput.FileOutput | ||
file_path = output/cities_sieved.txt | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/sh | ||
# | ||
# ETL for copying a file to standard output. | ||
# | ||
# Shortcut to call Stetl main.py with etl config. | ||
# | ||
stetl -c etl.cfg | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
city,lat,lon | ||
amsterdam,52.4,4.9 | ||
otterlo,52.101,5.773 | ||
rotterdam,51.9,4.5 | ||
eindhoven,51.44,5.47 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[{'lat': '52.4', 'city': 'amsterdam', 'lon': '4.9'}, {'lat': '52.101', 'city': 'otterlo', 'lon': '5.773'}] |
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Lets data Packets pass-through, "sieve", based on criteria in their data. | ||
# See issue: https://github.com/geopython/stetl/issues/78 | ||
# | ||
# A concrete example is AttrValueRecordSieve which sieves records matching | ||
# specific attribute values. One can also think of Sieves based on XPath expressions | ||
# (e.g. for XML, GML), or geospatial, based on for example WFS-like filters like bounding boxes. | ||
# | ||
# Author: Just van den Broecke | ||
# | ||
from stetl.component import Config | ||
from stetl.filter import Filter | ||
from stetl.util import Util | ||
from stetl.packet import FORMAT | ||
|
||
log = Util.get_log('Sieve') | ||
|
||
|
||
class Sieve(Filter): | ||
""" | ||
ABC for specific Sieves that pass-through, "sieve", Packets based on criteria in their data. | ||
""" | ||
|
||
def __init__(self, configdict, section, consumes, produces): | ||
Filter.__init__(self, configdict, section, consumes, produces) | ||
|
||
def invoke(self, packet): | ||
if packet.data is None: | ||
return packet | ||
return self.sieve(packet) | ||
|
||
def sieve(self, packet): | ||
""" | ||
To be implemented in subclasses. | ||
:param packet: | ||
:return: | ||
""" | ||
return packet | ||
|
||
|
||
class AttrValueRecordSieve(Sieve): | ||
""" | ||
Sieves by attr/value(s) in Record Packets. | ||
""" | ||
|
||
@Config(ptype=str, required=True) | ||
def attr_name(self): | ||
""" | ||
Name of attribute whose value(s) are to be sieved. | ||
""" | ||
pass | ||
|
||
@Config(ptype=list, default=list(), required=False) | ||
def attr_values(self): | ||
""" | ||
Value(s) for attribute to be to sieved. If empty any value is passed through (existence | ||
of attr_name is criterium). | ||
""" | ||
pass | ||
|
||
def __init__(self, configdict, section): | ||
Sieve.__init__(self, configdict, section, consumes=[FORMAT.record_array, FORMAT.record], produces=[FORMAT.record_array, FORMAT.record]) | ||
|
||
def sieve(self, packet): | ||
""" | ||
Filter out Packets that are not matching designated attr value(s). | ||
:param packet: | ||
:return: | ||
""" | ||
|
||
# Start with empty result: fill with matching records | ||
record_data = packet.data | ||
packet.data = None | ||
|
||
# Data can be list or single record | ||
if type(record_data) is list: | ||
packet.data = list() | ||
for record in record_data: | ||
if self.matches_attr(record): | ||
packet.data.append(record) | ||
elif type(record_data) is dict: | ||
if self.matches_attr(record_data): | ||
packet.data = record_data | ||
|
||
return packet | ||
|
||
def matches_attr(self, record): | ||
# Attr not even in record: no use going on | ||
if self.attr_name not in record: | ||
return False | ||
|
||
# Match if no value | ||
if not self.attr_values: | ||
return True | ||
|
||
return record[self.attr_name] in self.attr_values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = "1.2-dev" | ||
__version__ = "1.2-rc3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
city,lat,lon | ||
amsterdam,52.4,4.9 | ||
otterlo,52.101,5.773 | ||
rotterdam,51.9,4.5 | ||
eindhoven,51.44,5.47 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Config file for unit testing Sieve Filter. | ||
|
||
[etl] | ||
chains = input_csv_file|attr_value_sieve|packet_buffer|output_std | ||
|
||
[input_csv_file] | ||
class = inputs.fileinput.CsvFileInput | ||
file_path = tests/data/cities.csv | ||
|
||
[attr_value_sieve] | ||
class = filters.sieve.AttrValueRecordSieve | ||
input_format = record_array | ||
output_format = record_array | ||
attr_name = city | ||
attr_values = amsterdam,otterlo | ||
|
||
[packet_buffer] | ||
class = filters.packetbuffer.PacketBuffer | ||
|
||
[output_std] | ||
class = outputs.standardoutput.StandardOutput |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import os | ||
|
||
from stetl.etl import ETL | ||
from stetl.filters.packetbuffer import PacketBuffer | ||
from stetl.filters.sieve import AttrValueRecordSieve | ||
from tests.stetl_test_case import StetlTestCase | ||
|
||
class SieveTest(StetlTestCase): | ||
"""Unit tests for Sieve""" | ||
|
||
def setUp(self): | ||
super(SieveTest, self).setUp() | ||
|
||
# Initialize Stetl | ||
curr_dir = os.path.dirname(os.path.realpath(__file__)) | ||
cfg_dict = {'config_file': os.path.join(curr_dir, 'configs/sieve.cfg')} | ||
self.etl = ETL(cfg_dict) | ||
|
||
def test_class(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
section = StetlTestCase.get_section(chain, 1) | ||
class_name = self.etl.configdict.get(section, 'class') | ||
|
||
self.assertEqual('filters.sieve.AttrValueRecordSieve', class_name) | ||
|
||
def test_instance(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
|
||
self.assertTrue(isinstance(chain.get_by_index(1), AttrValueRecordSieve)) | ||
|
||
def test_execute(self): | ||
chain = StetlTestCase.get_chain(self.etl) | ||
chain.run() | ||
|
||
buffer_filter = chain.get_by_class(PacketBuffer) | ||
packet_list = buffer_filter.packet_list | ||
|
||
# Two city records filtered out | ||
self.assertEqual(len(packet_list[0].data), 2) | ||
self.assertEqual(str(packet_list[0].data[0]['city']), "amsterdam") | ||
self.assertEqual(str(packet_list[0].data[1]['city']), "otterlo") |