Skip to content

Commit

Permalink
Added in FilteredIterable (#231)
Browse files Browse the repository at this point in the history
* Added in FilteredIterable

* Adds in a new FilteredIterable that keeps a Set of elements to filter from iterations. Useful for when Iterable will be iterated over multiple times, but some elements should be skipped for efficiency.

* Checkstyle issues addressed
  • Loading branch information
adahn6 authored and matthieun committed Oct 1, 2018
1 parent ecdfe47 commit 6809924
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package org.openstreetmap.atlas.utilities.collections;

import java.util.Iterator;
import java.util.Set;
import java.util.function.Function;

/**
* Takes an iterable and adds in a set of elements to skip over. Useful for when an iterable will be
* iterated over multiple times, but some elements can be skipped for efficiency.
*
* @author samuelgass
* @param <Type>
* the type of the {@link Iterable}
* @param <IdentifierType>
* the type for the identifier used by the elements of Type for the {@link Iterable}
*/
public class FilteredIterable<Type, IdentifierType> implements Iterable<Type>
{
private final Iterable<Type> source;
private final Set<IdentifierType> filterSet;
private final Function<Type, IdentifierType> identifier;

/**
* Constructor for FilteredIterable.
*
* @param source
* A source iterable to translate to FilteredIterable
* @param filterSet
* A set of identifiers for elements to skip (can be empty or have members)
* @param identifier
* A function that takes an element of Type for the {@link Iterable} and returns the
* identifier for that element
*/
public FilteredIterable(final Iterable<Type> source, final Set<IdentifierType> filterSet,
final Function<Type, IdentifierType> identifier)
{
this.filterSet = filterSet;
this.source = source;
this.identifier = identifier;
}

/**
* Takes an element and uses the identifier function to add its identifier to the filter set.
*
* @param type
* The element to add to the filter set
* @return True if an element was added to the filter set, false if it wasn't (likely in the
* case it was already present in the set)
*/
public boolean addToFilteredSet(final Type type)
{
return this.filterSet.add(this.identifier.apply(type));
}

@Override
public Iterator<Type> iterator()
{
return new Iterator<Type>()
{
private final Iterator<Type> sourceIterator = FilteredIterable.this.source.iterator();
private Type next = null;
private Type current = this.next();

@Override
public boolean hasNext()
{
return this.next != null && !FilteredIterable.this.filterSet
.contains(FilteredIterable.this.identifier.apply(this.next));
}

@Override
public Type next()
{
this.current = this.next;
this.next = null;
while (this.sourceIterator.hasNext())
{
final Type nextCandidate = this.sourceIterator.next();
if (FilteredIterable.this.filterSet
.contains(FilteredIterable.this.identifier.apply(nextCandidate)))
{
continue;
}
else
{
this.next = nextCandidate;
break;
}
}
return this.current;
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,29 @@ public static <Type> Iterable<Type> filter(final Iterable<Type> input,
return filterTranslate(input, item -> item, matcher);
}

/**
* Translate an {@link Iterable} of items into a {@link FilteredIterable}
*
* @param types
* The {@link Iterable} to translate
* @param filterSet
* A set of identifiers for elements to skip (can be empty or have members)
* @param identifier
* A function that takes an element of Type for the {@link Iterable} and returns the
* identifier for that element
* @param <Type>
* The type of the {@link Iterable}
* @param <IdentifierType>
* The type of the Identifier object for the elements in the {@link Iterable}
* @return The translated {@link Iterable}
*/
public static <Type, IdentifierType> FilteredIterable<Type, IdentifierType> filter(
final Iterable<Type> types, final Set<IdentifierType> filterSet,
final Function<Type, IdentifierType> identifier)
{
return new FilteredIterable<Type, IdentifierType>(types, filterSet, identifier);
}

/**
* Translate an {@link Iterable} of type TypeIn to an {@link Iterable} of type TypeOut.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,24 @@ public StreamIterable<T> filter(final Predicate<T> filter)
return new StreamIterable<>(Iterables.filter(this.source, filter));
}

/**
* Filter an {@link Iterable} using a set of known elements to filter and an idenfitier function
*
* @param filterSet
* The set of IdentifierTypes to filter
* @param identifier
* The function mapping an element of type T to its identifier of type IdentifierType
* @param <IdentifierType>
* The type for the object identifier for elements of the {@link Iterable}
* @return The filtered {@link Iterable} as a {@link StreamIterable}
*/
public <IdentifierType> StreamIterable<T> filter(final Set<IdentifierType> filterSet,
final Function<T, IdentifierType> identifier)
{
return new StreamIterable<>(
new FilteredIterable<T, IdentifierType>(this.source, filterSet, identifier));
}

/**
* Flat Map an {@link Iterable}. This means each input item can return one to many results.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
package org.openstreetmap.atlas.utilities.collections;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.function.Function;

import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* @author Sam Gass
*/
public class FilteredIterableTest
{
private static final Logger logger = LoggerFactory.getLogger(FilteredIterableTest.class);

@Test
public void testComplexIdentifier() throws MalformedURLException
{
final Function<URL, String> identifier = (final URL urlToIdentify) ->
{
return urlToIdentify.getProtocol();
};
final List<URL> urls = new ArrayList<>();
urls.add(new URL("https://github.com"));
urls.add(new URL("http://github.com"));
urls.add(new URL("https://test.com"));
urls.add(new URL("http://test.com"));
final FilteredIterable<URL, String> filteredIterable = Iterables
.filter(Iterables.asIterable(urls), new HashSet<String>(), identifier);

// Non-destructive streaming filter
final Iterable<URL> filtered1 = Iterables.stream(Iterables.asIterable(urls))
.filter(Iterables.asSet(Iterables.from("http")), identifier).collect();
logger.info("{}", Iterables.asList(filtered1));
Assert.assertEquals(2, Iterables.count(filtered1, i -> 1L));

// No filtering
final List<URL> unfiltered = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", unfiltered);
Assert.assertEquals(4, unfiltered.size());

// filter HTTP -- note that this is the result of choosing such a generic identifier method,
// even though we invoke addToFilteredSet() on the much more specific "http://github.com"
filteredIterable.addToFilteredSet(new URL("http://github.com"));
final List<URL> filtered2 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered2);
Assert.assertEquals(2, filtered2.size());

// filter HTTPS -- note that this is the result of choosing such a generic identifier
// method, even though we invoke addToFilteredSet() on the much more specific
// "https://github.com"
filteredIterable.addToFilteredSet(new URL("https://github.com"));
final List<URL> filtered3 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered3);
Assert.assertEquals(0, filtered3.size());
}

@Test
public void testFilteringSimple()
{
final List<Integer> values = new ArrayList<>();
final Function<Integer, Integer> identifier = (final Integer integer) ->
{
return integer;
};
for (int index = 0; index < 10; index++)
{
values.add(index);
}

// No filtering
FilteredIterable<Integer, Integer> filteredIterable = new FilteredIterable<>(
Iterables.asIterable(values), new HashSet<Integer>(), identifier);
final List<Integer> unfiltered = Iterables.stream(filteredIterable).collectToList();

logger.info("{}", unfiltered);
Assert.assertEquals(10, unfiltered.size());
Assert.assertEquals(Iterables.asList(Iterables.from(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)),
unfiltered);

// Filter 5 and 6
filteredIterable.addToFilteredSet(new Integer(5));
filteredIterable.addToFilteredSet(new Integer(6));
final List<Integer> filtered1 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered1);
Assert.assertEquals(8, filtered1.size());
Assert.assertEquals(Iterables.asList(Iterables.from(0, 1, 2, 3, 4, 7, 8, 9)), filtered1);

// Filter duplicate
filteredIterable.addToFilteredSet(new Integer(5));
filteredIterable.addToFilteredSet(new Integer(5));
final List<Integer> filtered2 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered2);
Assert.assertEquals(8, filtered2.size());
Assert.assertEquals(Iterables.asList(Iterables.from(0, 1, 2, 3, 4, 7, 8, 9)), filtered2);

// Filter non-existent entries
filteredIterable.addToFilteredSet(new Integer(11));
filteredIterable.addToFilteredSet(new Integer(12));
final List<Integer> filtered3 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered3);
Assert.assertEquals(8, filtered3.size());
Assert.assertEquals(Iterables.asList(Iterables.from(0, 1, 2, 3, 4, 7, 8, 9)), filtered3);

// Filter down to one
filteredIterable.addToFilteredSet(new Integer(0));
filteredIterable.addToFilteredSet(new Integer(1));
filteredIterable.addToFilteredSet(new Integer(2));
filteredIterable.addToFilteredSet(new Integer(3));
filteredIterable.addToFilteredSet(new Integer(4));
filteredIterable.addToFilteredSet(new Integer(7));
filteredIterable.addToFilteredSet(new Integer(9));
final List<Integer> filtered4 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered4);
Assert.assertEquals(1, filtered4.size());
Assert.assertEquals(Iterables.asList(Iterables.from(8)), filtered4);

// Filter all
filteredIterable.addToFilteredSet(new Integer(8));
final List<Integer> filtered5 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered5);
Assert.assertEquals(0, filtered5.size());
Assert.assertEquals(Iterables.asList(Iterables.emptyIterable(Integer.class)), filtered5);

// New Iterable, testing filter of the first value (edge case)
filteredIterable = new FilteredIterable<Integer, Integer>(Iterables.asIterable(values),
new HashSet<Integer>(), identifier);
filteredIterable.addToFilteredSet(new Integer(0));
final List<Integer> filtered6 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered6);
Assert.assertEquals(9, filtered6.size());
Assert.assertEquals(Iterables.asList(Iterables.from(1, 2, 3, 4, 5, 6, 7, 8, 9)), filtered6);

// New Iterable, testing filter of the last value (edge case)
filteredIterable = new FilteredIterable<Integer, Integer>(Iterables.asIterable(values),
new HashSet<Integer>(), identifier);
filteredIterable.addToFilteredSet(new Integer(9));
final List<Integer> filtered7 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered7);
Assert.assertEquals(9, filtered7.size());
Assert.assertEquals(Iterables.asList(Iterables.from(0, 1, 2, 3, 4, 5, 6, 7, 8)), filtered7);

// Empty Iterable (edge case)
// New Iterable, testing filter of the first value (edge case)
filteredIterable = new FilteredIterable<Integer, Integer>(Iterables.from(0),
new HashSet<Integer>(), identifier);
filteredIterable.addToFilteredSet(new Integer(0));
final List<Integer> filtered8 = Iterables.stream(filteredIterable).collectToList();
logger.info("{}", filtered8);
Assert.assertEquals(0, filtered8.size());
Assert.assertEquals(Iterables.asList(Iterables.emptyIterable(Integer.class)), filtered8);
}
}

0 comments on commit 6809924

Please sign in to comment.