forked from osmlab/atlas
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added generic Resource caching capabilities (osmlab#183)
* Working on prototype of abstract resource caching * Slight refactor * More refactoring * Totally new design * Added tests * Added even more testing * Checkstyle * Refactored comments and tests * Added forced defaults to the LocalFileInMemoryCache * Refactored local file cache again. * Changed test names * Added a URI string shortcut to ResourceCache * Removed unnecessary ResourceFetchFunction interface and changed some log levels. * Addressed more PR comments * Refactored to make threadsafe. Need to add tests. * Added a Cache interface * Added javadoc * Fixed dumb compiler and checkstyle errors * More test updates * Added a large test to attempt to see if multithreaded is working * go away checkstyle * Added README * Updated caching test * Added an invalidate to the CachingStrategy interface * Changed a document word * Addressed PR comments.
- Loading branch information
1 parent
c3d93b9
commit a2764a0
Showing
10 changed files
with
788 additions
and
0 deletions.
There are no files selected for viewing
87 changes: 87 additions & 0 deletions
87
src/main/java/org/openstreetmap/atlas/utilities/caching/ConcurrentResourceCache.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package org.openstreetmap.atlas.utilities.caching; | ||
|
||
import java.net.URI; | ||
import java.util.Optional; | ||
import java.util.function.Function; | ||
|
||
import org.openstreetmap.atlas.streaming.resource.Resource; | ||
import org.openstreetmap.atlas.utilities.caching.strategies.CachingStrategy; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* The threadsafe {@link Resource} cache implementation. The cache needs a specified | ||
* {@link CachingStrategy} and default fetching {@link Function} at creation time. The cache then | ||
* loads a resource using a given {@link URI}. Since using raw URIs can often be cumbersome, users | ||
* of this class are encouraged to extend it and overload the {@link ConcurrentResourceCache#get} | ||
* method to take more convenient parameters. | ||
* | ||
* @author lcram | ||
*/ | ||
public class ConcurrentResourceCache | ||
{ | ||
private static final Logger logger = LoggerFactory.getLogger(ConcurrentResourceCache.class); | ||
|
||
private final CachingStrategy cachingStrategy; | ||
private final Function<URI, Resource> fetcher; | ||
|
||
/** | ||
* Create a new {@link ConcurrentResourceCache} with the given fetcher and strategy. | ||
* | ||
* @param cachingStrategy | ||
* the caching strategy | ||
* @param fetcher | ||
* the default fetcher | ||
*/ | ||
public ConcurrentResourceCache(final CachingStrategy cachingStrategy, | ||
final Function<URI, Resource> fetcher) | ||
{ | ||
this.cachingStrategy = cachingStrategy; | ||
this.fetcher = fetcher; | ||
} | ||
|
||
/** | ||
* Attempt to get the resource specified by the given string URI. | ||
* | ||
* @param resourceURIString | ||
* the resource {@link URI} as a {@link String} | ||
* @return an {@link Optional} wrapping the {@link Resource} | ||
*/ | ||
public Optional<Resource> get(final String resourceURIString) | ||
{ | ||
return this.get(URI.create(resourceURIString)); | ||
} | ||
|
||
/** | ||
* Attempt to get the resource specified by the given URI. | ||
* | ||
* @param resourceURI | ||
* the resource {@link URI} | ||
* @return an {@link Optional} wrapping the {@link Resource} | ||
*/ | ||
public Optional<Resource> get(final URI resourceURI) | ||
{ | ||
Optional<Resource> cachedResource; | ||
|
||
// We must synchronize the application of the caching strategy since we cannot guarantee | ||
// that the strategy does not utilize internal global state. | ||
synchronized (this) | ||
{ | ||
cachedResource = this.cachingStrategy.attemptFetch(resourceURI, this.fetcher); | ||
} | ||
|
||
if (!cachedResource.isPresent()) | ||
{ | ||
logger.warn("Cache fetch failed, falling back to default fetcher..."); | ||
|
||
// We must also synchronize the application of the fetcher, since it may rely on state | ||
// shared by the calling threads. | ||
synchronized (this) | ||
{ | ||
cachedResource = Optional.ofNullable(this.fetcher.apply(resourceURI)); | ||
} | ||
} | ||
|
||
return cachedResource; | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
src/main/java/org/openstreetmap/atlas/utilities/caching/LocalFileInMemoryCache.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package org.openstreetmap.atlas.utilities.caching; | ||
|
||
import java.nio.file.Paths; | ||
import java.util.Optional; | ||
|
||
import org.openstreetmap.atlas.streaming.resource.File; | ||
import org.openstreetmap.atlas.streaming.resource.Resource; | ||
import org.openstreetmap.atlas.utilities.caching.strategies.ByteArrayCachingStrategy; | ||
|
||
/** | ||
* An example of how to extend the {@link ConcurrentResourceCache} to enhance functionality. This | ||
* class caches local files in memory, and abstracts the the messiness of file URIs behind a cleaner | ||
* interface. The {@link LocalFileInMemoryCache} forces the caching strategy to be | ||
* {@link ByteArrayCachingStrategy}, and forces the default fetcher to simply load a local file. | ||
* | ||
* @author lcram | ||
*/ | ||
public class LocalFileInMemoryCache extends ConcurrentResourceCache | ||
{ | ||
public LocalFileInMemoryCache() | ||
{ | ||
super(new ByteArrayCachingStrategy(), uri -> new File(uri.getPath())); | ||
} | ||
|
||
/** | ||
* Attempt to get the resource specified by the given path. | ||
* | ||
* @param path | ||
* the path to the desired resource | ||
* @return an {@link Optional} wrapping the {@link Resource} | ||
*/ | ||
@Override | ||
public Optional<Resource> get(final String path) | ||
{ | ||
return this.get(Paths.get(path).toAbsolutePath().toUri()); | ||
} | ||
} |
34 changes: 34 additions & 0 deletions
34
src/main/java/org/openstreetmap/atlas/utilities/caching/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# utilities.caching package | ||
|
||
--- | ||
|
||
This package adds extensible `Resource` caching capabilities to `Atlas`. | ||
|
||
The basic idea is that users can create a cache with a default population mechanism (fetcher), choose a caching strategy, and then fetch desired resources (keyed by `URI`s) using the strategy. A very simple use case might look like: | ||
|
||
```java | ||
// Let's read a resource at an arbitrary URI into a Resource. | ||
// Notice the fetcher function provided to constructor conforms to the | ||
// Function<URI, Resource> functional interface. | ||
|
||
final URI LOCAL_TEST_FILE_URI = URI.create("file:///path/to/some/file.txt"); | ||
|
||
// cache file contents into memory (a byte array) | ||
final ConcurrentResourceCache resourceCache = | ||
new ConcurrentResourceCache(new ByteArrayCachingStrategy(), uri -> new File(uri.getPath())); | ||
|
||
// this will cache miss the first time and populate the cache using the provided fetcher | ||
Resource r1 = resourceCache.get(LOCAL_TEST_FILE_URI).get(); | ||
|
||
// this time we hit the cache, and read the bytes from memory instead of with the fetcher (from disk) | ||
Resource r2 = resourceCache.get(LOCAL_TEST_FILE_URI).get(); | ||
|
||
|
||
// Manually setting fetchers and CachingStrategies can be a pain. You can abstract | ||
// all this away by creating case-specific subclasses. This subclass cache uses | ||
// ByteArrayCachingStrategy by default and abstracts the get URI parameter by just | ||
// taking a path string as a parameter instead. | ||
final LocalFileInMemoryCache fileCache = new LocalFileInMemoryCache(); | ||
Resource r3 = fileCache.get("/path/to/another/file.txt").get(); | ||
``` | ||
See the `CachingTests` class for more usage examples, and the `LocalFileInMemoryCache` class for an example of how to extend `ConcurrentResourceCache`. |
66 changes: 66 additions & 0 deletions
66
...in/java/org/openstreetmap/atlas/utilities/caching/strategies/AbstractCachingStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package org.openstreetmap.atlas.utilities.caching.strategies; | ||
|
||
import java.net.URI; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.UUID; | ||
import java.util.function.Function; | ||
|
||
import org.openstreetmap.atlas.streaming.resource.Resource; | ||
|
||
/** | ||
* Base implementation of the {@link CachingStrategy} interface. Provides some additional | ||
* functionality for subclasses to leverage. | ||
* | ||
* @author lcram | ||
*/ | ||
public abstract class AbstractCachingStrategy implements CachingStrategy | ||
{ | ||
/* | ||
* Cache the UUIDs for each URI so we only have to compute them once. Caching them in a | ||
* comparatively small map is significantly faster than recomputing them every time. Subclasses | ||
* may want to use this cache to associate a UUID with a given URI. | ||
*/ | ||
private final Map<String, UUID> uriStringToUUIDCache; | ||
|
||
public AbstractCachingStrategy() | ||
{ | ||
this.uriStringToUUIDCache = new HashMap<>(); | ||
} | ||
|
||
@Override | ||
public abstract Optional<Resource> attemptFetch(URI resourceURI, | ||
Function<URI, Resource> defaultFetcher); | ||
|
||
@Override | ||
public abstract String getName(); | ||
|
||
/** | ||
* Given a URI, get a universally unique identifier ({@link UUID}) for that URI. This method | ||
* uses the {@link String} representation of a URI to compute the UUID. It will also cache | ||
* computed UUIDs, so subsequent fetches will not incur a re-computation performance penalty. | ||
* | ||
* @param resourceURI | ||
* the {@link URI} | ||
* @return the {@link UUID} of the given {@link URI} | ||
*/ | ||
protected UUID getUUIDForResourceURI(final URI resourceURI) | ||
{ | ||
final String uriString = resourceURI.toString(); | ||
|
||
if (!this.uriStringToUUIDCache.containsKey(uriString)) | ||
{ | ||
// As of Java 8, this method computes the MD5 sum of the URI string. | ||
// This can be relatively slow (10 digests per 1 ms), so we will cache the result in | ||
// memory for subsequent requests. | ||
final UUID newUUID = UUID.nameUUIDFromBytes(uriString.getBytes()); | ||
this.uriStringToUUIDCache.put(uriString, newUUID); | ||
return newUUID; | ||
} | ||
else | ||
{ | ||
return this.uriStringToUUIDCache.get(uriString); | ||
} | ||
} | ||
} |
107 changes: 107 additions & 0 deletions
107
...n/java/org/openstreetmap/atlas/utilities/caching/strategies/ByteArrayCachingStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package org.openstreetmap.atlas.utilities.caching.strategies; | ||
|
||
import java.net.URI; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.UUID; | ||
import java.util.function.Function; | ||
|
||
import org.openstreetmap.atlas.streaming.resource.ByteArrayResource; | ||
import org.openstreetmap.atlas.streaming.resource.Resource; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* Caching strategy that attempts to cache a {@link Resource} in a byte array, in memory. | ||
* | ||
* @author lcram | ||
*/ | ||
public class ByteArrayCachingStrategy extends AbstractCachingStrategy | ||
{ | ||
/* | ||
* Default size is arbitrarily set to 2 MiB | ||
*/ | ||
private static final long DEFAULT_BYTE_ARRAY_SIZE = 1024 * 1024 * 2; | ||
private static final Logger logger = LoggerFactory.getLogger(ByteArrayCachingStrategy.class); | ||
|
||
private final Map<UUID, ByteArrayResource> resourceCache; | ||
private long initialArraySize; | ||
private boolean useExactResourceSize; | ||
|
||
public ByteArrayCachingStrategy() | ||
{ | ||
this.resourceCache = new HashMap<>(); | ||
this.initialArraySize = DEFAULT_BYTE_ARRAY_SIZE; | ||
this.useExactResourceSize = false; | ||
} | ||
|
||
@Override | ||
public Optional<Resource> attemptFetch(final URI resourceURI, | ||
final Function<URI, Resource> defaultFetcher) | ||
{ | ||
final UUID resourceUUID = this.getUUIDForResourceURI(resourceURI); | ||
|
||
if (!this.resourceCache.containsKey(resourceUUID)) | ||
{ | ||
logger.info("Attempting to cache resource {} in byte array keyed on UUID {}", | ||
resourceURI, resourceUUID.toString()); | ||
final Resource resource = defaultFetcher.apply(resourceURI); | ||
final ByteArrayResource resourceBytes; | ||
if (this.useExactResourceSize) | ||
{ | ||
final long resourceLength = resource.length(); | ||
logger.info("Using extact resource length {}", resourceLength); | ||
resourceBytes = new ByteArrayResource(resourceLength); | ||
} | ||
else | ||
{ | ||
logger.info("Using initial array size {}", this.initialArraySize); | ||
resourceBytes = new ByteArrayResource(this.initialArraySize); | ||
} | ||
resourceBytes.writeAndClose(resource.readBytesAndClose()); | ||
this.resourceCache.put(resourceUUID, resourceBytes); | ||
} | ||
|
||
logger.info("Returning cached resource {} from byte array keyed on UUID {}", resourceURI, | ||
resourceUUID.toString()); | ||
return Optional.of(this.resourceCache.get(resourceUUID)); | ||
} | ||
|
||
@Override | ||
public String getName() | ||
{ | ||
return "ByteArrayCachingStrategy"; | ||
} | ||
|
||
@Override | ||
public void invalidate() | ||
{ | ||
this.resourceCache.clear(); | ||
} | ||
|
||
/** | ||
* Use the exact resource size of the byte arrays of the cache. This may cause performance | ||
* degradation on cache misses, since some resources do not store their length as metadata. | ||
* | ||
* @return the configured {@link ByteArrayCachingStrategy} | ||
*/ | ||
public ByteArrayCachingStrategy useExactResourceSize() | ||
{ | ||
this.useExactResourceSize = true; | ||
return this; | ||
} | ||
|
||
/** | ||
* Set an initial array size for the byte arrays of the cache. | ||
* | ||
* @param initialSize | ||
* the initial size | ||
* @return the configured {@link ByteArrayCachingStrategy} | ||
*/ | ||
public ByteArrayCachingStrategy withInitialArraySize(final long initialSize) | ||
{ | ||
this.initialArraySize = initialSize; | ||
return this; | ||
} | ||
} |
42 changes: 42 additions & 0 deletions
42
src/main/java/org/openstreetmap/atlas/utilities/caching/strategies/CachingStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package org.openstreetmap.atlas.utilities.caching.strategies; | ||
|
||
import java.net.URI; | ||
import java.util.Optional; | ||
import java.util.function.Function; | ||
|
||
import org.openstreetmap.atlas.streaming.resource.Resource; | ||
|
||
/** | ||
* Interface definition for a caching strategy. A caching strategy must provide a method for | ||
* obtaining a resource based on a {@link URI}. | ||
* | ||
* @author lcram | ||
*/ | ||
public interface CachingStrategy | ||
{ | ||
/** | ||
* Attempt to fetch the resource located at the given URI. | ||
* | ||
* @param resourceURI | ||
* the {@link URI} if the desired {@link Resource} | ||
* @param defaultFetcher | ||
* the initial {@link Function} used to populate the cache | ||
* @return the {@link Resource} wrapped in an {@link Optional} | ||
*/ | ||
Optional<Resource> attemptFetch(URI resourceURI, Function<URI, Resource> defaultFetcher); | ||
|
||
/** | ||
* Get a strategy name for logging purposes. | ||
* | ||
* @return the strategy name | ||
*/ | ||
String getName(); | ||
|
||
/** | ||
* Invalidate the contents of this strategy. The contract of this method is the following: a | ||
* {@link URI} that produces a cache hit on an {@link CachingStrategy#attemptFetch} before an | ||
* {@link CachingStrategy#invalidate} call must produce a cache miss on the first | ||
* {@link CachingStrategy#attemptFetch} after an {@link CachingStrategy#invalidate} call. | ||
*/ | ||
void invalidate(); | ||
} |
Oops, something went wrong.