Skip to content

Commit

Permalink
Added generic Resource caching capabilities (osmlab#183)
Browse files Browse the repository at this point in the history
* Working on prototype of abstract resource caching

* Slight refactor

* More refactoring

* Totally new design

* Added tests

* Added even more testing

* Checkstyle

* Refactored comments and tests

* Added forced defaults to the LocalFileInMemoryCache

* Refactored local file cache again.

* Changed test names

* Added a URI string shortcut to ResourceCache

* Removed unnecessary ResourceFetchFunction interface and changed some log levels.

* Addressed more PR comments

* Refactored to make threadsafe. Need to add tests.

* Added a Cache interface

* Added javadoc

* Fixed dumb compiler and checkstyle errors

* More test updates

* Added a large test to attempt to see if multithreaded is working

* go away checkstyle

* Added README

* Updated caching test

* Added an invalidate to the CachingStrategy interface

* Changed a document word

* Addressed PR comments.
  • Loading branch information
lucaspcram authored and MikeGost committed Aug 13, 2018
1 parent c3d93b9 commit a2764a0
Show file tree
Hide file tree
Showing 10 changed files with 788 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package org.openstreetmap.atlas.utilities.caching;

import java.net.URI;
import java.util.Optional;
import java.util.function.Function;

import org.openstreetmap.atlas.streaming.resource.Resource;
import org.openstreetmap.atlas.utilities.caching.strategies.CachingStrategy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The threadsafe {@link Resource} cache implementation. The cache needs a specified
* {@link CachingStrategy} and default fetching {@link Function} at creation time. The cache then
* loads a resource using a given {@link URI}. Since using raw URIs can often be cumbersome, users
* of this class are encouraged to extend it and overload the {@link ConcurrentResourceCache#get}
* method to take more convenient parameters.
*
* @author lcram
*/
public class ConcurrentResourceCache
{
private static final Logger logger = LoggerFactory.getLogger(ConcurrentResourceCache.class);

private final CachingStrategy cachingStrategy;
private final Function<URI, Resource> fetcher;

/**
* Create a new {@link ConcurrentResourceCache} with the given fetcher and strategy.
*
* @param cachingStrategy
* the caching strategy
* @param fetcher
* the default fetcher
*/
public ConcurrentResourceCache(final CachingStrategy cachingStrategy,
final Function<URI, Resource> fetcher)
{
this.cachingStrategy = cachingStrategy;
this.fetcher = fetcher;
}

/**
* Attempt to get the resource specified by the given string URI.
*
* @param resourceURIString
* the resource {@link URI} as a {@link String}
* @return an {@link Optional} wrapping the {@link Resource}
*/
public Optional<Resource> get(final String resourceURIString)
{
return this.get(URI.create(resourceURIString));
}

/**
* Attempt to get the resource specified by the given URI.
*
* @param resourceURI
* the resource {@link URI}
* @return an {@link Optional} wrapping the {@link Resource}
*/
public Optional<Resource> get(final URI resourceURI)
{
Optional<Resource> cachedResource;

// We must synchronize the application of the caching strategy since we cannot guarantee
// that the strategy does not utilize internal global state.
synchronized (this)
{
cachedResource = this.cachingStrategy.attemptFetch(resourceURI, this.fetcher);
}

if (!cachedResource.isPresent())
{
logger.warn("Cache fetch failed, falling back to default fetcher...");

// We must also synchronize the application of the fetcher, since it may rely on state
// shared by the calling threads.
synchronized (this)
{
cachedResource = Optional.ofNullable(this.fetcher.apply(resourceURI));
}
}

return cachedResource;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package org.openstreetmap.atlas.utilities.caching;

import java.nio.file.Paths;
import java.util.Optional;

import org.openstreetmap.atlas.streaming.resource.File;
import org.openstreetmap.atlas.streaming.resource.Resource;
import org.openstreetmap.atlas.utilities.caching.strategies.ByteArrayCachingStrategy;

/**
* An example of how to extend the {@link ConcurrentResourceCache} to enhance functionality. This
* class caches local files in memory, and abstracts the the messiness of file URIs behind a cleaner
* interface. The {@link LocalFileInMemoryCache} forces the caching strategy to be
* {@link ByteArrayCachingStrategy}, and forces the default fetcher to simply load a local file.
*
* @author lcram
*/
public class LocalFileInMemoryCache extends ConcurrentResourceCache
{
public LocalFileInMemoryCache()
{
super(new ByteArrayCachingStrategy(), uri -> new File(uri.getPath()));
}

/**
* Attempt to get the resource specified by the given path.
*
* @param path
* the path to the desired resource
* @return an {@link Optional} wrapping the {@link Resource}
*/
@Override
public Optional<Resource> get(final String path)
{
return this.get(Paths.get(path).toAbsolutePath().toUri());
}
}
34 changes: 34 additions & 0 deletions src/main/java/org/openstreetmap/atlas/utilities/caching/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# utilities.caching package

---

This package adds extensible `Resource` caching capabilities to `Atlas`.

The basic idea is that users can create a cache with a default population mechanism (fetcher), choose a caching strategy, and then fetch desired resources (keyed by `URI`s) using the strategy. A very simple use case might look like:

```java
// Let's read a resource at an arbitrary URI into a Resource.
// Notice the fetcher function provided to constructor conforms to the
// Function<URI, Resource> functional interface.

final URI LOCAL_TEST_FILE_URI = URI.create("file:///path/to/some/file.txt");

// cache file contents into memory (a byte array)
final ConcurrentResourceCache resourceCache =
new ConcurrentResourceCache(new ByteArrayCachingStrategy(), uri -> new File(uri.getPath()));

// this will cache miss the first time and populate the cache using the provided fetcher
Resource r1 = resourceCache.get(LOCAL_TEST_FILE_URI).get();

// this time we hit the cache, and read the bytes from memory instead of with the fetcher (from disk)
Resource r2 = resourceCache.get(LOCAL_TEST_FILE_URI).get();


// Manually setting fetchers and CachingStrategies can be a pain. You can abstract
// all this away by creating case-specific subclasses. This subclass cache uses
// ByteArrayCachingStrategy by default and abstracts the get URI parameter by just
// taking a path string as a parameter instead.
final LocalFileInMemoryCache fileCache = new LocalFileInMemoryCache();
Resource r3 = fileCache.get("/path/to/another/file.txt").get();
```
See the `CachingTests` class for more usage examples, and the `LocalFileInMemoryCache` class for an example of how to extend `ConcurrentResourceCache`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package org.openstreetmap.atlas.utilities.caching.strategies;

import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.function.Function;

import org.openstreetmap.atlas.streaming.resource.Resource;

/**
* Base implementation of the {@link CachingStrategy} interface. Provides some additional
* functionality for subclasses to leverage.
*
* @author lcram
*/
public abstract class AbstractCachingStrategy implements CachingStrategy
{
/*
* Cache the UUIDs for each URI so we only have to compute them once. Caching them in a
* comparatively small map is significantly faster than recomputing them every time. Subclasses
* may want to use this cache to associate a UUID with a given URI.
*/
private final Map<String, UUID> uriStringToUUIDCache;

public AbstractCachingStrategy()
{
this.uriStringToUUIDCache = new HashMap<>();
}

@Override
public abstract Optional<Resource> attemptFetch(URI resourceURI,
Function<URI, Resource> defaultFetcher);

@Override
public abstract String getName();

/**
* Given a URI, get a universally unique identifier ({@link UUID}) for that URI. This method
* uses the {@link String} representation of a URI to compute the UUID. It will also cache
* computed UUIDs, so subsequent fetches will not incur a re-computation performance penalty.
*
* @param resourceURI
* the {@link URI}
* @return the {@link UUID} of the given {@link URI}
*/
protected UUID getUUIDForResourceURI(final URI resourceURI)
{
final String uriString = resourceURI.toString();

if (!this.uriStringToUUIDCache.containsKey(uriString))
{
// As of Java 8, this method computes the MD5 sum of the URI string.
// This can be relatively slow (10 digests per 1 ms), so we will cache the result in
// memory for subsequent requests.
final UUID newUUID = UUID.nameUUIDFromBytes(uriString.getBytes());
this.uriStringToUUIDCache.put(uriString, newUUID);
return newUUID;
}
else
{
return this.uriStringToUUIDCache.get(uriString);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package org.openstreetmap.atlas.utilities.caching.strategies;

import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.function.Function;

import org.openstreetmap.atlas.streaming.resource.ByteArrayResource;
import org.openstreetmap.atlas.streaming.resource.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Caching strategy that attempts to cache a {@link Resource} in a byte array, in memory.
*
* @author lcram
*/
public class ByteArrayCachingStrategy extends AbstractCachingStrategy
{
/*
* Default size is arbitrarily set to 2 MiB
*/
private static final long DEFAULT_BYTE_ARRAY_SIZE = 1024 * 1024 * 2;
private static final Logger logger = LoggerFactory.getLogger(ByteArrayCachingStrategy.class);

private final Map<UUID, ByteArrayResource> resourceCache;
private long initialArraySize;
private boolean useExactResourceSize;

public ByteArrayCachingStrategy()
{
this.resourceCache = new HashMap<>();
this.initialArraySize = DEFAULT_BYTE_ARRAY_SIZE;
this.useExactResourceSize = false;
}

@Override
public Optional<Resource> attemptFetch(final URI resourceURI,
final Function<URI, Resource> defaultFetcher)
{
final UUID resourceUUID = this.getUUIDForResourceURI(resourceURI);

if (!this.resourceCache.containsKey(resourceUUID))
{
logger.info("Attempting to cache resource {} in byte array keyed on UUID {}",
resourceURI, resourceUUID.toString());
final Resource resource = defaultFetcher.apply(resourceURI);
final ByteArrayResource resourceBytes;
if (this.useExactResourceSize)
{
final long resourceLength = resource.length();
logger.info("Using extact resource length {}", resourceLength);
resourceBytes = new ByteArrayResource(resourceLength);
}
else
{
logger.info("Using initial array size {}", this.initialArraySize);
resourceBytes = new ByteArrayResource(this.initialArraySize);
}
resourceBytes.writeAndClose(resource.readBytesAndClose());
this.resourceCache.put(resourceUUID, resourceBytes);
}

logger.info("Returning cached resource {} from byte array keyed on UUID {}", resourceURI,
resourceUUID.toString());
return Optional.of(this.resourceCache.get(resourceUUID));
}

@Override
public String getName()
{
return "ByteArrayCachingStrategy";
}

@Override
public void invalidate()
{
this.resourceCache.clear();
}

/**
* Use the exact resource size of the byte arrays of the cache. This may cause performance
* degradation on cache misses, since some resources do not store their length as metadata.
*
* @return the configured {@link ByteArrayCachingStrategy}
*/
public ByteArrayCachingStrategy useExactResourceSize()
{
this.useExactResourceSize = true;
return this;
}

/**
* Set an initial array size for the byte arrays of the cache.
*
* @param initialSize
* the initial size
* @return the configured {@link ByteArrayCachingStrategy}
*/
public ByteArrayCachingStrategy withInitialArraySize(final long initialSize)
{
this.initialArraySize = initialSize;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.openstreetmap.atlas.utilities.caching.strategies;

import java.net.URI;
import java.util.Optional;
import java.util.function.Function;

import org.openstreetmap.atlas.streaming.resource.Resource;

/**
* Interface definition for a caching strategy. A caching strategy must provide a method for
* obtaining a resource based on a {@link URI}.
*
* @author lcram
*/
public interface CachingStrategy
{
/**
* Attempt to fetch the resource located at the given URI.
*
* @param resourceURI
* the {@link URI} if the desired {@link Resource}
* @param defaultFetcher
* the initial {@link Function} used to populate the cache
* @return the {@link Resource} wrapped in an {@link Optional}
*/
Optional<Resource> attemptFetch(URI resourceURI, Function<URI, Resource> defaultFetcher);

/**
* Get a strategy name for logging purposes.
*
* @return the strategy name
*/
String getName();

/**
* Invalidate the contents of this strategy. The contract of this method is the following: a
* {@link URI} that produces a cache hit on an {@link CachingStrategy#attemptFetch} before an
* {@link CachingStrategy#invalidate} call must produce a cache miss on the first
* {@link CachingStrategy#attemptFetch} after an {@link CachingStrategy#invalidate} call.
*/
void invalidate();
}
Loading

0 comments on commit a2764a0

Please sign in to comment.