Skip to content

Commit

Permalink
fix: IORef to skip concurrent writes in Store.set (jaspervdj#945)
Browse files Browse the repository at this point in the history
Fixes jaspervdj#903.

* fix: IORef to skip concurrent writes in Store.set

See jaspervdj#903

* Add note about cacheInsert

* Use a writeAhead Map

* Address comment from Minoru
  • Loading branch information
jaspervdj authored Aug 15, 2022
1 parent dccb5a9 commit ebb6bf1
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 46 deletions.
115 changes: 79 additions & 36 deletions lib/Hakyll/Core/Store.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@ module Hakyll.Core.Store


--------------------------------------------------------------------------------
import qualified Data.Hashable as DH
import Control.Concurrent (forkIO, threadDelay)
import Control.Monad (void, when)
import Data.Binary (Binary, decode, encodeFile)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Cache.LRU.IO as Lru
import qualified Data.Hashable as DH
import qualified Data.IORef as IORef
import Data.List (intercalate)
import qualified Data.Map as Map
import Data.Maybe (isJust)
import Data.Typeable (TypeRep, Typeable, cast, typeOf)
import System.Directory (createDirectoryIfMissing, doesFileExist, removeFile)
import System.Directory (createDirectoryIfMissing, doesFileExist,
removeFile)
import System.FilePath ((</>))
import System.IO (IOMode (..), hClose, openFile)
import System.IO.Error (catchIOError, ioeSetFileName,
Expand All @@ -38,9 +43,11 @@ data Box = forall a. Typeable a => Box a
--------------------------------------------------------------------------------
data Store = Store
{ -- | All items are stored on the filesystem
storeDirectory :: FilePath
, -- | Optionally, items are also kept in-memory
storeMap :: Maybe (Lru.AtomicLRU FilePath Box)
storeDirectory :: FilePath
, -- | See 'set'
storeWriteAhead :: IORef.IORef (Map.Map String Box)
-- | Optionally, items are also kept in-memory
, storeMap :: Maybe (Lru.AtomicLRU FilePath Box)
}


Expand Down Expand Up @@ -72,10 +79,12 @@ new :: Bool -- ^ Use in-memory caching
-> IO Store -- ^ Store
new inMemory directory = do
createDirectoryIfMissing True directory
writeAhead <- IORef.newIORef Map.empty
ref <- if inMemory then Just <$> Lru.newAtomicLRU csize else return Nothing
return Store
{ storeDirectory = directory
, storeMap = ref
{ storeDirectory = directory
, storeWriteAhead = writeAhead
, storeMap = ref
}
where
csize = Just 500
Expand All @@ -92,16 +101,16 @@ withStore store loc run identifier = modifyIOError handle $ run key path
--------------------------------------------------------------------------------
-- | Auxiliary: add an item to the in-memory cache
cacheInsert :: Typeable a => Store -> String -> a -> IO ()
cacheInsert (Store _ Nothing) _ _ = return ()
cacheInsert (Store _ (Just lru)) key x =
cacheInsert (Store _ _ Nothing) _ _ = return ()
cacheInsert (Store _ _ (Just lru)) key x =
Lru.insert key (Box x) lru


--------------------------------------------------------------------------------
-- | Auxiliary: get an item from the in-memory cache
cacheLookup :: forall a. Typeable a => Store -> String -> IO (Result a)
cacheLookup (Store _ Nothing) _ = return NotFound
cacheLookup (Store _ (Just lru)) key = do
cacheLookup (Store _ _ Nothing) _ = return NotFound
cacheLookup (Store _ _ (Just lru)) key = do
res <- Lru.lookup key lru
return $ case res of
Nothing -> NotFound
Expand All @@ -112,15 +121,15 @@ cacheLookup (Store _ (Just lru)) key = do

--------------------------------------------------------------------------------
cacheIsMember :: Store -> String -> IO Bool
cacheIsMember (Store _ Nothing) _ = return False
cacheIsMember (Store _ (Just lru)) key = isJust <$> Lru.lookup key lru
cacheIsMember (Store _ _ Nothing) _ = return False
cacheIsMember (Store _ _ (Just lru)) key = isJust <$> Lru.lookup key lru


--------------------------------------------------------------------------------
-- | Auxiliary: delete an item from the in-memory cache
cacheDelete :: Store -> String -> IO ()
cacheDelete (Store _ Nothing) _ = return ()
cacheDelete (Store _ (Just lru)) key = do
cacheDelete (Store _ _ Nothing) _ = return ()
cacheDelete (Store _ _ (Just lru)) key = do
_ <- Lru.delete key lru
return ()

Expand All @@ -129,31 +138,61 @@ cacheDelete (Store _ (Just lru)) key = do
-- | Store an item
set :: (Binary a, Typeable a) => Store -> [String] -> a -> IO ()
set store identifier value = withStore store "set" (\key path -> do
encodeFile path value
-- We need to avoid concurrent writes to the filesystem. Imagine the
-- follow scenario:
--
-- * We compile multiple posts
-- * All of these fetch some common metadata
-- * This metadata is missing; we fetch it and then store it.
--
-- To solve this, we skip duplicate writes by tracking their status
-- in 'storeWriteAhead'. Since this set will usually be small, the
-- required locking should be fast. Additionally the actual IO operation
-- still happens outside of the locking.
first <- IORef.atomicModifyIORef' (storeWriteAhead store) $
\wa -> case Map.lookup key wa of
Nothing -> (Map.insert key (Box value) wa, True)
Just _ -> (wa, False)

cacheInsert store key value

-- Only the thread that stored the writeAhead should actually write this
-- file. That way, only one thread at a time will try to write this.
-- Release the writeAhead value once we're done.
when first $ do
encodeFile path value
IORef.atomicModifyIORef' (storeWriteAhead store) $
\wa -> (Map.delete key wa, ())
) identifier


--------------------------------------------------------------------------------
-- | Load an item
get :: (Binary a, Typeable a) => Store -> [String] -> IO (Result a)
get :: forall a. (Binary a, Typeable a) => Store -> [String] -> IO (Result a)
get store = withStore store "get" $ \key path -> do
-- First check the in-memory map
ref <- cacheLookup store key
case ref of
-- Not found in the map, try the filesystem
NotFound -> do
exists <- doesFileExist path
if not exists
-- Not found in the filesystem either
then return NotFound
-- Found in the filesystem
else do
v <- decodeClose path
cacheInsert store key v
return $ Found v
-- Found in the in-memory map (or wrong type), just return
s -> return s
-- Check the writeAhead value
writeAhead <- IORef.readIORef $ storeWriteAhead store
case Map.lookup key writeAhead of
Just (Box x) -> case cast x of
Just x' -> pure $ Found x'
Nothing -> pure $ WrongType (typeOf (undefined :: a)) (typeOf x)
Nothing -> do
-- Check the in-memory map
ref <- cacheLookup store key
case ref of
-- Not found in the map, try the filesystem
NotFound -> do
exists <- doesFileExist path
if not exists
-- Not found in the filesystem either
then return NotFound
-- Found in the filesystem
else do
v <- decodeClose path
cacheInsert store key v
return $ Found v
-- Found in the in-memory map (or wrong type), just return
s -> return s
where
-- 'decodeFile' from Data.Binary which closes the file ASAP
decodeClose path = do
Expand All @@ -167,8 +206,12 @@ get store = withStore store "get" $ \key path -> do
-- | Strict function
isMember :: Store -> [String] -> IO Bool
isMember store = withStore store "isMember" $ \key path -> do
inCache <- cacheIsMember store key
if inCache then return True else doesFileExist path
writeAhead <- IORef.readIORef $ storeWriteAhead store
if Map.member key writeAhead
then pure True
else do
inCache <- cacheIsMember store key
if inCache then return True else doesFileExist path


--------------------------------------------------------------------------------
Expand All @@ -188,4 +231,4 @@ deleteFile = (`catchIOError` \_ -> return ()) . removeFile
--------------------------------------------------------------------------------
-- | Mostly meant for internal usage
hash :: [String] -> String
hash = show . DH.hash . intercalate "/"
hash = show . DH.hash . intercalate "/"
7 changes: 1 addition & 6 deletions stack.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
resolver: lts-18.16
resolver: 'lts-19.17'
save-hackage-creds: false
system-ghc: true
skip-ghc-check: true
Expand All @@ -20,9 +20,4 @@ nix:
- rsync # for deployment
- zlib

build:
haddock: true
haddock-hyperlink-source: true
haddock-deps: false

extra-deps:
8 changes: 4 additions & 4 deletions stack.yaml.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
packages: []
snapshots:
- completed:
size: 586286
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/16.yaml
sha256: cdead65fca0323144b346c94286186f4969bf85594d649c49c7557295675d8a5
original: lts-18.16
size: 619161
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/19/17.yaml
sha256: 7f47507fd037228a8d23cf830f5844e1f006221acebdd7cb49f2f5fb561e0546
original: lts-19.17

0 comments on commit ebb6bf1

Please sign in to comment.