diff --git a/README.md b/README.md index 6a01024..ebcf1a3 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,16 @@ With more tracing ```bash stack test --pedantic --file-watch --trace . ``` + + +## Running + +```bash +stack exec neural-net-exe -- --help +``` + +Simple Logistic Regression example: + +```bash +stack exec neural-net-exe -- -c -l 0.005 -i 1000 examples/simple-csv/train.csv examples/simple-csv/test.csv +``` diff --git a/app/Main.hs b/app/Main.hs index 33c67b6..e3972d5 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,5 +1,58 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# OPTIONS_GHC -fno-cse #-} module Main where +import NeuralNet.Problem +import NeuralNet.Net +import NeuralNet.Activation +import NeuralNet.Example +import NeuralNet.External +import System.IO () +import System.Random +import Data.List +import System.Console.CmdArgs + + +data RunOptions = RunOptions {trainPath :: FilePath + ,testPath :: FilePath + ,constInitWeights :: Bool + ,numIterations :: Int + ,learningRate :: Double} + deriving (Eq, Show, Data, Typeable) + +optionsDef :: RunOptions +optionsDef = RunOptions + {trainPath = def &= typ "TRAINFILE" &= argPos 0 + ,testPath = def &= typ "TESTFILE" &= argPos 1 + ,constInitWeights = def &= name "c" &= name "const-init-weights" &= help "Use 0 for initial weights" + ,numIterations = 1000 &= name "i" &= name "num-iterations" &= help "Number of training iterations" + ,learningRate = 0.005 &= name "l" &= name "learning-rate" &= help "Learning rate (alpha)"} &= + help "Simple Neural Net trainer" &= + summary "NeuralNet v0.0.0, (C) Daniel Holmes" main :: IO () -main = putStrLn "Hello" +main = do + options <- cmdArgs optionsDef + problem <- loadProblem options + gen <- createWeightInit options + let (nn, steps) = runProblem gen problem (\yh y -> ((round yh) :: Int) == ((round y) :: Int)) + putStrLn (intercalate "\n" (map formatStepLine steps)) + print nn + +formatStepLine :: RunStep -> String +formatStepLine s = show (runStepIteration s) ++ ") " ++ show (runStepCost s) ++ " " ++ show accuracy ++ "%" + where accuracy = (round (100.0 * runStepAccuracy s)) :: Int + +createWeightInit :: RunOptions -> IO WeightInitialiser +createWeightInit options = case constInitWeights options of + True -> return (Const 0) + False -> do + stdGen <- getStdGen + return (Random stdGen) + +loadProblem :: RunOptions -> IO Problem +loadProblem (RunOptions {trainPath = train, testPath = test, numIterations = n, learningRate = a}) = do + trainSet <- loadExampleSet train + testSet <- loadExampleSet test + let nnDef = createLogRegDefinition (exampleSetN testSet) Sigmoid + return (createProblem nnDef trainSet testSet a n) diff --git a/examples/simple-csv/test.csv b/examples/simple-csv/test.csv new file mode 100644 index 0000000..908c119 --- /dev/null +++ b/examples/simple-csv/test.csv @@ -0,0 +1,12 @@ +1,2,1 +0,4,0 +1,3,1 +1,0,1 +0,1,0 +1,5,1 +1,58,1 +0,72,0 +0,11,0 +1,6,1 +1,100,1 +1,7000,1 \ No newline at end of file diff --git a/examples/simple-csv/train.csv b/examples/simple-csv/train.csv new file mode 100644 index 0000000..f3fb6e0 --- /dev/null +++ b/examples/simple-csv/train.csv @@ -0,0 +1,17 @@ +1,2,1 +1,3,1 +0,2,0 +1,5,1 +0,7,0 +1,5,1 +0,6,0 +1,88,1 +0,99,0 +0,11,0 +1,23,1 +0,2,0 +1,1,1 +0,1,0 +1,100,1 +1,6,1 +0,7,0 \ No newline at end of file diff --git a/package.yaml b/package.yaml index b2bb0af..0beb06d 100644 --- a/package.yaml +++ b/package.yaml @@ -24,6 +24,10 @@ dependencies: - matrix >= 0.3.5.0 && < 1 - vector >= 0.11.0.0 && < 1 - random >= 1.1 && < 2 + - csv >= 0.1.2 && < 1 + - cmdargs >= 0.10.20 && < 1 + - filepath >= 1.4.1 && < 2 + - text >= 1.2.3.0 && < 2 library: source-dirs: src diff --git a/src/NeuralNet/Example.hs b/src/NeuralNet/Example.hs index cdb647e..e894da3 100644 --- a/src/NeuralNet/Example.hs +++ b/src/NeuralNet/Example.hs @@ -15,16 +15,18 @@ import Data.List (nub) type Example = ([Double], Double) data ExampleSet = ExampleSet [Example] (Matrix Double) (Matrix Double) + deriving (Show, Eq) createExampleSet :: [Example] -> ExampleSet createExampleSet e - | numUniqueExampleSizes > 1 = error "All examples must have same size" + | numUniqueExampleSizes > 1 = error ("All examples must have same size (given " ++ show uniqueExampleSizes ++ ")") | numUniqueExampleSizes == 0 = error "No examples provided" | n == 0 = error "Empty example provided" | otherwise = ExampleSet e x y where exampleSizes = map (length . fst) e - numUniqueExampleSizes = length (nub exampleSizes) + uniqueExampleSizes = nub exampleSizes + numUniqueExampleSizes = length uniqueExampleSizes m = length e n = head exampleSizes x = matrix n m (\(r, c) -> (fst (e!!(c - 1)))!!(r - 1)) diff --git a/src/NeuralNet/External.hs b/src/NeuralNet/External.hs new file mode 100644 index 0000000..98b5de3 --- /dev/null +++ b/src/NeuralNet/External.hs @@ -0,0 +1,24 @@ +module NeuralNet.External (loadExampleSet) where + +import NeuralNet.Example +import Text.CSV +import System.FilePath.Posix +import Data.Char + + +loadExampleSet :: FilePath -> IO ExampleSet +loadExampleSet p = + case map toLower (takeExtension p) of + ".csv" -> loadCsvExampleSet p + _ -> error ("Don't know how to load example set from " ++ show p) + +loadCsvExampleSet :: FilePath -> IO ExampleSet +loadCsvExampleSet p = do + result <- parseCSVFromFile p + case result of + Right c -> return (createExampleSet (csvToExamples c)) + _ -> error ("Error reading" ++ p) + +csvToExamples :: CSV -> [Example] +csvToExamples records = map (\r -> (init r, last r)) doubleRecords + where doubleRecords = map (map read) records diff --git a/src/NeuralNet/Net.hs b/src/NeuralNet/Net.hs index 8bdb3f8..4018312 100644 --- a/src/NeuralNet/Net.hs +++ b/src/NeuralNet/Net.hs @@ -1,6 +1,7 @@ module NeuralNet.Net ( NeuralNetDefinition, LayerDefinition (LayerDefinition), + WeightInitialiser (Random, Const), NeuralNet, initNN, buildNNFromList, @@ -10,7 +11,8 @@ module NeuralNet.Net ( nnForwardSet, isExampleSetCompatibleWithNN, isExampleSetCompatibleWithNNDef, - updateNNLayers + updateNNLayers, + createLogRegDefinition ) where import System.Random @@ -25,25 +27,36 @@ type NumInputs = Int type NeuralNetDefinition = (NumInputs, [LayerDefinition]) +data WeightInitialiser = Random StdGen | Const Double + deriving (Show) + data LayerDefinition = LayerDefinition Activation NumNeurons deriving (Show, Eq) data NeuralNet = NeuralNet [NeuronLayer] deriving (Show, Eq) -initNN :: StdGen -> NeuralNetDefinition -> NeuralNet +nextInitValue :: WeightInitialiser -> (Double, WeightInitialiser) +nextInitValue (Random g) = (v, Random nextG) + where (v, nextG) = randomR (0.0, 1.0) g +nextInitValue c@(Const v) = (v, c) + +createLogRegDefinition :: NumInputs -> Activation -> NeuralNetDefinition +createLogRegDefinition n a = (n, [LayerDefinition a 1]) + +initNN :: WeightInitialiser -> NeuralNetDefinition -> NeuralNet initNN _ (_, []) = error "No layer definitions provided" initNN g (numInputs, layerDefs) | numInputs > 0 = NeuralNet (fst (initNeuronLayers g numInputs layerDefs)) | otherwise = error "Need positive num inputs" -initNeuronLayers :: StdGen -> Int -> [LayerDefinition] -> ([NeuronLayer], StdGen) +initNeuronLayers :: WeightInitialiser -> Int -> [LayerDefinition] -> ([NeuronLayer], WeightInitialiser) initNeuronLayers g _ [] = ([], g) initNeuronLayers g numInputs (LayerDefinition a numNeurons : ds) = (NeuronLayer a w (fromList numNeurons 1 (replicate numNeurons 0)) : ls, newG2) where - foldStep :: Int -> ([Double], StdGen) -> ([Double], StdGen) + foldStep :: Int -> ([Double], WeightInitialiser) -> ([Double], WeightInitialiser) foldStep _ (accu, foldG) = (d:accu, newFoldG) - where (d, newFoldG) = randomR (0.0, 1.0) foldG + where (d, newFoldG) = nextInitValue foldG -- TODO: Look into replicateM instead of this seeds = [1..(numInputs * numNeurons)] diff --git a/src/NeuralNet/Problem.hs b/src/NeuralNet/Problem.hs index 4a0c532..57c8d0d 100644 --- a/src/NeuralNet/Problem.hs +++ b/src/NeuralNet/Problem.hs @@ -1,62 +1,86 @@ -module NeuralNet.Problem (Problem (), createProblem, runProblem) where +module NeuralNet.Problem ( + Problem (), + RunStep (), + createProblem, + runProblem, + runStepIteration, + runStepAccuracy, + runStepCost, + problemTestSet +) where -import System.Random import NeuralNet.Example import NeuralNet.Net import NeuralNet.Layer import NeuralNet.Cost import NeuralNet.Train +import Data.Matrix type NumIterations = Int type LearningRate = Double -data Problem = Problem NeuralNetDefinition ExampleSet LearningRate NumIterations +data Problem = Problem NeuralNetDefinition ExampleSet ExampleSet LearningRate NumIterations + deriving (Show, Eq) type IterationNum = Int - type Cost = Double +type Accuracy = Double +data RunStep = RunStep IterationNum Cost Accuracy + deriving (Show, Eq) + +runStepIteration :: RunStep -> IterationNum +runStepIteration (RunStep i _ _) = i -data RunStep = RunStep IterationNum Cost +runStepCost :: RunStep -> Cost +runStepCost (RunStep _ c _) = c +runStepAccuracy :: RunStep -> Accuracy +runStepAccuracy (RunStep _ _ a) = a problemNNDef :: Problem -> NeuralNetDefinition -problemNNDef (Problem d _ _ _) = d +problemNNDef (Problem d _ _ _ _) = d + +problemTrainSet :: Problem -> ExampleSet +problemTrainSet (Problem _ t _ _ _) = t -problemExampleSet :: Problem -> ExampleSet -problemExampleSet (Problem _ e _ _) = e +problemTestSet :: Problem -> ExampleSet +problemTestSet (Problem _ _ t _ _) = t problemLearningRate :: Problem -> LearningRate -problemLearningRate (Problem _ _ l _) = l +problemLearningRate (Problem _ _ _ l _) = l problemNumIterations :: Problem -> NumIterations -problemNumIterations (Problem _ _ _ i) = i +problemNumIterations (Problem _ _ _ _ i) = i -createProblem :: NeuralNetDefinition -> ExampleSet -> LearningRate -> NumIterations -> Problem -createProblem def examples learningRate numIterations - | not (isExampleSetCompatibleWithNNDef examples def) = error "Examples not compatible with nn" +createProblem :: NeuralNetDefinition -> ExampleSet -> ExampleSet -> LearningRate -> NumIterations -> Problem +createProblem def trainSet testSet learningRate numIterations + | not (isExampleSetCompatibleWithNNDef trainSet def) = error "trainSet not compatible with nn" + | not (isExampleSetCompatibleWithNNDef testSet def) = error "testSet not compatible with nn" | numIterations <= 0 = error "Must provide positive numIterations" | learningRate <= 0 = error "Must provide positive learningRate" - | otherwise = Problem def examples learningRate numIterations + | otherwise = Problem def trainSet testSet learningRate numIterations -runProblem :: StdGen -> Problem -> (NeuralNet, [RunStep]) -runProblem g p = (resultNN, allSteps) +runProblem :: WeightInitialiser -> Problem -> (Double -> Double -> Bool) -> (NeuralNet, [RunStep]) +runProblem g p accuracyCheck = (resultNN, tail allSteps) where startNN = initNN g (problemNNDef p) - startStep = RunStep 0 1 + startStep = RunStep 0 1 0 iterations = [1..(problemNumIterations p)] - allNNAndSteps = reverse (foldl (\steps@((nn,_):_) i -> runProblemStep p i nn : steps) [(startNN, startStep)] iterations) + allNNAndSteps = reverse (foldl (\steps@((nn,_):_) i -> runProblemStep p i nn accuracyCheck : steps) [(startNN, startStep)] iterations) allSteps = map snd allNNAndSteps resultNN = fst (last allNNAndSteps) -runProblemStep :: Problem -> IterationNum -> NeuralNet -> (NeuralNet, RunStep) -runProblemStep p i nn = (newNN, RunStep i cost) +runProblemStep :: Problem -> IterationNum -> NeuralNet -> (Double -> Double -> Bool) -> (NeuralNet, RunStep) +runProblemStep p i nn accuracyCheck = (newNN, RunStep i cost accuracy) where - exampleSet = problemExampleSet p + exampleSet = problemTrainSet p forwardSteps = nnForwardSet nn exampleSet al = forwardPropA (last forwardSteps) - cost = computeCost al (exampleSetY exampleSet) + y = exampleSetY exampleSet + cost = computeCost al y grads = nnBackward nn forwardSteps exampleSet newNN = updateNNParams nn grads (problemLearningRate p) + accuracy = fromIntegral (length (filter (uncurry accuracyCheck) (zip (toList al) (toList y)))) / fromIntegral (ncols y) diff --git a/test/NeuralNet/NetSpec.hs b/test/NeuralNet/NetSpec.hs index 59afab9..ed3809a 100644 --- a/test/NeuralNet/NetSpec.hs +++ b/test/NeuralNet/NetSpec.hs @@ -26,7 +26,8 @@ createdLayer a s l = activationEq && wSizeEq && bCorrect && allUnique m wSizeEq = matrixSize m == s netSpec :: StdGen -> SpecWith () -netSpec g = +netSpec stdGen = do + let g = Random stdGen describe "NeuralNet.Net" $ do describe "initNN" $ do it "returns correctly for small definition" $