Onnxruntime fuzzing (microsoft#4341)

* Add protobuf mutator library as a git submodule * Added files and instructions to build the protobuf mutator library in CMake * Added fuzzing flag to build system and added fuzzing dependency library. To run fuzzing test use the flags --fuzz_testing --build_shared_lib --use_full_protobuf --cmake_generator 'Visual Studio 16 2019' * Added src files and build instructions for the main fuzzing engine * Removed Random number generation test from inside the engine * Added license header to files * Removed all pep8 violations introduced by this change and other E501 violations
seungillee · Jul 6, 2020 · 632b289 · 632b289
1 parent ec35a1b
commit 632b289
Show file tree

Hide file tree

Showing 13 changed files with 1,699 additions and 24 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -62,3 +62,6 @@
 [submodule "cmake/external/onnx-tensorrt"]
 	path = cmake/external/onnx-tensorrt
 	url = https://github.com/stevenlix/onnx-tensorrt.git
+[submodule "cmake/external/libprotobuf-mutator"]
+	path = cmake/external/libprotobuf-mutator
+	url = https://github.com/google/libprotobuf-mutator.git
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -113,6 +113,18 @@ option(onnxruntime_ENABLE_TRAINING_E2E_TESTS "Enable training end-to-end tests."
 option(onnxruntime_USE_HOROVOD "Build with HOROVOD support" OFF)
 option(onnxruntime_USE_NCCL "Build with NCCL support" ON)
 
+# options for security fuzzing
+# build configuration for fuzz testing is in onnxruntime_fuzz_test.cmake
+option(onnxruntime_FUZZ_TEST "Enable Fuzz testing" OFF)
+
+# Fuzz test has only been tested with BUILD_SHARED_LIB option,
+# using the MSVC compiler and on windows OS.
+if(MSVC AND WIN32 AND onnxruntime_FUZZ_TEST AND onnxruntime_BUILD_SHARED_LIB AND onnxruntime_USE_FULL_PROTOBUF)
+  # Fuzz test library dependency, protobuf-mutator,
+  # needs the onnx message to be compiled using "non-lite protobuf version"
+  set(onnxruntime_FUZZ_ENABLED ON)
+endif()
+
 if (onnxruntime_ENABLE_NVTX_PROFILE)
   add_definitions(-DENABLE_NVTX_PROFILE=1)
 endif()

diff --git a/cmake/external/libprotobuf-mutator b/cmake/external/libprotobuf-mutator
diff --git a/cmake/onnxruntime_fuzz_test.cmake b/cmake/onnxruntime_fuzz_test.cmake
@@ -0,0 +1,64 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+# Check that the options are properly set for
+# the fuzzing project
+if (onnxruntime_FUZZ_ENABLED)
+	message(STATUS "Building dependency protobuf-mutator and libfuzzer")
+
+	# set the options used to control the protobuf-mutator build
+	set(PROTOBUF_LIBRARIES "$<TARGET_FILE:libprotobuf>")
+	set(PROTOBUF_INCLUDE_DIRS "$<TARGET_PROPERTY:libprotobuf,INCLUDE_DIRECTORIES>")
+	set(LIB_PROTO_MUTATOR_TESTING OFF)
+
+	# include the protobuf-mutator CMakeLists.txt rather than the projects CMakeLists.txt to avoid target clashes
+	# with google test
+	add_subdirectory("external/libprotobuf-mutator/src")
+
+	# add the appropriate include directory and compilation flags
+	# needed by the protobuf-mutator target and the libfuzzer
+	set(PROTOBUF_MUT_INCLUDE_DIRS "external/libprotobuf-mutator")
+	target_include_directories(protobuf-mutator PRIVATE ${PROTOBUF_INCLUDE_DIRS} ${PROTOBUF_MUT_INCLUDE_DIRS})
+	target_include_directories(protobuf-mutator-libfuzzer PRIVATE ${PROTOBUF_INCLUDE_DIRS} ${PROTOBUF_MUT_INCLUDE_DIRS})
+	target_compile_options(protobuf-mutator PRIVATE "/wd4244" "/wd4245" "/wd4267" "/wd4100" "/wd4456")
+	target_compile_options(protobuf-mutator-libfuzzer PRIVATE "/wd4146" "/wd4267")
+
+	# add Fuzzing Engine Build Configuration 
+	message(STATUS "Building Fuzzing engine")
+
+	# set Fuzz root directory
+	set(SEC_FUZZ_ROOT ${TEST_SRC_DIR}/fuzzing)
+
+	# Security fuzzing engine src file reference 
+	set(SEC_FUZ_SRC "${SEC_FUZZ_ROOT}/src/BetaDistribution.cpp" 
+					"${SEC_FUZZ_ROOT}/src/OnnxPrediction" 
+					"${SEC_FUZZ_ROOT}/src/testlog.cpp" 
+					"${SEC_FUZZ_ROOT}/src/test.cpp")
+
+	# compile the executables
+	add_executable(onnxruntime_security_fuzz ${SEC_FUZ_SRC})
+
+	# compile with c++17
+	target_compile_features(onnxruntime_security_fuzz PUBLIC cxx_std_17)
+
+	# Security fuzzing engine header file reference
+	onnxruntime_add_include_to_target(onnxruntime_security_fuzz libprotobuf onnx onnxruntime)
+
+	# Assign all include to one variable
+	set(SEC_FUZ_INC "${SEC_FUZZ_ROOT}/include")
+	set(INCLUDE_FILES ${SEC_FUZ_INC} "$<TARGET_PROPERTY:protobuf-mutator,INCLUDE_DIRECTORIES>")
+
+	# add all these include directory to the Fuzzing engine
+	target_include_directories(onnxruntime_security_fuzz PRIVATE ${INCLUDE_FILES})
+
+	# add link libraries the project
+	target_link_libraries(onnxruntime_security_fuzz libprotobuf onnx_proto onnxruntime protobuf-mutator)
+
+	# add the dependencies
+	add_dependencies(onnxruntime_security_fuzz libprotobuf onnx_proto onnxruntime protobuf-mutator)
+
+	# copy the dlls to the execution directory
+	add_custom_command(TARGET onnxruntime_security_fuzz POST_BUILD
+		COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:onnxruntime>  $<TARGET_FILE_DIR:onnxruntime_security_fuzz>
+		COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:libprotobuf>  $<TARGET_FILE_DIR:onnxruntime_security_fuzz>)
+endif()
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -882,3 +882,5 @@ if (onnxruntime_BUILD_JAVA)
     endif()
     set_property(TEST onnxruntime4j_test APPEND PROPERTY DEPENDS onnxruntime4j_jni)
 endif()
+
+include(onnxruntime_fuzz_test.cmake)
diff --git a/onnxruntime/test/fuzzing/include/BetaDistribution.h b/onnxruntime/test/fuzzing/include/BetaDistribution.h
@@ -0,0 +1,271 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifndef __BETADISTRIBUTION_H__
+#define __BETADISTRIBUTION_H__
+#include <random>
+#include <map>
+#include <chrono>
+
+// Default parameter will produce a shape with alpha = 0.5
+// and beta = 0.5.
+// By default this distribution creates a standard Beta distribution
+//
+
+template< typename result_type,
+  typename generator = std::default_random_engine>
+class BetaDistribution
+{
+public:
+    // The type used for internal calculations.
+    // Should be big enough to avoid overflows
+    // internally.
+    //
+    using calc_type = long double;
+
+    // The type used to store the parameters of
+    // the distribution.
+    //
+    using param_type = std::map<std::string, float>;
+
+    // Create a BetaDistribution with all the necessary parameters
+    //
+    BetaDistribution(float alpha, float beta, 
+        result_type beginRange, result_type endRange)
+    : m_alpha(alpha), 
+    m_beta(beta), 
+    m_beginRange(beginRange),
+    m_endRange(endRange)
+    { 
+        init();
+    }
+
+    // Create a BetaDistribution specifying only the range
+    // with default parameters alpha = 0.5 and beta = 0.5
+    //
+    BetaDistribution(result_type beginRange, result_type endRange)
+    : m_beginRange(beginRange), 
+    m_endRange(endRange)
+    {
+        init();
+    }
+
+    // Create a default Standard BetaDistribution with a shape
+    // that defined by alpha = 0.5 and beta = 0.5
+    //
+    explicit BetaDistribution(){}
+
+    // Get the parameters of the distribution
+    //
+    param_type param() const
+    {
+        return param;
+    }
+
+
+    // Get the lowest value that can be generated
+    //
+    result_type min() const
+    {
+        return m_beginRange;
+    }
+
+    // Get the highest value that can be generated
+    //
+    result_type max() const
+    {
+        return m_endRange;
+    }
+
+    // Generate a number from beginRange to endRange
+    //
+    result_type operator()(generator& gen)
+    {
+        // Find the probability of the each number
+        // and return the number with the highest probability
+        //
+        calc_type highest_probability = 0.0;
+        calc_type likely_number = 0;
+        for(int i=0; i < sample_size; i++)
+        {
+            calc_type sample = convert_to_fixed_range(gen);
+            calc_type highest_probability_temp = highest_probability;
+            highest_probability = std::max({ highest_probability_temp, distribution(sample)});
+
+            // A new sample number with a higher probabilty has been found
+            //
+            if (highest_probability > highest_probability_temp)
+            {
+                likely_number = sample;
+            }
+        }
+
+        return static_cast<result_type>(likely_number);
+    }
+
+private:
+    // Internal configuration shared across all constructors
+    //
+    void init()
+    {
+        if (m_endRange < m_beginRange)
+        {
+            throw std::runtime_error("endRange Must be greater than begin range");
+        }
+        param_value["alpha"] = m_alpha;
+        param_value["beta"] = m_beta;
+    }
+
+    // A constant value used for internal computation.
+    //
+    constexpr inline double sqrtpi()
+    { 
+        return std::sqrt( std::atan(1)*4 ); 
+    }
+
+    // Calculates the value of x in a gamma distribution
+    //
+    template<typename gamma_input_type>
+    double calculate_gamma(gamma_input_type x)
+    {
+        static_assert(std::is_arithmetic<gamma_input_type>(), 
+                        "Input to calculate_gamma must be arithmetic");
+        if (x < 0)
+        {
+            throw std::invalid_argument("No implementation for gamma less than 0");
+        }
+        if (std::is_floating_point<gamma_input_type>() && x <= 0.5 && x> 0.49 )
+        {
+            return sqrtpi();
+        }
+        else if (std::is_floating_point<gamma_input_type>() && x <= 1.00 && x >= 0.99)
+        {
+            return 1;
+        }
+        else if ( std::is_integral<gamma_input_type>() )
+        {
+            // Calculate factorial
+            //
+            gamma_input_type result = 1;
+            for(gamma_input_type n = x - 1; n >= 0; n--)
+            {
+            result = n == 0 ? result*1 : result *n;
+            }
+
+            return static_cast<double>(result);
+        }
+        else
+        {
+            throw std::exception("Non special gamma values not yet Implemeted");
+        }
+    }
+
+    // Generate the probabilty of having this number
+    //
+    inline calc_type distribution(calc_type randVar)
+    {
+        if (randVar > max() || randVar < min())
+        {
+            return 0;
+        }
+        calc_type range {static_cast<calc_type>(max()) - static_cast<calc_type>(min())};
+        calc_type term {1.0/range};
+
+        calc_type gammaTerm {calculate_gamma(m_alpha + m_beta)};
+        calc_type gammaTerm1 {calculate_gamma(m_alpha)};
+        calc_type gammaTerm2 {calculate_gamma(m_beta)};
+        calc_type term1 {gammaTerm/(gammaTerm1 * gammaTerm2)};
+
+        calc_type term2 { pow( (randVar - min()) / range, m_alpha - 1)};
+        calc_type term3 { pow( (max() - randVar) / range, m_beta - 1) };
+
+        return {term * term1 * term2 * term3};
+    }
+
+    // Used to convert the number that generator produces
+    // to the range specified.
+    // For example, the default BetaDistribution
+    // generates number between 0..1. Hence this function
+    // will convert 0..N produced by the generator to 0..1.  
+    //
+    calc_type convert_to_fixed_range(generator& gen)
+    {
+        // Find a number in the generator space
+        //
+        calc_type x{ static_cast<calc_type>(gen())};
+
+        // Convert the number to the range [beginRange, endRange]
+        //
+        calc_type range { std::numeric_limits<generator::result_type>::max() 
+                                - std::numeric_limits<generator::result_type>::lowest()};
+
+        calc_type delta {x - std::numeric_limits<generator::result_type>::lowest()};                    
+        calc_type ratio {delta/range};
+
+        calc_type new_range { static_cast<calc_type>(max()) - static_cast<calc_type>(min())};
+        if (new_range <= 0 || new_range >= std::numeric_limits<calc_type>::infinity())
+        {
+        throw std::runtime_error(
+            "Overflow error: The range of the Beta distribution is to big to fit into the result_type.\n"\
+            "Consider using the standard and then scaling to the desired range.");
+        }
+
+        calc_type res {(ratio * new_range) + min()};
+        return {res};
+    }
+
+private:
+  static constexpr int sample_size = 2;
+  float m_alpha = 0.5;
+  float m_beta = 0.5;
+  result_type m_beginRange = 0;
+  result_type m_endRange = 1;
+  param_type param_value = 
+  {
+    std::pair<std::string, float>{"alpha", m_alpha}, 
+    std::pair<std::string, float>{"beta", m_beta}
+  };
+};
+
+// Test to visualize the distribution
+//
+void unittestBetaDistribution();
+
+// Test to generate Random data
+// and verify its distribution.
+//
+void unittestGenerateRandomData();
+
+// type - Used to determine the size of the data
+// numElementsToGenerate - Number of elements to generate
+//
+template<typename ONNX_ELEMENT_VALUE_TYPE>
+std::vector<ONNX_ELEMENT_VALUE_TYPE>
+GenerateRandomData(ONNX_ELEMENT_VALUE_TYPE initialValue, size_t numElementsToGenerate, size_t seed)
+{
+  // Store the generated data in this vector
+  //
+  std::vector<ONNX_ELEMENT_VALUE_TYPE> randomDataBucket(numElementsToGenerate, initialValue);
+
+  // The Beta distribution is likely to returns values
+  // at the extremes of a finite range. For examples for
+  // a float, most values generated will be around numeric_limit<float>::min
+  // and the numeric_limit<float>::max. To avoid problems of overflow
+  // use the standard to generate number in the close to 0 and close to 1.
+  //
+  std::default_random_engine generator(static_cast<unsigned int>(seed));
+  BetaDistribution<ONNX_ELEMENT_VALUE_TYPE> standardBetaDistribution
+    { std::numeric_limits<ONNX_ELEMENT_VALUE_TYPE>::min(), 
+      std::numeric_limits<ONNX_ELEMENT_VALUE_TYPE>::max()
+    };
+
+  // Generate the data in the vector
+  //
+  for(auto& data: randomDataBucket)
+  {
+    data = standardBetaDistribution(generator);
+  }
+
+  return randomDataBucket;
+}
+#endif
-Original file line number
+Diff line change
@@ Expand Up / @@ -882,3 +882,5 @@ if (onnxruntime_BUILD_JAVA) @@
         endif()
         set_property(TEST onnxruntime4j_test APPEND PROPERTY DEPENDS onnxruntime4j_jni)
     endif()
+    include(onnxruntime_fuzz_test.cmake)