Skip to content

Commit

Permalink
C#: Add CreateFromMemory to FixedBufferOnnxValue to allow bind user b…
Browse files Browse the repository at this point in the history
…uffers and pass custom binary compatible types (#5886)

Add CreateFromMemory to FixedBufferOnnxValue so users can bind their own custom binary compatible buffers to feed/fetch data.
  • Loading branch information
yuslepukhin authored Nov 24, 2020
1 parent 705d093 commit c2d6100
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 34 deletions.
90 changes: 87 additions & 3 deletions csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Buffers;

namespace Microsoft.ML.OnnxRuntime
{
/// <summary>
/// Represents an Onnx Value with its underlying buffer pinned
/// Represents an OrtValue with its underlying buffer pinned
/// </summary>
public class FixedBufferOnnxValue : IDisposable
{
Expand All @@ -28,11 +31,14 @@ private FixedBufferOnnxValue(MemoryHandle pinnedMemory, OrtValue ortValue, OnnxV
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="value"></param>
/// <returns></returns>
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
{
MemoryHandle? memHandle;
var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
// memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be
/// disposed (unpinned) when all is said is done. This is the case for blittable types but does not
/// happen for string type where each element has its own allocation.
if (memHandle.HasValue)
{
return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
Expand All @@ -43,6 +49,84 @@ public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
}
}

/// <summary>
/// This is a factory method that creates a disposable instance of FixedBufferOnnxValue
/// on top of a buffer. Internally, it will pin managed buffer and will create
/// an OrtValue containing a tensor that will not own the memory.
/// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run()
/// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types
/// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types
/// I.e. those that have the same binary representation as the original type. This includes all existing types
/// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size.
/// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue
/// See example below.
/// </summary>
/// <typeparam name="T">Blittable data type, compatible with supported types</typeparam>
/// <param name="memoryInfo">memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance</param>
/// <param name="memory"></param>
/// <param name="elementType">TensorElementType</param>
/// <param name="shape">shape of the tensor to be created</param>
/// <param name="bytesSize">size of the allocation in bytes</param>
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
/// <example>
/// Here is an example of using a 3rd party library class for processing float16/bfloat16.
/// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures
/// so DenseTensor can recognize it.
///
/// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation
/// matches native one and its size is 16-bits, you can use the following conceptual example
/// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[]
///
/// \code{.cs}
/// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); }
/// Half[] input = new Half[] { 5646, 12345 };
/// var input_shape = new long[] {input.Length};
/// Half[] output = new Half[40]; // Whatever the expected len/shape is must match
/// var output_shape = new long[] {output.Length};
///
/// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
///
/// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
/// input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort))
/// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
/// output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort))
/// {
/// FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput};
/// FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput};
/// session.Run(inputNames, inputValues, outputNames, outputValues);
/// // Output is now in output[]
/// }
/// \endcode
/// </example>
public static FixedBufferOnnxValue CreateFromMemory<T>(OrtMemoryInfo memoryInfo, Memory<T> memory,
TensorElementType elementType, long[] shape, long bytesSize)
{
if(elementType == TensorElementType.String)
{
throw new ArgumentException("String data type is not supported");
}

var memHandle = memory.Pin();
try
{
IntPtr memPtr;
unsafe
{
memPtr = (IntPtr)memHandle.Pointer;
}
var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo,
elementType,
shape,
memPtr, bytesSize);
return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
}
catch (Exception e)
{
memHandle.Dispose();
throw e;
}
}

#region IDisposable Support

/// <summary>
Expand All @@ -51,7 +135,7 @@ public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
/// <param name="disposing">true if invoked from Dispose()</param>
protected virtual void Dispose(bool disposing)
{
if(_disposed)
if (_disposed)
{
return;
}
Expand Down
6 changes: 3 additions & 3 deletions csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ public void Run(
/// Create OrtIoBinding instance to bind pre-allocated buffers
/// to input/output
/// </summary>
/// <returns></returns>
/// <returns>A new instance of OrtIoBinding</returns>
public OrtIoBinding CreateIoBinding()
{
return new OrtIoBinding(this);
Expand All @@ -469,8 +469,8 @@ public OrtIoBinding CreateIoBinding()
/// the expense of fetching them and pairing with names.
/// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
/// </summary>
/// <param name="runOptions"></param>
/// <param name="ioBinding"></param>
/// <param name="runOptions">runOptions</param>
/// <param name="ioBinding">ioBinding instance to use</param>
public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
Expand Down
1 change: 0 additions & 1 deletion csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Licensed under the MIT License.

using System;
using System.Runtime.InteropServices;

namespace Microsoft.ML.OnnxRuntime
{
Expand Down
15 changes: 13 additions & 2 deletions csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime
/// <summary>
/// This class enable to bind inputs and outputs to pre-allocated
/// memory. This enables interesting scenarios. For example, if your input
/// already resides in some pre-allocated memory even if on a device you bind
/// already resides in some pre-allocated memory like GPU, you can bind
/// that piece of memory to an input name and shape and onnxruntime will use that as input.
/// Other traditional inputs can also be bound that already exists as Tensors
/// Other traditional inputs can also be bound that already exists as Tensors.
///
/// Note, that this arrangement is designed to minimize data copies and to that effect
/// your memory allocations must match what is expected by the model, whether you run on
/// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not
/// match the one expected by the model. However, copies with OrtIoBindings are only done once,
/// at the time of the binding, not at run time. This means, that if your input data required a copy,
/// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is
/// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match
/// for your use case.
///
/// The fact that data copy is not made during runtime also has performance implications.
/// </summary>
public class OrtIoBinding : SafeHandle
{
Expand Down
6 changes: 3 additions & 3 deletions csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,14 @@ internal IntPtr Disown()
public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
long[] shape,
IntPtr dataBuffer,
uint bufferLength)
long bufferLength)
{
Type type;
int width;
TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
if(width == 0)
if(width < 1)
{
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");
}

var shapeSize = ArrayUtilities.GetSizeForShape(shape);
Expand Down
2 changes: 0 additions & 2 deletions csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
using System;
using System.Runtime.InteropServices;
using System.Text;
using System.Runtime.InteropServices;
using System.IO;

namespace Microsoft.ML.OnnxRuntime
{
Expand Down
73 changes: 58 additions & 15 deletions csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -229,16 +229,26 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
{
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");

// Set the graph optimization level for this session.
SessionOptions options = new SessionOptions();
options.GraphOptimizationLevel = graphOptimizationLevel;
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;

using (var session = new InferenceSession(modelPath, options))
using (var cleanUp = new DisposableList<IDisposable>())
{
// Set the graph optimization level for this session.
SessionOptions options = new SessionOptions();
options.GraphOptimizationLevel = graphOptimizationLevel;
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
cleanUp.Add(options);

var session = new InferenceSession(modelPath, options);
cleanUp.Add(session);

var inputMeta = session.InputMetadata;
var outputMeta = session.OutputMetadata;
var container = new List<NamedOnnxValue>();

float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
ReadOnlySpan<int> expectedOutputDimensions = expectedDimensions;
string[] expectedOutputNames = new string[] { "softmaxout_1" };

float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model

foreach (var name in inputMeta.Keys)
Expand All @@ -249,8 +259,6 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
}

ReadOnlySpan<int> expectedOutputDimensions = new int[] { 1, 1000, 1, 1 };
string[] expectedOutputNames = new string[] { "softmaxout_1" };

// Run inference with named inputs and outputs created with in Run()
using (var results = session.Run(container)) // results is an IReadOnlyList<NamedOnnxValue> container
Expand Down Expand Up @@ -291,9 +299,40 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
}
}

// Run inference with outputs pinned from buffers
using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
using(var pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
{
var memInfo = OrtMemoryInfo.DefaultInstance; // CPU

// Create inputs
Assert.Single(inputMeta.Keys);
var inputNames = inputMeta.Keys.ToArray();
var inputName = inputNames[0];
Assert.Equal(typeof(float), inputMeta[inputName].ElementType);
Assert.True(inputMeta[inputName].IsTensor);
var longShape = Array.ConvertAll<int, long>(inputMeta[inputName].Dimensions, d => d);
var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, inputData,
TensorElementType.Float, longShape, byteSize));


// Prepare output buffer
Assert.Single(outputMeta.Keys);
var outputNames = outputMeta.Keys.ToArray();
var outputName = outputNames[0];
Assert.Equal(typeof(float), outputMeta[outputName].ElementType);
Assert.True(outputMeta[outputName].IsTensor);
longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, d => d);
byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
float[] outputBuffer = new float[expectedOutput.Length];
pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, outputBuffer,
TensorElementType.Float, longShape, byteSize));

session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs);
Assert.Equal(expectedOutput, outputBuffer, new floatComparer());
}

float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
// Run inference with named inputs and named outputs
{
// correct pre-allocated outputs
Expand Down Expand Up @@ -1954,6 +1993,10 @@ private void TestIOBinding()
var inputTensor = tuple.Item3;
var outputData = tuple.Item4;
dispList.Add(session);
var runOptions = new RunOptions();
dispList.Add(runOptions);

var inputMeta = session.InputMetadata;
var outputMeta = session.OutputMetadata;
var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);

Expand All @@ -1967,8 +2010,8 @@ private void TestIOBinding()
{
var cyrName = "несуществующийВыход";
var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput);
ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput);
string[] outputs = ioBinding.GetOutputNames();
Assert.Equal(2, outputs.Length);
Assert.Equal(outputName, outputs[0]);
Expand All @@ -1982,7 +2025,7 @@ private void TestIOBinding()
{
ioBinding.BindInput(inputName, fixeInputBuffer);
ioBinding.BindOutput(outputName, fixedOutputBuffer);
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.First();
Expand All @@ -2000,7 +2043,7 @@ private void TestIOBinding()
ioBinding.BindInput(inputName, fixedInputBuffer);
ioBinding.BindOutputToDevice(outputName, allocator.Info);

using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
{
Assert.Equal(1, outputs.Count);
var output = outputs.First();
Expand Down Expand Up @@ -2040,7 +2083,7 @@ private void TestWeightSharingBetweenSessions()
}
var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float);
var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float,
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);

SessionOptions options = new SessionOptions();
options.AddInitializer("W", sharedInitializer);
Expand Down
Loading

0 comments on commit c2d6100

Please sign in to comment.