Skip to content

Commit

Permalink
Use the new top-k implementation in Accelerate (#21)
Browse files Browse the repository at this point in the history
* - added new topk implementation
- added accuracy to testing method
- added tests

* one more assert

* - added deallocate array descriptors
- try! instead of try? when applying topk function
  • Loading branch information
jkrukowski authored Nov 25, 2023
1 parent d08c506 commit a55706b
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 27 deletions.
13 changes: 7 additions & 6 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,19 @@ let package = Package(
.package(url: "https://github.com/apple/swift-argument-parser.git", exact: "1.2.0")
],
targets: [
.target(name: "Hub", resources: [.process("FallbackConfigs")]),
.target(name: "Tokenizers", dependencies: ["Hub"]),
.target(name: "TensorUtils"),
.target(name: "Generation", dependencies: ["Tokenizers", "TensorUtils"]),
.target(name: "Models", dependencies: ["Tokenizers", "Generation", "TensorUtils"]),
.executableTarget(
name: "TransformersCLI",
dependencies: [
"Models", "Generation", "Tokenizers",
.product(name: "ArgumentParser", package: "swift-argument-parser")]),
.target(name: "Hub", resources: [.process("FallbackConfigs")]),
.target(name: "Tokenizers", dependencies: ["Hub"]),
.target(name: "TensorUtils"),
.target(name: "Generation", dependencies: ["Tokenizers", "TensorUtils"]),
.target(name: "Models", dependencies: ["Tokenizers", "Generation", "TensorUtils"]),
.testTarget(name: "TokenizersTests", dependencies: ["Tokenizers", "Models"], resources: [.process("Resources"), .process("Vocabs")]),
.testTarget(name: "HubTests", dependencies: ["Hub"]),
.testTarget(name: "PreTokenizerTests", dependencies: ["Tokenizers", "Hub"])
.testTarget(name: "PreTokenizerTests", dependencies: ["Tokenizers", "Hub"]),
.testTarget(name: "TensorUtilsTests", dependencies: ["TensorUtils"])
]
)
61 changes: 40 additions & 21 deletions Sources/TensorUtils/Math.swift
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ public struct Math {
return (Int(maxIndex), maxValue)
}


/// MLMultiArray helper.
/// Works in our specific use case.
public static func argmax(_ multiArray: MLMultiArray) -> (Int, Double) {
Expand All @@ -76,27 +75,47 @@ public struct Math {
/// and their softmaxed probabilities.
///
public static func topK(arr: [Float], k: Int) -> (indexes: [Int], probs: [Float]) {
var minV = -Float.greatestFiniteMagnitude
var selected: [(index: Int, value: Float)] = []

for (i, v) in arr.enumerated() {
if v > minV || selected.count < k {
// Append and sort
if selected.count == k {
selected.remove(at: 0)
}
selected.append((i, v))
selected.sort { $0.value < $1.value }
minV = selected.first!.value
}
guard !arr.isEmpty else {
return (indexes: [], probs: [])
}

selected = selected.reversed()
let indexes = selected.map { $0.index }
let logits = selected.map { $0.value }
let probs = softmax(logits)

return (indexes: indexes, probs: probs)
let k = min(k, arr.count)
let arrDescriptor = BNNSNDArrayDescriptor.allocate(
initializingFrom: arr,
shape: .vector(arr.count)
)
defer {
arrDescriptor.deallocate()
}
let bestIndices = BNNSNDArrayDescriptor.allocateUninitialized(
scalarType: Int32.self,
shape: .vector(k)
)
defer {
bestIndices.deallocate()
}
let bestValues = BNNSNDArrayDescriptor.allocateUninitialized(
scalarType: Float.self,
shape: .vector(k)
)
defer {
bestValues.deallocate()
}
try! Accelerate.BNNS.applyTopK(
k: k,
input: arrDescriptor,
bestValues: bestValues,
bestIndices: bestIndices,
axis: 0,
batchSize: 1,
filterParameters: nil
)
let distances = bestValues.data!.withMemoryRebound(to: Float.self, capacity: k) { ptr in
Array(UnsafeBufferPointer(start: ptr, count: k))
}
let indices = bestIndices.data!.withMemoryRebound(to: Int32.self, capacity: k) { ptr in
Array(UnsafeBufferPointer(start: ptr, count: k))
}
return (indexes: indices.map { Int($0) }, probs: softmax(distances))
}

/// Multinomial sampling from an array of probs. Works well with topK
Expand Down
100 changes: 100 additions & 0 deletions Tests/TensorUtilsTests/TensorUtilsTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
//
// TensorUtilsTests.swift
//
// Created by Jan Krukowski on 25/11/2023.
//

import XCTest
import CoreML
@testable import TensorUtils

class TensorUtilsTests: XCTestCase {
private let accuracy: Float = 0.00001

func testTopK() {
let result1 = Math.topK(arr: [], k: 0)

XCTAssertEqual(result1.indexes, [])
XCTAssertEqual(result1.probs, [])

let result2 = Math.topK(arr: [], k: 3)

XCTAssertEqual(result2.indexes, [])
XCTAssertEqual(result2.probs, [])

let result3 = Math.topK(arr: [2.0, 1.0], k: 3)

XCTAssertEqual(result3.indexes, [0, 1])
XCTAssertEqual(result3.probs, [0.7310586, 0.26894143], accuracy: accuracy)
XCTAssertEqual(result3.probs.reduce(0, +), 1.0, accuracy: accuracy)

let result4 = Math.topK(arr: [2.0, 1.0, 3.0], k: 3)

XCTAssertEqual(result4.indexes, [2, 0, 1])
XCTAssertEqual(result4.probs, [0.6652409, 0.24472845, 0.090030566], accuracy: accuracy)
XCTAssertEqual(result4.probs.reduce(0, +), 1.0, accuracy: accuracy)

let result5 = Math.topK(arr: [2.0, 1.0, 3.0, -1.0, 123.0, 0.0], k: 4)

XCTAssertEqual(result5.indexes, [4, 2, 0, 1])
XCTAssertEqual(result5.probs, [1.0, 0.0, 0.0, 0.0], accuracy: accuracy)
XCTAssertEqual(result5.probs.reduce(0, +), 1.0, accuracy: accuracy)
}

func testArgMax() throws {
let result1 = Math.argmax([3.0, 4.0, 1.0, 2.0] as [Float], count: 4)

XCTAssertEqual(result1.0, 1)
XCTAssertEqual(result1.1, 4.0)

let result2 = Math.argmax32([3.0, 4.0, 1.0, 2.0], count: 4)

XCTAssertEqual(result2.0, 1)
XCTAssertEqual(result2.1, 4.0)

let result3 = Math.argmax([3.0, 4.0, 1.0, 2.0] as [Double], count: 4)

XCTAssertEqual(result3.0, 1)
XCTAssertEqual(result3.1, 4.0)

let result4 = Math.argmax32(try MLMultiArray([3.0, 4.0, 1.0, 2.0] as [Float]))
XCTAssertEqual(result4.0, 1)
XCTAssertEqual(result4.1, 4.0)

let result5 = Math.argmax(try MLMultiArray([3.0, 4.0, 1.0, 2.0] as [Double]))
XCTAssertEqual(result5.0, 1)
XCTAssertEqual(result5.1, 4.0)

let result6 = Math.argmax(MLShapedArray(scalars: [3.0, 4.0, 1.0, 2.0] as [Float], shape: [4]))
XCTAssertEqual(result6.0, 1)
XCTAssertEqual(result6.1, 4.0)
}

func testSoftmax() {
XCTAssertEqual(Math.softmax([]), [])

let result1 = Math.softmax([3.0, 4.0, 1.0, 2.0])
XCTAssertEqual(result1, [0.23688284, 0.6439143, 0.032058604, 0.08714432], accuracy: accuracy)
XCTAssertEqual(result1.reduce(0, +), 1.0, accuracy: accuracy)
}
}

func XCTAssertEqual<T: FloatingPoint>(
_ expression1: @autoclosure () throws -> [T],
_ expression2: @autoclosure () throws -> [T],
accuracy: T,
_ message: @autoclosure () -> String = "",
file: StaticString = #filePath,
line: UInt = #line
) {
do {
let lhsEvaluated = try expression1()
let rhsEvaluated = try expression2()
XCTAssertEqual(lhsEvaluated.count, rhsEvaluated.count, file: file, line: line)
for (lhs, rhs) in zip(lhsEvaluated, rhsEvaluated) {
XCTAssertEqual(lhs, rhs, accuracy: accuracy, file: file, line: line)
}
} catch {
XCTFail("Unexpected error: \(error)", file: file, line: line)
}
}

0 comments on commit a55706b

Please sign in to comment.