Skip to content

Commit

Permalink
Code polishing
Browse files Browse the repository at this point in the history
  • Loading branch information
Ihor Makhnyk committed Nov 20, 2023
1 parent ca0a7e9 commit 9bcd1fa
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 37 deletions.
2 changes: 2 additions & 0 deletions Demo/Demo.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@
CODE_SIGN_ENTITLEMENTS = App/Demo.entitlements;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = YX43KN96E7;
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = "Demo-Info.plist";
Expand Down Expand Up @@ -339,6 +340,7 @@
CODE_SIGN_ENTITLEMENTS = App/Demo.entitlements;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = YX43KN96E7;
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = "Demo-Info.plist";
Expand Down
2 changes: 1 addition & 1 deletion Demo/DemoChat/Sources/MiscStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public final class MiscStore: ObservableObject {
let audioObject = AudioObject(prompt: input,
audioPlayer: player,
originResponse: response,
format: query.response_format.rawValue)
format: query.responseFormat.rawValue)
audioObjects.append(audioObject)
} catch {
NSLog("\(error)")
Expand Down
15 changes: 9 additions & 6 deletions Demo/DemoChat/Sources/UI/TextToSpeechView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ public struct TextToSpeechView: View {
@State private var speed: Double = 1
@State private var responseFormat: AudioSpeechQuery.AudioSpeechResponseFormat = .mp3

private let formats: [AudioSpeechQuery.AudioSpeechResponseFormat] = [.mp3, .aac, .flac, .opus]
private let voices: [AudioSpeechQuery.AudioSpeechVoice] = [.alloy, .echo, .fable, .onyx, .nova, .shimmer]

public init(store: MiscStore) {
self.store = store
}
Expand Down Expand Up @@ -50,7 +47,8 @@ public struct TextToSpeechView: View {
}
HStack {
Picker("Voice", selection: $voice) {
ForEach(voices, id: \.self) { voice in
let allVoices = AudioSpeechQuery.AudioSpeechVoice.allCases
ForEach(allVoices, id: \.self) { voice in
Text("\(voice.rawValue.capitalized)")
}
}
Expand All @@ -67,7 +65,8 @@ public struct TextToSpeechView: View {
}
HStack {
Picker("Format", selection: $responseFormat) {
ForEach(formats, id: \.self) { format in
let allFormats = AudioSpeechQuery.AudioSpeechResponseFormat.allCases
ForEach(allFormats, id: \.self) { format in
Text(".\(format.rawValue)")
}
}
Expand All @@ -83,7 +82,7 @@ public struct TextToSpeechView: View {
let query = AudioSpeechQuery(model: .tts_1,
input: prompt,
voice: voice,
response_format: responseFormat,
responseFormat: responseFormat,
speed: speed)
Task {
await store.createSpeech(query)
Expand Down Expand Up @@ -133,6 +132,10 @@ public struct TextToSpeechView: View {
.navigationTitle("Create Speech")
}

}

extension TextToSpeechView {

func saveAudioDataToFile(audioData: Data, fileName: String) {
if let fileURL = try? FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true) {
let saveURL = fileURL.appendingPathComponent(fileName)
Expand Down
81 changes: 52 additions & 29 deletions Sources/OpenAI/Public/Models/AudioSpeechQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ public struct AudioSpeechQuery: Codable, Equatable {
///
/// To get aquinted with each of the voices and listen to the samples visit:
/// [OpenAI Text-to-Speech – Voice Options](https://platform.openai.com/docs/guides/text-to-speech/voice-options)
public enum AudioSpeechVoice: String, Codable {
case alloy,
echo,
fable,
onyx,
nova,
shimmer
public enum AudioSpeechVoice: String, Codable, CaseIterable {
case alloy
case echo
case fable
case onyx
case nova
case shimmer
}

/// Encapsulates the response formats available for audio data.
Expand All @@ -30,11 +30,11 @@ public struct AudioSpeechQuery: Codable, Equatable {
/// - opus
/// - aac
/// - flac
public enum AudioSpeechResponseFormat: String, Codable {
case mp3,
opus,
aac,
flac
public enum AudioSpeechResponseFormat: String, Codable, CaseIterable {
case mp3
case opus
case aac
case flac
}
/// One of the available TTS models: tts-1 or tts-1-hd
public let model: Model
Expand All @@ -43,36 +43,59 @@ public struct AudioSpeechQuery: Codable, Equatable {
/// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
public let voice: AudioSpeechVoice
/// The format to audio in. Supported formats are mp3, opus, aac, and flac.
public let response_format: AudioSpeechResponseFormat
public let responseFormat: AudioSpeechResponseFormat
/// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0**
public let speed: String?

public enum CodingKeys: String, CodingKey {
case model
case input
case voice
case responseFormat = "response_format"
case speed
}

public init(model: Model?,
input: String,
voice: AudioSpeechVoice,
response_format: AudioSpeechResponseFormat = .mp3,
responseFormat: AudioSpeechResponseFormat = .mp3,
speed: Double?) {

self.model = {
guard let model else { return .tts_1 }
let isModelOfIncorrentFormat = model != .tts_1 && model != .tts_1_hd
guard !isModelOfIncorrentFormat else {
NSLog("[AudioSpeech] 'AudioSpeechQuery' must have a valid Text-To-Speech model, 'tts-1' or 'tts-1-hd'. Setting model to 'tts-1'.")
return .tts_1
}
return model
AudioSpeechQuery.validateSpeechModel(model)
}()
self.input = input
self.voice = voice
self.speed = {
guard let speed else { return "1.0" }
let isSpeedOutOfBounds = speed >= 4.0 && speed <= 0.25
guard !isSpeedOutOfBounds else {
NSLog("[AudioSpeech] Speed value must be between 0.25 and 4.0. Setting value to closest valid.")
return speed < 0.25 ? "1.0" : "4.0"
}
return String("\(speed)")
AudioSpeechQuery.validateSpeechSpeed(speed)
}()
self.response_format = response_format
self.responseFormat = responseFormat
}

}

extension AudioSpeechQuery {

private static func validateSpeechModel(_ inputModel: Model?) -> Model {
guard let inputModel else { return .tts_1 }
let isModelOfIncorrentFormat = inputModel != .tts_1 && inputModel != .tts_1_hd
guard !isModelOfIncorrentFormat else {
NSLog("[AudioSpeech] 'AudioSpeechQuery' must have a valid Text-To-Speech model, 'tts-1' or 'tts-1-hd'. Setting model to 'tts-1'.")
return .tts_1
}
return inputModel
}

private static func validateSpeechSpeed(_ inputSpeed: Double?) -> String {
guard let inputSpeed else { return "1.0" }
let minSpeed = 0.25
let maxSpeed = 4.0
let isSpeedOutOfBounds = inputSpeed >= maxSpeed && inputSpeed <= minSpeed
guard !isSpeedOutOfBounds else {
NSLog("[AudioSpeech] Speed value must be between 0.25 and 4.0. Setting value to closest valid.")
return inputSpeed < minSpeed ? "1.0" : "4.0"
}
return "\(inputSpeed)"
}

}
2 changes: 1 addition & 1 deletion Sources/OpenAI/Public/Models/AudioSpeechResult.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public struct AudioSpeechResult {
public func saveAs(_ name: String, format: AudioSpeechQuery.AudioSpeechResponseFormat, to path: URL) throws {
guard let data = audioData else {
throw NSError(
domain: Bundle.main.bundleIdentifier!,
domain: Bundle.main.bundleIdentifier ?? "",
code: 1,
userInfo: [NSLocalizedDescriptionKey: "No audio data"]
)
Expand Down

0 comments on commit 9bcd1fa

Please sign in to comment.