Introducing our Dockerized solution! Seamlessly merge Bolna with Whisper ASR and Melo TTS for telephone provider we use Twillo and for tunning we use ngrok. This is docker compose by which you can host bolna server Whisper ASR, Melo TTS together in cloud just by clone this repo and follow these simple steps to deploy ,but before that you have to make sure that you have docker and docker compose and make a .env file refer to .env-sample and also put ngrok auth token in ngrok-config.yml file
# build latest `twilio-app` and `bolna-app`
docker compose build twilio-app bolna-app
# run containers
docker compose up -d
the output something like this
note: make sure that your all service were runing
for creating agent you have to execute following command mention below
Agent create API
curl --location 'http://0.0.0.0:5001/agent' \
--header 'Content-Type: application/json' \
--data '{
"agent_config": {
"agent_name": "Bolna Agent",
"agent_welcome_message": "Hey how are you!",
"tasks": [
{
"tools_config": {
"output": {
"format": "wav",
"provider": "twilio"
},
"input": {
"format": "wav",
"provider": "twilio"
},
"synthesizer": {
"provider": "melotts",
"provider_config": {
"voice": "Alex",
"sample_rate": 8000,
"sdp_ratio" : 0.2,
"noise_scale" : 0.6,
"noise_scale_w" : 0.8,
"speed" : 1.0
},
"stream": true,
"buffer_size": 123,
"audio_format": "pcm"
},
"llm_agent": {
"model": "deepinfra/meta-llama/Meta-Llama-3-70B-Instruct",
"max_tokens": 123,
"agent_flow_type": "streaming",
"use_fallback": true,
"family": "llama",
"temperature": 0.1,
"request_json": true,
"provider": "deepinfra"
},
"transcriber": {
"encoding": "linear16",
"language": "en",
"model": "whisper",
"stream": true,
"modeltype":"distil-large-v3",
"keywords":"ansh,joseph,hola",
"task": "transcribe",
"provider":"whisper"
},
"api_tools": null
},
"task_config": {
"ambient_noise_track": "office-ambience",
"hangup_after_LLMCall": false,
"hangup_after_silence": 10.0,
"ambient_noise": false,
"interruption_backoff_period": 0.0,
"backchanneling": false,
"backchanneling_start_delay": 5.0,
"optimize_latency": true,
"incremental_delay": 100.0,
"call_cancellation_prompt": null,
"number_of_words_for_interruption": 3.0,
"backchanneling_message_gap": 5.0,
"use_fillers": false
},
"task_type": "conversation",
"toolchain": {
"execution": "parallel",
"pipelines": [
[
"transcriber",
"llm",
"synthesizer"
]
]
}
}
],
"agent_type": "Lead Qualification"
},
"agent_prompts": {
"task_1": {
"system_prompt": "Ask if they are coming for party tonight"
}
}
}'
Copy this agent_id
we have to use in next step while invoking the call
Invoke call API
curl --location 'http://0.0.0.0:8001/call' \
--header 'Content-Type: application/json' \
--data '{
"agent_id": "bf2a9e9c-6038-4104-85c4-b71a0d1478c9",
"recipient_phone_number": "+1XXXXXXXXXX"
}'
You should hear your phone ringing now.
docker compose down
by default we resrtict Melo EN but there were 5 option for voice as mention below
you have to just change the following section mention below
{
"synthesizer": {
"provider": "melo",
"provider_config": {
"voice": "<put your selected voice here>",
"sample_rate": 8000,
"sdp_ratio": 0.2,
"noise_scale": 0.6,
"noise_scale_w": 0.8,
"speed": 1.0
},
"stream": true,
"buffer_size": 123,
"audio_format": "pcm"
}
}
This is demo using below prompt to the LLM
"task_1": {
"system_prompt": "You are assistant at Dr. Sharma clinic you have to book an appointment"
}
you can give prompt as per your use case