Skip to content

Commit

Permalink
🚀 Feat: Streamline File Strategies & GPT-4-Vision Settings (danny-avi…
Browse files Browse the repository at this point in the history
…la#1535)

* chore: fix `endpoint` typescript issues and typo in console info message

* feat(api): files GET endpoint and save only file_id references to messages

* refactor(client): `useGetFiles` query hook, update file types, optimistic update of filesQuery on file upload

* refactor(buildTree): update to use params object and accept fileMap

* feat: map files to messages; refactor(ChatView): messages only available after files are fetched

* fix: fetch files only when authenticated

* feat(api): AppService
- rename app.locals.configs to app.locals.paths
- load custom config use fileStrategy from yaml config in app.locals

* refactor: separate Firebase and Local strategies, call based on config

* refactor: modularize file strategies and employ with use of DALL-E

* refactor(librechat.yaml): add fileStrategy field

* feat: add source to MongoFile schema, as well as BatchFile, and ExtendedFile types

* feat: employ file strategies for upload/delete files

* refactor(deleteFirebaseFile): add user id validation for firebase file deletion

* chore(deleteFirebaseFile): update jsdocs

* feat: employ strategies for vision requests

* fix(client): handle messages with deleted files

* fix(client): ensure `filesToDelete` always saves/sends `file.source`

* feat(openAI): configurable `resendImages` and `imageDetail`

* refactor(getTokenCountForMessage): recursive process only when array of Objects and only their values (not keys) aside from `image_url` types

* feat(OpenAIClient): calculateImageTokenCost

* chore: remove comment

* refactor(uploadAvatar): employ fileStrategy for avatars, from social logins or user upload

* docs: update docs on how to configure fileStrategy

* fix(ci): mock winston and winston related modules, update DALLE3.spec.js with changes made

* refactor(redis): change terminal message to reflect current development state

* fix(DALL-E-2): pass fileStrategy to dall-e
  • Loading branch information
danny-avila authored Jan 11, 2024
1 parent f15d9ea commit 6246be2
Show file tree
Hide file tree
Showing 81 changed files with 1,727 additions and 853 deletions.
36 changes: 27 additions & 9 deletions api/app/clients/BaseClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ class BaseClient {
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', response);
}

async addPreviousAttachments(messages) {
return messages;
}

async recordTokenUsage({ promptTokens, completionTokens }) {
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', {
promptTokens,
Expand Down Expand Up @@ -484,20 +488,22 @@ class BaseClient {
mapMethod = this.getMessageMapMethod();
}

const orderedMessages = this.constructor.getMessagesForConversation({
let _messages = this.constructor.getMessagesForConversation({
messages,
parentMessageId,
mapMethod,
});

_messages = await this.addPreviousAttachments(_messages);

if (!this.shouldSummarize) {
return orderedMessages;
return _messages;
}

// Find the latest message with a 'summary' property
for (let i = orderedMessages.length - 1; i >= 0; i--) {
if (orderedMessages[i]?.summary) {
this.previous_summary = orderedMessages[i];
for (let i = _messages.length - 1; i >= 0; i--) {
if (_messages[i]?.summary) {
this.previous_summary = _messages[i];
break;
}
}
Expand All @@ -512,7 +518,7 @@ class BaseClient {
});
}

return orderedMessages;
return _messages;
}

async saveMessageToDatabase(message, endpointOptions, user = null) {
Expand Down Expand Up @@ -618,6 +624,11 @@ class BaseClient {
* An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
* In our implementation, this is accounted for in the getMessagesWithinTokenLimit method.
*
* The content parts example was adapted from the following example:
* https://github.com/openai/openai-cookbook/pull/881/files
*
* Note: image token calculation is to be done elsewhere where we have access to the image metadata
*
* @param {Object} message
*/
getTokenCountForMessage(message) {
Expand All @@ -631,11 +642,18 @@ class BaseClient {
}

const processValue = (value) => {
if (typeof value === 'object' && value !== null) {
for (let [nestedKey, nestedValue] of Object.entries(value)) {
if (nestedKey === 'image_url' || nestedValue === 'image_url') {
if (Array.isArray(value)) {
for (let item of value) {
if (!item || !item.type || item.type === 'image_url') {
continue;
}

const nestedValue = item[item.type];

if (!nestedValue) {
continue;
}

processValue(nestedValue);
}
} else {
Expand Down
169 changes: 149 additions & 20 deletions api/app/clients/OpenAIClient.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
const OpenAI = require('openai');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { getResponseSender } = require('librechat-data-provider');
const { getResponseSender, ImageDetailCost, ImageDetail } = require('librechat-data-provider');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const { encodeAndFormat, validateVisionModel } = require('~/server/services/Files/images');
const { getModelMaxTokens, genAzureChatCompletion, extractBaseURL } = require('~/utils');
const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts');
const { handleOpenAIErrors } = require('./tools/util');
const spendTokens = require('~/models/spendTokens');
const { createLLM, RunManager } = require('./llm');
const { isEnabled } = require('~/server/utils');
const ChatGPTClient = require('./ChatGPTClient');
const { isEnabled } = require('~/server/utils');
const { getFiles } = require('~/models/File');
const { summaryBuffer } = require('./memory');
const { runTitleChain } = require('./chains');
const { tokenSplit } = require('./document');
Expand Down Expand Up @@ -76,16 +77,7 @@ class OpenAIClient extends BaseClient {
};
}

this.isVisionModel = validateVisionModel(this.modelOptions.model);

if (this.options.attachments && !this.isVisionModel) {
this.modelOptions.model = 'gpt-4-vision-preview';
this.isVisionModel = true;
}

if (this.isVisionModel) {
delete this.modelOptions.stop;
}
this.checkVisionRequest(this.options.attachments);

const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
if (OPENROUTER_API_KEY && !this.azure) {
Expand Down Expand Up @@ -204,6 +196,27 @@ class OpenAIClient extends BaseClient {
return this;
}

/**
*
* Checks if the model is a vision model based on request attachments and sets the appropriate options:
* - Sets `this.modelOptions.model` to `gpt-4-vision-preview` if the request is a vision request.
* - Sets `this.isVisionModel` to `true` if vision request.
* - Deletes `this.modelOptions.stop` if vision request.
* @param {Array<Promise<MongoFile[]> | MongoFile[]> | Record<string, MongoFile[]>} attachments
*/
checkVisionRequest(attachments) {
this.isVisionModel = validateVisionModel(this.modelOptions.model);

if (attachments && !this.isVisionModel) {
this.modelOptions.model = 'gpt-4-vision-preview';
this.isVisionModel = true;
}

if (this.isVisionModel) {
delete this.modelOptions.stop;
}
}

setupTokens() {
if (this.isChatCompletion) {
this.startToken = '||>';
Expand Down Expand Up @@ -288,7 +301,11 @@ class OpenAIClient extends BaseClient {
tokenizerCallsCount++;
}

// Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
/**
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
* @param {string} text - The text to get the token count for.
* @returns {number} The token count of the given text.
*/
getTokenCount(text) {
this.resetTokenizersIfNecessary();
try {
Expand All @@ -301,10 +318,33 @@ class OpenAIClient extends BaseClient {
}
}

/**
* Calculate the token cost for an image based on its dimensions and detail level.
*
* @param {Object} image - The image object.
* @param {number} image.width - The width of the image.
* @param {number} image.height - The height of the image.
* @param {'low'|'high'|string|undefined} [image.detail] - The detail level ('low', 'high', or other).
* @returns {number} The calculated token cost.
*/
calculateImageTokenCost({ width, height, detail }) {
if (detail === 'low') {
return ImageDetailCost.LOW;
}

// Calculate the number of 512px squares
const numSquares = Math.ceil(width / 512) * Math.ceil(height / 512);

// Default to high detail cost calculation
return numSquares * ImageDetailCost.HIGH + ImageDetailCost.ADDITIONAL;
}

getSaveOptions() {
return {
chatGptLabel: this.options.chatGptLabel,
promptPrefix: this.options.promptPrefix,
resendImages: this.options.resendImages,
imageDetail: this.options.imageDetail,
...this.modelOptions,
};
}
Expand All @@ -317,6 +357,69 @@ class OpenAIClient extends BaseClient {
};
}

/**
*
* @param {TMessage[]} _messages
* @returns {TMessage[]}
*/
async addPreviousAttachments(_messages) {
if (!this.options.resendImages) {
return _messages;
}

/**
*
* @param {TMessage} message
*/
const processMessage = async (message) => {
if (!this.message_file_map) {
/** @type {Record<string, MongoFile[]> */
this.message_file_map = {};
}

const fileIds = message.files.map((file) => file.file_id);
const files = await getFiles({
file_id: { $in: fileIds },
});

await this.addImageURLs(message, files);

this.message_file_map[message.messageId] = files;
return message;
};

const promises = [];

for (const message of _messages) {
if (!message.files) {
promises.push(message);
continue;
}

promises.push(processMessage(message));
}

const messages = await Promise.all(promises);

this.checkVisionRequest(this.message_file_map);
return messages;
}

/**
*
* Adds image URLs to the message object and returns the files
*
* @param {TMessage[]} messages
* @param {MongoFile[]} files
* @returns {Promise<MongoFile[]>}
*/
async addImageURLs(message, attachments) {
const { files, image_urls } = await encodeAndFormat(this.options.req, attachments);

message.image_urls = image_urls;
return files;
}

async buildMessages(
messages,
parentMessageId,
Expand Down Expand Up @@ -355,13 +458,23 @@ class OpenAIClient extends BaseClient {
}

if (this.options.attachments) {
const attachments = await this.options.attachments;
const { files, image_urls } = await encodeAndFormat(
this.options.req,
attachments.filter((file) => file.type.includes('image')),
const attachments = (await this.options.attachments).filter((file) =>
file.type.includes('image'),
);

if (this.message_file_map) {
this.message_file_map[orderedMessages[orderedMessages.length - 1].messageId] = attachments;
} else {
this.message_file_map = {
[orderedMessages[orderedMessages.length - 1].messageId]: attachments,
};
}

const files = await this.addImageURLs(
orderedMessages[orderedMessages.length - 1],
attachments,
);

orderedMessages[orderedMessages.length - 1].image_urls = image_urls;
this.options.attachments = files;
}

Expand All @@ -372,10 +485,25 @@ class OpenAIClient extends BaseClient {
assistantName: this.options?.chatGptLabel,
});

if (this.contextStrategy && !orderedMessages[i].tokenCount) {
const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount;

/* If tokens were never counted, or, is a Vision request and the message has files, count again */
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage);
}

/* If message has files, calculate image token cost */
if (this.message_file_map && this.message_file_map[message.messageId]) {
const attachments = this.message_file_map[message.messageId];
for (const file of attachments) {
orderedMessages[i].tokenCount += this.calculateImageTokenCost({
width: file.width,
height: file.height,
detail: this.options.imageDetail ?? ImageDetail.auto,
});
}
}

return formattedMessage;
});

Expand Down Expand Up @@ -780,7 +908,6 @@ ${convo}
if (this.isChatCompletion) {
modelOptions.messages = payload;
} else {
// TODO: unreachable code. Need to implement completions call for non-chat models
modelOptions.prompt = payload;
}

Expand Down Expand Up @@ -916,6 +1043,8 @@ ${convo}
clientOptions.addMetadata({ finish_reason });
}

logger.debug('[OpenAIClient] chatCompletion response', chatCompletion);

return message.content;
} catch (err) {
if (
Expand Down
2 changes: 1 addition & 1 deletion api/app/clients/PluginsClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class PluginsClient extends OpenAIClient {
signal: this.abortController.signal,
openAIApiKey: this.openAIApiKey,
conversationId: this.conversationId,
debug: this.options?.debug,
fileStrategy: this.options.req.app.locals.fileStrategy,
message,
},
});
Expand Down
33 changes: 33 additions & 0 deletions api/app/clients/specs/OpenAIClient.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,39 @@ describe('OpenAIClient', () => {
expect(totalTokens).toBe(testCase.expected);
});
});

const vision_request = [
{
role: 'user',
content: [
{
type: 'text',
text: 'describe what is in this image?',
},
{
type: 'image_url',
image_url: {
url: 'https://venturebeat.com/wp-content/uploads/2019/03/openai-1.png',
detail: 'high',
},
},
],
},
];

const expectedTokens = 14;
const visionModel = 'gpt-4-vision-preview';

it(`should return ${expectedTokens} tokens for model ${visionModel} (Vision Request)`, () => {
client.modelOptions.model = visionModel;
client.selectTokenizer();
// 3 tokens for assistant label
let totalTokens = 3;
for (let message of vision_request) {
totalTokens += client.getTokenCountForMessage(message);
}
expect(totalTokens).toBe(expectedTokens);
});
});

describe('sendMessage/getCompletion/chatCompletion', () => {
Expand Down
Loading

0 comments on commit 6246be2

Please sign in to comment.