Skip to content

Commit

Permalink
feat: Datasource custom_id index
Browse files Browse the repository at this point in the history
  • Loading branch information
gmpetrov committed Jun 20, 2023
1 parent 6be3a2c commit 8a30a93
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 14 deletions.
12 changes: 10 additions & 2 deletions pages/api/external/datastores/file-upload/[id].ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const FileSchema = z.object({
export const upload = async (req: AppNextApiRequest, res: NextApiResponse) => {
const file = (req as any).file as z.infer<typeof FileSchema>;
const fileName = (req as any)?.body?.fileName as string;
const custom_id = (req as any)?.body?.custom_id as string;

try {
await FileSchema.parseAsync(file);
Expand Down Expand Up @@ -104,12 +105,19 @@ export const upload = async (req: AppNextApiRequest, res: NextApiResponse) => {
plan,
});

const name =
fileName ||
file?.originalname ||
`${generateFunId()}.${mime.extension(file.mimetype)}`;

const datasource = await prisma.appDatasource.create({
data: {
name,
type: DatasourceType.file,
name: fileName || generateFunId(),
config: {
type: file.mimetype,
source: name,
custom_id,
},
status: DatasourceStatus.pending,
owner: {
Expand All @@ -131,7 +139,7 @@ export const upload = async (req: AppNextApiRequest, res: NextApiResponse) => {

const params = {
Bucket: process.env.NEXT_PUBLIC_S3_BUCKET_NAME!,
Key: `datastores/${datastore.id}/${s3FileName}`,
Key: `datastores/${datastore.id}/${datasource.id}/${s3FileName}`,
Body: file.buffer,
ContentType: file.mimetype,
ACL: 'public-read',
Expand Down
2 changes: 1 addition & 1 deletion pages/api/external/datastores/query/[id].ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export const queryURL = async (
const results = await store.search({
query: data.query,
topK: topK as number,
tags: [],
filters: data.filters,
});

return results || [];
Expand Down
3 changes: 2 additions & 1 deletion types/dtos.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ export type TaskRemoveDatastoreSchema = z.infer<
export const SearchRequestSchema = z.object({
query: z.string(),
topK: z.number().default(3).optional(),
filter: DocumentMetadataSchema.optional(),
tags: z.array(z.string()).optional(),
filters: DocumentMetadataSchema.optional(),
});

export type SearchRequestSchema = z.infer<typeof SearchRequestSchema>;
Expand Down
1 change: 1 addition & 0 deletions types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export enum MetadataFields {
chunk_hash = 'chunk_hash',
datasource_hash = 'datasource_hash',
chunk_offset = 'chunk_offset',
custom_id = 'custom_id',
}

export type DocumentMetadata = {
Expand Down
1 change: 1 addition & 0 deletions types/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export const DocumentMetadataSchema = z.object({
author: z.string().optional(),
start_date: z.string().optional(),
end_date: z.string().optional(),
custom_id: z.string().optional(),
});

export const DocumentSchema = z.object({
Expand Down
8 changes: 2 additions & 6 deletions utils/datastores/base.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { Datastore } from '@prisma/client';
import { Document as LangchainDocument } from 'langchain/document';

import { Chunk, DocumentMetadata } from '@app/types';
import { Chunk, DocumentMetadata, SearchRequestSchema } from '@app/types';

export const INDEX_NAME = 'databerry';

Expand All @@ -23,11 +23,7 @@ export abstract class ClientManager<T extends Datastore> {
abstract upload(documents: Document[]): Promise<Chunk[]>;
abstract remove(datasourceId: string): Promise<any>;
abstract delete(): Promise<any>;
abstract search(props: {
query: string;
tags: string[];
topK: number;
}): Promise<
abstract search(props: SearchRequestSchema): Promise<
{
text: string;
source: string;
Expand Down
4 changes: 2 additions & 2 deletions utils/datastores/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Datastore, DatastoreType } from '@prisma/client';
import { blake3, createBLAKE3 } from 'hash-wasm';

import { Chunk } from '@app/types';
import { Chunk, SearchRequestSchema } from '@app/types';
import type { Document } from '@app/utils/datastores/base';

import uuidv4 from '../uuid';
Expand Down Expand Up @@ -31,7 +31,7 @@ export class DatastoreManager {
return this.manager.upload(chunks);
}

search(props: { query: string; tags: string[]; topK: number }) {
search(props: SearchRequestSchema) {
return this.manager.search(props);
}

Expand Down
18 changes: 16 additions & 2 deletions utils/datastores/qdrant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { Embeddings } from 'langchain/embeddings';
import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
import { z } from 'zod';

import { Chunk, MetadataFields } from '@app/types';
import { Chunk, MetadataFields, SearchRequestSchema } from '@app/types';
import { QdrantConfigSchema } from '@app/types/models';

import uuidv4 from '../uuid';
Expand Down Expand Up @@ -71,6 +71,11 @@ export class QdrantManager extends ClientManager<DatastoreType> {
field_name: MetadataFields.tags,
field_schema: 'keyword',
});

await this.client.put(`/collections/text-embedding-ada-002/index`, {
field_name: MetadataFields.custom_id,
field_schema: 'keyword',
});
}

private async addDocuments(
Expand Down Expand Up @@ -104,6 +109,7 @@ export class QdrantManager extends ClientManager<DatastoreType> {
chunk_offset: documents[idx].metadata.chunk_offset,
datasource_hash: documents[idx].metadata.datasource_hash,
datasource_id: documents[idx].metadata.datasource_id,
custom_id: documents[idx].metadata.custom_id,
},
vector,
} as Point)
Expand Down Expand Up @@ -177,7 +183,7 @@ export class QdrantManager extends ClientManager<DatastoreType> {
return documents;
}

async search(props: any) {
async search(props: SearchRequestSchema) {
const vectors = await this.embeddings.embedDocuments([props.query]);

const results = await this.client.post(
Expand All @@ -193,6 +199,14 @@ export class QdrantManager extends ClientManager<DatastoreType> {
key: MetadataFields.datastore_id,
match: { value: this.datastore.id },
},
...(props.filters?.custom_id
? [
{
key: MetadataFields.custom_id,
match: { value: props.filters.custom_id },
},
]
: []),
],
},
}
Expand Down
1 change: 1 addition & 0 deletions utils/loaders/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export class FileLoader extends DatasourceLoaderBase {
source_type: this.datasource.type,
source: (this.datasource?.config as any)?.source,
file_type: (this.datasource?.config as any)?.type,
custom_id: (this.datasource?.config as any)?.custom_id,
tags: [],
},
});
Expand Down

1 comment on commit 8a30a93

@vercel
Copy link

@vercel vercel bot commented on 8a30a93 Jun 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.