Skip to content

Commit

Permalink
Merge pull request #18 from gmpetrov/refactor/datasource-loader
Browse files Browse the repository at this point in the history
refactor: Use workers queue to handle datasource loading
  • Loading branch information
gmpetrov authored Apr 14, 2023
2 parents c9e5fc7 + d516b98 commit eb07da1
Show file tree
Hide file tree
Showing 35 changed files with 588 additions and 406 deletions.
13 changes: 12 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
FROM node:18-alpine AS base

ARG NEXT_PUBLIC_S3_BUCKET_NAME

# Install dependencies only when needed
FROM base AS deps

ARG NEXT_PUBLIC_S3_BUCKET_NAME

# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.
RUN apk add --no-cache libc6-compat
Expand Down Expand Up @@ -55,6 +56,16 @@ COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
COPY --from=builder --chown=nextjs:nodejs /app/.next/server ./server
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static

# TODO: Improve this. Output file tracing is removing modules needed for workers
COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./
RUN \
if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
elif [ -f package-lock.json ]; then npm ci; \
elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i ; \
else echo "Lockfile not found." && exit 1; \
fi
RUN rm -rf node_modules/.pnpm/canvas@2.11.0

USER nextjs

EXPOSE 3000
Expand Down
70 changes: 61 additions & 9 deletions components/DatasourceForms/Base.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@ import {
Prisma,
} from '@prisma/client';
import axios from 'axios';
import React, { useEffect } from 'react';
import mime from 'mime-types';
import React, { useEffect, useState } from 'react';
import { FormProvider, useForm, useFormContext } from 'react-hook-form';
import useSWR from 'swr';
import useSWRMutation from 'swr/mutation';
import { z } from 'zod';

import Input from '@app/components/Input';
import { upsertDatasource } from '@app/pages/api/datasources';
import { GenerateUploadLinkRequest } from '@app/pages/api/datastores/[id]/generate-upload-link';
import { UpsertDatasourceSchema } from '@app/types/models';
import cuid from '@app/utils/cuid';
import { fetcher, postFetcher } from '@app/utils/swr-fetcher';

import type { DatasourceFormProps } from './types';
Expand Down Expand Up @@ -61,6 +64,7 @@ const DatasourceText = (props: {
};

export default function BaseForm(props: Props) {
const [isLoading, setIsLoading] = useState(false);
const methods = useForm<UpsertDatasourceSchema>({
resolver: zodResolver(props.schema),
defaultValues: {
Expand All @@ -82,27 +86,75 @@ export default function BaseForm(props: Props) {

const onSubmit = async (values: UpsertDatasourceSchema) => {
try {
setIsLoading(true);
const datasourceText = !dirtyFields['datasourceText']
? undefined
: values.datasourceText;

const payload = {
id: cuid(),
...values,
datasourceText,
isUpdateText: !!datasourceText,
file: undefined,
} as UpsertDatasourceSchema;

const check = await axios.post('/api/datasources/check', payload);

if (!check?.data?.valid) {
alert(check?.data?.message);
return;
if (
datasourceText ||
payload.type === DatasourceType.text ||
payload.type === DatasourceType.file
) {
let type = '';
let fileName = '';
let file: File;

if (datasourceText || payload.type === DatasourceType.text) {
type = 'text/plain';
fileName = `${payload.id}.txt`;
file = new File([datasourceText!], fileName, { type });

// Treat text as file
payload['type'] = DatasourceType.file;
payload['config'] = {
...values.config,
fileSize: file.size,
type,
};
} else {
type = (values as any).file.type as string;
fileName = `${payload.id}.${mime.extension(type)}`;
file = (values as any)?.file as File;
}

// upload text from file to AWS
const uploadLinkRes = await axios.post(
`/api/datastores/${props.defaultValues?.datastoreId}/generate-upload-link`,
{
fileName,
type,
} as GenerateUploadLinkRequest
);

await axios.put(uploadLinkRes.data, file, {
headers: {
'Content-Type': type,
},
});
}

// const check = await axios.post('/api/datasources/check', payload);

// if (!check?.data?.valid) {
// alert(check?.data?.message);
// return;
// }

const datasource = await upsertDatasourceMutation.trigger(payload as any);

props?.onSubmitSuccess?.(datasource!);
} catch (err) {
console.log('error', err);
} finally {
setIsLoading(false);
}
};

Expand Down Expand Up @@ -135,14 +187,14 @@ export default function BaseForm(props: Props) {

{props?.customSubmitButton ? (
React.createElement(props.customSubmitButton, {
isLoading: upsertDatasourceMutation.isMutating,
isLoading: isLoading || upsertDatasourceMutation.isMutating,
})
) : (
<Button
type="submit"
variant="soft"
color="primary"
loading={upsertDatasourceMutation.isMutating}
loading={isLoading || upsertDatasourceMutation.isMutating}
disabled={!isDirty}
{...props.submitButtonProps}
>
Expand Down
68 changes: 25 additions & 43 deletions components/DatasourceForms/FileForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,27 @@ import AttachFileIcon from '@mui/icons-material/AttachFile';
import CloseRoundedIcon from '@mui/icons-material/CloseRounded';
import { Alert, Button, Card, Chip, IconButton, Typography } from '@mui/joy';
import { DatasourceType } from '@prisma/client';
import axios from 'axios';
import { useSession } from 'next-auth/react';
import React, { useRef } from 'react';
import React, { useEffect, useRef } from 'react';
import { useFormContext } from 'react-hook-form';
import { z } from 'zod';

import useStateReducer from '@app/hooks/useStateReducer';
import { UpsertDatasourceSchema } from '@app/types/models';
import excelToText from '@app/utils/excel-to-text';
import pdfToText from '@app/utils/pdf-to-text';
import pptxToText from '@app/utils/pptx-to-text';
import wordToText from '@app/utils/word-to-text';

import Base from './Base';
import type { DatasourceFormProps } from './types';

type Props = DatasourceFormProps & {};

export const FileForm = UpsertDatasourceSchema.extend({
file: z.any(),
config: z.object({
source: z.string(),
type: z.string().optional(),
fileSize: z.number().optional(),
fileUploadPath: z.string().optional(),
}),
});

Expand All @@ -37,7 +38,7 @@ const acceptedFileTypes = [

function Nested() {
const { data: session, status } = useSession();
const { control, register, setValue } =
const { control, register, setValue, reset, watch } =
useFormContext<z.infer<typeof FileForm>>();
const fileInputRef = useRef();

Expand All @@ -47,47 +48,16 @@ function Nested() {
isProcessing: false,
});

const datasourceText = watch('datasourceText');

const handleSetFile = async (file: File) => {
if (!file) {
return;
}

setState({ isProcessing: true });

let text = '';

switch (file.type) {
case 'text/csv':
case 'text/plain':
case 'text/markdown':
text = await file.text();
break;
case 'application/pdf':
text = await pdfToText(file);
break;
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
text = await pptxToText(file);
break;
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
text = await wordToText(file);
break;
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
text = await excelToText(file);
break;
default:
break;
}

setState({ isProcessing: false });

if (!text) {
alert('No text extracted from file. Please try another file.');
return;
}

if (!session?.user?.isPremium && new Blob([text]).size / 1000000 > 1.1) {
if (file.size / 1000000 > 1.1) {
alert(
'File upload is limited to 5MB on the free plan. Contact support@databerry.ai to upgrade your account'
'File upload is limited to 1MB on the free plan. To subscribe: Click your profile picture > Upgrade Account'
);
return;
}
Expand All @@ -97,8 +67,10 @@ function Nested() {
});

setValue('name', file?.name);
setValue('file', file);
setValue('config.source', file?.name);
setValue('datasourceText', text, { shouldDirty: true });
setValue('config.type', file?.type);
setValue('config.fileSize', file?.size);
};

const handleFileDrop = (event: any) => {
Expand All @@ -122,6 +94,14 @@ function Nested() {
setValue('datasourceText', '', { shouldDirty: false });
};

useEffect(() => {
setValue('file', {});
}, [datasourceText]);

if (datasourceText) {
return null;
}

return (
<>
<input
Expand Down Expand Up @@ -189,13 +169,15 @@ function Nested() {
export default function WebPageForm(props: Props) {
const { defaultValues, ...rest } = props;

console.log('defaultValues?.datasourceText', defaultValues?.datasourceText);

return (
<Base
schema={FileForm}
{...rest}
defaultValues={{
...props.defaultValues!,
type: DatasourceType.text,
type: DatasourceType.file,
}}
>
<Nested />
Expand Down
10 changes: 5 additions & 5 deletions fly/fly.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ kill_signal = "SIGINT"
kill_timeout = 5
primary_region = "iad"

# [processes]
# web = "node server.js"
# worker = "node server/worker-datasource-loader.js"
[processes]
web = "node server.js"
worker = "node server/datasource-loader.js"

[build]
[build.args]
NEXT_PUBLIC_S3_BUCKET_NAME = "databerry"

[[services]]
processes = ["app"]
# processes = ["web"]
# processes = ["app"]
processes = ["web"]
protocol = "tcp"
internal_port = 3000

Expand Down
2 changes: 1 addition & 1 deletion next.config.base.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ const nextConfig = {
const entries = await oldEntry(...args);
return {
...entries,
'worker-datasource-loader': path.resolve(
'datasource-loader': path.resolve(
process.cwd(),
'workers/datasource-loader.ts'
),
Expand Down
9 changes: 6 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,21 @@
"@types/nodemailer": "^6.4.7",
"@types/react": "18.0.29",
"@types/react-dom": "18.0.11",
"@xmldom/xmldom": "^0.8.7",
"aws-sdk": "^2.1343.0",
"axios": "^1.3.4",
"bee-queue": "^1.5.0",
"bull": "^4.10.4",
"cheerio": "1.0.0-rc.12",
"clsx": "^1.2.1",
"crisp-api": "^7.4.1",
"cuid": "^3.0.0",
"eslint": "8.36.0",
"eslint-config-next": "13.2.4",
"hash-wasm": "^4.9.0",
"ioredis": "^5.3.1",
"jszip": "^3.10.1",
"langchain": "^0.0.48",
"langchain": "^0.0.53",
"mammoth": "^1.5.1",
"mime-types": "^2.1.35",
"nanoid": "^4.0.2",
"next": "13.2.4",
"next-auth": "^4.20.1",
Expand All @@ -74,6 +75,8 @@
"zod": "3.19.1"
},
"devDependencies": {
"@types/bull": "^4.10.0",
"@types/mime-types": "^2.1.1",
"@types/nprogress": "^0.2.0",
"@types/uuid": "^9.0.1",
"autoprefixer": "^10.4.14",
Expand Down
Loading

1 comment on commit eb07da1

@vercel
Copy link

@vercel vercel bot commented on eb07da1 Apr 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.