-
Notifications
You must be signed in to change notification settings - Fork 426
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
421 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import React, { useEffect } from 'react'; | ||
import { useFormContext } from 'react-hook-form'; | ||
import { z } from 'zod'; | ||
|
||
import Input from '@app/components/Input'; | ||
|
||
import { | ||
DatasourceSchema, | ||
DatasourceYoutube, | ||
} from '@chaindesk/lib/types/models'; | ||
import YoutubeApi from '@chaindesk/lib/youtube-api'; | ||
import { DatasourceType } from '@chaindesk/prisma'; | ||
|
||
import Base from './Base'; | ||
import type { DatasourceFormProps } from './types'; | ||
|
||
type Props = DatasourceFormProps<DatasourceYoutube> & {}; | ||
|
||
function getDatasourceType(url: string) { | ||
if (url.includes('@') || url.includes('list')) { | ||
return DatasourceType.youtube_bulk; | ||
} else if (url.includes('watch')) { | ||
return DatasourceType.youtube_video; | ||
} else { | ||
return null; | ||
} | ||
} | ||
|
||
function Nested() { | ||
const { control, register, setValue, watch } = | ||
useFormContext<DatasourceYoutube>(); | ||
|
||
const url = watch('config.source_url'); | ||
|
||
useEffect(() => { | ||
const type = getDatasourceType(url || ''); | ||
if (type) { | ||
console.log(type); | ||
setValue('type', type); | ||
} | ||
}, [url]); | ||
|
||
return ( | ||
<Input | ||
label="Youtube URL (video, playlist or channel)" | ||
helperText="e.g.: https://www.youtube.com/watch?v=Jq_XKf5slVc" | ||
control={control as any} | ||
{...register('config.source_url')} | ||
/> | ||
); | ||
} | ||
|
||
export default function YoutubeForm(props: Props) { | ||
const { defaultValues, ...rest } = props; | ||
|
||
return ( | ||
<Base | ||
schema={DatasourceSchema} | ||
{...rest} | ||
defaultValues={{ | ||
...props.defaultValues!, | ||
}} | ||
> | ||
<Nested /> | ||
</Base> | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import axios from 'axios'; | ||
import cuid from 'cuid'; | ||
import { google } from 'googleapis'; | ||
import { YoutubeTranscript } from 'youtube-transcript'; | ||
|
||
import generateFunId from '@chaindesk/lib/generate-fun-id'; | ||
import { AppDocument } from '@chaindesk/lib/types/document'; | ||
import { DatasourceSchema } from '@chaindesk/lib/types/models'; | ||
import { DatasourceStatus, DatasourceType } from '@chaindesk/prisma'; | ||
import { prisma } from '@chaindesk/prisma/client'; | ||
|
||
import triggerTaskLoadDatasource from '../trigger-task-load-datasource'; | ||
import YoutubeApi from '../youtube-api'; | ||
|
||
import { DatasourceLoaderBase } from './base'; | ||
|
||
type BulkYoutubeDatasource = Extract< | ||
DatasourceSchema, | ||
{ type: 'youtube_bulk' } | ||
>; | ||
|
||
export class BulkYoutubesLoader extends DatasourceLoaderBase<BulkYoutubeDatasource> { | ||
isGroup = true; | ||
|
||
async getSize(text: string) { | ||
return 0; | ||
} | ||
|
||
async load() { | ||
const url = this.datasource.config['source_url']; | ||
|
||
if (!url) { | ||
throw new Error('Fatal: missing or invalid url'); | ||
} | ||
|
||
const type = YoutubeApi.getYoutubeLinkType(url); | ||
|
||
let videos: { id: string; title: string }[] = []; | ||
const Youtube = new YoutubeApi(); | ||
|
||
switch (type) { | ||
case 'channel': | ||
videos = await Youtube.getVideosForChannel(url); | ||
break; | ||
case 'playlist': | ||
videos = await Youtube.getVideosForPlaylist(url); | ||
break; | ||
case 'unknown': | ||
throw new Error('Invalid youtube Url'); | ||
} | ||
|
||
await prisma.$transaction(async (tx) => { | ||
let ids: string[] = videos.map(() => cuid()); | ||
|
||
await tx.appDatasource.createMany({ | ||
data: videos.map((video, index) => ({ | ||
id: ids[index], | ||
type: DatasourceType.youtube_video, | ||
name: video?.title || `${generateFunId()}`, | ||
config: { | ||
source_url: `https://www.youtube.com/watch?v=${video?.id}`, | ||
}, | ||
organizationId: this.datasource?.organizationId!, | ||
datastoreId: this.datasource?.datastoreId, | ||
groupId: this.datasource?.id, | ||
serviceProviderId: this.datasource?.serviceProviderId, | ||
})), | ||
skipDuplicates: true, | ||
}); | ||
|
||
await tx.appDatasource.update({ | ||
where: { | ||
id: this.datasource.id, | ||
}, | ||
data: { | ||
status: DatasourceStatus.synched, | ||
}, | ||
}); | ||
|
||
await triggerTaskLoadDatasource( | ||
videos.map((_, index) => ({ | ||
organizationId: this.datasource?.organizationId!, | ||
datasourceId: ids[index], | ||
priority: 10, | ||
})) | ||
); | ||
}); | ||
|
||
return [] as AppDocument[]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import axios from 'axios'; | ||
import { YoutubeTranscript } from 'youtube-transcript'; | ||
|
||
import { AppDocument } from '@chaindesk/lib/types/document'; | ||
import { DatasourceSchema } from '@chaindesk/lib/types/models'; | ||
|
||
import { ApiError, ApiErrorType } from '../api-error'; | ||
import cleanTextForEmbeddings from '../clean-text-for-embeddings'; | ||
|
||
import { DatasourceLoaderBase } from './base'; | ||
|
||
type DatasourceYoutubeVideo = Extract< | ||
DatasourceSchema, | ||
{ type: 'youtube_video' } | ||
>; | ||
|
||
type YoutubeTranscriptType = { text: string; duration: number; offset: number }; | ||
|
||
export class YoutubeVideoLoader extends DatasourceLoaderBase<DatasourceYoutubeVideo> { | ||
async getSize(text: string) { | ||
return 0; | ||
} | ||
|
||
async load() { | ||
const url = this.datasource.config['source_url']; | ||
|
||
if (!url) { | ||
throw new Error('Fatal: missing youtube url.'); | ||
} | ||
|
||
let docs = []; | ||
try { | ||
const transcripts: YoutubeTranscriptType[] = | ||
await YoutubeTranscript.fetchTranscript(url); | ||
docs = transcripts.map(({ text, offset }) => { | ||
return new AppDocument<any>({ | ||
pageContent: text, | ||
metadata: { | ||
source_url: `${url}&t=${Math.ceil(offset / 1000)}`, | ||
}, | ||
}); | ||
}); | ||
} catch (err) { | ||
docs = [ | ||
new AppDocument<any>({ | ||
pageContent: 'FAILED: Captions Are Disabled on this Video.', | ||
metadata: { | ||
source_url: url, | ||
}, | ||
}), | ||
]; | ||
} | ||
|
||
return docs.map(({ pageContent, metadata }) => { | ||
return { | ||
pageContent, | ||
metadata: { | ||
...metadata, | ||
datastore_id: this.datasource.datastoreId!, | ||
datasource_id: this.datasource.id, | ||
datasource_name: this.datasource.name, | ||
datasource_type: this.datasource.type, | ||
custom_id: this.datasource?.config?.custom_id, | ||
tags: this.datasource?.config?.tags || [], | ||
}, | ||
}; | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
abe3e67
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
dashboard – ./
dashboard-nu-tawny.vercel.app
www.chaindesk.ai
*.chaindesk.ai
databerry.ai
www.resolveai.io
chaindesk.ai
www.chatbotgpt.ai
*.chaindesk.chat
*.databerry.ai
databerry.chat
resolveai.io
dashboard-databerry.vercel.app
dashboard-git-main-databerry.vercel.app
bestaichatbot.org
www.databerry.ai
www.bestaichatbot.org
chaindesk.chat