Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
garronej committed Jan 21, 2025
1 parent e255fe5 commit 66fd5dd
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
const supportedFileTypes = ["parquet", "csv", "json"] as const;

export type SupportedFileType = (typeof supportedFileTypes)[number];

export const getIsSupportedFileType = (ext: string): ext is SupportedFileType =>
supportedFileTypes.some(validType => validType === ext);
6 changes: 0 additions & 6 deletions web/src/core/usecases/dataExplorer/decoupledLogic/fileType.ts

This file was deleted.

6 changes: 0 additions & 6 deletions web/src/core/usecases/dataExplorer/decoupledLogic/index.ts

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { isValidFileType } from "./fileType";
import { type SupportedFileType, getIsSupportedFileType } from "./SupportedFileType";

export function detectFileTypeFromSourceUrlExtension(sourceUrl: string) {
function inferFileType_fromExtension(sourceUrl: string): SupportedFileType | undefined {
let pathname: string;

try {
Expand All @@ -16,12 +16,14 @@ export function detectFileTypeFromSourceUrlExtension(sourceUrl: string) {

const [, extension] = match;

return isValidFileType(extension) ? extension : undefined;
}
if (!getIsSupportedFileType(extension)) {
return undefined;
}

function detectFileTypeFromContentType(contentType: string | null) {
if (!contentType) return undefined;
return extension;
}

function inferFileType_fromContentType(contentType: string) {
const contentTypeToExtension = [
{
keyword: "application/parquet" as const,
Expand All @@ -47,7 +49,7 @@ function detectFileTypeFromContentType(contentType: string | null) {
return match ? match.extension : undefined;
}

async function detectFileTypeFromBytes(response: Response) {
async function inferFileType_fromBytes(firstBytes: ArrayBuffer) {
const fileSignatures = [
{
condition: (bytes: Uint8Array) =>
Expand All @@ -72,39 +74,63 @@ async function detectFileTypeFromBytes(response: Response) {
}
];

const arrayBuffer = await response.arrayBuffer();
const bytes = new Uint8Array(arrayBuffer);
const firstBytes_uint8Array = new Uint8Array(firstBytes);

const match = fileSignatures.find(({ condition }) => condition(bytes));
const match = fileSignatures.find(({ condition }) =>
condition(firstBytes_uint8Array)
);

return match ? match.extension : undefined;
}

export async function detectFileTypeFromFileDownload(fileDownloadUrl: string) {
const response = await fetch(fileDownloadUrl, {
method: "GET",
headers: { Range: "bytes=0-15" } // Fetch the first 16 bytes
});
export async function inferFileType(params: {
sourceUrl: string;
getContentType: () => Promise<string | null>;
getFirstBytes: () => Promise<ArrayBuffer | undefined>;
}): Promise<SupportedFileType | undefined> {
const { sourceUrl, getContentType, getFirstBytes } = params;

if (!response.ok) {
return { fileType: undefined, redirectedUrl: undefined };
}
file_extension: {
const fileType = inferFileType_fromExtension(sourceUrl);

const redirectedUrl = response.url !== fileDownloadUrl ? response.url : undefined;
if (fileType === undefined) {
break file_extension;
}

if (redirectedUrl) {
console.log(`The url you provided is being redirected to ${redirectedUrl}`);
return fileType;
}

const contentType = response.headers.get("Content-Type");
content_type: {
const contentType = await getContentType();

if (contentType === null) {
break content_type;
}

const detectedFileTypeFromContentType = detectFileTypeFromContentType(contentType);
const fileType = inferFileType_fromContentType(contentType);

if (detectedFileTypeFromContentType) {
return { fileType: detectedFileTypeFromContentType, redirectedUrl };
if (fileType === undefined) {
break content_type;
}

return fileType;
}

const detectedFileTypeFromBytes = await detectFileTypeFromBytes(response);
from_bytes: {
const firstBytes = await getFirstBytes();

if (firstBytes === undefined) {
break from_bytes;
}

const fileType = await inferFileType_fromBytes(firstBytes);

if (fileType === undefined) {
break from_bytes;
}

return fileType;
}

return { fileType: detectedFileTypeFromBytes, redirectedUrl };
return undefined;
}
6 changes: 3 additions & 3 deletions web/src/core/usecases/dataExplorer/state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createUsecaseActions } from "clean-architecture";
import type { Column } from "core/ports/SqlOlap";
import { assert } from "tsafe/assert";
import { id } from "tsafe/id";
import { type ValidFileType } from "./decoupledLogic";
import type { SupportedFileType } from "./decoupledLogic/SupportedFileType";

export const name = "dataExplorer";

Expand All @@ -28,7 +28,7 @@ export type State = {
columns: Column[];
rowCount: number | undefined;
fileDownloadUrl: string;
fileType: ValidFileType;
fileType: SupportedFileType;
}
| undefined;
};
Expand Down Expand Up @@ -109,7 +109,7 @@ export const { actions, reducer } = createUsecaseActions({
columns: Column[];
rowCount: number | undefined;
fileDownloadUrl: string;
fileType: ValidFileType;
fileType: SupportedFileType;
};
}
) => {
Expand Down
53 changes: 38 additions & 15 deletions web/src/core/usecases/dataExplorer/thunks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ import { createUsecaseContextApi } from "clean-architecture";
import { waitForDebounceFactory } from "core/tools/waitForDebounce";
import { assert } from "tsafe/assert";
import * as s3ConfigManagement from "core/usecases/s3ConfigManagement";
import {
detectFileTypeFromFileDownload,
detectFileTypeFromSourceUrlExtension
} from "./decoupledLogic";
import { inferFileType } from "./decoupledLogic/inferFileType";
import memoize from "memoizee";

const privateThunks = {
getFileDownloadUrl:
Expand Down Expand Up @@ -181,22 +179,47 @@ const privateThunks = {
const { sourceUrl } = params;
const [dispatch] = args;

const fileTypeFromExtension = detectFileTypeFromSourceUrlExtension(sourceUrl);
const partialFetch = memoize(
async () => {
const fileDownloadUrl = await dispatch(
privateThunks.getFileDownloadUrl({ sourceUrl })
);

const response = await fetch(fileDownloadUrl, {
method: "GET",
headers: { Range: "bytes=0-15" } // Fetch the first 16 bytes
});

return {
response,
fileDownloadUrl_direct: response.url
};
},
{ promise: true }
);

if (fileTypeFromExtension) {
return { fileType: fileTypeFromExtension, fileDownloadUrl: undefined };
}
const fileType = await inferFileType({
sourceUrl,
getContentType: async () => {
const { response } = await partialFetch();
return response.headers.get("Content-Type");
},
getFirstBytes: async () => {
const { response } = await partialFetch();

if (!response.ok) {
return undefined;
}

const fileDownloadUrl = await dispatch(
privateThunks.getFileDownloadUrl({ sourceUrl })
);
return response.arrayBuffer();
}
});

const fileTypeAndRedirectedUrl =
await detectFileTypeFromFileDownload(fileDownloadUrl);
const { fileDownloadUrl_direct } = await partialFetch();

return {
fileType: fileTypeAndRedirectedUrl.fileType,
fileDownloadUrl: fileTypeAndRedirectedUrl.redirectedUrl ?? fileDownloadUrl
fileType,
fileDownloadUrl: fileDownloadUrl_direct
};
},
updateDataSource:
Expand Down

0 comments on commit 66fd5dd

Please sign in to comment.