Skip to content

Commit

Permalink
feat: improve download anonymized repository
Browse files Browse the repository at this point in the history
  • Loading branch information
tdurieux committed May 6, 2024
1 parent 93606a5 commit dcf483e
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 40 deletions.
25 changes: 13 additions & 12 deletions src/core/AnonymizedFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -289,14 +289,9 @@ export default class AnonymizedFile {
this.sha(),
this.repository.getToken(),
]);
// const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
// const ipHost = await this.cacheableLookup.lookupAsync(hostName);
got
const resStream = got
.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
// lookup: this.cacheableLookup.lookup,
// host: ipHost.address,
// dnsCache: this.cacheableLookup,
json: {
sha,
token,
Expand All @@ -308,20 +303,26 @@ export default class AnonymizedFile {
anonymizerOptions: anonymizer.opt,
},
})
.on("error", () => {
.on("error", (err) => {
span.recordException(err);
handleError(
new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
}),
res
);
})
.pipe(res)
.on("close", () => {
span.end();
resolve();
});
resStream.pipe(res);
res.on("close", () => {
span.end();
resolve();
});
res.on("error", (err) => {
reject(err);
span.recordException(err);
span.end();
});
return;
}

Expand Down
8 changes: 4 additions & 4 deletions src/core/source/GitHubDownload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export default class GitHubDownload extends GitHubBase {
super(data);
}

private async _getZipUrl(): Promise<OctokitResponse<unknown, 302>> {
public async getZipUrl(): Promise<OctokitResponse<unknown, 302>> {
const oct = octokit(await this.data.getToken());
return oct.rest.repos.downloadZipballArchive({
owner: this.data.organization,
Expand All @@ -32,11 +32,11 @@ export default class GitHubDownload extends GitHubBase {
try {
let response: OctokitResponse<unknown, number>;
try {
response = await this._getZipUrl();
response = await this.getZipUrl();
} catch (error) {
span.recordException(error as Error);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
throw new AnonymousError("repo_not_found", {
httpStatus: (error as any).status || 404,
object: this.data,
cause: error as Error,
});
Expand Down
3 changes: 2 additions & 1 deletion src/core/source/GitHubStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ export default class GitHubStream extends GitHubBase {
});
output.push(...this.tree2Tree(data.tree, parentPath));
} catch (error) {
if ((error as any).status == 404) {
console.log(error);
if ((error as any).status == 409 || (error as any).status == 404) {
// empty repo
data = { tree: [] };
} else {
Expand Down
72 changes: 50 additions & 22 deletions src/server/routes/repository-public.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { promisify } from "util";
import * as express from "express";
import * as stream from "stream";
import config from "../../config";
import got from "got";
import { join } from "path";

import { getRepo, getUser, handleError } from "./route-utils";
import AnonymousError from "../../core/AnonymousError";
Expand All @@ -26,17 +28,15 @@ router.get(
const repo = await getRepo(req, res);
if (!repo) return;

let user: User | undefined = undefined;
try {
user = await getUser(req);
} catch (_) {}

let download = false;
const conference = await repo.conference();
if (conference) {
download =
conference.quota.size > -1 &&
!!config.ENABLE_DOWNLOAD &&
repo.source.type == "GitHubDownload";
}
if (
repo.size.storage < config.FREE_DOWNLOAD_REPO_SIZE * 1024 &&
repo.source.type == "GitHubDownload"
(!!config.ENABLE_DOWNLOAD && !!config.STREAMER_ENTRYPOINT) ||
user?.isAdmin === true
) {
download = true;
}
Expand All @@ -48,6 +48,44 @@ router.get(
});
}

await repo.countView();

if (config.STREAMER_ENTRYPOINT) {
// use the streamer service
const token = await repo.getToken();
const anonymizer = repo.generateAnonymizeTransformer("");
res.attachment(`${repo.repoId}.zip`);
const reqStream = got
.stream(join(config.STREAMER_ENTRYPOINT, "api/download"), {
method: "POST",
json: {
token,
repoFullName: repo.model.source.repositoryName,
commit: repo.model.source.commit,
branch: repo.model.source.branch,
repoId: repo.repoId,
anonymizerOptions: anonymizer.opt,
},
})
.on("error", () => {
handleError(
new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
}),
res
);
});
reqStream.pipe(res);
res.on("close", () => {
reqStream.destroy();
});
res.on("error", () => {
reqStream.destroy();
});
return;
}

res.attachment(`${repo.repoId}.zip`);

// cache the file for 6 hours
Expand Down Expand Up @@ -125,7 +163,7 @@ router.get(
throw new AnonymousError(
repo.model.statusMessage
? repo.model.statusMessage
: "repository_not_available",
: "repository_not_accessible",
{
object: repo,
httpStatus: 500,
Expand All @@ -142,17 +180,7 @@ router.get(
}

let download = false;
const conference = await repo.conference();
if (conference) {
download =
conference.quota.size > -1 &&
!!config.ENABLE_DOWNLOAD &&
repo.source.type == "GitHubDownload";
}
if (
repo.size.storage < config.FREE_DOWNLOAD_REPO_SIZE * 1024 &&
repo.source.type == "GitHubDownload"
) {
if (!!config.ENABLE_DOWNLOAD && !!config.STREAMER_ENTRYPOINT) {
download = true;
}

Expand All @@ -162,7 +190,7 @@ router.get(
} catch (_) {}
res.json({
url: redirectURL,
download,
download: download || user?.isAdmin === true,
lastUpdateDate: repo.model.source.commitDate
? repo.model.source.commitDate
: repo.model.anonymizeDate,
Expand Down
77 changes: 76 additions & 1 deletion src/streamer/route.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,86 @@
import { promisify } from "util";
import * as stream from "stream";
import * as express from "express";
import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import {
anonymizePath,
AnonymizeTransformer,
isTextFile,
} from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils";
import { lookup } from "mime-types";
import GitHubDownload from "../core/source/GitHubDownload";
import got from "got";
import { Parse } from "unzip-stream";
import archiver = require("archiver");

export const router = express.Router();

router.post(
"/download",
async (req: express.Request, res: express.Response) => {
const token: string = req.body.token;
const repoFullName = req.body.repoFullName.split("/");
const repoId = req.body.repoId;
const branch = req.body.branch;
const commit = req.body.commit;
const anonymizerOptions = req.body.anonymizerOptions;

try {
const source = new GitHubDownload({
repoId,
organization: repoFullName[0],
repoName: repoFullName[1],
commit: commit,
getToken: () => token,
});
const response = await source.getZipUrl();
const downloadStream = got.stream(response.url);

res.on("error", (error) => {
console.error(error);
downloadStream.destroy();
});

res.on("close", () => {
downloadStream.destroy();
});

const archive = archiver("zip", {});
downloadStream
.pipe(Parse())
.on("entry", (entry) => {
if (entry.type === "File") {
try {
const fileName = anonymizePath(
entry.path.substring(entry.path.indexOf("/") + 1),
anonymizerOptions.terms || []
);
const anonymizer = new AnonymizeTransformer(anonymizerOptions);
anonymizer.opt.filePath = fileName;
const st = entry.pipe(anonymizer);
archive.append(st, { name: fileName });
} catch (error) {
entry.autodrain();
console.error(error);
}
} else {
entry.autodrain();
}
})
.on("error", (error) => {
console.error(error);
archive.finalize();
})
.on("finish", () => {
archive.finalize();
});
archive.pipe(res);
} catch (error) {
handleError(error, res);
}
}
);
router.post("/", async (req: express.Request, res: express.Response) => {
req.body = req.body || {};
const token: string = req.body.token;
Expand Down

0 comments on commit dcf483e

Please sign in to comment.