Skip to content

Commit e5d5d5a

Browse files
authored
Adds support for downloading attachments as a .tar (#1495)
Currently only Zip is supported for downloading attachments. In the future, we want to be able to support re-uploading attachments. Zip will make this difficult, as the structure of the zip file is stored at the end of file, requiring the entire file to be uploaded before it can be parsed. This commit adds downloading attachments as a .tar, which uses inline headers for each file it contains. This means it can be easily parsed and processed through node's streams, minimizing server disk and memory usage. ## Related issues #1021
1 parent 5d497cb commit e5d5d5a

File tree

9 files changed

+227
-73
lines changed

9 files changed

+227
-73
lines changed

app/gen-server/lib/DocApiForwarder.ts

+5-4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ export class DocApiForwarder {
5959
app.use('/api/docs/:docId/create-fork', withDoc);
6060
app.use('/api/docs/:docId/apply', withDoc);
6161
app.use('/api/docs/:docId/attachments', withDoc);
62+
app.use('/api/docs/:docId/attachments/download', withDoc);
63+
app.use('/api/docs/:docId/attachments/transferStatus', withDoc);
64+
app.use('/api/docs/:docId/attachments/transferAll', withDoc);
65+
app.use('/api/docs/:docId/attachments/store', withDoc);
66+
app.use('/api/docs/:docId/attachments/stores', withDoc);
6267
app.use('/api/docs/:docId/snapshots', withDoc);
6368
app.use('/api/docs/:docId/usersForViewAs', withDoc);
6469
app.use('/api/docs/:docId/replace', withDoc);
@@ -74,10 +79,6 @@ export class DocApiForwarder {
7479
app.use('/api/docs/:docId/timing/start', withDoc);
7580
app.use('/api/docs/:docId/timing/stop', withDoc);
7681
app.use('/api/docs/:docId/forms/:vsId', withDoc);
77-
app.use('/api/docs/:docId/attachments/transferStatus', withDoc);
78-
app.use('/api/docs/:docId/attachments/transferAll', withDoc);
79-
app.use('/api/docs/:docId/attachments/store', withDoc);
80-
app.use('/api/docs/:docId/attachments/stores', withDoc);
8182

8283
app.use('^/api/docs$', withoutDoc);
8384
}

app/server/lib/ActiveDoc.ts

+17-3
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,12 @@ import {Share} from 'app/gen-server/entity/Share';
9090
import {RecordWithStringId} from 'app/plugin/DocApiTypes';
9191
import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI';
9292
import {AccessTokenOptions, AccessTokenResult, GristDocAPI, UIRowId} from 'app/plugin/GristAPI';
93-
import {Archive, ArchiveEntry, create_zip_archive} from 'app/server/lib/Archive';
93+
import {
94+
Archive,
95+
ArchiveEntry, CreatableArchiveFormats,
96+
create_tar_archive,
97+
create_zip_archive
98+
} from 'app/server/lib/Archive';
9499
import {AssistanceSchemaPromptV1Context} from 'app/server/lib/Assistance';
95100
import {AssistanceContext} from 'app/common/AssistancePrompts';
96101
import {AuditEventAction} from 'app/server/lib/AuditEvent';
@@ -960,7 +965,8 @@ export class ActiveDoc extends EventEmitter {
960965
return data;
961966
}
962967

963-
public async getAttachmentsArchive(docSession: OptDocSession): Promise<Archive> {
968+
public async getAttachmentsArchive(docSession: OptDocSession,
969+
format: CreatableArchiveFormats = 'zip'): Promise<Archive> {
964970
if (
965971
!await this._granularAccess.canReadEverything(docSession) &&
966972
!await this.canDownload(docSession)
@@ -990,12 +996,20 @@ export class ActiveDoc extends EventEmitter {
990996
filesAdded.add(name);
991997
yield({
992998
name,
999+
size: file.metadata.size,
9931000
data: file.contentStream,
9941001
});
9951002
}
9961003
}
9971004

998-
return create_zip_archive({ store: true }, fileGenerator());
1005+
if (format == 'tar') {
1006+
return create_tar_archive(fileGenerator());
1007+
}
1008+
if (format == 'zip') {
1009+
return create_zip_archive({ store: true }, fileGenerator());
1010+
}
1011+
// Generally this won't happen, as long as the above is exhaustive over the type of `format`
1012+
throw new ApiError(`Unsupported archive format ${format}`, 400);
9991013
}
10001014

10011015
@ActiveDoc.keepDocOpen

app/server/lib/Archive.ts

+46
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,25 @@
1+
import {StringUnion} from 'app/common/StringUnion';
12
import {ZipArchiveEntry} from 'compress-commons';
23
import stream from 'node:stream';
4+
import * as tar from 'tar-stream';
35
import ZipStream, {ZipStreamOptions} from 'zip-stream';
46

57
export interface ArchiveEntry {
68
name: string;
9+
size: number;
710
data: stream.Readable | Buffer;
811
}
912

1013
export interface Archive {
14+
mimeType: string;
15+
fileExtension: string;
1116
dataStream: stream.Readable;
1217
completed: Promise<void>;
1318
}
1419

20+
export const CreatableArchiveFormats = StringUnion('zip', 'tar');
21+
export type CreatableArchiveFormats = typeof CreatableArchiveFormats.type;
22+
1523
/**
1624
*
1725
* Creates a streamable zip archive, reading files on-demand from the entries iterator.
@@ -27,6 +35,8 @@ export async function create_zip_archive(
2735
const archive = new ZipStream(options);
2836

2937
return {
38+
mimeType: "application/zip",
39+
fileExtension: "zip",
3040
dataStream: archive,
3141
// Caller is responsible for error handling/awaiting on this promise.
3242
completed: (async () => {
@@ -57,3 +67,39 @@ function addEntryToZipArchive(archive: ZipStream, file: ArchiveEntry): Promise<Z
5767
});
5868
});
5969
}
70+
71+
/**
72+
*
73+
* Creates a streamable tar archive, reading files on-demand from the entries iterator.
74+
* Entries are provided as an async iterable, to ensure the archive is constructed
75+
* correctly. A generator can be used for convenience.
76+
* @param {AsyncIterable<ArchiveEntry>} entries - Entries to add.
77+
* @returns {Archive}
78+
*/
79+
export async function create_tar_archive(
80+
entries: AsyncIterable<ArchiveEntry>
81+
): Promise<Archive> {
82+
const archive = tar.pack();
83+
84+
return {
85+
mimeType: "application/x-tar",
86+
fileExtension: "tar",
87+
dataStream: archive,
88+
// Caller is responsible for error handling/awaiting on this promise.
89+
completed: (async () => {
90+
try {
91+
for await (const entry of entries) {
92+
const entryStream = archive.entry({ name: entry.name, size: entry.size });
93+
await stream.promises.pipeline(entry.data, entryStream);
94+
}
95+
archive.finalize();
96+
} catch (error) {
97+
archive.destroy(error);
98+
} finally {
99+
// If the stream was destroyed with an error, this will re-throw the error we caught above.
100+
// Without this, node will see the stream as having an uncaught error, and complain.
101+
await stream.promises.finished(archive);
102+
}
103+
})()
104+
};
105+
}

app/server/lib/DocApi.ts

+11-3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import {
4545
} from 'app/plugin/TableOperationsImpl';
4646
import {ActiveDoc, colIdToRef as colIdToReference, getRealTableId, tableIdToRef} from "app/server/lib/ActiveDoc";
4747
import {appSettings} from "app/server/lib/AppSettings";
48+
import {CreatableArchiveFormats} from 'app/server/lib/Archive';
4849
import {sendForCompletion} from 'app/server/lib/Assistance';
4950
import {getDocPoolIdFromDocInfo} from 'app/server/lib/AttachmentStore';
5051
import {
@@ -575,12 +576,19 @@ export class DocWorkerApi {
575576

576577
// Responds with an archive of all attachment contents, with suitable Content-Type and Content-Disposition.
577578
this._app.get('/api/docs/:docId/attachments/download', canView, withDoc(async (activeDoc, req, res) => {
578-
const archive = await activeDoc.getAttachmentsArchive(docSessionFromRequest(req));
579+
const archiveFormatStr = optStringParam(req.query.format, 'format', {
580+
allowed: CreatableArchiveFormats.values,
581+
allowEmpty: true,
582+
});
583+
584+
const archiveFormat = CreatableArchiveFormats.parse(archiveFormatStr) || 'zip';
585+
const archive = await activeDoc.getAttachmentsArchive(docSessionFromRequest(req), archiveFormat);
579586
const docName = await this._getDownloadFilename(req, "Attachments", activeDoc.doc);
580587
res.status(200)
581-
.type("application/zip")
588+
.type(archive.mimeType)
582589
// Construct a content-disposition header of the form 'attachment; filename="NAME"'
583-
.set('Content-Disposition', contentDisposition(`${docName}.zip`, {type: 'attachment'}))
590+
.set('Content-Disposition',
591+
contentDisposition(`${docName}.${archive.fileExtension}`, {type: 'attachment'}))
584592
// Avoid storing because this could be huge.
585593
.set('Cache-Control', 'no-store');
586594

package.json

+2
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@
128128
"@gristlabs/pidusage": "2.0.17",
129129
"@gristlabs/sqlite3": "5.1.4-grist.8",
130130
"@popperjs/core": "2.11.8",
131+
"@types/tar-stream": "^3.1.3",
131132
"@types/zip-stream": "^7.0.0",
132133
"accept-language-parser": "1.5.0",
133134
"ace-builds": "1.23.3",
@@ -205,6 +206,7 @@
205206
"short-uuid": "5.2.0",
206207
"slugify": "1.6.6",
207208
"swagger-ui-dist": "5.11.0",
209+
"tar-stream": "^3.1.7",
208210
"tmp": "0.0.33",
209211
"tmp-promise": "1.0.5",
210212
"ts-interface-checker": "1.0.2",

test/server/lib/ActiveDoc.ts

+15-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import * as gristTypes from 'app/common/gristTypes';
66
import {GristObjCode} from 'app/plugin/GristData';
77
import {TableData} from 'app/common/TableData';
88
import {ActiveDoc} from 'app/server/lib/ActiveDoc';
9+
import {CreatableArchiveFormats} from 'app/server/lib/Archive';
910
import {AttachmentStoreProvider} from 'app/server/lib/AttachmentStoreProvider';
1011
import {DummyAuthorizer} from 'app/server/lib/Authorizer';
1112
import {Client} from 'app/server/lib/Client';
@@ -1193,15 +1194,19 @@ describe('ActiveDoc', async function() {
11931194
await doc.addAttachments(fakeTransferSession, uploadId);
11941195
}
11951196

1196-
async function assertArchiveContents(archive: string | Buffer, expectedFiles: { name: string; contents?: string }[])
1197-
{
1197+
async function assertArchiveContents(
1198+
archive: string | Buffer,
1199+
archiveType: string,
1200+
expectedFiles: { name: string; contents?: string }[],
1201+
) {
11981202
const getFileName = (filePath: string) => filePath.substring(filePath.indexOf("_") + 1);
11991203
const files = await decompress(archive);
12001204
for (const expectedFile of expectedFiles) {
12011205
const file = files.find((file) => getFileName(file.path) === expectedFile.name);
12021206
assert(file, "file not found in archive");
12031207
if (expectedFile.contents) {
1204-
assert.equal(file?.data.toString(), expectedFile.contents, "file contents in archive don't match");
1208+
assert.equal(
1209+
file?.data.toString(), expectedFile.contents, `file contents in ${archiveType} archive don't match`);
12051210
}
12061211
}
12071212
}
@@ -1212,11 +1217,14 @@ describe('ActiveDoc', async function() {
12121217

12131218
await uploadAttachments(activeDoc1, testAttachments);
12141219

1215-
const archive = await activeDoc1.getAttachmentsArchive(fakeTransferSession);
1216-
const archiveMemoryStream = new MemoryWritableStream();
1217-
await stream.promises.pipeline(archive.dataStream, archiveMemoryStream);
1220+
for (const archiveType of CreatableArchiveFormats.values) {
1221+
const archive = await activeDoc1.getAttachmentsArchive(fakeTransferSession, archiveType);
1222+
const archiveMemoryStream = new MemoryWritableStream();
1223+
await stream.promises.pipeline(archive.dataStream, archiveMemoryStream);
1224+
1225+
await assertArchiveContents(archiveMemoryStream.getBuffer(), archiveType, testAttachments);
1226+
}
12181227

1219-
await assertArchiveContents(archiveMemoryStream.getBuffer(), testAttachments);
12201228
});
12211229

12221230
/*

test/server/lib/Archive.ts

+47-31
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
import {Archive, create_zip_archive} from 'app/server/lib/Archive';
1+
import {
2+
Archive,
3+
ArchiveEntry,
4+
create_tar_archive,
5+
create_zip_archive
6+
} from 'app/server/lib/Archive';
27
import {MemoryWritableStream} from 'app/server/utils/MemoryWritableStream';
38
import decompress from 'decompress';
49
import {assert} from 'chai';
@@ -16,24 +21,24 @@ const testFiles = [
1621
];
1722

1823
async function* generateTestArchiveContents() {
19-
yield {
20-
name: testFiles[0].name,
21-
data: stream.Readable.from(Buffer.from(testFiles[0].contents))
22-
};
23-
yield {
24-
name: testFiles[1].name,
25-
data: Buffer.from(testFiles[1].contents)
26-
};
24+
for (const file of testFiles) {
25+
const buffer = Buffer.from(file.contents);
26+
yield {
27+
name: file.name,
28+
size: buffer.length,
29+
data: stream.Readable.from(buffer),
30+
};
31+
}
2732
}
2833

2934
async function assertArchiveCompletedSuccessfully(archive: Archive) {
3035
await assert.isFulfilled(archive.completed);
31-
assert.isTrue(archive.dataStream.closed, "archive data stream was not closed");
36+
assert.isTrue(archive.dataStream.destroyed, "archive data stream was not finished");
3237
}
3338

3439
async function assertArchiveFailedWithError(archive: Archive) {
3540
await assert.isRejected(archive.completed);
36-
assert.isTrue(archive.dataStream.closed, "archive data stream was not closed");
41+
assert.isTrue(archive.dataStream.destroyed, "archive data stream was not finished");
3742
}
3843

3944
async function assertArchiveContainsTestFiles(archiveData: Buffer) {
@@ -46,30 +51,41 @@ async function assertArchiveContainsTestFiles(archiveData: Buffer) {
4651
assert.deepEqual(zippedFile?.data.toString(), testFile.contents);
4752
}
4853
}
49-
describe('Archive', function () {
50-
describe('create_zip_archive', function () {
51-
it('should create a zip archive', async function () {
52-
const archive = await create_zip_archive({store: true}, generateTestArchiveContents());
53-
const output = new MemoryWritableStream();
54-
archive.dataStream.pipe(output);
55-
await assertArchiveCompletedSuccessfully(archive);
5654

57-
const archiveData = output.getBuffer();
55+
type ArchiveCreator = (entries: AsyncIterable<ArchiveEntry>) => Promise<Archive>;
5856

59-
await assertArchiveContainsTestFiles(archiveData);
60-
});
57+
function testArchive(type: string, makeArchive: ArchiveCreator) {
58+
it(`should create a ${type} archive`, async function () {
59+
const archive = await makeArchive(generateTestArchiveContents());
60+
const output = new MemoryWritableStream();
61+
archive.dataStream.pipe(output);
62+
await assertArchiveCompletedSuccessfully(archive);
6163

62-
it('errors in archive.completed if the generator errors', async function () {
63-
async function* throwErrorGenerator() {
64-
throw new Error("Test error");
65-
yield {name: testFiles[0].name, data: Buffer.from(testFiles[0].contents)};
66-
}
64+
const archiveData = output.getBuffer();
6765

66+
await assertArchiveContainsTestFiles(archiveData);
67+
});
68+
69+
it('errors in archive.completed if the generator errors', async function () {
70+
async function* throwErrorGenerator() {
71+
throw new Error("Test error");
72+
yield {name: 'Test', size: 0, data: Buffer.from([])};
73+
}
6874

69-
// Shouldn't error here - as this just starts the packing.
70-
const archive = await create_zip_archive({store: true}, throwErrorGenerator());
71-
await assert.isRejected(archive.completed, "Test error");
72-
await assertArchiveFailedWithError(archive);
73-
});
75+
// Shouldn't error here - as this just starts the packing.
76+
const archive = await makeArchive(throwErrorGenerator());
77+
await assert.isRejected(archive.completed, "Test error");
78+
await assertArchiveFailedWithError(archive);
79+
});
80+
}
81+
82+
describe('Archive', function () {
83+
describe('create_zip_archive', function () {
84+
const creator: ArchiveCreator = (entries) => create_zip_archive({store: true}, entries);
85+
testArchive('zip', creator);
86+
});
87+
describe('create_tar_archive', function () {
88+
const creator: ArchiveCreator = (entries) => create_tar_archive(entries);
89+
testArchive('tar', creator);
7490
});
7591
});

0 commit comments

Comments
 (0)