Skip to content

Adds zip downloads for all of a document's attachments #1471

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b3416ee
Makes downloadStream fetch metadata
Spoffy Feb 17, 2025
e45b093
Refactors attachment downloads to be streaming, and include metadata
Spoffy Feb 18, 2025
001004b
Adds Zip archive downloads for attachments
Spoffy Feb 20, 2025
ca8f3b2
Fixes test types for attachments
Spoffy Feb 25, 2025
f7dd1cd
Attempts a fix for content-length causing tests to fail
Spoffy Feb 25, 2025
306261a
Adds TODO
Spoffy Feb 25, 2025
30f9556
Fixes test error in ExternalStorageAttachmentStore
Spoffy Feb 25, 2025
dafb2fb
Adds archive tests and fixes
Spoffy Feb 25, 2025
dc29771
Changes zip-stream version to 6.0.1
Spoffy Feb 25, 2025
742cac3
Adds permissions check to getAttachmentsArchive
Spoffy Feb 25, 2025
99513f1
Adds DocApi tests
Spoffy Feb 27, 2025
336a678
Fixes bad test expectation for DocApi
Spoffy Feb 27, 2025
6668797
Fixes import order in Archive.ts
Spoffy Feb 27, 2025
72f3e1e
Merge branch 'main' into spoffy/attachment-downloads
Spoffy Feb 27, 2025
d95b2d9
Refreshes yarn.lock
Spoffy Feb 27, 2025
3a825f3
Addresses formatting code review feedback
Spoffy Feb 27, 2025
b76655f
Changes file naming convention for attachments
Spoffy Feb 27, 2025
6f707bb
Shuts down attachment archiving if document shuts down
Spoffy Feb 27, 2025
efe15f7
Adds ActiveDoc archive tests
Spoffy Feb 28, 2025
e67c259
Cleans archive assertion in DocAPi
Spoffy Feb 28, 2025
1f845fe
Swaps TODO for doc comment
Spoffy Feb 28, 2025
2038992
Re-adds accidentally removed import
Spoffy Feb 28, 2025
c460ab6
Prevents name collisions due to multiple identical attachments
Spoffy Feb 28, 2025
a6daa51
Changes attachment file name to match doc name
Spoffy Feb 28, 2025
bfc9312
Changes 'no access' to 'insufficient access'
Spoffy Feb 28, 2025
d4eaadc
Fixes over-length line
Spoffy Feb 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions app/server/lib/ActiveDoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ import {Share} from 'app/gen-server/entity/Share';
import {RecordWithStringId} from 'app/plugin/DocApiTypes';
import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI';
import {AccessTokenOptions, AccessTokenResult, GristDocAPI, UIRowId} from 'app/plugin/GristAPI';
import {Archive, ArchiveEntry, create_zip_archive} from 'app/server/lib/Archive';
import {AssistanceSchemaPromptV1Context} from 'app/server/lib/Assistance';
import {AssistanceContext} from 'app/common/AssistancePrompts';
import {AuditEventAction} from 'app/server/lib/AuditEvent';
Expand Down Expand Up @@ -959,6 +960,44 @@ export class ActiveDoc extends EventEmitter {
return data;
}

public async getAttachmentsArchive(docSession: OptDocSession): Promise<Archive> {
if (
!await this._granularAccess.canReadEverything(docSession) &&
!await this.canDownload(docSession)
) {
throw new ApiError("Insufficient access to download attachments", 403);
}
if (!this.docData) {
throw new Error("No doc data");
}
const attachments = this.docData.getMetaTable('_grist_Attachments').getRecords();
const attachmentFileManager = this._attachmentFileManager;
const doc = this;

async function* fileGenerator(): AsyncGenerator<ArchiveEntry> {
const filesAdded = new Set<string>();
for (const attachment of attachments) {
if (doc._shuttingDown) {
throw new ApiError("Document is shutting down, archiving aborted", 500);
}
const file = await attachmentFileManager.getFile(attachment.fileIdent);
const fileHash = attachment.fileIdent.split(".")[0];
const name = `${fileHash}_${attachment.fileName}`;
// This should only happen if a file has identical name *and* content hash.
if (filesAdded.has(name)) {
continue;
}
filesAdded.add(name);
yield({
name,
data: file.contentStream,
});
}
}

return create_zip_archive({ store: true }, fileGenerator());
}

@ActiveDoc.keepDocOpen
public async startTransferringAllAttachmentsToDefaultStore() {
const attachmentStoreId = this._getDocumentSettings().attachmentStoreId;
Expand Down
59 changes: 59 additions & 0 deletions app/server/lib/Archive.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import {ZipArchiveEntry} from 'compress-commons';
import stream from 'node:stream';
import ZipStream, {ZipStreamOptions} from 'zip-stream';

export interface ArchiveEntry {
name: string;
data: stream.Readable | Buffer;
}

export interface Archive {
dataStream: stream.Readable;
completed: Promise<void>;
}

/**
*
* Creates a streamable zip archive, reading files on-demand from the entries iterator.
* Entries are provided as an async iterable, to ensure the archive is constructed
* correctly. A generator can be used for convenience.
* @param {ZipStreamOptions} options - Settings for the zip archive
* @param {AsyncIterable<ArchiveEntry>} entries - Entries to add.
* @returns {Archive}
*/
export async function create_zip_archive(
options: ZipStreamOptions, entries: AsyncIterable<ArchiveEntry>
): Promise<Archive> {
const archive = new ZipStream(options);

return {
dataStream: archive,
// Caller is responsible for error handling/awaiting on this promise.
completed: (async () => {
try {
for await (const entry of entries) {
// ZipStream will break if multiple entries try to be added at the same time.
await addEntryToZipArchive(archive, entry);
}
archive.finish();
} catch (error) {
archive.destroy(error);
} finally {
// If the stream was destroyed with an error, this will re-throw the error we caught above.
// Without this, node will see the stream as having an uncaught error, and complain.
await stream.promises.finished(archive);
}
})()
};
}

function addEntryToZipArchive(archive: ZipStream, file: ArchiveEntry): Promise<ZipArchiveEntry | undefined> {
return new Promise((resolve, reject) => {
archive.entry(file.data, { name: file.name }, function(err, entry) {
if (err) {
return reject(err);
}
return resolve(entry);
});
});
}
46 changes: 30 additions & 16 deletions app/server/lib/AttachmentFileManager.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import {AttachmentTransferStatus, DocAttachmentsLocation} from 'app/common/UserAPI';
import {
AttachmentFile,
AttachmentStoreDocInfo,
DocPoolId,
getDocPoolIdFromDocInfo,
IAttachmentStore
IAttachmentStore, loadAttachmentFileIntoMemory
} from 'app/server/lib/AttachmentStore';
import {AttachmentStoreId, IAttachmentStoreProvider} from 'app/server/lib/AttachmentStoreProvider';
import {checksumFileStream} from 'app/server/lib/checksumFile';
import {DocStorage} from 'app/server/lib/DocStorage';
import log from 'app/server/lib/log';
import {LogMethods} from 'app/server/lib/LogMethods';
import {MemoryWritableStream} from 'app/server/utils/MemoryWritableStream';
import {EventEmitter} from 'events';
import {Readable} from 'node:stream';
import {AbortController} from 'node-abort-controller';
Expand Down Expand Up @@ -141,7 +141,12 @@ export class AttachmentFileManager extends EventEmitter {


public async getFileData(fileIdent: string): Promise<Buffer> {
return (await this._getFile(fileIdent)).data;
const file = await this.getFile(fileIdent);
return (await loadAttachmentFileIntoMemory(file)).contents;
}

public async getFile(fileIdent: string): Promise<AttachmentFile> {
return (await this._getFileInfo(fileIdent)).file;
}

public async locationSummary(): Promise<DocAttachmentsLocation> {
Expand Down Expand Up @@ -211,12 +216,17 @@ export class AttachmentFileManager extends EventEmitter {
// re-check.
// Streaming isn't an option here, as SQLite only supports buffers (meaning we need to keep at
// least 1 full copy of the file in memory during transfers).
const file = await this._getFile(fileIdent);
if (!await validateFileChecksum(fileIdent, file.data)) {
throw new AttachmentRetrievalError(file.storageId, file.ident, "checksum verification failed for retrieved file");
const fileInfo = await this._getFileInfo(fileIdent);
// Cache this to avoid undefined warnings everywhere we use `dataInMemory`.
const fileInMemory = await loadAttachmentFileIntoMemory(fileInfo.file);

if (!await validateFileChecksum(fileIdent, fileInMemory.contents)) {
throw new AttachmentRetrievalError(
fileInfo.storageId, fileInfo.ident, "checksum verification failed for retrieved file"
);
}
if (!newStoreId) {
await this._storeFileInLocalStorage(fileIdent, file.data);
await this._storeFileInLocalStorage(fileIdent, fileInMemory.contents);
return;
}
const newStore = await this._getStore(newStoreId);
Expand All @@ -228,7 +238,7 @@ export class AttachmentFileManager extends EventEmitter {
throw new StoreNotAvailableError(newStoreId);
}
// Store should error if the upload fails in any way.
await this._storeFileInAttachmentStore(newStore, fileIdent, file.data);
await this._storeFileInAttachmentStore(newStore, fileIdent, fileInMemory.contents);

// Don't remove the file from the previous store, in case we need to roll back to an earlier
// snapshot.
Expand Down Expand Up @@ -422,7 +432,7 @@ export class AttachmentFileManager extends EventEmitter {
return this._addFileToExternalStorage(destStoreId, fileIdent, fileData);
}

private async _getFile(fileIdent: string): Promise<AttachmentFileInfo> {
private async _getFileInfo(fileIdent: string): Promise<AttachmentFileInfo> {
const fileInfo = await this._docStorage.getFileInfo(fileIdent);
if (!fileInfo) {
this._log.error({ fileIdent }, "cannot find file metadata in document");
Expand All @@ -436,7 +446,13 @@ export class AttachmentFileManager extends EventEmitter {
return {
ident: fileIdent,
storageId: null,
data: fileInfo.data,
file: {
metadata: {
size: fileInfo.data.length,
},
contentStream: Readable.from(fileInfo.data),
contents: fileInfo.data,
}
};
}
const store = await this._getStore(fileInfo.storageId);
Expand All @@ -447,7 +463,7 @@ export class AttachmentFileManager extends EventEmitter {
return {
ident: fileIdent,
storageId: store.id,
data: await this._getFileDataFromAttachmentStore(store, fileIdent),
file: await this._getFileDataFromAttachmentStore(store, fileIdent),
};
}

Expand Down Expand Up @@ -494,11 +510,9 @@ export class AttachmentFileManager extends EventEmitter {
await this._docStorage.attachOrUpdateFile(fileIdent, undefined, store.id);
}

private async _getFileDataFromAttachmentStore(store: IAttachmentStore, fileIdent: string): Promise<Buffer> {
private async _getFileDataFromAttachmentStore(store: IAttachmentStore, fileIdent: string): Promise<AttachmentFile> {
try {
const outputStream = new MemoryWritableStream();
await store.download(this._getDocPoolId(), fileIdent, outputStream);
return outputStream.getBuffer();
return await store.download(this._getDocPoolId(), fileIdent);
} catch(e) {
throw new AttachmentRetrievalError(store.id, fileIdent, e);
}
Expand All @@ -525,7 +539,7 @@ interface AttachmentFileManagerLogInfo {
interface AttachmentFileInfo {
ident: string;
storageId: string | null;
data: Buffer;
file: AttachmentFile;
}

type TransferJob = Promise<void>
59 changes: 48 additions & 11 deletions app/server/lib/AttachmentStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
ExternalStorage,
joinKeySegments,
} from 'app/server/lib/ExternalStorage';
import {MemoryWritableStream} from 'app/server/utils/MemoryWritableStream';
import * as fse from 'fs-extra';
import * as stream from 'node:stream';
import * as path from 'path';
Expand All @@ -20,6 +21,40 @@ export interface AttachmentStoreDocInfo {
trunkId: string | null | undefined;
}

interface FileMetadata {
size: number;
}

export interface AttachmentFile {
metadata: FileMetadata,
contentStream: stream.Readable,
// Used to optimise certain scenarios where the data *must* be in memory (e.g. SQLite read/writes)
contents?: Buffer
}

export interface AttachmentFileInMemory extends AttachmentFile {
contents: Buffer;
}

export function isAttachmentFileInMemory(file: AttachmentFile): file is AttachmentFileInMemory {
return file.contents !== undefined;
}

export async function loadAttachmentFileIntoMemory(file: AttachmentFile): Promise<AttachmentFileInMemory> {
if (isAttachmentFileInMemory(file)) {
return file;
}
const memoryStream = new MemoryWritableStream();
await stream.promises.pipeline(file.contentStream, memoryStream);
const buffer = memoryStream.getBuffer();

// Use Object.assign because it gives type safety, without having to us `as` or copy the object.
return Object.assign(file, {
contents: buffer,
contentStream: stream.Readable.from(buffer),
});
}

/**
* Gets the correct pool id for a given document, given the document's id and trunk id.
*
Expand Down Expand Up @@ -74,10 +109,8 @@ export interface IAttachmentStore {
// Upload attachment to the store.
upload(docPoolId: DocPoolId, fileId: FileId, fileData: stream.Readable): Promise<void>;

// Download attachment to an in-memory buffer.
// It's preferable to accept an output stream as a parameter, as it simplifies attachment store
// implementation and gives them control over local buffering.
download(docPoolId: DocPoolId, fileId: FileId, outputStream: stream.Writable): Promise<void>;
// Fetch the attachment from the store, including a readable stream for the attachment's contents.
download(docPoolId: DocPoolId, fileId: FileId): Promise<AttachmentFile>;

// Remove attachment from the store
delete(docPoolId: DocPoolId, fileId: FileId): Promise<void>;
Expand Down Expand Up @@ -127,8 +160,8 @@ export class ExternalStorageAttachmentStore implements IAttachmentStore {
await this._storage.uploadStream(this._getKey(docPoolId, fileId), fileData);
}

public async download(docPoolId: string, fileId: string, outputStream: stream.Writable): Promise<void> {
await this._storage.downloadStream(this._getKey(docPoolId, fileId), outputStream);
public async download(docPoolId: string, fileId: string): Promise<AttachmentFile> {
return await this._storage.downloadStream(this._getKey(docPoolId, fileId));
}

public async delete(docPoolId: string, fileId: string): Promise<void> {
Expand Down Expand Up @@ -171,11 +204,15 @@ export class FilesystemAttachmentStore implements IAttachmentStore {
);
}

public async download(docPoolId: DocPoolId, fileId: FileId, output: stream.Writable): Promise<void> {
await stream.promises.pipeline(
fse.createReadStream(this._createPath(docPoolId, fileId)),
output,
);
public async download(docPoolId: DocPoolId, fileId: FileId): Promise<AttachmentFile> {
const filePath = this._createPath(docPoolId, fileId);
const stat = await fse.stat(filePath);
return {
metadata: {
size: stat.size,
},
contentStream: fse.createReadStream(filePath)
};
}

public async delete(docPoolId: string, fileId: string): Promise<void> {
Expand Down
14 changes: 14 additions & 0 deletions app/server/lib/DocApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,20 @@ export class DocWorkerApi {
})
);

// Responds with an archive of all attachment contents, with suitable Content-Type and Content-Disposition.
this._app.get('/api/docs/:docId/attachments/download', canView, withDoc(async (activeDoc, req, res) => {
const archive = await activeDoc.getAttachmentsArchive(docSessionFromRequest(req));
const docName = await this._getDownloadFilename(req, "Attachments", activeDoc.doc);
res.status(200)
.type("application/zip")
// Construct a content-disposition header of the form 'attachment; filename="NAME"'
.set('Content-Disposition', contentDisposition(`${docName}.zip`, {type: 'attachment'}))
// Avoid storing because this could be huge.
.set('Cache-Control', 'no-store');

archive.dataStream.pipe(res);
}));

// Returns cleaned metadata for a given attachment ID (i.e. a rowId in _grist_Attachments table).
this._app.get('/api/docs/:docId/attachments/:attId', canView, withDoc(async (activeDoc, req, res) => {
const attId = integerParam(req.params.attId, 'attId');
Expand Down
14 changes: 12 additions & 2 deletions app/server/lib/ExternalStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ import stream from 'node:stream';
// checksum is expected otherwise.
export const DELETED_TOKEN = '*DELETED*';

export interface FileMetadata {
size: number;
snapshotId: string;
}

export interface StreamDownloadResult {
metadata: FileMetadata,
contentStream: stream.Readable,
}

/**
* An external store for the content of files. The store may be either consistent
* or eventually consistent. Specifically, the `exists`, `download`, and `versions`
Expand Down Expand Up @@ -59,7 +69,7 @@ export interface ExternalStorage {
close(): Promise<void>;

uploadStream?(key: string, inStream: stream.Readable, metadata?: ObjMetadata): Promise<string|null|typeof Unchanged>;
downloadStream?(key: string, outStream: stream.Writable, snapshotId?: string ): Promise<string>;
downloadStream?(key: string, snapshotId?: string ): Promise<StreamDownloadResult>;
}

/**
Expand All @@ -81,7 +91,7 @@ export class KeyMappedExternalStorage implements ExternalStorage {
if (_ext.downloadStream !== undefined) {
const extDownloadStream = _ext.downloadStream;
this.downloadStream =
(key, outStream, snapshotId) => extDownloadStream.call(_ext, this._map(key), outStream, snapshotId);
(key, snapshotId) => extDownloadStream.call(_ext, this._map(key), snapshotId);
}
if (_ext.removeAllWithPrefix !== undefined) {
const extRemoveAllWithPrefix = _ext.removeAllWithPrefix;
Expand Down
Loading