-
-
Notifications
You must be signed in to change notification settings - Fork 405
Adds zip downloads for all of a document's attachments #1471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
b3416ee
e45b093
001004b
ca8f3b2
f7dd1cd
306261a
30f9556
dafb2fb
dc29771
742cac3
99513f1
336a678
6668797
72f3e1e
d95b2d9
3a825f3
b76655f
6f707bb
efe15f7
e67c259
1f845fe
2038992
c460ab6
a6daa51
bfc9312
d4eaadc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,6 +90,7 @@ import {Share} from 'app/gen-server/entity/Share'; | |
import {RecordWithStringId} from 'app/plugin/DocApiTypes'; | ||
import {ParseFileResult, ParseOptions} from 'app/plugin/FileParserAPI'; | ||
import {AccessTokenOptions, AccessTokenResult, GristDocAPI, UIRowId} from 'app/plugin/GristAPI'; | ||
import {Archive, ArchiveEntry, create_zip_archive} from 'app/server/lib/Archive'; | ||
import {AssistanceSchemaPromptV1Context} from 'app/server/lib/Assistance'; | ||
import {AssistanceContext} from 'app/common/AssistancePrompts'; | ||
import {AuditEventAction} from 'app/server/lib/AuditEvent'; | ||
|
@@ -959,6 +960,32 @@ export class ActiveDoc extends EventEmitter { | |
return data; | ||
} | ||
|
||
public async getAttachmentsArchive(): Promise<Archive> { | ||
if (!this.docData) { | ||
throw new Error("No doc data"); | ||
} | ||
const attachments = this.docData.getMetaTable('_grist_Attachments').getRecords(); | ||
const attachmentFileManager = this._attachmentFileManager; | ||
|
||
async function* fileGenerator(): AsyncGenerator<ArchiveEntry> { | ||
const filesSeen = new Set<string>(); | ||
for (const attachment of attachments) { | ||
if (filesSeen.has(attachment.fileIdent)) { | ||
continue; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we've agreed to not dedupe attachments that happen to have the same hash (but may have different names) |
||
} | ||
filesSeen.add(attachment.fileIdent); | ||
const file = await attachmentFileManager.getFile(attachment.fileIdent); | ||
yield({ | ||
name: attachment.fileName, | ||
data: file.contentStream, | ||
}); | ||
// TODO - Abort this on shutdown by throwing an error | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a TODO here. |
||
} | ||
} | ||
|
||
return create_zip_archive({ store: true }, fileGenerator()); | ||
} | ||
|
||
@ActiveDoc.keepDocOpen | ||
public async startTransferringAllAttachmentsToDefaultStore() { | ||
const attachmentStoreId = this._getDocumentSettings().attachmentStoreId; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import {ZipArchiveEntry} from 'compress-commons'; | ||
import stream from 'node:stream'; | ||
import ZipStream, {ZipStreamOptions} from 'zip-stream'; | ||
|
||
export interface ArchiveEntry { | ||
name: string; | ||
data: stream.Readable | Buffer; | ||
} | ||
|
||
export interface Archive { | ||
dataStream: stream.Readable, | ||
completed: Promise<void> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you format the end of lines of your interfaces consistently? |
||
} | ||
|
||
/** | ||
* | ||
* Creates a streamable zip archive, reading files on-demand from the entries iterator. | ||
* Entries are provided as an async iterable, to ensure the archive is constructed | ||
* correctly. A generator can be used for convenience. | ||
* @param {ZipStreamOptions} options - Settings for the zip archive | ||
* @param {AsyncIterable<ArchiveEntry>} entries - Entries to add. | ||
* @returns {Archive} | ||
*/ | ||
export async function create_zip_archive( | ||
options: ZipStreamOptions, entries: AsyncIterable<ArchiveEntry> | ||
): Promise<Archive> { | ||
// Dynamic import needed because zip-stream is only an ESM module, and we use module: CommonJS | ||
// However, typescript dynamic imports are broken. So we need to dynamic eval to fix it. :( | ||
// eslint-disable-next-line @typescript-eslint/no-implied-eval | ||
const ZipStreamModule = await (Function('return import("zip-stream")')() as Promise<typeof import('zip-stream')>); | ||
|
||
const _archive = new ZipStreamModule.default(options); | ||
|
||
return { | ||
dataStream: _archive, | ||
// TODO - Should we add a default 'catch' here that logs errors? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a TODO here, I don't quite understand it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right now, this promise swallows errors if there isn't a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment claims that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perfect - thanks for clarifying! |
||
completed: (async () => { | ||
for await (const entry of entries) { | ||
// ZipStream will break if multiple entries try to be added at the same time. | ||
// TODo - test what happens to the stream if an error is thrown here. | ||
// Do we need to manually destroy the archive? | ||
await addEntryToZipArchive(_archive, entry); | ||
} | ||
_archive.finish(); | ||
await stream.promises.finished(_archive); | ||
})() | ||
}; | ||
} | ||
|
||
function addEntryToZipArchive(archive: ZipStream, file: ArchiveEntry): Promise<ZipArchiveEntry | undefined> { | ||
return new Promise((resolve, reject) => { | ||
archive.entry(file.data, { name: file.name }, function(err, entry) { | ||
if (err) { | ||
return reject(err); | ||
} | ||
return resolve(entry); | ||
}); | ||
}); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,9 +2,12 @@ import { | |
ExternalStorage, | ||
joinKeySegments, | ||
} from 'app/server/lib/ExternalStorage'; | ||
import {MemoryWritableStream} from 'app/server/utils/MemoryWritableStream'; | ||
import * as fse from 'fs-extra'; | ||
import * as stream from 'node:stream'; | ||
import * as path from 'path'; | ||
import {Readable} from 'node:stream'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. import order There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh man, I'm really disappointed with this one - I did a review of imports before posting the PR for review. |
||
import {pipeline} from 'node:stream/promises'; | ||
|
||
export type DocPoolId = string; | ||
type FileId = string; | ||
|
@@ -20,6 +23,40 @@ export interface AttachmentStoreDocInfo { | |
trunkId: string | null | undefined; | ||
} | ||
|
||
interface FileMetadata { | ||
size: number; | ||
} | ||
|
||
export interface AttachmentFile { | ||
metadata: FileMetadata, | ||
contentStream: Readable, | ||
// Used to optimise certain scenarios where the data *must* be in memory (e.g. SQLite read/writes) | ||
contents?: Buffer | ||
} | ||
|
||
export interface AttachmentFileInMemory extends AttachmentFile { | ||
contents: Buffer; | ||
} | ||
|
||
export function isAttachmentFileInMemory(file: AttachmentFile): file is AttachmentFileInMemory { | ||
return file.contents !== undefined; | ||
} | ||
|
||
export async function loadAttachmentFileIntoMemory(file: AttachmentFile): Promise<AttachmentFileInMemory> { | ||
if (isAttachmentFileInMemory(file)) { | ||
return file; | ||
} | ||
const memoryStream = new MemoryWritableStream(); | ||
await pipeline(file.contentStream, memoryStream); | ||
const buffer = memoryStream.getBuffer(); | ||
|
||
// Use Object.assign because it gives type safety, without having to us `as` or copy the object. | ||
return Object.assign(file, { | ||
contents: buffer, | ||
contentStream: Readable.from(buffer), | ||
}); | ||
} | ||
|
||
/** | ||
* Gets the correct pool id for a given document, given the document's id and trunk id. | ||
* | ||
|
@@ -74,10 +111,8 @@ export interface IAttachmentStore { | |
// Upload attachment to the store. | ||
upload(docPoolId: DocPoolId, fileId: FileId, fileData: stream.Readable): Promise<void>; | ||
|
||
// Download attachment to an in-memory buffer. | ||
// It's preferable to accept an output stream as a parameter, as it simplifies attachment store | ||
// implementation and gives them control over local buffering. | ||
download(docPoolId: DocPoolId, fileId: FileId, outputStream: stream.Writable): Promise<void>; | ||
// Fetch the attachment from the store, including a readable stream for the attachment's contents. | ||
download(docPoolId: DocPoolId, fileId: FileId): Promise<AttachmentFile>; | ||
|
||
// Remove attachment from the store | ||
delete(docPoolId: DocPoolId, fileId: FileId): Promise<void>; | ||
|
@@ -127,8 +162,8 @@ export class ExternalStorageAttachmentStore implements IAttachmentStore { | |
await this._storage.uploadStream(this._getKey(docPoolId, fileId), fileData); | ||
} | ||
|
||
public async download(docPoolId: string, fileId: string, outputStream: stream.Writable): Promise<void> { | ||
await this._storage.downloadStream(this._getKey(docPoolId, fileId), outputStream); | ||
public async download(docPoolId: string, fileId: string): Promise<AttachmentFile> { | ||
return await this._storage.downloadStream(this._getKey(docPoolId, fileId)); | ||
} | ||
|
||
public async delete(docPoolId: string, fileId: string): Promise<void> { | ||
|
@@ -171,11 +206,15 @@ export class FilesystemAttachmentStore implements IAttachmentStore { | |
); | ||
} | ||
|
||
public async download(docPoolId: DocPoolId, fileId: FileId, output: stream.Writable): Promise<void> { | ||
await stream.promises.pipeline( | ||
fse.createReadStream(this._createPath(docPoolId, fileId)), | ||
output, | ||
); | ||
public async download(docPoolId: DocPoolId, fileId: FileId): Promise<AttachmentFile> { | ||
const filePath = this._createPath(docPoolId, fileId); | ||
const stat = await fse.stat(filePath); | ||
return { | ||
metadata: { | ||
size: stat.size, | ||
}, | ||
contentStream: fse.createReadStream(filePath) | ||
}; | ||
} | ||
|
||
public async delete(docPoolId: string, fileId: string): Promise<void> { | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -573,6 +573,19 @@ export class DocWorkerApi { | |||||||||||||||||||||||
}) | ||||||||||||||||||||||||
); | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
// Responds with attachment contents, with suitable Content-Type and Content-Disposition. | ||||||||||||||||||||||||
this._app.get('/api/docs/:docId/attachments/download', canView, withDoc(async (activeDoc, req, res) => { | ||||||||||||||||||||||||
const archive = await activeDoc.getAttachmentsArchive(); | ||||||||||||||||||||||||
res.status(200) | ||||||||||||||||||||||||
.type("application/zip") | ||||||||||||||||||||||||
// Construct a content-disposition header of the form 'attachment; filename="NAME"' | ||||||||||||||||||||||||
.set('Content-Disposition', contentDisposition(`${activeDoc.docName}.zip`, {type: 'attachment'})) | ||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gave this a try. Everything worked well. The name was a bit jarring, compared to other downloads associated with a document. What do you think about a more human friendly name, that rhymes with what other grist-core/app/server/lib/DocApi.ts Lines 1834 to 1844 in 14b9147
Something a bit weird seems to happen if I download with multiple distinct files that have the same name, but we've talked about changing the name structure so no need to poke at that... |
||||||||||||||||||||||||
// Avoid storing because this could be huge. | ||||||||||||||||||||||||
.set('Cache-Control', 'no-store'); | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
archive.dataStream.pipe(res); | ||||||||||||||||||||||||
})); | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
// Returns cleaned metadata for a given attachment ID (i.e. a rowId in _grist_Attachments table). | ||||||||||||||||||||||||
this._app.get('/api/docs/:docId/attachments/:attId', canView, withDoc(async (activeDoc, req, res) => { | ||||||||||||||||||||||||
const attId = integerParam(req.params.attId, 'attId'); | ||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's no access control here. Enough ActiveDoc methods take responsibility for access control that I think you might want to add a comment that you're not doing access control (or take an OptDocSession and do access control). Specifically, so far the caller just has a canView, which isn't a very high bar. The easy path would be to use ActiveDoc.canDownload. More advanced would be to check view permissions on every attachment.