Skip to content

Commit 3da0e3d

Browse files
feat: Add webpage translation functionality
1 parent 54250f2 commit 3da0e3d

File tree

4 files changed

+125
-9
lines changed

4 files changed

+125
-9
lines changed

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,30 @@ directly:
230230
- `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and
231231
- `downloadDocument()`
232232

233+
#### Translating webpages
234+
235+
Webpages can be translated as well by calling `translateWebpage()`. It has the same signature as `translateDocument` except for the
236+
first parameter, which is the URL for the webpage you would like translated.
237+
238+
```javascript
239+
// Translate the English DeepL wikipedia page into German:
240+
await translator.translateWebpage(
241+
'https://en.wikipedia.org/wiki/DeepL_Translator',
242+
'DeepLWiki.html',
243+
'en',
244+
'de'
245+
);
246+
```
247+
248+
Like `translateDocument()`, `translateWebpage()` wraps multiple API calls: uploading, polling status until
249+
the translation is complete, and downloading. If your application needs to
250+
execute these steps individually, you can instead use the following functions
251+
directly:
252+
253+
- `uploadWebpage()`,
254+
- `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and
255+
- `downloadDocument()`
256+
233257
#### Document translation options
234258

235259
- `formality`: same as in [Text translation options](#text-translation-options).

src/client.ts

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,16 @@ export class HttpClient {
106106
url: string,
107107
timeoutMs: number,
108108
responseAsStream: boolean,
109+
isDeepL: boolean,
109110
options: SendRequestOptions,
110111
): AxiosRequestConfig {
111112
const headers = Object.assign({}, this.headers, options.headers);
113+
logDebug(`isDeepL: ${isDeepL}`);
112114

113115
const axiosRequestConfig: AxiosRequestConfig = {
114116
url,
115117
method,
116-
baseURL: this.serverUrl,
118+
baseURL: isDeepL ? this.serverUrl : undefined,
117119
headers,
118120
responseType: responseAsStream ? 'stream' : 'text',
119121
timeout: timeoutMs,
@@ -147,19 +149,26 @@ export class HttpClient {
147149
/**
148150
* Makes API request retrying if necessary, and returns (as Promise) response.
149151
* @param method HTTP method, for example 'GET'
150-
* @param url Path to endpoint, excluding base server URL.
152+
* @param url Path to endpoint, excluding base server URL if DeepL API request, including base server URL if a webpage.
151153
* @param options Additional options controlling request.
152154
* @param responseAsStream Set to true if the return type is IncomingMessage.
153-
* @return Fulfills with status code and response (as text or stream).
155+
* @return Fulfills with status code, content type, and response (as text or stream).
154156
*/
155157
async sendRequestWithBackoff<TContent extends string | IncomingMessage>(
156158
method: HttpMethod,
157159
url: string,
158160
options?: SendRequestOptions,
159161
responseAsStream = false,
160-
): Promise<{ statusCode: number; content: TContent }> {
162+
): Promise<{ statusCode: number; content: TContent; contentType?: string }> {
163+
let isDeepLUrl: boolean;
164+
try {
165+
isDeepLUrl = !!new URL(url);
166+
} catch {
167+
isDeepLUrl = true;
168+
}
169+
161170
options = options === undefined ? {} : options;
162-
logInfo(`Request to DeepL API ${method} ${url}`);
171+
logInfo(`${isDeepLUrl ? 'Request to DeepL API' : 'Request to webpage'} ${method} ${url}`);
163172
logDebug(`Request details: ${options.data}`);
164173
const backoff = new BackoffTimer();
165174
let response, error;
@@ -170,8 +179,14 @@ export class HttpClient {
170179
url,
171180
timeoutMs,
172181
responseAsStream,
182+
isDeepLUrl,
173183
options,
174184
);
185+
186+
if (!isDeepLUrl && axiosRequestConfig.headers) {
187+
delete axiosRequestConfig.headers.Authorization;
188+
}
189+
175190
try {
176191
response = await HttpClient.sendAxiosRequest<TContent>(axiosRequestConfig);
177192
error = undefined;
@@ -199,8 +214,12 @@ export class HttpClient {
199214
}
200215

201216
if (response !== undefined) {
202-
const { statusCode, content } = response;
203-
logInfo(`DeepL API response ${method} ${url} ${statusCode}`);
217+
const { statusCode, content, contentType } = response;
218+
logInfo(
219+
`${
220+
isDeepLUrl ? 'DeepL API response' : 'Webpage response'
221+
} ${method} ${url} ${statusCode}${!isDeepLUrl ? ` ${contentType}` : ''}`,
222+
);
204223
if (!responseAsStream) {
205224
logDebug('Response details:', { content: content });
206225
}
@@ -217,7 +236,7 @@ export class HttpClient {
217236
*/
218237
private static async sendAxiosRequest<TContent extends string | IncomingMessage>(
219238
axiosRequestConfig: AxiosRequestConfig,
220-
): Promise<{ statusCode: number; content: TContent }> {
239+
): Promise<{ statusCode: number; content: TContent; contentType?: string }> {
221240
try {
222241
const response = await axios.request(axiosRequestConfig);
223242

@@ -227,7 +246,12 @@ export class HttpClient {
227246
response.data = JSON.stringify(response.data);
228247
}
229248
}
230-
return { statusCode: response.status, content: response.data };
249+
250+
return {
251+
statusCode: response.status,
252+
content: response.data,
253+
contentType: response.headers['content-type'],
254+
};
231255
} catch (axios_error_raw) {
232256
const axiosError = axios_error_raw as AxiosError;
233257
const message: string = axiosError.message || '';

src/errors.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,5 @@ export class DocumentTranslationError extends DeepLError {
4141
export class GlossaryNotFoundError extends DeepLError {}
4242

4343
export class DocumentNotReadyError extends DeepLError {}
44+
45+
export class WebsiteDownloadError extends DeepLError {}

src/index.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
GlossaryNotFoundError,
1212
QuotaExceededError,
1313
TooManyRequestsError,
14+
WebsiteDownloadError,
1415
} from './errors';
1516
import { GlossaryEntries } from './glossaryEntries';
1617
import {
@@ -645,6 +646,37 @@ export class Translator {
645646
}
646647
}
647648

649+
/**
650+
* Uploads the HTML of the specified webpage to DeepL to translate into given target language, waits for
651+
* translation to complete, then downloads translated webpage to specified output path.
652+
* @param webpageUrl String or URL containing the URL of the webpage to be translated.
653+
* @param outputFile String containing file path to create translated document, or Stream or
654+
* FileHandle to write translated document content.
655+
* @param sourceLang Language code of input document, or null to use auto-detection.
656+
* @param targetLang Language code of language to translate into.
657+
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation.
658+
* @return Fulfills with a DocumentStatus object for the completed translation. You can use the
659+
* billedCharacters property to check how many characters were billed for the document.
660+
* @throws {Error} If no file exists at the input file path, or a file already exists at the output file path.
661+
* @throws {DocumentTranslationError} If any error occurs during document upload, translation or
662+
* download. The `documentHandle` property of the error may be used to recover the document.
663+
*/
664+
async translateWebpage(
665+
webpageUrl: string | URL,
666+
outputFile: string | fs.WriteStream | fs.promises.FileHandle,
667+
sourceLang: SourceLanguageCode | null,
668+
targetLang: TargetLanguageCode,
669+
options?: DocumentTranslateOptions,
670+
): Promise<DocumentStatus> {
671+
return this.translateDocument(
672+
Buffer.from(await this.getContentFromWebpage(webpageUrl)),
673+
outputFile,
674+
sourceLang,
675+
targetLang,
676+
{ filename: 'webpage.html', ...options },
677+
);
678+
}
679+
648680
/**
649681
* Uploads specified document to DeepL to translate into target language, and returns handle associated with the document.
650682
* @param inputFile String containing file path, stream containing file data, or FileHandle.
@@ -709,6 +741,28 @@ export class Translator {
709741
}
710742
}
711743

744+
/**
745+
* Uploads specified webpage HTML to DeepL to translate into target language, and returns handle associated with the document.
746+
* @param webpageUrl String or URL containing the URL of the webpage to be translated.
747+
* @param sourceLang Language code of input document, or null to use auto-detection.
748+
* @param targetLang Language code of language to translate into.
749+
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation.
750+
* @return Fulfills with DocumentHandle associated with the in-progress translation.
751+
*/
752+
async uploadWebpage(
753+
webpageUrl: string | URL,
754+
sourceLang: SourceLanguageCode | null,
755+
targetLang: TargetLanguageCode,
756+
options?: DocumentTranslateOptions,
757+
): Promise<DocumentHandle> {
758+
return this.uploadDocument(
759+
Buffer.from(await this.getContentFromWebpage(webpageUrl)),
760+
sourceLang,
761+
targetLang,
762+
{ filename: 'webpage.html', ...options },
763+
);
764+
}
765+
712766
/**
713767
* Retrieves the status of the document translation associated with the given document handle.
714768
* @param handle Document handle associated with document.
@@ -1003,6 +1057,18 @@ export class Translator {
10031057
return libraryInfoString;
10041058
}
10051059

1060+
private async getContentFromWebpage(webpageUrl: string | URL): Promise<string> {
1061+
const { statusCode, content, contentType } =
1062+
await this.httpClient.sendRequestWithBackoff<string>('GET', webpageUrl.toString());
1063+
await checkStatusCode(statusCode, content);
1064+
1065+
if (!contentType?.includes('text/html')) {
1066+
throw new WebsiteDownloadError('URL to translate must return HTML');
1067+
}
1068+
1069+
return content;
1070+
}
1071+
10061072
/**
10071073
* HttpClient implements all HTTP requests and retries.
10081074
* @private

0 commit comments

Comments
 (0)