-
Notifications
You must be signed in to change notification settings - Fork 27
Add webpage translation functionality #25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,14 +106,16 @@ export class HttpClient { | |
url: string, | ||
timeoutMs: number, | ||
responseAsStream: boolean, | ||
isDeepL: boolean, | ||
options: SendRequestOptions, | ||
): AxiosRequestConfig { | ||
const headers = Object.assign({}, this.headers, options.headers); | ||
logDebug(`isDeepL: ${isDeepL}`); | ||
|
||
const axiosRequestConfig: AxiosRequestConfig = { | ||
SleeplessOne1917 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
url, | ||
method, | ||
baseURL: this.serverUrl, | ||
baseURL: isDeepL ? this.serverUrl : undefined, | ||
headers, | ||
responseType: responseAsStream ? 'stream' : 'text', | ||
timeout: timeoutMs, | ||
|
@@ -147,19 +149,26 @@ export class HttpClient { | |
/** | ||
* Makes API request retrying if necessary, and returns (as Promise) response. | ||
* @param method HTTP method, for example 'GET' | ||
* @param url Path to endpoint, excluding base server URL. | ||
* @param url Path to endpoint, excluding base server URL if DeepL API request, including base server URL if a webpage. | ||
* @param options Additional options controlling request. | ||
* @param responseAsStream Set to true if the return type is IncomingMessage. | ||
* @return Fulfills with status code and response (as text or stream). | ||
* @return Fulfills with status code, content type, and response (as text or stream). | ||
*/ | ||
async sendRequestWithBackoff<TContent extends string | IncomingMessage>( | ||
method: HttpMethod, | ||
url: string, | ||
options?: SendRequestOptions, | ||
responseAsStream = false, | ||
): Promise<{ statusCode: number; content: TContent }> { | ||
): Promise<{ statusCode: number; content: TContent; contentType?: string }> { | ||
let isDeepLUrl: boolean; | ||
try { | ||
isDeepLUrl = !!new URL(url); | ||
} catch { | ||
isDeepLUrl = true; | ||
} | ||
|
||
options = options === undefined ? {} : options; | ||
logInfo(`Request to DeepL API ${method} ${url}`); | ||
logInfo(`${isDeepLUrl ? 'Request to DeepL API' : 'Request to webpage'} ${method} ${url}`); | ||
logDebug(`Request details: ${options.data}`); | ||
const backoff = new BackoffTimer(); | ||
let response, error; | ||
|
@@ -170,8 +179,14 @@ export class HttpClient { | |
url, | ||
timeoutMs, | ||
responseAsStream, | ||
isDeepLUrl, | ||
options, | ||
); | ||
|
||
if (!isDeepLUrl && axiosRequestConfig.headers) { | ||
delete axiosRequestConfig.headers.Authorization; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is where I strip the API key if the request is going to an external domain. |
||
} | ||
|
||
try { | ||
response = await HttpClient.sendAxiosRequest<TContent>(axiosRequestConfig); | ||
error = undefined; | ||
|
@@ -199,8 +214,12 @@ export class HttpClient { | |
} | ||
|
||
if (response !== undefined) { | ||
const { statusCode, content } = response; | ||
logInfo(`DeepL API response ${method} ${url} ${statusCode}`); | ||
const { statusCode, content, contentType } = response; | ||
logInfo( | ||
`${ | ||
isDeepLUrl ? 'DeepL API response' : 'Webpage response' | ||
} ${method} ${url} ${statusCode}${!isDeepLUrl ? ` ${contentType}` : ''}`, | ||
); | ||
if (!responseAsStream) { | ||
logDebug('Response details:', { content: content }); | ||
} | ||
|
@@ -217,7 +236,7 @@ export class HttpClient { | |
*/ | ||
private static async sendAxiosRequest<TContent extends string | IncomingMessage>( | ||
axiosRequestConfig: AxiosRequestConfig, | ||
): Promise<{ statusCode: number; content: TContent }> { | ||
): Promise<{ statusCode: number; content: TContent; contentType?: string }> { | ||
try { | ||
const response = await axios.request(axiosRequestConfig); | ||
|
||
|
@@ -227,7 +246,12 @@ export class HttpClient { | |
response.data = JSON.stringify(response.data); | ||
} | ||
} | ||
return { statusCode: response.status, content: response.data }; | ||
|
||
return { | ||
statusCode: response.status, | ||
content: response.data, | ||
contentType: response.headers['content-type'], | ||
}; | ||
} catch (axios_error_raw) { | ||
const axiosError = axios_error_raw as AxiosError; | ||
const message: string = axiosError.message || ''; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ import { | |
GlossaryNotFoundError, | ||
QuotaExceededError, | ||
TooManyRequestsError, | ||
WebsiteDownloadError, | ||
} from './errors'; | ||
import { GlossaryEntries } from './glossaryEntries'; | ||
import { | ||
|
@@ -645,6 +646,37 @@ export class Translator { | |
} | ||
} | ||
|
||
/** | ||
* Uploads the HTML of the specified webpage to DeepL to translate into given target language, waits for | ||
* translation to complete, then downloads translated webpage to specified output path. | ||
* @param webpageUrl String or URL containing the URL of the webpage to be translated. | ||
* @param outputFile String containing file path to create translated document, or Stream or | ||
* FileHandle to write translated document content. | ||
* @param sourceLang Language code of input document, or null to use auto-detection. | ||
* @param targetLang Language code of language to translate into. | ||
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation. | ||
* @return Fulfills with a DocumentStatus object for the completed translation. You can use the | ||
* billedCharacters property to check how many characters were billed for the document. | ||
* @throws {Error} If no file exists at the input file path, or a file already exists at the output file path. | ||
* @throws {DocumentTranslationError} If any error occurs during document upload, translation or | ||
* download. The `documentHandle` property of the error may be used to recover the document. | ||
*/ | ||
async translateWebpage( | ||
webpageUrl: string | URL, | ||
outputFile: string | fs.WriteStream | fs.promises.FileHandle, | ||
sourceLang: SourceLanguageCode | null, | ||
targetLang: TargetLanguageCode, | ||
options?: DocumentTranslateOptions, | ||
): Promise<DocumentStatus> { | ||
return this.translateDocument( | ||
Buffer.from(await this.getContentFromWebpage(webpageUrl)), | ||
outputFile, | ||
sourceLang, | ||
targetLang, | ||
{ filename: 'webpage.html', ...options }, | ||
); | ||
} | ||
|
||
/** | ||
* Uploads specified document to DeepL to translate into target language, and returns handle associated with the document. | ||
* @param inputFile String containing file path, stream containing file data, or FileHandle. | ||
|
@@ -709,6 +741,28 @@ export class Translator { | |
} | ||
} | ||
|
||
/** | ||
* Uploads specified webpage HTML to DeepL to translate into target language, and returns handle associated with the document. | ||
* @param webpageUrl String or URL containing the URL of the webpage to be translated. | ||
* @param sourceLang Language code of input document, or null to use auto-detection. | ||
* @param targetLang Language code of language to translate into. | ||
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation. | ||
* @return Fulfills with DocumentHandle associated with the in-progress translation. | ||
*/ | ||
async uploadWebpage( | ||
webpageUrl: string | URL, | ||
sourceLang: SourceLanguageCode | null, | ||
targetLang: TargetLanguageCode, | ||
options?: DocumentTranslateOptions, | ||
): Promise<DocumentHandle> { | ||
return this.uploadDocument( | ||
Buffer.from(await this.getContentFromWebpage(webpageUrl)), | ||
sourceLang, | ||
targetLang, | ||
{ filename: 'webpage.html', ...options }, | ||
); | ||
} | ||
|
||
/** | ||
* Retrieves the status of the document translation associated with the given document handle. | ||
* @param handle Document handle associated with document. | ||
|
@@ -1003,6 +1057,18 @@ export class Translator { | |
return libraryInfoString; | ||
} | ||
|
||
private async getContentFromWebpage(webpageUrl: string | URL): Promise<string> { | ||
const { statusCode, content, contentType } = | ||
await this.httpClient.sendRequestWithBackoff<string>('GET', webpageUrl.toString()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, using this method will leak the DeepL API key to this website (as it's in the headers). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ended up stripping the Authorization header from the request before it gets sent if the request is a webpage request. It seemed more straightforward than making a whole new function. See my other comment. |
||
await checkStatusCode(statusCode, content); | ||
|
||
if (!contentType?.includes('text/html')) { | ||
throw new WebsiteDownloadError('URL to translate must return HTML'); | ||
} | ||
|
||
return content; | ||
} | ||
|
||
/** | ||
* HttpClient implements all HTTP requests and retries. | ||
* @private | ||
|
Uh oh!
There was an error while loading. Please reload this page.