Skip to content

Commit d79c5c0

Browse files
authored
Add lite transcription support (#4)
1 parent ec88e47 commit d79c5c0

File tree

7 files changed

+533
-75
lines changed

7 files changed

+533
-75
lines changed

README.md

Lines changed: 108 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,34 @@ pip install salad-cloud-transcription
2626

2727
```python
2828
from salad_cloud_transcription import SaladCloudTranscriptionSdk
29+
from salad_cloud_transcription_sdk.models.transcription_engine import TranscriptionEngine
30+
from salad_cloud_transcription_sdk.models.transcription_request import TranscriptionRequest
31+
from salad_cloud_transcription_sdk.models.transcription_job_input import TranscriptionJobInput
2932

3033
# Initialize the SDK
3134
sdk = SaladCloudTranscriptionSdk(api_key="your_api_key")
3235

33-
# Transcribe an audio file
36+
# Setup the request
37+
request_object = TranscriptionRequest(
38+
options=TranscriptionJobInput(
39+
language_code="en",
40+
return_as_file=False,
41+
sentence_level_timestamps=True,
42+
word_level_timestamps=True,
43+
diarization=True,
44+
srt=True
45+
),
46+
metadata={"project": "example_project"}
47+
)
48+
49+
# Transcribe a video file using the Full Transcription engine
3450
result = sdk.transcription_client.transcribe(
35-
"path/to/audio.mp3",
36-
auto_poll = True)
51+
"path/to/video.mp4",
52+
organization_name="your_organization_name",
53+
request=request_object,
54+
engine=TranscriptionEngine.Full,
55+
auto_poll=True
56+
)
3757

3858
# Print the transcription
3959
print(result.text)
@@ -59,6 +79,33 @@ If you need to set or update the API key after initializing the SDK, you can use
5979
sdk.set_api_key("YOUR_API_KEY")
6080
```
6181

82+
## Transcription Engines
83+
The SDK supports two transcription modes: `Full` and `Lite`. The desired mode can be specified via the `engine` parameter of the `transcribe` method. When omitted it defaults to `Full`.
84+
85+
When using the `Lite` engine, the request object has to specify explicit defaults for a few of the properties:
86+
87+
```python
88+
request = TranscriptionRequest(
89+
options=TranscriptionJobInput(
90+
language_code="en",
91+
return_as_file=True,
92+
translate="to_eng",
93+
sentence_level_timestamps=True,
94+
word_level_timestamps=True,
95+
diarization=True,
96+
sentence_diarization=True,
97+
srt=True,
98+
99+
# Adding required parameters with null/empty values
100+
summarize=0,
101+
custom_vocabulary="",
102+
llm_translation=[],
103+
srt_translation=[],
104+
),
105+
metadata={"test_id": "integration_test", "environment": "testing"},
106+
)
107+
```
108+
62109
## Sample Usage
63110

64111
### The *source* parameter
@@ -74,18 +121,35 @@ When a remote file is specified, that URL is passed as-is to the transcription e
74121

75122
```python
76123
from salad_cloud_transcription import SaladCloudTranscriptionSdk
124+
from salad_cloud_transcription_sdk.models.transcription_engine import TranscriptionEngine
125+
from salad_cloud_transcription_sdk.models.transcription_request import TranscriptionRequest
126+
from salad_cloud_transcription_sdk.models.transcription_job_input import TranscriptionJobInput
77127

78128
# Initialize the SDK
79129
sdk = SaladCloudTranscriptionSdk(api_key="your_api_key")
80130

131+
# Setup the request
132+
request_object = TranscriptionRequest(
133+
options=TranscriptionJobInput(
134+
language_code="en",
135+
return_as_file=False,
136+
sentence_level_timestamps=True,
137+
word_level_timestamps=True,
138+
diarization=True,
139+
srt=True
140+
),
141+
metadata={"project": "example_project"}
142+
)
143+
81144
# Start a transcription job and wait for the result
82-
# When the job is processed, this function returns a InferenceEndpointJob
83145
result = sdk.transcription_client.transcribe(
84-
source = "path/to/audio.mp3",
85-
auto_poll = True)
146+
source="path/to/audio.mp3",
147+
organization_name="your_organization_name",
148+
request=request_object,
149+
auto_poll=True
150+
)
86151

87-
# The output property of the InferenceEndpointJob is a either a TranscriptionJobFileOutput
88-
# or a TranscriptionJobOutput. You can print it to examine job results.
152+
# Print the transcription job output
89153
print(result.output)
90154
```
91155

@@ -97,13 +161,28 @@ from salad_cloud_transcription import SaladCloudTranscriptionSdk
97161
# Initialize the SDK
98162
sdk = SaladCloudTranscriptionSdk(api_key="your_api_key")
99163

164+
# Setup the request
165+
request_object = TranscriptionRequest(
166+
options=TranscriptionJobInput(
167+
language_code="en",
168+
return_as_file=False,
169+
sentence_level_timestamps=True,
170+
word_level_timestamps=True,
171+
diarization=True,
172+
srt=True
173+
),
174+
metadata={"project": "example_project"}
175+
)
176+
100177
# Start a transcription job. auto_poll = False
101-
job = sdk.transcription_client.start_transcription_job(
102-
source = "path/to/audio.mp3")
178+
job = sdk.transcription_client.transcribe(
179+
source = "path/to/audio.mp3",
180+
request = request_object,
181+
auto_poll = False)
103182

104183
# Poll for the job status
105184
while True:
106-
job = self._get_transcription_job_internal(organization_name, job.id_)
185+
job = self.get_transcription_job(organization_name, job.id_)
107186
if job.status in [
108187
Status.SUCCEEDED.value,
109188
Status.FAILED.value,
@@ -118,17 +197,31 @@ if job.status == Status.SUCCEEDED.value:
118197

119198
### Start a Transcription Job and Get Updates via a Webhook
120199

121-
First, initialize a transcription job.
122-
123200
```python
124201
from salad_cloud_transcription import SaladCloudTranscriptionSdk
202+
from salad_cloud_transcription_sdk.models.transcription_request import TranscriptionRequest
203+
from salad_cloud_transcription_sdk.models.transcription_job_input import TranscriptionJobInput
125204

126205
# Initialize the SDK
127206
sdk = SaladCloudTranscriptionSdk(api_key="your_api_key")
128207

208+
# Setup the request
209+
request_object = TranscriptionRequest(
210+
options=TranscriptionJobInput(
211+
language_code="en",
212+
return_as_file=False,
213+
sentence_level_timestamps=True,
214+
word_level_timestamps=True,
215+
diarization=True,
216+
srt=True
217+
),
218+
metadata={"project": "example_project"}
219+
)
220+
129221
# Start a transcription job with a webhook URL
130-
job = sdk.transcription_client.start_transcription_job(
131-
source = "path/to/audio.mp3",
222+
job = sdk.transcription_client.transcribe(
223+
source="path/to/audio.mp3",
224+
request=request_object,
132225
webhook_url="https://your-webhook-endpoint.com"
133226
)
134227

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from enum import Enum
2+
3+
4+
class TranscriptionEngine(Enum):
5+
"""
6+
Enum representing the different transcription engine options.
7+
8+
Options:
9+
- Full: Full transcription engine which supports all features
10+
- Lite: Lightweight transcription engine with less features, aimed at being faster
11+
"""
12+
13+
Full = "full"
14+
Lite = "lite"

src/salad_cloud_transcription_sdk/models/transcription_job_output.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -112,18 +112,20 @@ class TranscriptionJobOutput(BaseModel):
112112
:type sentence_level_timestamps: List[SentenceTimestamp]
113113
:param srt_content: SRT formatted content for subtitles
114114
:type srt_content: str
115-
:param summary: Summary of the transcription content
116-
:type summary: str
117-
:param llm_translation: Translations of the transcription in different languages
118-
:type llm_translation: Dict[str, str]
119-
:param srt_translation: Translations of the SRT content in different languages
120-
:type srt_translation: Dict[str, str]
121115
:param duration_in_seconds: Duration of the audio in seconds
122116
:type duration_in_seconds: float
123-
:param duration: Duration in hours (optional)
124-
:type duration: float
125117
:param processing_time: Processing time in seconds
126118
:type processing_time: float
119+
:param summary: Summary of the transcription content (optional)
120+
:type summary: Optional[str]
121+
:param llm_translation: Translations of the transcription in different languages (optional)
122+
:type llm_translation: Optional[Dict[str, str]]
123+
:param srt_translation: Translations of the SRT content in different languages (optional)
124+
:type srt_translation: Optional[Dict[str, str]]
125+
:param duration: Duration in hours (optional)
126+
:type duration: Optional[float]
127+
:param overall_processing_time: Overall processing time in seconds (optional)
128+
:type overall_processing_time: Optional[float]
127129
"""
128130

129131
def __init__(
@@ -132,12 +134,13 @@ def __init__(
132134
word_segments: List[Dict[str, Any]],
133135
sentence_level_timestamps: List[Dict[str, Any]],
134136
srt_content: str,
135-
summary: str,
136-
llm_translation: Dict[str, str],
137-
srt_translation: Dict[str, str],
138137
duration_in_seconds: float,
139-
duration: float,
140138
processing_time: float,
139+
summary: Optional[str] = None, # optional in Lite
140+
llm_translation: Optional[Dict[str, str]] = None, # optional in Lite
141+
srt_translation: Optional[Dict[str, str]] = None, # optional in Lite
142+
duration: Optional[float] = None, # optional in Lite
143+
overall_processing_time: Optional[float] = None, # optional in Lite
141144
**kwargs,
142145
):
143146
self.text = self._define_str("text", text)
@@ -146,14 +149,19 @@ def __init__(
146149
SentenceTimestamp(**sentence) for sentence in sentence_level_timestamps
147150
]
148151
self.srt_content = self._define_str("srt_content", srt_content)
149-
self.summary = self._define_str("summary", summary)
152+
self.summary = summary
150153
self.llm_translation = llm_translation
151154
self.srt_translation = srt_translation
152155
self.duration_in_seconds = self._define_number(
153156
"duration_in_seconds", duration_in_seconds
154157
)
155-
self.duration = self._define_number("duration", duration)
158+
self.duration = self._define_number("duration", duration) if duration else None
156159
self.processing_time = self._define_number("processing_time", processing_time)
160+
self.overall_processing_time = (
161+
self._define_number("overall_processing_time", overall_processing_time)
162+
if overall_processing_time
163+
else None
164+
)
157165
self._kwargs = kwargs
158166

159167
def to_dict(self) -> Dict[str, Any]:
@@ -162,21 +170,31 @@ def to_dict(self) -> Dict[str, Any]:
162170
:return: Dictionary representation of this instance
163171
:rtype: Dict[str, Any]
164172
"""
165-
return {
173+
result = {
166174
"text": self.text,
167175
"word_segments": [segment.to_dict() for segment in self.word_segments],
168176
"sentence_level_timestamps": [
169177
sentence.to_dict() for sentence in self.sentence_level_timestamps
170178
],
171179
"srt_content": self.srt_content,
172-
"summary": self.summary,
173-
"llm_translation": self.llm_translation,
174-
"srt_translation": self.srt_translation,
175180
"duration_in_seconds": self.duration_in_seconds,
176-
"duration": self.duration,
177181
"processing_time": self.processing_time,
178182
}
179183

184+
# Add optional fields if they exist
185+
if self.summary is not None:
186+
result["summary"] = self.summary
187+
if self.llm_translation is not None:
188+
result["llm_translation"] = self.llm_translation
189+
if self.srt_translation is not None:
190+
result["srt_translation"] = self.srt_translation
191+
if self.duration is not None:
192+
result["duration"] = self.duration
193+
if self.overall_processing_time is not None:
194+
result["overall_processing_time"] = self.overall_processing_time
195+
196+
return result
197+
180198
@classmethod
181199
def from_json(
182200
cls, json_data: Union[str, bytes, Dict[str, Any]]

src/salad_cloud_transcription_sdk/net/environment/environment.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from enum import Enum
66
from urllib.parse import urlparse
77

8-
TRANSCRIPTION_ENDPOINT_NAME = "transcribe"
8+
FULL_TRANSCRIPTION_ENDPOINT_NAME = "transcribe"
9+
LITE_TRANSCRIPTION_ENDPOINT_NAME = "transcription-lite"
910

1011

1112
class Environment(Enum):

0 commit comments

Comments
 (0)