Skip to content

Commit 4424db5

Browse files
Logging clean up + IT TN fix (#118)
* fix utils and it TN Signed-off-by: Evelina <[email protected]> * clean up Signed-off-by: Evelina <[email protected]> * fix logging Signed-off-by: Evelina <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix format Signed-off-by: Evelina <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix format Signed-off-by: Evelina <[email protected]> * fix format Signed-off-by: Evelina <[email protected]> * add IT TN to CI Signed-off-by: Evelina <[email protected]> * update patch Signed-off-by: Evelina <[email protected]> --------- Signed-off-by: Evelina <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 0ded21a commit 4424db5

File tree

65 files changed

+239
-247
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+239
-247
lines changed

Jenkinsfile

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ pipeline {
2323
VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
2424
SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
2525
ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-27-23-0'
26+
IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
2627
DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
27-
2828
}
2929
stages {
3030

@@ -188,7 +188,7 @@ pipeline {
188188
failFast true
189189
parallel {
190190
stage('L0: FR TN grammars') {
191-
steps {
191+
steps {
192192
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=fr --text="2" --cache_dir ${FR_TN_CACHE}'
193193
}
194194
}
@@ -200,7 +200,7 @@ pipeline {
200200

201201
}
202202
}
203-
stage('L0: Create HU TN/ITN Grammars') {
203+
stage('L0: Create VI ITN & HU TN & IT TN') {
204204
when {
205205
anyOf {
206206
branch 'main'
@@ -209,38 +209,21 @@ pipeline {
209209
}
210210
failFast true
211211
parallel {
212+
stage('L0: VI ITN grammars') {
213+
steps {
214+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
215+
}
216+
}
212217
stage('L0: HU TN grammars') {
213218
steps {
214219
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
215220
}
216221
}
217-
// stage('L0: HU ITN grammars') {
218-
// steps {
219-
// sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hu --text="száz " --cache_dir ${HU_TN_CACHE}'
220-
// }
221-
// }
222-
}
223-
}
224-
stage('L0: Create VI TN/ITN Grammars') {
225-
when {
226-
anyOf {
227-
branch 'main'
228-
changeRequest target: 'main'
229-
}
230-
}
231-
failFast true
232-
parallel {
233-
// stage('L0: VI TN grammars') {
234-
// steps {
235-
// sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="2" --cache_dir ${VI_TN_CACHE}'
236-
// }
237-
// }
238-
stage('L0: VI ITN grammars') {
222+
stage('L0: IT TN grammars') {
239223
steps {
240-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
224+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=it --text="122" --cache_dir ${IT_TN_CACHE}'
241225
}
242226
}
243-
244227
}
245228
}
246229

nemo_text_processing/inverse_text_normalization/ar/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
16-
1715
import pynini
1816
from nemo_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
1917
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize import VerbalizeFst
2018
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
19+
from nemo_text_processing.utils.logging import logger
2120
from pynini.lib import pynutil

nemo_text_processing/inverse_text_normalization/ar/taggers/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
16-
1715
import pynini
1816
from nemo_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
1917
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize import VerbalizeFst
2018
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
19+
from nemo_text_processing.utils.logging import logger
2120
from pynini.lib import pynutil

nemo_text_processing/inverse_text_normalization/ar/taggers/tokenize_and_classify.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
1615
import os
1716

1817
import pynini
@@ -31,6 +30,7 @@
3130
)
3231
from nemo_text_processing.text_normalization.ar.taggers.tokenize_and_classify import ClassifyFst as TNClassifyFst
3332
from nemo_text_processing.text_normalization.en.graph_utils import INPUT_LOWER_CASED
33+
from nemo_text_processing.utils.logging import logger
3434
from pynini.lib import pynutil
3535

3636

@@ -62,9 +62,9 @@ def __init__(
6262
far_file = os.path.join(cache_dir, f"ar_itn_{input_case}.far")
6363
if not overwrite_cache and far_file and os.path.exists(far_file):
6464
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
65-
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
65+
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
6666
else:
67-
logging.info(f"Creating ClassifyFst grammars.")
67+
logger.info(f"Creating ClassifyFst grammars.")
6868
tn_classify = TNClassifyFst(
6969
input_case='cased', deterministic=True, cache_dir=cache_dir, overwrite_cache=True
7070
)
@@ -109,4 +109,3 @@ def __init__(
109109

110110
if far_file:
111111
generator_main(far_file, {"tokenize_and_classify": self.fst})
112-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/ar/verbalizers/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
16-
1715
import pynini
1816
from nemo_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
1917
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize import VerbalizeFst
2018
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
19+
from nemo_text_processing.utils.logging import logger

nemo_text_processing/inverse_text_normalization/de/taggers/tokenize_and_classify.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
1615
import os
1716

1817
import pynini
@@ -46,6 +45,7 @@
4645
delete_space,
4746
generator_main,
4847
)
48+
from nemo_text_processing.utils.logging import logger
4949
from pynini.lib import pynutil
5050

5151

@@ -78,9 +78,9 @@ def __init__(
7878
far_file = os.path.join(cache_dir, f"de_itn_{input_case}.far")
7979
if not overwrite_cache and far_file and os.path.exists(far_file):
8080
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
81-
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
81+
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
8282
else:
83-
logging.info(f"Creating ClassifyFst grammars.")
83+
logger.info(f"Creating ClassifyFst grammars.")
8484
tn_cardinal_tagger = TNCardinalTagger(deterministic=False)
8585
tn_date_tagger = TNDateTagger(cardinal=tn_cardinal_tagger, deterministic=False)
8686
tn_decimal_tagger = TNDecimalTagger(cardinal=tn_cardinal_tagger, deterministic=False)
@@ -147,4 +147,3 @@ def __init__(
147147

148148
if far_file:
149149
generator_main(far_file, {"tokenize_and_classify": self.fst})
150-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/en/taggers/tokenize_and_classify.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
import logging
1716
import os
1817

1918
import pynini
@@ -36,6 +35,7 @@
3635
delete_space,
3736
generator_main,
3837
)
38+
from nemo_text_processing.utils.logging import logger
3939
from pynini.lib import pynutil
4040

4141

@@ -67,9 +67,9 @@ def __init__(
6767
far_file = os.path.join(cache_dir, f"en_itn_{input_case}.far")
6868
if not overwrite_cache and far_file and os.path.exists(far_file):
6969
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
70-
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
70+
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
7171
else:
72-
logging.info(f"Creating ClassifyFst grammars.")
72+
logger.info(f"Creating ClassifyFst grammars.")
7373
cardinal = CardinalFst(input_case=input_case)
7474
cardinal_graph = cardinal.fst
7575

@@ -116,4 +116,3 @@ def __init__(
116116

117117
if far_file:
118118
generator_main(far_file, {"tokenize_and_classify": self.fst})
119-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/es/taggers/tokenize_and_classify.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
1615
import os
1716

1817
import pynini
@@ -36,6 +35,7 @@
3635
delete_space,
3736
generator_main,
3837
)
38+
from nemo_text_processing.utils.logging import logger
3939
from pynini.lib import pynutil
4040

4141

@@ -67,9 +67,9 @@ def __init__(
6767
far_file = os.path.join(cache_dir, f"es_itn_{input_case}.far")
6868
if not overwrite_cache and far_file and os.path.exists(far_file):
6969
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
70-
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
70+
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
7171
else:
72-
logging.info(f"Creating ClassifyFst grammars.")
72+
logger.info(f"Creating ClassifyFst grammars.")
7373

7474
cardinal = CardinalFst()
7575
cardinal_graph = cardinal.fst
@@ -121,4 +121,3 @@ def __init__(
121121

122122
if far_file:
123123
generator_main(far_file, {"tokenize_and_classify": self.fst})
124-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/es_en/taggers/tokenize_and_classify.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import logging
1615
import os
1716

1817
import pynini
@@ -49,6 +48,7 @@
4948
delete_space,
5049
generator_main,
5150
)
51+
from nemo_text_processing.utils.logging import logger
5252
from pynini.lib import pynutil
5353

5454

@@ -85,9 +85,9 @@ def __init__(
8585
far_file = os.path.join(cache_dir, f"es_en_itn_{input_case}.far")
8686
if not overwrite_cache and far_file and os.path.exists(far_file):
8787
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
88-
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
88+
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
8989
else:
90-
logging.info(f"Creating ClassifyFst grammars.")
90+
logger.info(f"Creating ClassifyFst grammars.")
9191

9292
cardinal = CardinalFst()
9393
cardinal_graph = cardinal.fst
@@ -174,4 +174,4 @@ def __init__(
174174

175175
if far_file:
176176
generator_main(far_file, {"tokenize_and_classify": self.fst})
177-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
177+
logger.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/fr/graph_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
import logging
1716
import os
1817
import string
1918
from pathlib import Path
2019
from typing import Dict
2120

2221
import pynini
2322
from nemo_text_processing.inverse_text_normalization.fr.utils import get_abs_path
23+
from nemo_text_processing.utils.logging import logger
2424
from pynini import Far
2525
from pynini.examples import plurals
2626
from pynini.export import export
@@ -80,7 +80,7 @@ def generator_main(file_name: str, graphs: Dict[str, pynini.FstLike]):
8080
for rule, graph in graphs.items():
8181
exporter[rule] = graph.optimize()
8282
exporter.close()
83-
logging.info(f'Created {file_name}')
83+
logger.info(f'Created {file_name}')
8484

8585

8686
def get_plurals(fst):

0 commit comments

Comments
 (0)