From b52b6e61bfd5ab8b05984d1b212219827bbff3a4 Mon Sep 17 00:00:00 2001 From: Jiaxiang Li Date: Wed, 2 Sep 2020 18:01:05 +0800 Subject: [PATCH] fix encoding problem. @gaowenxin85 --- .gitignore | 2 ++ analysis/.gitignore | 1 + code/__pycache__/.gitignore | 2 ++ code/augment.py | 4 ++-- data/.gitignore | 1 + output/.gitignore | 1 + 6 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 analysis/.gitignore create mode 100644 code/__pycache__/.gitignore create mode 100644 data/.gitignore create mode 100644 output/.gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b7d12d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.Rproj.user +EDA_NLP_for_Chinese.Rproj diff --git a/analysis/.gitignore b/analysis/.gitignore new file mode 100644 index 0000000..da4d722 --- /dev/null +++ b/analysis/.gitignore @@ -0,0 +1 @@ +debug.Rmd diff --git a/code/__pycache__/.gitignore b/code/__pycache__/.gitignore new file mode 100644 index 0000000..fe529a5 --- /dev/null +++ b/code/__pycache__/.gitignore @@ -0,0 +1,2 @@ +eda.cpython-36.pyc +eda.cpython-37.pyc diff --git a/code/augment.py b/code/augment.py index 75e4432..9765689 100644 --- a/code/augment.py +++ b/code/augment.py @@ -34,8 +34,8 @@ def gen_eda(train_orig, output_file, alpha, num_aug=9): - writer = open(output_file, 'w') - lines = open(train_orig, 'r').readlines() + writer = open(output_file, 'w', encoding = "UTF-8") + lines = open(train_orig, 'r', encoding = 'UTF-8').readlines() print("正在使用EDA生成增强语句...") for i, line in enumerate(lines): diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..ebfc91a --- /dev/null +++ b/data/.gitignore @@ -0,0 +1 @@ +train.txt diff --git a/output/.gitignore b/output/.gitignore new file mode 100644 index 0000000..f13b103 --- /dev/null +++ b/output/.gitignore @@ -0,0 +1 @@ +train_augmented.txt