In [1]:
import setup_env
from setup_env import device
--------------------------------------------------------------------------------
=== Hardware Acceleration ===
PyTorch version: 2.10.0+cu128
Using NVIDIA GPU (CUDA)
CUDA version: 12.8
GPU name: NVIDIA GeForce RTX 5070 Ti
GPU count: 1
Total GPU memory: 15.92 GB
Allocated memory: 0.00 GB
Free memory: 15.92 GB
Device: cuda
=== Matplotlib Settings ===
✅ Font: NanumGothic
=== System Info ===
OS: Ubuntu 24.04.3 LTS (Noble Numbat)
Kernel: 6.6.87.2-microsoft-standard-WSL2
Architecture: x86_64
Python: 3.12.3
Working directory: /workspace/ai-deeplearning/tutorial
=== Library Versions ===
NumPy: 2.4.2
Pandas: 3.0.1
Matplotlib: 3.10.7
Scikit-learn: 1.7.2
OpenCV: Not installed → !pip install -q opencv-python
Pillow: 12.0.0
Seaborn: 0.13.2
TensorFlow: Not installed → !pip install -q tensorflow
Transformers: 5.2.0
TorchVision: 0.25.0+cu128
=== Environment setup completed ===
--------------------------------------------------------------------------------
=== Visualizing Test Plot (Wide View) ===
=== GPU Usage Code Snippet === Device set to: cuda ---------------------------------------- # 아래 코드를 복사해서 모델과 데이터를 GPU로 보내세요: model = YourModel().to(device) data = data.to(device) ---------------------------------------- === Environment setup completed === --------------------------------------------------------------------------------
Seq2Seq (Sequence-to-Sequence)¶
시퀀스(순서가 있는 데이터의 나열)를 입력받아 시퀀스를 출력하는 모델
| 예시 | |
|---|---|
| 번역 | "나는 밥을 먹었다" → "I ate rice" |
| 요약 | 긴 문장 → 짧은 문장 |
| 챗봇 | 질문 → 대답 |
핵심 구조: 인코더 → 디코더
- 인코더: 입력 시퀀스 전체를 하나의 벡터로 압축
- 디코더: 그 벡터를 받아 출력 시퀀스를 한 토큰씩 생성
| 연도 | 기술 | 핵심 방식 | 한계 |
|---|---|---|---|
| ~2007 | 규칙 기반 (SYSTRAN) | 언어학자가 문법 규칙 수작업 | 관용어, 문맥, 예외 처리 불가 |
| 2007~2016 | 통계 기반 (SMT) | 대용량 병렬 코퍼스에서 확률 학습 | 구절 단위 번역 → 문맥 유실 |
| 2016.11 | 신경망 번역 (GNMT) | Seq2Seq + Attention, 문장 전체 처리 | LSTM 순차처리 → 병렬화 불가, 긴 문장 약함 |
| 2017 | Transformer | Attention Is All You Need, LSTM 제거 | 추론 속도 문제 |
opus100 데이터셋¶
100개 언어 쌍의 병렬 번역 문장 쌍 모음
- 출처: OPUS 프로젝트 (공개 다국어 코퍼스)
- Hugging Face에서 바로 로드 가능
from datasets import load_dataset
ds = load_dataset("opus100", "en-ko")
데이터 구조¶
| 컬럼 | 예시 |
|---|---|
translation.en |
"I ate rice with a friend yesterday." |
translation.ko |
"나는 어제 친구와 밥을 먹었다." |
데이터 크기¶
| split | 문장 쌍 수 |
|---|---|
| train | 1,000,000 |
| validation | 2,000 |
| test | 2,000 |
검증 방식¶
입력(영어) → 모델 → 예측(한국어)를 정답(한국어)과 BLEU로 비교
입력: "I ate rice"
정답: "나는 밥을 먹었다"
예측: "나는 밥을 먹었어"
BLEU: 부분 일치 점수 계산
왜 영→한인가?¶
결과를 우리가 직접 눈으로 보고 판단할 수 있기 때문. BLEU 점수 + 육안 확인 두 가지로 검증 가능.
In [2]:
import os
from datasets import load_dataset
data_dir = "./data/opus100_en_ko"
if os.path.exists(data_dir):
print("이미 존재합니다. 스킵합니다.")
else:
print("다운로드 중...")
ds = load_dataset("opus100", "en-ko")
ds.save_to_disk(data_dir)
print("완료:", data_dir)
다운로드 중...
Saving the dataset (0/1 shards): 0%| | 0/2000 [00:00<?, ? examples/s]
Saving the dataset (0/1 shards): 0%| | 0/1000000 [00:00<?, ? examples/s]
Saving the dataset (0/1 shards): 0%| | 0/2000 [00:00<?, ? examples/s]
완료: ./data/opus100_en_ko
In [3]:
from datasets import load_from_disk
ds = load_from_disk("./data/opus100_en_ko")
print("데이터셋 구조:")
print(ds)
print("\n--- train 샘플 3개 ---")
for i in range(3):
pair = ds["train"][i]["translation"]
print(f"EN: {pair['en']}")
print(f"KO: {pair['ko']}")
print()
데이터셋 구조:
DatasetDict({
test: Dataset({
features: ['translation'],
num_rows: 2000
})
train: Dataset({
features: ['translation'],
num_rows: 1000000
})
validation: Dataset({
features: ['translation'],
num_rows: 2000
})
})
--- train 샘플 3개 ---
EN: They're shaped like a bus.
KO: 할머니처럼 만들었지만.. ? 엉망이지만..
EN: I ain't fishing' 'em out.
KO: 그거 꺼내려다가는
EN: You are torturing god's creatures in an age where we have the technology that no longer requires us to.
KO: 선생님은 이 기술력이 있는 시대에 그러지 않아도 되는데도 신의 피조물을 괴롭히고 있다고요
1.seq2seq + Attention + LSTM¶
- 토크나이저 & 어휘 사전 구축
- 영어/한국어 각각 토큰화
- 단어 → 인덱스 매핑 (vocab) 만들기
- 데이터셋 & 데이터로더
- 토큰 → 인덱스 변환
- 패딩,
토큰 처리 - PyTorch Dataset/DataLoader 구성
- 모델 구현
- Encoder (LSTM)
- Decoder (LSTM)
- Attention 메커니즘
- Seq2Seq 전체 연결
- 학습
- 손실함수 (CrossEntropy)
- Teacher Forcing
- 학습 루프
- 평가 & 번역 테스트
- BLEU 스코어
- 실제 문장 번역해보기
In [7]:
!pip install sentencepiece
Collecting sentencepiece Downloading sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (10 kB) Downloading sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (1.4 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 11.8 MB/s 0:00:00 Installing collected packages: sentencepiece Successfully installed sentencepiece-0.2.1 WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
In [4]:
import sentencepiece as spm
import os
from datasets import load_from_disk
# 2. 데이터 로드
ds = load_from_disk("./data/opus100_en_ko")
# 3. sentencepiece 학습용 텍스트 파일 저장
os.makedirs("./data/spm", exist_ok=True)
en_path = "./data/spm/train_en.txt"
ko_path = "./data/spm/train_ko.txt"
if not os.path.exists(en_path):
print("텍스트 파일 저장 중...")
with open(en_path, "w") as f_en, open(ko_path, "w") as f_ko:
for item in ds["train"]:
f_en.write(item["translation"]["en"] + "\n")
f_ko.write(item["translation"]["ko"] + "\n")
print("완료")
# 4. 토크나이저 학습
en_model_path = "./data/spm/spm_en"
ko_model_path = "./data/spm/spm_ko"
if not os.path.exists(en_model_path + ".model"):
print("영어 토크나이저 학습 중...")
spm.SentencePieceTrainer.train(
input=en_path,
model_prefix=en_model_path,
vocab_size=8000,
model_type="bpe",
pad_id=0, unk_id=1, bos_id=2, eos_id=3,
pad_piece="<pad>", unk_piece="<unk>", bos_piece="<sos>", eos_piece="<eos>"
)
print("완료")
if not os.path.exists(ko_model_path + ".model"):
print("한국어 토크나이저 학습 중...")
spm.SentencePieceTrainer.train(
input=ko_path,
model_prefix=ko_model_path,
vocab_size=8000,
model_type="bpe",
pad_id=0, unk_id=1, bos_id=2, eos_id=3,
pad_piece="<pad>", unk_piece="<unk>", bos_piece="<sos>", eos_piece="<eos>"
)
print("완료")
# 5. 토크나이저 로드 및 테스트
sp_en = spm.SentencePieceProcessor()
sp_ko = spm.SentencePieceProcessor()
sp_en.load(en_model_path + ".model")
sp_ko.load(ko_model_path + ".model")
# 6. 샘플 확인
sample = ds["train"][0]["translation"]
print("\n--- 원문 ---")
print("EN:", sample["en"])
print("KO:", sample["ko"])
print("\n--- 토크나이징 ---")
en_tokens = sp_en.encode(sample["en"], out_type=str)
ko_tokens = sp_ko.encode(sample["ko"], out_type=str)
print("EN tokens:", en_tokens)
print("KO tokens:", ko_tokens)
print("\n--- 인덱스 변환 ---")
en_ids = sp_en.encode(sample["en"])
ko_ids = sp_ko.encode(sample["ko"])
print("EN ids:", en_ids)
print("KO ids:", ko_ids)
print("\n어휘 사전 크기:", sp_en.get_piece_size(), "/", sp_ko.get_piece_size())
텍스트 파일 저장 중... 완료 영어 토크나이저 학습 중...
sentencepiece_trainer.cc(78) LOG(INFO) Starts training with :
trainer_spec {
input: ./data/spm/train_en.txt
input_format:
model_prefix: ./data/spm/spm_en
model_type: BPE
vocab_size: 8000
self_test_sample_size: 0
character_coverage: 0.9995
input_sentence_size: 0
shuffle_input_sentence: 1
seed_sentencepiece_size: 1000000
shrinking_factor: 0.75
max_sentence_length: 4192
num_threads: 16
num_sub_iterations: 2
max_sentencepiece_length: 16
split_by_unicode_script: 1
split_by_number: 1
split_by_whitespace: 1
split_digits: 0
pretokenization_delimiter:
treat_whitespace_as_suffix: 0
allow_whitespace_only_pieces: 0
required_chars:
byte_fallback: 0
vocabulary_output_piece_score: 1
train_extremely_large_corpus: 0
seed_sentencepieces_file:
hard_vocab_limit: 1
use_all_vocab: 0
unk_id: 1
bos_id: 2
eos_id: 3
pad_id: 0
unk_piece: <unk>
bos_piece: <sos>
eos_piece: <eos>
pad_piece: <pad>
unk_surface: ⁇
enable_differential_privacy: 0
differential_privacy_noise_level: 0
differential_privacy_clipping_threshold: 0
}
normalizer_spec {
name: nmt_nfkc
add_dummy_prefix: 1
remove_extra_whitespaces: 1
escape_whitespaces: 1
normalization_rule_tsv:
}
denormalizer_spec {}
trainer_interface.cc(355) LOG(INFO) SentenceIterator is not specified. Using MultiFileSentenceIterator.
trainer_interface.cc(186) LOG(INFO) Loading corpus: ./data/spm/train_en.txt
trainer_interface.cc(148) LOG(INFO) Loaded 1000000 lines
trainer_interface.cc(411) LOG(INFO) Loaded all 1000000 sentences
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <pad>
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <unk>
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <sos>
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <eos>
trainer_interface.cc(432) LOG(INFO) Normalizing sentences...
trainer_interface.cc(541) LOG(INFO) all chars count=40598225
trainer_interface.cc(552) LOG(INFO) Done: 99.9516% characters are covered.
trainer_interface.cc(562) LOG(INFO) Alphabet size=84
trainer_interface.cc(563) LOG(INFO) Final character coverage=0.999516
trainer_interface.cc(594) LOG(INFO) Done! preprocessed 1000000 sentences.
trainer_interface.cc(600) LOG(INFO) Tokenizing input sentences with whitespace: 1000000
trainer_interface.cc(611) LOG(INFO) Done! 276910
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=895887 min_freq=237
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=235059 size=20 all=4004 active=2064 piece=it
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=134769 size=40 all=5417 active=3477 piece=▁W
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=100155 size=60 all=6755 active=4815 piece=le
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=71246 size=80 all=8093 active=6153 piece=▁we
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=54588 size=100 all=9778 active=7838 piece=▁D
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=53536 min_freq=2979
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=45769 size=120 all=11192 active=2340 piece=▁an
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=36646 size=140 all=12251 active=3399 piece=ant
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=28674 size=160 all=13654 active=4802 piece=▁who
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=25910 size=180 all=15091 active=6239 piece=ive
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=23228 size=200 all=16392 active=7540 piece=▁whe
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=23112 min_freq=2715
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=20773 size=220 all=17599 active=2200 piece=ook
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=19108 size=240 all=19211 active=3812 piece=kay
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=16929 size=260 all=20759 active=5360 piece=▁she
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=15240 size=280 all=21957 active=6558 piece=rou
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=13888 size=300 all=22838 active=7439 piece=▁sp
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=13871 min_freq=2021
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=12873 size=320 all=23999 active=2242 piece=ice
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=11944 size=340 all=24856 active=3099 piece=other
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=10844 size=360 all=25765 active=4008 piece=▁too
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=9989 size=380 all=26652 active=4895 piece=▁If
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=9307 size=400 all=27367 active=5610 piece=▁more
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=9283 min_freq=1504
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=8649 size=420 all=28331 active=2325 piece=▁something
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=8011 size=440 all=29053 active=3047 piece=▁My
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=7627 size=460 all=29970 active=3964 piece=old
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=7167 size=480 all=30891 active=4885 piece=▁Who
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=6802 size=500 all=31753 active=5747 piece=ob
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=6765 min_freq=1176
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=6411 size=520 all=32711 active=2393 piece=▁under
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5998 size=540 all=33492 active=3174 piece=ious
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5624 size=560 all=34260 active=3942 piece=ty
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5293 size=580 all=34922 active=4604 piece=▁these
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5069 size=600 all=35588 active=5270 piece=▁rec
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=5066 min_freq=959
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4848 size=620 all=36327 active=2479 piece=▁kid
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4624 size=640 all=36930 active=3082 piece=▁stop
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4393 size=660 all=37774 active=3926 piece=▁Did
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4190 size=680 all=38471 active=4623 piece=ub
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4039 size=700 all=38936 active=5088 piece=ail
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=4034 min_freq=815
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3893 size=720 all=39847 active=2775 piece=▁best
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3739 size=740 all=40269 active=3197 piece=▁bad
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3592 size=760 all=40703 active=3631 piece=▁ind
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3469 size=780 all=41531 active=4459 piece=side
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3345 size=800 all=42010 active=4938 piece=▁Bec
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=3342 min_freq=705
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3264 size=820 all=42876 active=2956 piece=▁ph
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3140 size=840 all=43567 active=3647 piece=▁family
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3063 size=860 all=44185 active=4265 piece=ere
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2954 size=880 all=44721 active=4801 piece=ib
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2854 size=900 all=45559 active=5639 piece=▁prom
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2849 min_freq=614
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2775 size=920 all=46173 active=2883 piece=▁room
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2708 size=940 all=46542 active=3252 piece=▁seen
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2629 size=960 all=47065 active=3775 piece=▁rest
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2571 size=980 all=47470 active=4180 piece=▁fin
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2498 size=1000 all=47867 active=4577 piece=ph
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2484 min_freq=549
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2406 size=1020 all=48353 active=2770 piece=ween
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2355 size=1040 all=48939 active=3356 piece=ying
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2298 size=1060 all=49555 active=3972 piece=▁face
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2239 size=1080 all=50062 active=4479 piece=vent
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2176 size=1100 all=50660 active=5077 piece=▁true
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2174 min_freq=496
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2126 size=1120 all=50916 active=2789 piece=▁Sc
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2087 size=1140 all=51353 active=3226 piece=▁count
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2037 size=1160 all=52130 active=4003 piece=lect
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1994 size=1180 all=52626 active=4499 piece=▁month
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1947 size=1200 all=52998 active=4871 piece=anc
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1947 min_freq=455
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1903 size=1220 all=53529 active=3070 piece=ets
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1854 size=1240 all=54003 active=3544 piece=▁trans
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1809 size=1260 all=54514 active=4055 piece=▁sub
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1770 size=1280 all=55091 active=4632 piece=ys
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1735 size=1300 all=55729 active=5270 piece=▁supposed
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1733 min_freq=416
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1702 size=1320 all=56196 active=3252 piece=als
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1658 size=1340 all=56774 active=3830 piece=▁comes
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1629 size=1360 all=57022 active=4078 piece=▁created
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1596 size=1380 all=57414 active=4470 piece=▁needs
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1572 size=1400 all=57864 active=4920 piece=▁fall
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1568 min_freq=390
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1551 size=1420 all=58123 active=3141 piece=▁den
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1514 size=1440 all=58677 active=3695 piece=▁Thanks
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1489 size=1460 all=59045 active=4063 piece=olog
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1460 size=1480 all=59645 active=4663 piece=▁afraid
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1428 size=1500 all=59899 active=4917 piece=ative
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1427 min_freq=360
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1395 size=1520 all=60231 active=3287 piece=▁Con
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1375 size=1540 all=60636 active=3692 piece=ized
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1336 size=1560 all=61312 active=4368 piece=▁exp
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1310 size=1580 all=61625 active=4681 piece=▁Ge
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1292 size=1600 all=62208 active=5264 piece=▁hey
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1290 min_freq=335
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1264 size=1620 all=62513 active=3415 piece=▁sweet
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1245 size=1640 all=62838 active=3740 piece=kes
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1229 size=1660 all=63401 active=4303 piece=HP
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1218 size=1680 all=63655 active=4557 piece=▁cause
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1196 size=1700 all=63898 active=4800 piece=▁Sir
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1195 min_freq=314
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1173 size=1720 all=64338 active=3630 piece=▁married
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1157 size=1740 all=64733 active=4025 piece=▁lives
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1146 size=1760 all=65252 active=4544 piece=▁outside
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1118 size=1780 all=65535 active=4827 piece=▁aren
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1099 size=1800 all=65991 active=5283 piece=▁asking
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1097 min_freq=294
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1081 size=1820 all=66264 active=3573 piece=▁id
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1062 size=1840 all=66780 active=4089 piece=ution
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1038 size=1860 all=67135 active=4444 piece=els
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1024 size=1880 all=67531 active=4840 piece=reet
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1008 size=1900 all=67873 active=5182 piece=▁everybody
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1007 min_freq=277
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=994 size=1920 all=68256 active=3777 piece=▁shouldn
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=981 size=1940 all=68682 active=4203 piece=▁peace
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=960 size=1960 all=69132 active=4653 piece=irth
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=946 size=1980 all=69588 active=5109 piece=▁Everything
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=929 size=2000 all=69857 active=5378 piece=eredith
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=928 min_freq=261
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=916 size=2020 all=70232 active=3865 piece=▁Show
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=904 size=2040 all=70548 active=4181 piece=▁surpr
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=894 size=2060 all=70799 active=4432 piece=▁sort
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=887 size=2080 all=71098 active=4731 piece=▁eff
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=869 size=2100 all=71510 active=5143 piece=▁contin
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=869 min_freq=247
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=860 size=2120 all=71679 active=3737 piece=▁caus
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=852 size=2140 all=71967 active=4025 piece=▁respect
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=843 size=2160 all=72314 active=4372 piece=▁needed
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=830 size=2180 all=72708 active=4766 piece=):
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=817 size=2200 all=72949 active=5007 piece=▁squ
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=817 min_freq=235
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=804 size=2220 all=73295 active=3967 piece=▁fair
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=792 size=2240 all=73549 active=4221 piece=▁tot
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=783 size=2260 all=73802 active=4474 piece=▁PHP
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=773 size=2280 all=74254 active=4926 piece=gram
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=766 size=2300 all=74452 active=5124 piece=▁Fl
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=766 min_freq=226
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=755 size=2320 all=74779 active=3990 piece=alley
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=744 size=2340 all=75150 active=4361 piece=▁El
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=735 size=2360 all=75499 active=4710 piece=rif
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=723 size=2380 all=75918 active=5129 piece=ored
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=714 size=2400 all=76245 active=5456 piece=▁road
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=713 min_freq=214
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=704 size=2420 all=76337 active=3894 piece=▁cent
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=694 size=2440 all=76522 active=4079 piece=▁dro
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=687 size=2460 all=76882 active=4439 piece=▁astray
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=678 size=2480 all=77043 active=4600 piece=▁Remember
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=671 size=2500 all=77252 active=4809 piece=▁ign
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=671 min_freq=206
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=665 size=2520 all=77645 active=4246 piece=▁piece
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=657 size=2540 all=78121 active=4722 piece=▁arm
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=646 size=2560 all=78372 active=4973 piece=▁bear
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=640 size=2580 all=78628 active=5229 piece=▁Mic
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=636 size=2600 all=78848 active=5449 piece=eg
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=636 min_freq=197
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=631 size=2620 all=79174 active=4190 piece=▁stick
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=626 size=2640 all=79324 active=4340 piece=▁standing
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=621 size=2660 all=79418 active=4434 piece=And
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=616 size=2680 all=79700 active=4716 piece=▁quiet
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=609 size=2700 all=80052 active=5068 piece=▁soc
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=609 min_freq=189
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=605 size=2720 all=80290 active=4233 piece=atter
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=598 size=2740 all=80751 active=4694 piece=▁viol
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=588 size=2760 all=80925 active=4868 piece=▁incre
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=584 size=2780 all=81191 active=5134 piece=▁situation
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=577 size=2800 all=81388 active=5331 piece=▁dance
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=576 min_freq=183
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=567 size=2820 all=81845 active=4527 piece=zzie
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=560 size=2840 all=82225 active=4907 piece=▁agree
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=554 size=2860 all=82555 active=5237 piece=dden
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=550 size=2880 all=82775 active=5457 piece=asure
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=542 size=2900 all=82928 active=5610 piece=▁forgiveness
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=541 min_freq=175
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=536 size=2920 all=83204 active=4423 piece=▁Wor
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=531 size=2940 all=83344 active=4563 piece=▁doll
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=527 size=2960 all=83514 active=4733 piece=▁lay
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=524 size=2980 all=83757 active=4976 piece=▁David
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=519 size=3000 all=84001 active=5220 piece=▁value
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=518 min_freq=169
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=514 size=3020 all=84173 active=4373 piece=▁stuck
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=511 size=3040 all=84434 active=4634 piece=▁cust
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=507 size=3060 all=84587 active=4787 piece=▁missed
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=502 size=3080 all=84715 active=4915 piece=ilty
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=497 size=3100 all=85056 active=5256 piece=pping
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=496 min_freq=163
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=493 size=3120 all=85375 active=4549 piece=▁crap
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=488 size=3140 all=85646 active=4820 piece=ural
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=484 size=3160 all=86041 active=5215 piece=▁round
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=481 size=3180 all=86166 active=5340 piece=▁yesterday
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=475 size=3200 all=86455 active=5629 piece=▁Mag
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=474 min_freq=157
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=470 size=3220 all=86671 active=4515 piece=engers
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=467 size=3240 all=86871 active=4715 piece=▁push
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=463 size=3260 all=87099 active=4943 piece=oman
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=459 size=3280 all=87246 active=5090 piece=▁Dra
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=454 size=3300 all=87497 active=5341 piece=ground
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=454 min_freq=152
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=450 size=3320 all=87747 active=4604 piece=▁transgress
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=445 size=3340 all=87891 active=4748 piece=▁prec
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=441 size=3360 all=88162 active=5019 piece=▁Dec
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=438 size=3380 all=88398 active=5255 piece=▁imm
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=435 size=3400 all=88497 active=5354 piece=izz
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=435 min_freq=147
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=433 size=3420 all=88738 active=4611 piece=▁seat
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=430 size=3440 all=88867 active=4740 piece=▁Anything
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=427 size=3460 all=89193 active=5066 piece=▁prefer
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=422 size=3480 all=89379 active=5252 piece=▁Yo
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=417 size=3500 all=89446 active=5319 piece=▁secure
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=417 min_freq=143
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=414 size=3520 all=89685 active=4710 piece=▁bare
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=410 size=3540 all=89881 active=4906 piece=enger
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=407 size=3560 all=90096 active=5121 piece=▁elect
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=404 size=3580 all=90359 active=5384 piece=▁equal
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=400 size=3600 all=90445 active=5470 piece=▁vir
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=400 min_freq=139
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=397 size=3620 all=90678 active=4749 piece='.
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=395 size=3640 all=90890 active=4961 piece=▁beyond
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=392 size=3660 all=91072 active=5143 piece=ino
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=390 size=3680 all=91273 active=5344 piece=▁polit
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=388 size=3700 all=91592 active=5663 piece=▁interested
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=387 min_freq=135
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=384 size=3720 all=91789 active=4777 piece=▁Que
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=382 size=3740 all=91963 active=4951 piece=▁college
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=379 size=3760 all=92176 active=5164 piece=load
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=375 size=3780 all=92389 active=5377 piece=▁ghost
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=372 size=3800 all=92696 active=5684 piece=▁club
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=372 min_freq=131
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=369 size=3820 all=92886 active=4820 piece=▁Med
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=368 size=3840 all=93084 active=5018 piece=▁female
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=366 size=3860 all=93249 active=5183 piece=▁nervous
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=363 size=3880 all=93462 active=5396 piece=▁spirit
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=360 size=3900 all=93735 active=5669 piece=Muhammad
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=360 min_freq=127
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=356 size=3920 all=93920 active=4872 piece=ples
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=353 size=3940 all=94085 active=5037 piece=izing
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=351 size=3960 all=94299 active=5251 piece=▁feelings
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=348 size=3980 all=94552 active=5504 piece=▁sees
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=347 size=4000 all=94791 active=5743 piece=▁weight
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=347 min_freq=123
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=344 size=4020 all=94923 active=4866 piece=▁greatest
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=341 size=4040 all=95124 active=5067 piece=rees
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=339 size=4060 all=95228 active=5171 piece=▁daddy
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=336 size=4080 all=95412 active=5355 piece=▁barely
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=333 size=4100 all=95618 active=5561 piece=▁Aren
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=333 min_freq=120
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=331 size=4120 all=95787 active=4949 piece=▁president
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=328 size=4140 all=95999 active=5161 piece=▁liar
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=325 size=4160 all=96237 active=5399 piece=▁Cap
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=324 size=4180 all=96387 active=5549 piece=▁property
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=322 size=4200 all=96619 active=5781 piece=▁sand
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=322 min_freq=116
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=320 size=4220 all=96871 active=5070 piece=▁Absolutely
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=318 size=4240 all=96993 active=5192 piece=itation
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=316 size=4260 all=97112 active=5311 piece=▁recompense
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=313 size=4280 all=97251 active=5450 piece=born
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=311 size=4300 all=97373 active=5572 piece=▁walked
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=310 min_freq=113
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=308 size=4320 all=97431 active=4927 piece=▁bathroom
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=306 size=4340 all=97615 active=5111 piece=▁loud
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=305 size=4360 all=97674 active=5170 piece=▁folder
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=303 size=4380 all=97918 active=5414 piece=▁employ
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=300 size=4400 all=98255 active=5751 piece=rig
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=300 min_freq=110
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=299 size=4420 all=98480 active=5082 piece=itions
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=296 size=4440 all=98602 active=5204 piece=olf
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=294 size=4460 all=98782 active=5384 piece=aches
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=292 size=4480 all=99023 active=5625 piece=▁sust
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=289 size=4500 all=99224 active=5826 piece=cho
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=289 min_freq=107
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=288 size=4520 all=99411 active=5125 piece=▁prev
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=286 size=4540 all=99493 active=5207 piece=PS
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=284 size=4560 all=99647 active=5361 piece=zen
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=281 size=4580 all=99836 active=5550 piece=Ch
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=280 size=4600 all=100036 active=5750 piece=▁cases
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=280 min_freq=104
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=279 size=4620 all=100105 active=5071 piece=▁chicken
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=277 size=4640 all=100239 active=5205 piece=▁driver
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=275 size=4660 all=100415 active=5381 piece=▁failure
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=273 size=4680 all=100503 active=5469 piece=▁Ugh
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=271 size=4700 all=100685 active=5651 piece=oph
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=271 min_freq=102
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=270 size=4720 all=100809 active=5113 piece=▁witnesses
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=268 size=4740 all=101018 active=5322 piece=▁ep
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=267 size=4760 all=101201 active=5505 piece=▁fet
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=266 size=4780 all=101320 active=5624 piece=▁arrived
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=264 size=4800 all=101552 active=5856 piece=ected
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=264 min_freq=99
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=263 size=4820 all=101613 active=5130 piece=▁Empire
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=261 size=4840 all=101809 active=5326 piece=zing
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=260 size=4860 all=102014 active=5531 piece=▁init
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=259 size=4880 all=102232 active=5749 piece=▁theirs
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=257 size=4900 all=102318 active=5835 piece=▁sett
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=257 min_freq=97
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=255 size=4920 all=102483 active=5276 piece=▁Cent
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=253 size=4940 all=102543 active=5336 piece=▁Jake
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=252 size=4960 all=102687 active=5480 piece=▁arrogant
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=250 size=4980 all=102897 active=5690 piece=▁beer
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=249 size=5000 all=103113 active=5906 piece=▁nurse
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=249 min_freq=95
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=248 size=5020 all=103221 active=5263 piece=▁animal
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=247 size=5040 all=103331 active=5373 piece=▁directory
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=245 size=5060 all=103488 active=5530 piece=▁Girl
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=243 size=5080 all=103515 active=5557 piece=ops
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=242 size=5100 all=103787 active=5829 piece=▁opening
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=242 min_freq=93
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=240 size=5120 all=103928 active=5331 piece=▁bug
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=239 size=5140 all=104029 active=5432 piece=isf
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=238 size=5160 all=104271 active=5674 piece=▁total
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=236 size=5180 all=104528 active=5931 piece=ources
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=235 size=5200 all=104779 active=6182 piece=▁Dri
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=235 min_freq=91
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=234 size=5220 all=104877 active=5326 piece=▁responsibility
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=232 size=5240 all=105055 active=5504 piece=▁60
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=231 size=5260 all=105206 active=5655 piece=▁Pen
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=230 size=5280 all=105272 active=5721 piece=▁apostles
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=228 size=5300 all=105430 active=5879 piece=▁Jess
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=228 min_freq=89
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=227 size=5320 all=105571 active=5406 piece=▁bal
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=226 size=5340 all=105730 active=5565 piece=▁attend
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=224 size=5360 all=105838 active=5673 piece=uary
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=223 size=5380 all=106038 active=5873 piece=istic
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=222 size=5400 all=106227 active=6062 piece=Yeah
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=222 min_freq=87
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=221 size=5420 all=106516 active=5601 piece=anced
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=220 size=5440 all=106672 active=5757 piece=unting
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=219 size=5460 all=106741 active=5826 piece=▁memories
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=218 size=5480 all=106982 active=6067 piece=▁exchange
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=216 size=5500 all=107135 active=6220 piece=▁sne
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=216 min_freq=85
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=215 size=5520 all=107283 active=5500 piece=▁crowd
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=214 size=5540 all=107395 active=5612 piece=▁functions
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=212 size=5560 all=107529 active=5746 piece=▁Eat
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=211 size=5580 all=107634 active=5851 piece=▁Arab
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=210 size=5600 all=107669 active=5886 piece=▁Naz
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=210 min_freq=83
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=209 size=5620 all=107730 active=5437 piece=▁cal
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=208 size=5640 all=107874 active=5581 piece=ett
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=208 size=5660 all=108095 active=5802 piece=▁Fucking
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=207 size=5680 all=108218 active=5925 piece=▁player
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=206 size=5700 all=108345 active=6052 piece=▁beating
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=206 min_freq=81
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=205 size=5720 all=108462 active=5535 piece=aining
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=204 size=5740 all=108597 active=5670 piece=edient
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=203 size=5760 all=108813 active=5886 piece=▁remote
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=201 size=5780 all=108901 active=5974 piece=FF
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=200 size=5800 all=109111 active=6184 piece=oura
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=200 min_freq=79
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=199 size=5820 all=109247 active=5581 piece=▁Sun
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=198 size=5840 all=109371 active=5705 piece=▁enjoin
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=197 size=5860 all=109493 active=5827 piece=▁spare
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=196 size=5880 all=109571 active=5905 piece=▁brilliant
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=194 size=5900 all=109747 active=6081 piece=▁Po
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=194 min_freq=78
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=193 size=5920 all=109860 active=5590 piece=gend
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=192 size=5940 all=109983 active=5713 piece=ago
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=192 size=5960 all=110162 active=5892 piece=▁Spanish
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=190 size=5980 all=110295 active=6025 piece=▁3,
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=189 size=6000 all=110370 active=6100 piece=▁1,
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=189 min_freq=77
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=188 size=6020 all=110431 active=5569 piece=udy
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=188 size=6040 all=110580 active=5718 piece=▁produc
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=187 size=6060 all=110880 active=6018 piece=▁Cons
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=186 size=6080 all=111010 active=6148 piece=▁ju
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=186 size=6100 all=111191 active=6329 piece=CHUCKLES
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=186 min_freq=75
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=185 size=6120 all=111315 active=5684 piece=▁member
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=184 size=6140 all=111408 active=5777 piece=▁Muslims
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=183 size=6160 all=111536 active=5905 piece=▁refuse
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=182 size=6180 all=111688 active=6057 piece=▁interp
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=181 size=6200 all=111851 active=6220 piece=▁range
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=181 min_freq=74
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=180 size=6220 all=111976 active=5718 piece=▁Glory
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=179 size=6240 all=112018 active=5760 piece=▁fro
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=178 size=6260 all=112051 active=5793 piece=IL
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=178 size=6280 all=112271 active=6013 piece=▁actions
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=177 size=6300 all=112434 active=6176 piece=▁ability
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=177 min_freq=72
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=176 size=6320 all=112633 active=5821 piece=▁mommy
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=175 size=6340 all=112692 active=5880 piece=erable
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=174 size=6360 all=112744 active=5932 piece=kin
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=174 size=6380 all=112902 active=6090 piece=▁Whether
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=173 size=6400 all=113079 active=6267 piece=▁Tony
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=173 min_freq=71
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=173 size=6420 all=113101 active=5675 piece=▁righteousness
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=172 size=6440 all=113303 active=5877 piece=▁nerve
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=171 size=6460 all=113454 active=6028 piece=▁talks
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=170 size=6480 all=113564 active=6138 piece=▁sexy
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=169 size=6500 all=113623 active=6197 piece=ifies
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=169 min_freq=69
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=169 size=6520 all=113679 active=5716 piece=▁hitting
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=168 size=6540 all=113833 active=5870 piece=▁wave
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=167 size=6560 all=113935 active=5972 piece=▁chain
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=166 size=6580 all=114041 active=6078 piece=▁Year
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=165 size=6600 all=114214 active=6251 piece=spring
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=165 min_freq=68
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=164 size=6620 all=114320 active=5811 piece=▁Kate
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=164 size=6640 all=114339 active=5830 piece=▁Washington
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=163 size=6660 all=114459 active=5950 piece=▁arrange
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=162 size=6680 all=114704 active=6195 piece=▁metal
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=161 size=6700 all=114785 active=6276 piece=▁ded
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=161 min_freq=67
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=160 size=6720 all=114913 active=5858 piece=got
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=160 size=6740 all=115071 active=6016 piece=▁hunting
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=159 size=6760 all=115185 active=6130 piece=▁Wilson
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=158 size=6780 all=115423 active=6368 piece=▁Dylan
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=157 size=6800 all=115445 active=6390 piece=oved
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=157 min_freq=66
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=157 size=6820 all=115539 active=5858 piece=▁possibility
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=156 size=6840 all=115688 active=6007 piece=▁crush
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=155 size=6860 all=115752 active=6071 piece=ahn
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=155 size=6880 all=115899 active=6218 piece=▁chances
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=154 size=6900 all=116045 active=6364 piece=▁cord
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=154 min_freq=65
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=153 size=6920 all=116142 active=5894 piece=▁pled
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=152 size=6940 all=116289 active=6041 piece=ju
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=152 size=6960 all=116511 active=6263 piece=▁noble
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=151 size=6980 all=116679 active=6431 piece=▁Laur
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=151 size=7000 all=116689 active=6441 piece=▁Elizabeth
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=151 min_freq=64
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=150 size=7020 all=116818 active=5964 piece=▁Breat
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=150 size=7040 all=116828 active=5974 piece=▁testimony
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=149 size=7060 all=117030 active=6176 piece=▁sweat
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=148 size=7080 all=117149 active=6295 piece=▁Net
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=148 size=7100 all=117243 active=6389 piece=▁colors
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=148 min_freq=63
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=147 size=7120 all=117341 active=5960 piece=▁wow
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=147 size=7140 all=117352 active=5971 piece=▁tracking
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=146 size=7160 all=117554 active=6173 piece=▁dates
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=145 size=7180 all=117604 active=6223 piece=▁WAS
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=145 size=7200 all=117673 active=6292 piece=idences
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=145 min_freq=62
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=144 size=7220 all=117801 active=6005 piece=umps
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=144 size=7240 all=117870 active=6074 piece=▁Princess
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=143 size=7260 all=118158 active=6362 piece=▁Hud
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=143 size=7280 all=118219 active=6423 piece=▁agreement
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=142 size=7300 all=118370 active=6574 piece=▁loan
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=142 min_freq=61
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=142 size=7320 all=118419 active=5966 piece=▁students
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=141 size=7340 all=118637 active=6184 piece=▁shell
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=140 size=7360 all=118806 active=6353 piece=▁coc
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=140 size=7380 all=118838 active=6385 piece=▁neglect
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=139 size=7400 all=118984 active=6531 piece=people
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=139 min_freq=60
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=139 size=7420 all=119031 active=5983 piece=▁stands
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=138 size=7440 all=119170 active=6122 piece=olm
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=138 size=7460 all=119362 active=6314 piece=▁joking
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=137 size=7480 all=119523 active=6475 piece=point
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=137 size=7500 all=119583 active=6535 piece=▁ending
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=137 min_freq=59
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=136 size=7520 all=119659 active=6056 piece=Have
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=136 size=7540 all=119726 active=6123 piece=▁Solomon
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=135 size=7560 all=119872 active=6269 piece=▁Oper
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=135 size=7580 all=119928 active=6325 piece=▁parking
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=134 size=7600 all=120080 active=6477 piece=▁types
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=134 min_freq=58
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=133 size=7620 all=120190 active=6114 piece=rome
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=133 size=7640 all=120271 active=6195 piece=▁Arabic
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=132 size=7660 all=120377 active=6301 piece=▁Guy
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=132 size=7680 all=120427 active=6351 piece=▁mortal
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=131 size=7700 all=120619 active=6543 piece=wan
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=131 min_freq=57
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=131 size=7720 all=120764 active=6155 piece=▁loser
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=131 size=7740 all=120768 active=6159 piece=▁Americans
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=130 size=7760 all=120984 active=6375 piece=ifier
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=130 size=7780 all=121059 active=6450 piece=▁destiny
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=129 size=7800 all=121205 active=6596 piece=▁Ji
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=129 min_freq=56
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=129 size=7820 all=121324 active=6172 piece=▁choices
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=128 size=7840 all=121463 active=6311 piece=▁Jas
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=128 size=7860 all=121504 active=6352 piece=▁deserved
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=127 size=7880 all=121768 active=6616 piece=▁Rand
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=126 size=7900 all=121866 active=6714 piece=▁AL
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=126 min_freq=55
trainer_interface.cc(689) LOG(INFO) Saving model: ./data/spm/spm_en.model
trainer_interface.cc(701) LOG(INFO) Saving vocabs: ./data/spm/spm_en.vocab
완료 한국어 토크나이저 학습 중... 완료 --- 원문 --- EN: They're shaped like a bus. KO: 할머니처럼 만들었지만.. ? 엉망이지만.. --- 토크나이징 --- EN tokens: ['▁They', "'", 're', '▁sha', 'ped', '▁like', '▁a', '▁bus', '.'] KO tokens: ['▁할머니', '처럼', '▁만들었', '지만', '..', '▁?', '▁엉망', '이지만', '..'] --- 인덱스 변환 --- EN ids: [299, 7939, 9, 388, 1829, 197, 6, 970, 7929] KO ids: [2338, 325, 5704, 76, 10, 1659, 3793, 4437, 10] 어휘 사전 크기: 8000 / 8000
sentencepiece_trainer.cc(78) LOG(INFO) Starts training with :
trainer_spec {
input: ./data/spm/train_ko.txt
input_format:
model_prefix: ./data/spm/spm_ko
model_type: BPE
vocab_size: 8000
self_test_sample_size: 0
character_coverage: 0.9995
input_sentence_size: 0
shuffle_input_sentence: 1
seed_sentencepiece_size: 1000000
shrinking_factor: 0.75
max_sentence_length: 4192
num_threads: 16
num_sub_iterations: 2
max_sentencepiece_length: 16
split_by_unicode_script: 1
split_by_number: 1
split_by_whitespace: 1
split_digits: 0
pretokenization_delimiter:
treat_whitespace_as_suffix: 0
allow_whitespace_only_pieces: 0
required_chars:
byte_fallback: 0
vocabulary_output_piece_score: 1
train_extremely_large_corpus: 0
seed_sentencepieces_file:
hard_vocab_limit: 1
use_all_vocab: 0
unk_id: 1
bos_id: 2
eos_id: 3
pad_id: 0
unk_piece: <unk>
bos_piece: <sos>
eos_piece: <eos>
pad_piece: <pad>
unk_surface: ⁇
enable_differential_privacy: 0
differential_privacy_noise_level: 0
differential_privacy_clipping_threshold: 0
}
normalizer_spec {
name: nmt_nfkc
add_dummy_prefix: 1
remove_extra_whitespaces: 1
escape_whitespaces: 1
normalization_rule_tsv:
}
denormalizer_spec {}
trainer_interface.cc(355) LOG(INFO) SentenceIterator is not specified. Using MultiFileSentenceIterator.
trainer_interface.cc(186) LOG(INFO) Loading corpus: ./data/spm/train_ko.txt
trainer_interface.cc(382) LOG(WARNING) Found too long line (4754 > 4192).
trainer_interface.cc(384) LOG(WARNING) Too long lines are skipped in the training.
trainer_interface.cc(385) LOG(WARNING) The maximum length can be changed with --max_sentence_length=<size> flag.
trainer_interface.cc(411) LOG(INFO) Loaded all 999999 sentences
trainer_interface.cc(418) LOG(INFO) Skipped 1 too long sentences.
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <pad>
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <unk>
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <sos>
trainer_interface.cc(427) LOG(INFO) Adding meta_piece: <eos>
trainer_interface.cc(432) LOG(INFO) Normalizing sentences...
trainer_interface.cc(541) LOG(INFO) all chars count=20981598
trainer_interface.cc(552) LOG(INFO) Done: 99.9501% characters are covered.
trainer_interface.cc(562) LOG(INFO) Alphabet size=1466
trainer_interface.cc(563) LOG(INFO) Final character coverage=0.999501
trainer_interface.cc(594) LOG(INFO) Done! preprocessed 999999 sentences.
trainer_interface.cc(600) LOG(INFO) Tokenizing input sentences with whitespace: 999999
trainer_interface.cc(611) LOG(INFO) Done! 649036
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=267477 min_freq=353
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=45736 size=20 all=102318 active=8745 piece=▁우
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=32620 size=40 all=105758 active=12185 piece=▁해
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=24933 size=60 all=109528 active=15955 piece=▁시
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=20410 size=80 all=113094 active=19521 piece=▁만
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=17221 size=100 all=117174 active=23601 piece=▁잘
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=16956 min_freq=297
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=14988 size=120 all=119405 active=7967 piece=▁있는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=13185 size=140 all=121593 active=10155 piece=▁왜
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=11582 size=160 all=124073 active=12635 piece=예요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=10417 size=180 all=127061 active=15623 piece=▁괜찮
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=9607 size=200 all=128919 active=17481 piece=▁무슨
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=9578 min_freq=265
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=8862 size=220 all=130945 active=8445 piece=▁필요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=8227 size=240 all=132702 active=10202 piece=▁to
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=7761 size=260 all=134355 active=11855 piece=▁2
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=7343 size=280 all=137015 active=14515 piece=▁어디
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=6803 size=300 all=138728 active=16228 piece=▁S
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=6785 min_freq=243
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=6349 size=320 all=140497 active=8646 piece=▁우린
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=6031 size=340 all=142847 active=10996 piece=ot
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5824 size=360 all=144627 active=12776 piece=▁베
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5591 size=380 all=146817 active=14966 piece=▁그건
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5311 size=400 all=148357 active=16506 piece=▁환
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=5262 min_freq=221
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5061 size=420 all=150277 active=9294 piece=들에게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4853 size=440 all=151667 active=10684 piece=ve
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4644 size=460 all=153819 active=12836 piece=▁뭘
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4469 size=480 all=155699 active=14716 piece=▁몇
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4317 size=500 all=157796 active=16813 piece=▁간
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=4305 min_freq=201
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4188 size=520 all=158977 active=8970 piece=▁빨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4047 size=540 all=161406 active=11399 piece=구나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3898 size=560 all=162508 active=12501 piece=▁이게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3724 size=580 all=164354 active=14347 piece=▁뒤
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3574 size=600 all=165238 active=15231 piece=▁여기서
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=3563 min_freq=188
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3430 size=620 all=166973 active=9978 piece=▁고마
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3308 size=640 all=169468 active=12473 piece=▁얼마나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3201 size=660 all=170760 active=13765 piece=▁본
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3113 size=680 all=172347 active=15352 piece=▁선생
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3039 size=700 all=174444 active=17449 piece=▁출
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=3034 min_freq=173
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2951 size=720 all=175527 active=9750 piece=▁믿는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2843 size=740 all=177011 active=11234 piece=▁만나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2770 size=760 all=178146 active=12369 piece=이고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2719 size=780 all=179615 active=13838 piece=▁do
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2638 size=800 all=180809 active=15032 piece=▁레
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2637 min_freq=165
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2564 size=820 all=182211 active=10335 piece=▁나한테
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2494 size=840 all=183369 active=11493 piece=겠습니다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2404 size=860 all=184643 active=12767 piece=▁너희를
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2341 size=880 all=185583 active=13707 piece=▁죄송
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2270 size=900 all=186613 active=14737 piece=▁앉
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2265 min_freq=157
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2208 size=920 all=188631 active=11317 piece=▁슬
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2161 size=940 all=189958 active=12644 piece=▁커
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2116 size=960 all=191829 active=14515 piece=▁스스로
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2085 size=980 all=193301 active=15987 piece=▁물론
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2025 size=1000 all=194215 active=16901 piece=이는
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2025 min_freq=147
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1975 size=1020 all=195631 active=10932 piece=▁이르
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1927 size=1040 all=197357 active=12658 piece=▁이리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1881 size=1060 all=198463 active=13764 piece=all
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1845 size=1080 all=199839 active=15140 piece=▁���와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1794 size=1100 all=201105 active=16406 piece=서요
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1794 min_freq=139
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1756 size=1120 all=202266 active=11092 piece=인가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1719 size=1140 all=204061 active=12887 piece=▁부인
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1690 size=1160 all=205250 active=14076 piece=▁첫
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1654 size=1180 all=206585 active=15411 piece=▁성서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1617 size=1200 all=208276 active=17102 piece=▁님
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1615 min_freq=132
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1584 size=1220 all=209755 active=11865 piece=City
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1549 size=1240 all=211194 active=13304 piece=하신
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1523 size=1260 all=212720 active=14830 piece=▁as
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1496 size=1280 all=214251 active=16361 piece=▁갖고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1474 size=1300 all=215430 active=17540 piece=▁우릴
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1473 min_freq=126
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1443 size=1320 all=216523 active=11863 piece=▁하여금
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1424 size=1340 all=217650 active=12990 piece=▁모세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1396 size=1360 all=218609 active=13949 piece=자들이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1372 size=1380 all=219763 active=15103 piece=▁플
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1347 size=1400 all=220594 active=15934 piece=▁나타
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1347 min_freq=120
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1323 size=1420 all=221199 active=11620 piece=▁합
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1297 size=1440 all=222798 active=13219 piece=▁Oh
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1281 size=1460 all=224360 active=14781 piece=시는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1263 size=1480 all=225428 active=15849 piece=▁아래
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1241 size=1500 all=226691 active=17112 piece=ss
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1241 min_freq=116
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1225 size=1520 all=227485 active=12056 piece=▁섹
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1199 size=1540 all=228231 active=12802 piece=으시
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1177 size=1560 all=229586 active=14157 piece=▁솔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1156 size=1580 all=230706 active=15277 piece=수가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1140 size=1600 all=232049 active=16620 piece=▁ab
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1140 min_freq=111
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1126 size=1620 all=233351 active=12890 piece=에서는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1107 size=1640 all=234454 active=13993 piece=▁허락
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1091 size=1660 all=236269 active=15808 piece=▁맘
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1076 size=1680 all=236916 active=16455 piece=ure
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1063 size=1700 all=237603 active=17142 piece=스럽
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1062 min_freq=106
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1053 size=1720 all=238892 active=13037 piece=▁자들은
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1034 size=1740 all=240162 active=14307 piece=스크
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1024 size=1760 all=241360 active=15505 piece=▁됐어요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1012 size=1780 all=242319 active=16464 piece=▁책임
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=994 size=1800 all=243364 active=17509 piece=▁욕
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=994 min_freq=103
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=983 size=1820 all=244367 active=13120 piece=▁현세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=970 size=1840 all=245627 active=14380 piece=▁세계
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=960 size=1860 all=246530 active=15283 piece=.0
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=949 size=1880 all=247995 active=16748 piece=▁out
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=940 size=1900 all=248915 active=17668 piece=실에
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=939 min_freq=99
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=930 size=1920 all=250076 active=13502 piece=▁뒤에
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=919 size=1940 all=251077 active=14503 piece=하러
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=909 size=1960 all=252127 active=15553 piece=▁그와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=901 size=1980 all=253190 active=16616 piece=▁하게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=891 size=2000 all=254251 active=17677 piece=▁오늘은
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=890 min_freq=96
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=880 size=2020 all=255207 active=13668 piece=▁있었던
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=871 size=2040 all=255967 active=14428 piece=▁흐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=864 size=2060 all=257434 active=15895 piece=디어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=852 size=2080 all=258922 active=17383 piece=▁우주
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=836 size=2100 all=259549 active=18010 piece=▁윌
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=836 min_freq=92
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=822 size=2120 all=260556 active=13945 piece=하실
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=815 size=2140 all=261763 active=15152 piece=▁닫
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=809 size=2160 all=262954 active=16343 piece=이었어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=800 size=2180 all=264170 active=17559 piece=▁베푸
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=791 size=2200 all=264779 active=18168 piece=셔서
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=790 min_freq=89
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=780 size=2220 all=266257 active=14659 piece=▁있잖아요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=774 size=2240 all=267095 active=15497 piece=▁이들은
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=766 size=2260 all=267791 active=16193 piece=▁캠
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=761 size=2280 all=268839 active=17241 piece=▁탐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=754 size=2300 all=269394 active=17796 piece=ck
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=754 min_freq=87
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=746 size=2320 all=270495 active=14527 piece=▁흘
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=741 size=2340 all=271160 active=15192 piece=▁인턴
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=733 size=2360 all=272377 active=16409 piece=▁거래
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=724 size=2380 all=273303 active=17335 piece=▁어쨌든
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=717 size=2400 all=274497 active=18529 piece=▁일단
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=717 min_freq=84
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=709 size=2420 all=275265 active=14491 piece=▁삭
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=701 size=2440 all=275987 active=15213 piece=▁su
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=695 size=2460 all=276541 active=15767 piece=▁뭐라
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=688 size=2480 all=277542 active=16768 piece=▁운전
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=684 size=2500 all=278557 active=17783 piece=▁느껴
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=684 min_freq=82
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=680 size=2520 all=279476 active=14815 piece=▁need
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=673 size=2540 all=280217 active=15556 piece=▁지역
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=664 size=2560 all=280927 active=16266 piece=▁그분께
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=657 size=2580 all=281678 active=17017 piece=pen
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=650 size=2600 all=282601 active=17940 piece=▁요원
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=650 min_freq=79
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=643 size=2620 all=283280 active=14759 piece=▁불가능
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=637 size=2640 all=284008 active=15487 piece=▁없잖아
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=631 size=2660 all=284691 active=16170 piece=ont
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=626 size=2680 all=285413 active=16892 piece=▁살고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=619 size=2700 all=286145 active=17624 piece=▁time
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=618 min_freq=78
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=611 size=2720 all=287008 active=15164 piece=▁부터
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=604 size=2740 all=287527 active=15683 piece=▁깜
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=598 size=2760 all=288582 active=16738 piece=ven
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=594 size=2780 all=289433 active=17589 piece=움을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=590 size=2800 all=290882 active=19038 piece=▁든
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=590 min_freq=75
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=587 size=2820 all=291668 active=15310 piece=▁소유
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=582 size=2840 all=292400 active=16042 piece=한텐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=577 size=2860 all=292918 active=16560 piece=▁어쩔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=571 size=2880 all=293510 active=17152 piece=▁동의
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=566 size=2900 all=294485 active=18127 piece=▁u
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=566 min_freq=74
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=562 size=2920 all=295272 active=15499 piece=▁구해
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=557 size=2940 all=295992 active=16219 piece=▁만나서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=551 size=2960 all=296899 active=17126 piece=▁총을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=547 size=2980 all=297583 active=17810 piece=▁하느뇨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=540 size=3000 all=298624 active=18851 piece=▁가질
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=539 min_freq=72
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=535 size=3020 all=299231 active=15511 piece=▁아무런
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=529 size=3040 all=299646 active=15926 piece=▁잘됐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=525 size=3060 all=300790 active=17070 piece=감을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=522 size=3080 all=301624 active=17904 piece=▁된다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=518 size=3100 all=302222 active=18502 piece=▁값
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=518 min_freq=71
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=515 size=3120 all=302908 active=15768 piece=▁나타나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=511 size=3140 all=303848 active=16708 piece=▁이래
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=507 size=3160 all=304491 active=17351 piece=▁보통
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=504 size=3180 all=305149 active=18009 piece=▁자세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=500 size=3200 all=306023 active=18883 piece=▁생각해요
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=499 min_freq=69
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=495 size=3220 all=306598 active=15877 piece=▁주소
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=492 size=3240 all=307101 active=16380 piece=대는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=490 size=3260 all=308004 active=17283 piece=▁가져가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=486 size=3280 all=308715 active=17994 piece=으라
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=480 size=3300 all=309634 active=18913 piece=하진
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=480 min_freq=68
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=477 size=3320 all=310283 active=15933 piece=▁상황이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=474 size=3340 all=310881 active=16531 piece=으니까요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=471 size=3360 all=311747 active=17397 piece=부를
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=468 size=3380 all=312723 active=18373 piece=wn
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=465 size=3400 all=313335 active=18985 piece=▁흑
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=465 min_freq=66
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=462 size=3420 all=314104 active=16395 piece=명한
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=458 size=3440 all=314703 active=16994 piece=▁밝혀
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=455 size=3460 all=315558 active=17849 piece=키는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=453 size=3480 all=316534 active=18825 piece=ings
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=450 size=3500 all=317420 active=19711 piece=▁메세
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=450 min_freq=65
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=447 size=3520 all=318261 active=16702 piece=▁동물
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=444 size=3540 all=318931 active=17372 piece=▁건지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=442 size=3560 all=319442 active=17883 piece=▁used
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=439 size=3580 all=320237 active=18678 piece=▁있으니까
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=435 size=3600 all=320798 active=19239 piece=▁불행
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=435 min_freq=63
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=432 size=3620 all=321373 active=16577 piece=▁송
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=430 size=3640 all=321887 active=17091 piece=는거야
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=427 size=3660 all=322532 active=17736 piece=tt
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=425 size=3680 all=323108 active=18312 piece=년에
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=423 size=3700 all=323939 active=19143 piece=▁똑같은
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=422 min_freq=62
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=419 size=3720 all=324679 active=16925 piece=▁충만하시니라
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=416 size=3740 all=325378 active=17624 piece=▁미치
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=414 size=3760 all=325902 active=18148 piece=▁길에서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=411 size=3780 all=326722 active=18968 piece=▁av
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=408 size=3800 all=327150 active=19396 piece=▁x
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=408 min_freq=61
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=405 size=3820 all=327874 active=17064 piece=▁르
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=403 size=3840 all=328228 active=17418 piece=▁센
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=401 size=3860 all=329204 active=18394 piece=▁사랑을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=398 size=3880 all=329744 active=18934 piece=▁옷을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=396 size=3900 all=330405 active=19595 piece=▁까지
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=396 min_freq=60
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=395 size=3920 all=330932 active=17034 piece=▁우리와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=393 size=3940 all=331613 active=17715 piece=▁결혼식
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=391 size=3960 all=332102 active=18204 piece=▁같다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=389 size=3980 all=332657 active=18759 piece=▁드디어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=387 size=4000 all=333080 active=19182 piece=▁컴퓨터
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=387 min_freq=59
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=384 size=4020 all=333595 active=17140 piece=▁거리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=382 size=4040 all=334299 active=17844 piece=해야지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=380 size=4060 all=335107 active=18652 piece=▁목소
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=378 size=4080 all=335602 active=19147 piece=▁뿌
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=377 size=4100 all=336490 active=20035 piece=▁의사가
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=376 min_freq=58
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=375 size=4120 all=337358 active=17692 piece=▁응급
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=373 size=4140 all=337654 active=17988 piece=).
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=371 size=4160 all=338136 active=18470 piece=▁섹시
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=368 size=4180 all=338812 active=19146 piece=일세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=367 size=4200 all=339569 active=19903 piece=께서는
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=367 min_freq=57
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=365 size=4220 all=340267 active=17621 piece=▁않도록
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=363 size=4240 all=340815 active=18169 piece=▁new
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=361 size=4260 all=341160 active=18514 piece=▁핵
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=360 size=4280 all=342023 active=19377 piece=났어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=358 size=4300 all=342634 active=19988 piece=ody
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=358 min_freq=56
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=357 size=4320 all=343124 active=17611 piece=▁속에서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=355 size=4340 all=343687 active=18174 piece=▁있거든
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=353 size=4360 all=344231 active=18718 piece=번호
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=352 size=4380 all=344801 active=19288 piece=ield
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=350 size=4400 all=345560 active=20047 piece=▁증오
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=350 min_freq=55
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=348 size=4420 all=346191 active=17860 piece=어질
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=345 size=4440 all=346878 active=18547 piece=ec
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=343 size=4460 all=347771 active=19440 piece=▁부러
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=341 size=4480 all=348133 active=19802 piece=▁전지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=339 size=4500 all=348738 active=20407 piece=▁가정
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=339 min_freq=54
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=338 size=4520 all=349580 active=18218 piece=▁which
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=336 size=4540 all=350226 active=18864 piece=▁몇몇
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=335 size=4560 all=351157 active=19795 piece=▁밖에서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=333 size=4580 all=351527 active=20165 piece=▁친척
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=331 size=4600 all=352148 active=20786 piece=▁찾은
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=331 min_freq=53
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=329 size=4620 all=352780 active=18224 piece=▁내려가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=327 size=4640 all=353229 active=18673 piece=▁대지를
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=325 size=4660 all=353849 active=19293 piece=▁올바른
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=323 size=4680 all=354393 active=19837 piece=하다는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=320 size=4700 all=354992 active=20436 piece=았지
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=320 min_freq=52
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=319 size=4720 all=355676 active=18376 piece=▁해변
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=317 size=4740 all=356335 active=19035 piece=▁누르
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=316 size=4760 all=357247 active=19947 piece=▁따라와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=314 size=4780 all=357630 active=20330 piece=▁슬프
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=312 size=4800 all=358224 active=20924 piece=증을
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=312 min_freq=52
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=310 size=4820 all=359200 active=18829 piece=▁좋다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=308 size=4840 all=359625 active=19254 piece=▁어딜
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=307 size=4860 all=360056 active=19685 piece=▁식으로
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=305 size=4880 all=360599 active=20228 piece=▁곳은
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=303 size=4900 all=361188 active=20817 piece=▁마련
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=303 min_freq=51
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=302 size=4920 all=361764 active=18580 piece=▁없다면
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=300 size=4940 all=362364 active=19180 piece=▁수고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=298 size=4960 all=362935 active=19751 piece=셔도
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=297 size=4980 all=363568 active=20384 piece=▁where
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=295 size=5000 all=364085 active=20901 piece=▁고통을
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=295 min_freq=50
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=293 size=5020 all=364498 active=18616 piece=와서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=292 size=5040 all=364963 active=19081 piece=▁어차피
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=291 size=5060 all=365459 active=19577 piece=▁item
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=289 size=5080 all=366064 active=20182 piece=▁스튜
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=287 size=5100 all=366576 active=20694 piece=▁ke
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=287 min_freq=49
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=286 size=5120 all=367161 active=18910 piece=▁대니
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=285 size=5140 all=367651 active=19400 piece=▁이제는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=283 size=5160 all=368329 active=20078 piece=▁부복
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=282 size=5180 all=369002 active=20751 piece=▁걸요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=281 size=5200 all=369539 active=21288 piece=▁자리에
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=281 min_freq=48
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=279 size=5220 all=369859 active=18795 piece=▁맺
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=278 size=5240 all=370562 active=19498 piece=▁클리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=277 size=5260 all=370970 active=19906 piece=▁중재
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=275 size=5280 all=371353 active=20289 piece=디슨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=274 size=5300 all=372007 active=20943 piece=졌다
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=274 min_freq=48
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=273 size=5320 all=372463 active=18981 piece=onst
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=272 size=5340 all=372911 active=19429 piece=▁찾기
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=270 size=5360 all=373265 active=19783 piece=ru
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=269 size=5380 all=373834 active=20352 piece=▁사랑하지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=267 size=5400 all=374215 active=20733 piece=▁벌어
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=267 min_freq=47
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=266 size=5420 all=374782 active=19245 piece=집니다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=265 size=5440 all=375582 active=20045 piece=▁느낄
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=264 size=5460 all=376219 active=20682 piece=한테도
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=263 size=5480 all=376619 active=21082 piece=▁재능
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=262 size=5500 all=377327 active=21790 piece=▁흔들
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=262 min_freq=47
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=261 size=5520 all=378118 active=19612 piece=nder
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=260 size=5540 all=378470 active=19964 piece=▁read
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=258 size=5560 all=378823 active=20317 piece=▁퀸
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=257 size=5580 all=379475 active=20969 piece=▁오게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=256 size=5600 all=379938 active=21432 piece=▁감독
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=256 min_freq=46
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=255 size=5620 all=380562 active=19569 piece=▁인터뷰
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=254 size=5640 all=381178 active=20185 piece=▁리처드
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=252 size=5660 all=381736 active=20743 piece=▁밑에
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=251 size=5680 all=382360 active=21367 piece=▁여깄
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=250 size=5700 all=382924 active=21931 piece=▁고맙다
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=250 min_freq=45
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=249 size=5720 all=383381 active=19601 piece=▁있겠죠
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=247 size=5740 all=383740 active=19960 piece=▁거만
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=246 size=5760 all=384299 active=20519 piece=하기로
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=245 size=5780 all=384677 active=20897 piece=스토
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=244 size=5800 all=385143 active=21363 piece=▁닦
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=244 min_freq=45
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=244 size=5820 all=385533 active=19612 piece=▁거역하겠느뇨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=242 size=5840 all=385854 active=19933 piece=질이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=242 size=5860 all=386363 active=20442 piece=▁뭐든지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=241 size=5880 all=386733 active=20812 piece=▁해야할
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=239 size=5900 all=387392 active=21471 piece=성에
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=239 min_freq=44
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=238 size=5920 all=387966 active=19881 piece=▁잠을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=237 size=5940 all=388450 active=20365 piece=▁내부
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=236 size=5960 all=389118 active=21033 piece=▁오스
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=235 size=5980 all=389542 active=21457 piece=▁불어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=234 size=6000 all=389857 active=21772 piece=▁마르
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=234 min_freq=44
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=233 size=6020 all=390277 active=19874 piece=수술
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=232 size=6040 all=390707 active=20304 piece=▁찔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=232 size=6060 all=390929 active=20526 piece=▁그분에게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=231 size=6080 all=391428 active=21025 piece=arch
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=230 size=6100 all=392011 active=21608 piece=시니라
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=230 min_freq=43
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=229 size=6120 all=392386 active=19963 piece=▁배열
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=228 size=6140 all=392834 active=20411 piece=▁연설
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=227 size=6160 all=393240 active=20817 piece=▁찍어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=226 size=6180 all=393549 active=21126 piece=던지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=226 size=6200 all=394109 active=21686 piece=onstellation
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=225 min_freq=42
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=225 size=6220 all=394519 active=20114 piece=▁pass
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=224 size=6240 all=395023 active=20618 piece=▁된다는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=223 size=6260 all=395489 active=21084 piece=▁어느날
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=222 size=6280 all=395976 active=21571 piece=▁도망가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=221 size=6300 all=396705 active=22300 piece=▁conf
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=221 min_freq=42
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=220 size=6320 all=397322 active=20438 piece=▁제목
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=219 size=6340 all=397827 active=20943 piece=▁고생
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=218 size=6360 all=398390 active=21506 piece=나도
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=218 size=6380 all=398985 active=22101 piece=▁내려놔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=217 size=6400 all=399386 active=22502 piece=▁주민
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=217 min_freq=41
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=216 size=6420 all=399839 active=20397 piece=▁술을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=215 size=6440 all=400424 active=20982 piece=▁열매
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=214 size=6460 all=401107 active=21665 piece=▁가리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=213 size=6480 all=401735 active=22293 piece=음이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=212 size=6500 all=402185 active=22743 piece=수로
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=212 min_freq=41
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=212 size=6520 all=402739 active=20598 piece=▁사람한테
trainer_interface.cc(689) LOG(INFO) Saving model: ./data/spm/spm_ko.model
trainer_interface.cc(701) LOG(INFO) Saving vocabs: ./data/spm/spm_ko.vocab
dating active symbols. max_freq=5262 min_freq=221
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=5061 size=420 all=150277 active=9294 piece=들에게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4853 size=440 all=151667 active=10684 piece=ve
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4644 size=460 all=153819 active=12836 piece=▁뭘
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4469 size=480 all=155699 active=14716 piece=▁몇
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4317 size=500 all=157796 active=16813 piece=▁간
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=4305 min_freq=201
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4188 size=520 all=158977 active=8970 piece=▁빨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=4047 size=540 all=161406 active=11399 piece=구나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3898 size=560 all=162508 active=12501 piece=▁이게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3724 size=580 all=164354 active=14347 piece=▁뒤
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3574 size=600 all=165238 active=15231 piece=▁여기서
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=3563 min_freq=188
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3430 size=620 all=166973 active=9978 piece=▁고마
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3308 size=640 all=169468 active=12473 piece=▁얼마나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3201 size=660 all=170760 active=13765 piece=▁본
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3113 size=680 all=172347 active=15352 piece=▁선생
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=3039 size=700 all=174444 active=17449 piece=▁출
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=3034 min_freq=173
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2951 size=720 all=175527 active=9750 piece=▁믿는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2843 size=740 all=177011 active=11234 piece=▁만나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2770 size=760 all=178146 active=12369 piece=이고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2719 size=780 all=179615 active=13838 piece=▁do
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2638 size=800 all=180809 active=15032 piece=▁레
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2637 min_freq=165
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2564 size=820 all=182211 active=10335 piece=▁나한테
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2494 size=840 all=183369 active=11493 piece=겠습니다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2404 size=860 all=184643 active=12767 piece=▁너희를
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2341 size=880 all=185583 active=13707 piece=▁죄송
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2270 size=900 all=186613 active=14737 piece=▁앉
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2265 min_freq=157
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2208 size=920 all=188631 active=11317 piece=▁슬
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2161 size=940 all=189958 active=12644 piece=▁커
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2116 size=960 all=191829 active=14515 piece=▁스스로
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2085 size=980 all=193301 active=15987 piece=▁물론
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=2025 size=1000 all=194215 active=16901 piece=이는
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=2025 min_freq=147
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1975 size=1020 all=195631 active=10932 piece=▁이르
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1927 size=1040 all=197357 active=12658 piece=▁이리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1881 size=1060 all=198463 active=13764 piece=all
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1845 size=1080 all=199839 active=15140 piece=▁���와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1794 size=1100 all=201105 active=16406 piece=서요
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1794 min_freq=139
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1756 size=1120 all=202266 active=11092 piece=인가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1719 size=1140 all=204061 active=12887 piece=▁부인
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1690 size=1160 all=205250 active=14076 piece=▁첫
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1654 size=1180 all=206585 active=15411 piece=▁성서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1617 size=1200 all=208276 active=17102 piece=▁님
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1615 min_freq=132
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1584 size=1220 all=209755 active=11865 piece=City
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1549 size=1240 all=211194 active=13304 piece=하신
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1523 size=1260 all=212720 active=14830 piece=▁as
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1496 size=1280 all=214251 active=16361 piece=▁갖고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1474 size=1300 all=215430 active=17540 piece=▁우릴
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1473 min_freq=126
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1443 size=1320 all=216523 active=11863 piece=▁하여금
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1424 size=1340 all=217650 active=12990 piece=▁모세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1396 size=1360 all=218609 active=13949 piece=자들이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1372 size=1380 all=219763 active=15103 piece=▁플
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1347 size=1400 all=220594 active=15934 piece=▁나타
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1347 min_freq=120
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1323 size=1420 all=221199 active=11620 piece=▁합
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1297 size=1440 all=222798 active=13219 piece=▁Oh
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1281 size=1460 all=224360 active=14781 piece=시는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1263 size=1480 all=225428 active=15849 piece=▁아래
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1241 size=1500 all=226691 active=17112 piece=ss
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1241 min_freq=116
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1225 size=1520 all=227485 active=12056 piece=▁섹
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1199 size=1540 all=228231 active=12802 piece=으시
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1177 size=1560 all=229586 active=14157 piece=▁솔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1156 size=1580 all=230706 active=15277 piece=수가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1140 size=1600 all=232049 active=16620 piece=▁ab
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1140 min_freq=111
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1126 size=1620 all=233351 active=12890 piece=에서는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1107 size=1640 all=234454 active=13993 piece=▁허락
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1091 size=1660 all=236269 active=15808 piece=▁맘
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1076 size=1680 all=236916 active=16455 piece=ure
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1063 size=1700 all=237603 active=17142 piece=스럽
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=1062 min_freq=106
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1053 size=1720 all=238892 active=13037 piece=▁자들은
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1034 size=1740 all=240162 active=14307 piece=스크
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1024 size=1760 all=241360 active=15505 piece=▁됐어요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=1012 size=1780 all=242319 active=16464 piece=▁책임
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=994 size=1800 all=243364 active=17509 piece=▁욕
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=994 min_freq=103
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=983 size=1820 all=244367 active=13120 piece=▁현세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=970 size=1840 all=245627 active=14380 piece=▁세계
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=960 size=1860 all=246530 active=15283 piece=.0
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=949 size=1880 all=247995 active=16748 piece=▁out
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=940 size=1900 all=248915 active=17668 piece=실에
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=939 min_freq=99
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=930 size=1920 all=250076 active=13502 piece=▁뒤에
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=919 size=1940 all=251077 active=14503 piece=하러
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=909 size=1960 all=252127 active=15553 piece=▁그와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=901 size=1980 all=253190 active=16616 piece=▁하게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=891 size=2000 all=254251 active=17677 piece=▁오늘은
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=890 min_freq=96
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=880 size=2020 all=255207 active=13668 piece=▁있었던
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=871 size=2040 all=255967 active=14428 piece=▁흐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=864 size=2060 all=257434 active=15895 piece=디어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=852 size=2080 all=258922 active=17383 piece=▁우주
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=836 size=2100 all=259549 active=18010 piece=▁윌
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=836 min_freq=92
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=822 size=2120 all=260556 active=13945 piece=하실
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=815 size=2140 all=261763 active=15152 piece=▁닫
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=809 size=2160 all=262954 active=16343 piece=이었어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=800 size=2180 all=264170 active=17559 piece=▁베푸
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=791 size=2200 all=264779 active=18168 piece=셔서
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=790 min_freq=89
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=780 size=2220 all=266257 active=14659 piece=▁있잖아요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=774 size=2240 all=267095 active=15497 piece=▁이들은
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=766 size=2260 all=267791 active=16193 piece=▁캠
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=761 size=2280 all=268839 active=17241 piece=▁탐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=754 size=2300 all=269394 active=17796 piece=ck
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=754 min_freq=87
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=746 size=2320 all=270495 active=14527 piece=▁흘
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=741 size=2340 all=271160 active=15192 piece=▁인턴
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=733 size=2360 all=272377 active=16409 piece=▁거래
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=724 size=2380 all=273303 active=17335 piece=▁어쨌든
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=717 size=2400 all=274497 active=18529 piece=▁일단
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=717 min_freq=84
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=709 size=2420 all=275265 active=14491 piece=▁삭
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=701 size=2440 all=275987 active=15213 piece=▁su
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=695 size=2460 all=276541 active=15767 piece=▁뭐라
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=688 size=2480 all=277542 active=16768 piece=▁운전
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=684 size=2500 all=278557 active=17783 piece=▁느껴
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=684 min_freq=82
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=680 size=2520 all=279476 active=14815 piece=▁need
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=673 size=2540 all=280217 active=15556 piece=▁지역
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=664 size=2560 all=280927 active=16266 piece=▁그분께
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=657 size=2580 all=281678 active=17017 piece=pen
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=650 size=2600 all=282601 active=17940 piece=▁요원
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=650 min_freq=79
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=643 size=2620 all=283280 active=14759 piece=▁불가능
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=637 size=2640 all=284008 active=15487 piece=▁없잖아
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=631 size=2660 all=284691 active=16170 piece=ont
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=626 size=2680 all=285413 active=16892 piece=▁살고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=619 size=2700 all=286145 active=17624 piece=▁time
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=618 min_freq=78
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=611 size=2720 all=287008 active=15164 piece=▁부터
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=604 size=2740 all=287527 active=15683 piece=▁깜
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=598 size=2760 all=288582 active=16738 piece=ven
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=594 size=2780 all=289433 active=17589 piece=움을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=590 size=2800 all=290882 active=19038 piece=▁든
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=590 min_freq=75
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=587 size=2820 all=291668 active=15310 piece=▁소유
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=582 size=2840 all=292400 active=16042 piece=한텐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=577 size=2860 all=292918 active=16560 piece=▁어쩔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=571 size=2880 all=293510 active=17152 piece=▁동의
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=566 size=2900 all=294485 active=18127 piece=▁u
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=566 min_freq=74
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=562 size=2920 all=295272 active=15499 piece=▁구해
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=557 size=2940 all=295992 active=16219 piece=▁만나서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=551 size=2960 all=296899 active=17126 piece=▁총을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=547 size=2980 all=297583 active=17810 piece=▁하느뇨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=540 size=3000 all=298624 active=18851 piece=▁가질
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=539 min_freq=72
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=535 size=3020 all=299231 active=15511 piece=▁아무런
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=529 size=3040 all=299646 active=15926 piece=▁잘됐
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=525 size=3060 all=300790 active=17070 piece=감을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=522 size=3080 all=301624 active=17904 piece=▁된다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=518 size=3100 all=302222 active=18502 piece=▁값
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=518 min_freq=71
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=515 size=3120 all=302908 active=15768 piece=▁나타나
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=511 size=3140 all=303848 active=16708 piece=▁이래
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=507 size=3160 all=304491 active=17351 piece=▁보통
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=504 size=3180 all=305149 active=18009 piece=▁자세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=500 size=3200 all=306023 active=18883 piece=▁생각해요
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=499 min_freq=69
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=495 size=3220 all=306598 active=15877 piece=▁주소
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=492 size=3240 all=307101 active=16380 piece=대는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=490 size=3260 all=308004 active=17283 piece=▁가져가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=486 size=3280 all=308715 active=17994 piece=으라
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=480 size=3300 all=309634 active=18913 piece=하진
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=480 min_freq=68
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=477 size=3320 all=310283 active=15933 piece=▁상황이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=474 size=3340 all=310881 active=16531 piece=으니까요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=471 size=3360 all=311747 active=17397 piece=부를
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=468 size=3380 all=312723 active=18373 piece=wn
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=465 size=3400 all=313335 active=18985 piece=▁흑
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=465 min_freq=66
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=462 size=3420 all=314104 active=16395 piece=명한
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=458 size=3440 all=314703 active=16994 piece=▁밝혀
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=455 size=3460 all=315558 active=17849 piece=키는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=453 size=3480 all=316534 active=18825 piece=ings
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=450 size=3500 all=317420 active=19711 piece=▁메세
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=450 min_freq=65
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=447 size=3520 all=318261 active=16702 piece=▁동물
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=444 size=3540 all=318931 active=17372 piece=▁건지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=442 size=3560 all=319442 active=17883 piece=▁used
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=439 size=3580 all=320237 active=18678 piece=▁있으니까
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=435 size=3600 all=320798 active=19239 piece=▁불행
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=435 min_freq=63
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=432 size=3620 all=321373 active=16577 piece=▁송
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=430 size=3640 all=321887 active=17091 piece=는거야
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=427 size=3660 all=322532 active=17736 piece=tt
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=425 size=3680 all=323108 active=18312 piece=년에
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=423 size=3700 all=323939 active=19143 piece=▁똑같은
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=422 min_freq=62
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=419 size=3720 all=324679 active=16925 piece=▁충만하시니라
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=416 size=3740 all=325378 active=17624 piece=▁미치
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=414 size=3760 all=325902 active=18148 piece=▁길에서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=411 size=3780 all=326722 active=18968 piece=▁av
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=408 size=3800 all=327150 active=19396 piece=▁x
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=408 min_freq=61
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=405 size=3820 all=327874 active=17064 piece=▁르
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=403 size=3840 all=328228 active=17418 piece=▁센
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=401 size=3860 all=329204 active=18394 piece=▁사랑을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=398 size=3880 all=329744 active=18934 piece=▁옷을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=396 size=3900 all=330405 active=19595 piece=▁까지
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=396 min_freq=60
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=395 size=3920 all=330932 active=17034 piece=▁우리와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=393 size=3940 all=331613 active=17715 piece=▁결혼식
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=391 size=3960 all=332102 active=18204 piece=▁같다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=389 size=3980 all=332657 active=18759 piece=▁드디어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=387 size=4000 all=333080 active=19182 piece=▁컴퓨터
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=387 min_freq=59
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=384 size=4020 all=333595 active=17140 piece=▁거리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=382 size=4040 all=334299 active=17844 piece=해야지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=380 size=4060 all=335107 active=18652 piece=▁목소
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=378 size=4080 all=335602 active=19147 piece=▁뿌
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=377 size=4100 all=336490 active=20035 piece=▁의사가
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=376 min_freq=58
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=375 size=4120 all=337358 active=17692 piece=▁응급
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=373 size=4140 all=337654 active=17988 piece=).
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=371 size=4160 all=338136 active=18470 piece=▁섹시
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=368 size=4180 all=338812 active=19146 piece=일세
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=367 size=4200 all=339569 active=19903 piece=께서는
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=367 min_freq=57
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=365 size=4220 all=340267 active=17621 piece=▁않도록
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=363 size=4240 all=340815 active=18169 piece=▁new
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=361 size=4260 all=341160 active=18514 piece=▁핵
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=360 size=4280 all=342023 active=19377 piece=났어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=358 size=4300 all=342634 active=19988 piece=ody
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=358 min_freq=56
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=357 size=4320 all=343124 active=17611 piece=▁속에서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=355 size=4340 all=343687 active=18174 piece=▁있거든
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=353 size=4360 all=344231 active=18718 piece=번호
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=352 size=4380 all=344801 active=19288 piece=ield
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=350 size=4400 all=345560 active=20047 piece=▁증오
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=350 min_freq=55
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=348 size=4420 all=346191 active=17860 piece=어질
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=345 size=4440 all=346878 active=18547 piece=ec
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=343 size=4460 all=347771 active=19440 piece=▁부러
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=341 size=4480 all=348133 active=19802 piece=▁전지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=339 size=4500 all=348738 active=20407 piece=▁가정
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=339 min_freq=54
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=338 size=4520 all=349580 active=18218 piece=▁which
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=336 size=4540 all=350226 active=18864 piece=▁몇몇
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=335 size=4560 all=351157 active=19795 piece=▁밖에서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=333 size=4580 all=351527 active=20165 piece=▁친척
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=331 size=4600 all=352148 active=20786 piece=▁찾은
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=331 min_freq=53
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=329 size=4620 all=352780 active=18224 piece=▁내려가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=327 size=4640 all=353229 active=18673 piece=▁대지를
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=325 size=4660 all=353849 active=19293 piece=▁올바른
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=323 size=4680 all=354393 active=19837 piece=하다는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=320 size=4700 all=354992 active=20436 piece=았지
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=320 min_freq=52
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=319 size=4720 all=355676 active=18376 piece=▁해변
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=317 size=4740 all=356335 active=19035 piece=▁누르
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=316 size=4760 all=357247 active=19947 piece=▁따라와
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=314 size=4780 all=357630 active=20330 piece=▁슬프
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=312 size=4800 all=358224 active=20924 piece=증을
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=312 min_freq=52
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=310 size=4820 all=359200 active=18829 piece=▁좋다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=308 size=4840 all=359625 active=19254 piece=▁어딜
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=307 size=4860 all=360056 active=19685 piece=▁식으로
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=305 size=4880 all=360599 active=20228 piece=▁곳은
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=303 size=4900 all=361188 active=20817 piece=▁마련
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=303 min_freq=51
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=302 size=4920 all=361764 active=18580 piece=▁없다면
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=300 size=4940 all=362364 active=19180 piece=▁수고
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=298 size=4960 all=362935 active=19751 piece=셔도
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=297 size=4980 all=363568 active=20384 piece=▁where
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=295 size=5000 all=364085 active=20901 piece=▁고통을
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=295 min_freq=50
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=293 size=5020 all=364498 active=18616 piece=와서
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=292 size=5040 all=364963 active=19081 piece=▁어차피
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=291 size=5060 all=365459 active=19577 piece=▁item
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=289 size=5080 all=366064 active=20182 piece=▁스튜
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=287 size=5100 all=366576 active=20694 piece=▁ke
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=287 min_freq=49
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=286 size=5120 all=367161 active=18910 piece=▁대니
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=285 size=5140 all=367651 active=19400 piece=▁이제는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=283 size=5160 all=368329 active=20078 piece=▁부복
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=282 size=5180 all=369002 active=20751 piece=▁걸요
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=281 size=5200 all=369539 active=21288 piece=▁자리에
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=281 min_freq=48
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=279 size=5220 all=369859 active=18795 piece=▁맺
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=278 size=5240 all=370562 active=19498 piece=▁클리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=277 size=5260 all=370970 active=19906 piece=▁중재
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=275 size=5280 all=371353 active=20289 piece=디슨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=274 size=5300 all=372007 active=20943 piece=졌다
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=274 min_freq=48
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=273 size=5320 all=372463 active=18981 piece=onst
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=272 size=5340 all=372911 active=19429 piece=▁찾기
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=270 size=5360 all=373265 active=19783 piece=ru
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=269 size=5380 all=373834 active=20352 piece=▁사랑하지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=267 size=5400 all=374215 active=20733 piece=▁벌어
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=267 min_freq=47
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=266 size=5420 all=374782 active=19245 piece=집니다
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=265 size=5440 all=375582 active=20045 piece=▁느낄
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=264 size=5460 all=376219 active=20682 piece=한테도
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=263 size=5480 all=376619 active=21082 piece=▁재능
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=262 size=5500 all=377327 active=21790 piece=▁흔들
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=262 min_freq=47
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=261 size=5520 all=378118 active=19612 piece=nder
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=260 size=5540 all=378470 active=19964 piece=▁read
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=258 size=5560 all=378823 active=20317 piece=▁퀸
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=257 size=5580 all=379475 active=20969 piece=▁오게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=256 size=5600 all=379938 active=21432 piece=▁감독
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=256 min_freq=46
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=255 size=5620 all=380562 active=19569 piece=▁인터뷰
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=254 size=5640 all=381178 active=20185 piece=▁리처드
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=252 size=5660 all=381736 active=20743 piece=▁밑에
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=251 size=5680 all=382360 active=21367 piece=▁여깄
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=250 size=5700 all=382924 active=21931 piece=▁고맙다
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=250 min_freq=45
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=249 size=5720 all=383381 active=19601 piece=▁있겠죠
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=247 size=5740 all=383740 active=19960 piece=▁거만
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=246 size=5760 all=384299 active=20519 piece=하기로
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=245 size=5780 all=384677 active=20897 piece=스토
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=244 size=5800 all=385143 active=21363 piece=▁닦
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=244 min_freq=45
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=244 size=5820 all=385533 active=19612 piece=▁거역하겠느뇨
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=242 size=5840 all=385854 active=19933 piece=질이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=242 size=5860 all=386363 active=20442 piece=▁뭐든지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=241 size=5880 all=386733 active=20812 piece=▁해야할
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=239 size=5900 all=387392 active=21471 piece=성에
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=239 min_freq=44
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=238 size=5920 all=387966 active=19881 piece=▁잠을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=237 size=5940 all=388450 active=20365 piece=▁내부
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=236 size=5960 all=389118 active=21033 piece=▁오스
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=235 size=5980 all=389542 active=21457 piece=▁불어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=234 size=6000 all=389857 active=21772 piece=▁마르
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=234 min_freq=44
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=233 size=6020 all=390277 active=19874 piece=수술
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=232 size=6040 all=390707 active=20304 piece=▁찔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=232 size=6060 all=390929 active=20526 piece=▁그분에게
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=231 size=6080 all=391428 active=21025 piece=arch
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=230 size=6100 all=392011 active=21608 piece=시니라
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=230 min_freq=43
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=229 size=6120 all=392386 active=19963 piece=▁배열
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=228 size=6140 all=392834 active=20411 piece=▁연설
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=227 size=6160 all=393240 active=20817 piece=▁찍어
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=226 size=6180 all=393549 active=21126 piece=던지
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=226 size=6200 all=394109 active=21686 piece=onstellation
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=225 min_freq=42
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=225 size=6220 all=394519 active=20114 piece=▁pass
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=224 size=6240 all=395023 active=20618 piece=▁된다는
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=223 size=6260 all=395489 active=21084 piece=▁어느날
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=222 size=6280 all=395976 active=21571 piece=▁도망가
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=221 size=6300 all=396705 active=22300 piece=▁conf
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=221 min_freq=42
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=220 size=6320 all=397322 active=20438 piece=▁제목
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=219 size=6340 all=397827 active=20943 piece=▁고생
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=218 size=6360 all=398390 active=21506 piece=나도
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=218 size=6380 all=398985 active=22101 piece=▁내려놔
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=217 size=6400 all=399386 active=22502 piece=▁주민
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=217 min_freq=41
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=216 size=6420 all=399839 active=20397 piece=▁술을
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=215 size=6440 all=400424 active=20982 piece=▁열매
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=214 size=6460 all=401107 active=21665 piece=▁가리
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=213 size=6480 all=401735 active=22293 piece=음이
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=212 size=6500 all=402185 active=22743 piece=수로
bpe_model_trainer.cc(159) LOG(INFO) Updating active symbols. max_freq=212 min_freq=41
bpe_model_trainer.cc(268) LOG(INFO) Added: freq=212 size=6520 all=402739 active=20598 piece=▁사람한테
trainer_interface.cc(689) LOG(INFO) Saving model: ./data/spm/spm_ko.model
trainer_interface.cc(701) LOG(INFO) Saving vocabs: ./data/spm/spm_ko.vocab
In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
# 1. Dataset
class TranslationDataset(Dataset):
def __init__(self, data, sp_en, sp_ko, max_len=50):
self.data = data
self.sp_en = sp_en
self.sp_ko = sp_ko
self.max_len = max_len
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
pair = self.data[idx]["translation"]
# 인코딩 + <eos> 추가, max_len 제한
en_ids = self.sp_en.encode(pair["en"])[:self.max_len] + [3] # 3=<eos>
ko_ids = [2] + self.sp_ko.encode(pair["ko"])[:self.max_len] + [3] # 2=<sos>, 3=<eos>
return torch.tensor(en_ids), torch.tensor(ko_ids)
# 2. 패딩 collate 함수
def collate_fn(batch):
en_batch, ko_batch = zip(*batch)
en_batch = pad_sequence(en_batch, batch_first=True, padding_value=0) # 0=<pad>
ko_batch = pad_sequence(ko_batch, batch_first=True, padding_value=0)
return en_batch, ko_batch
# 3. DataLoader
train_dataset = TranslationDataset(ds["train"].select(range(100000)), sp_en, sp_ko)
valid_dataset = TranslationDataset(ds["validation"], sp_en, sp_ko)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, batch_size=256, shuffle=False, collate_fn=collate_fn)
# 4. 확인
en_batch, ko_batch = next(iter(train_loader))
print("영어 배치 shape:", en_batch.shape) # (64, max_len)
print("한국어 배치 shape:", ko_batch.shape)
print("\n영어 샘플:", en_batch[0])
print("한국어 샘플:", ko_batch[0])
영어 배치 shape: torch.Size([256, 51])
한국어 배치 shape: torch.Size([256, 52])
영어 샘플: tensor([ 474, 7939, 7924, 94, 125, 7929, 3, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0])
한국어 샘플: tensor([ 2, 6169, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0])
In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# 1. Encoder
class Encoder(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, n_layers, dropout):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
self.lstm = nn.LSTM(embed_dim, hidden_dim, n_layers,
batch_first=True, dropout=dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# x: (batch, src_len)
embedded = self.dropout(self.embedding(x)) # (batch, src_len, embed_dim)
outputs, (hidden, cell) = self.lstm(embedded) # outputs: (batch, src_len, hidden_dim)
return outputs, hidden, cell
# 2. Attention
class Attention(nn.Module):
def __init__(self, hidden_dim):
super().__init__()
self.attn = nn.Linear(hidden_dim * 2, hidden_dim)
self.v = nn.Linear(hidden_dim, 1, bias=False)
def forward(self, hidden, encoder_outputs):
# hidden: (batch, hidden_dim) — 디코더 현재 hidden
# encoder_outputs: (batch, src_len, hidden_dim)
src_len = encoder_outputs.shape[1]
hidden = hidden.unsqueeze(1).repeat(1, src_len, 1) # (batch, src_len, hidden_dim)
energy = torch.tanh(self.attn(torch.cat([hidden, encoder_outputs], dim=2)))
attention = self.v(energy).squeeze(2) # (batch, src_len)
return F.softmax(attention, dim=1) # 가중치 합=1
# 3. Decoder
class Decoder(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, n_layers, dropout, attention):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
self.attention = attention
self.lstm = nn.LSTM(embed_dim + hidden_dim, hidden_dim, n_layers,
batch_first=True, dropout=dropout)
self.fc_out = nn.Linear(hidden_dim * 2, vocab_size)
self.dropout = nn.Dropout(dropout)
def forward(self, trg, hidden, cell, encoder_outputs):
# trg: (batch,) — 현재 토큰 하나
trg = trg.unsqueeze(1) # (batch, 1)
embedded = self.dropout(self.embedding(trg)) # (batch, 1, embed_dim)
attn_weights = self.attention(hidden[-1], encoder_outputs) # (batch, src_len)
attn_weights = attn_weights.unsqueeze(1) # (batch, 1, src_len)
context = torch.bmm(attn_weights, encoder_outputs) # (batch, 1, hidden_dim)
lstm_input = torch.cat([embedded, context], dim=2) # (batch, 1, embed_dim+hidden_dim)
output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
prediction = self.fc_out(torch.cat([output, context], dim=2).squeeze(1)) # (batch, vocab_size)
return prediction, hidden, cell
# 4. Seq2Seq
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
def forward(self, src, trg, teacher_forcing_ratio=0.5):
# src: (batch, src_len), trg: (batch, trg_len)
batch_size = src.shape[0]
trg_len = trg.shape[1]
trg_vocab_size = self.decoder.fc_out.out_features
outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(self.device)
encoder_outputs, hidden, cell = self.encoder(src)
trg_input = trg[:, 0] # <sos>
for t in range(1, trg_len):
prediction, hidden, cell = self.decoder(trg_input, hidden, cell, encoder_outputs)
outputs[:, t, :] = prediction
# Teacher Forcing: 50% 확률로 정답을 다음 입력으로
teacher_force = torch.rand(1).item() < teacher_forcing_ratio
trg_input = trg[:, t] if teacher_force else prediction.argmax(1)
return outputs
# 5. 모델 초기화
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EMBED_DIM = 256
HIDDEN_DIM = 512
N_LAYERS = 2
DROPOUT = 0.5
EN_VOCAB = sp_en.get_piece_size() # 8000
KO_VOCAB = sp_ko.get_piece_size() # 8000
attn = Attention(HIDDEN_DIM)
encoder = Encoder(EN_VOCAB, EMBED_DIM, HIDDEN_DIM, N_LAYERS, DROPOUT)
decoder = Decoder(KO_VOCAB, EMBED_DIM, HIDDEN_DIM, N_LAYERS, DROPOUT, attn)
model = Seq2Seq(encoder, decoder, device).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=0)
print(f"모델 파라미터 수: {sum(p.numel() for p in model.parameters()):,}")
print(f"device: {device}")
모델 파라미터 수: 21,226,304 device: cuda
In [24]:
from tqdm import tqdm
def train_epoch(model, loader, optimizer, criterion, clip=1.0):
model.train()
epoch_loss = 0
for src, trg in tqdm(loader, desc="Train", leave=False):
src, trg = src.to(device), trg.to(device)
optimizer.zero_grad()
output = model(src, trg)
output = output[:, 1:, :].reshape(-1, KO_VOCAB)
trg = trg[:, 1:].reshape(-1)
loss = criterion(output, trg)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(loader)
def valid_epoch(model, loader, criterion):
model.eval()
epoch_loss = 0
with torch.no_grad():
for src, trg in tqdm(loader, desc="Valid", leave=False):
src, trg = src.to(device), trg.to(device)
output = model(src, trg, teacher_forcing_ratio=0.0)
output = output[:, 1:, :].reshape(-1, KO_VOCAB)
trg = trg[:, 1:].reshape(-1)
loss = criterion(output, trg)
epoch_loss += loss.item()
return epoch_loss / len(loader)
N_EPOCHS = 10
best_valid_loss = float("inf")
history = {"train_loss": [], "valid_loss": []}
for epoch in range(N_EPOCHS):
train_loss = train_epoch(model, train_loader, optimizer, criterion)
valid_loss = valid_epoch(model, valid_loader, criterion)
history["train_loss"].append(train_loss)
history["valid_loss"].append(valid_loss)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), "./data/seq2seq_best.pt")
saved = "✅"
else:
saved = ""
print(f"Epoch {epoch+1:02d} | Train: {train_loss:.3f} | Valid: {valid_loss:.3f} {saved}")
Epoch 01 | Train: 7.060 | Valid: 6.944 ✅
Epoch 02 | Train: 6.587 | Valid: 6.816 ✅
Epoch 03 | Train: 6.320 | Valid: 6.725 ✅
Epoch 04 | Train: 6.105 | Valid: 6.712 ✅
Epoch 05 | Train: 5.924 | Valid: 6.710 ✅
Epoch 06 | Train: 5.765 | Valid: 6.747
Epoch 07 | Train: 5.608 | Valid: 6.746
Epoch 08 | Train: 5.467 | Valid: 6.815
Epoch 09 | Train: 5.332 | Valid: 6.881
Epoch 10 | Train: 5.191 | Valid: 6.919
In [40]:
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 4))
plt.plot(history["train_loss"], label="Train")
plt.plot(history["valid_loss"], label="Valid")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Seq2Seq Training History")
plt.legend()
plt.tight_layout()
plt.show()
In [26]:
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu
import matplotlib.pyplot as plt
# 1. 번역 함수
def translate(model, sentence, sp_en, sp_ko, device, max_len=50):
model.eval()
tokens = sp_en.encode(sentence) + [3] # <eos>
src = torch.tensor(tokens).unsqueeze(0).to(device)
with torch.no_grad():
encoder_outputs, hidden, cell = model.encoder(src)
trg_input = torch.tensor([2]).to(device) # <sos>
result = []
for _ in range(max_len):
with torch.no_grad():
output, hidden, cell = model.decoder(trg_input, hidden, cell, encoder_outputs)
pred_token = output.argmax(1).item()
if pred_token == 3: # <eos>
break
result.append(pred_token)
trg_input = torch.tensor([pred_token]).to(device)
return sp_ko.decode(result)
# 2. BLEU 평가
def evaluate_bleu(model, dataset, sp_en, sp_ko, device, n=500):
refs, hyps = [], []
for i in range(min(n, len(dataset))):
pair = dataset.data[i]["translation"]
pred = translate(model, pair["en"], sp_en, sp_ko, device)
ref = sp_ko.encode(pair["ko"], out_type=str)
hyp = sp_ko.encode(pred, out_type=str)
refs.append([ref])
hyps.append(hyp)
return corpus_bleu(refs, hyps) * 100
bleu = evaluate_bleu(model, valid_dataset, sp_en, sp_ko, device)
print(f"BLEU Score: {bleu:.2f}")
# 3. 샘플 번역 5개
print("\n--- 샘플 번역 ---")
for i in range(5):
pair = valid_dataset.data[i]["translation"]
pred = translate(model, pair["en"], sp_en, sp_ko, device)
print(f"EN: {pair['en']}")
print(f"정답: {pair['ko']}")
print(f"예측: {pred}")
print()
# 4. 시각화
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(history["train_loss"], label="Train")
axes[0].plot(history["valid_loss"], label="Valid")
axes[0].set_title("Training History")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].legend()
axes[1].bar(["BLEU"], [bleu], color="steelblue")
axes[1].set_ylim(0, 100)
axes[1].set_title("BLEU Score")
axes[1].set_ylabel("Score")
for i, v in enumerate([bleu]):
axes[1].text(i, v + 1, f"{v:.2f}", ha="center")
plt.tight_layout()
plt.show()
BLEU Score: 0.00 --- 샘플 번역 --- EN: Yeah, a lot of it. 정답: 네, 무척요. 예측: 그래, 그거 다 EN: I'll set up some tests. Shep, 정답: 날 뚫어지게 쳐다보는데 그만 해요 예측: 제가 커피하겠습니다 EN: Look, I don't like it any more than you do, but if you help me, I promise to keep you safe. 정답: 이봐 나도 너만큼 안 내켜 그래도 날 도우면 내가 보호해주지 예측: 내가,,, 내가 별로 더하지 않겠지만 하지만 내가 도와드릴 수 EN: Like, what does that even mean? 정답: 뭔 뜻이야? 예측: 무슨 뜻이죠? EN: She becomes the story. 정답: 리즈가 영웅이 되고 있어요 예측: 그녀는에에 대해
Transformer¶
"Attention Is All You Need" - Google, 2017
LSTM을 완전히 제거하고 Attention만으로 인코더-디코더를 구성한 모델.
Seq2Seq와 비교¶
| Seq2Seq (LSTM) | Transformer | |
|---|---|---|
| 핵심 구조 | LSTM + Attention | Attention only |
| 처리 방식 | 순차 (느림) | 병렬 (빠름) |
| 장거리 의존성 | 약함 | 강함 |
| GPU 활용 | 낮음 | 높음 |
핵심 개념 3가지¶
1. Self-Attention 문장 내 모든 단어가 서로를 직접 참조 → "it"이 무엇을 가리키는지 문장 전체를 보고 판단
2. Multi-Head Attention Attention을 여러 개 병렬로 실행 → 다양한 관점에서 동시에 문맥 파악
3. Positional Encoding 순서 정보가 없는 Attention에 위치 정보를 주입 → LSTM은 순차처리라 자동으로 순서를 알지만, Transformer는 별도로 알려줘야 함
In [27]:
import torch
import torch.nn as nn
import math
# 1. Positional Encoding
class PositionalEncoding(nn.Module):
def __init__(self, embed_dim, dropout=0.1, max_len=5000):
super().__init__()
self.dropout = nn.Dropout(dropout)
pe = torch.zeros(max_len, embed_dim)
position = torch.arange(0, max_len).unsqueeze(1).float()
div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-math.log(10000.0) / embed_dim))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0) # (1, max_len, embed_dim)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + self.pe[:, :x.size(1)]
return self.dropout(x)
# 2. Transformer 모델
class TransformerModel(nn.Module):
def __init__(self, src_vocab, trg_vocab, embed_dim, n_heads,
n_encoder_layers, n_decoder_layers, ff_dim, dropout):
super().__init__()
self.src_embedding = nn.Embedding(src_vocab, embed_dim, padding_idx=0)
self.trg_embedding = nn.Embedding(trg_vocab, embed_dim, padding_idx=0)
self.pos_encoding = PositionalEncoding(embed_dim, dropout)
self.transformer = nn.Transformer(
d_model=embed_dim,
nhead=n_heads,
num_encoder_layers=n_encoder_layers,
num_decoder_layers=n_decoder_layers,
dim_feedforward=ff_dim,
dropout=dropout,
batch_first=True
)
self.fc_out = nn.Linear(embed_dim, trg_vocab)
self.scale = math.sqrt(embed_dim)
def make_pad_mask(self, x):
# 패딩 위치 True
return x == 0
def forward(self, src, trg):
src_pad_mask = self.make_pad_mask(src) # (batch, src_len)
trg_pad_mask = self.make_pad_mask(trg) # (batch, trg_len)
trg_len = trg.size(1)
# Causal mask: 미래 토큰 못 보게
causal_mask = nn.Transformer.generate_square_subsequent_mask(trg_len).to(src.device)
src_emb = self.pos_encoding(self.src_embedding(src) * self.scale)
trg_emb = self.pos_encoding(self.trg_embedding(trg) * self.scale)
output = self.transformer(
src_emb, trg_emb,
tgt_mask=causal_mask,
src_key_padding_mask=src_pad_mask,
tgt_key_padding_mask=trg_pad_mask,
memory_key_padding_mask=src_pad_mask
)
return self.fc_out(output) # (batch, trg_len, trg_vocab)
# 3. 모델 초기화
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_tf = TransformerModel(
src_vocab=EN_VOCAB,
trg_vocab=KO_VOCAB,
embed_dim=256,
n_heads=8,
n_encoder_layers=3,
n_decoder_layers=3,
ff_dim=512,
dropout=0.1
).to(device)
print(f"파라미터 수: {sum(p.numel() for p in model_tf.parameters()):,}")
print(f"device: {device}")
파라미터 수: 10,106,688 device: cuda
In [28]:
# 1. 옵티마이저 & 손실함수
criterion_tf = nn.CrossEntropyLoss(ignore_index=0)
optimizer_tf = optim.Adam(model_tf.parameters(), lr=0.0001)
# 2. 학습/검증 함수
def train_epoch_tf(model, loader, optimizer, criterion):
model.train()
epoch_loss = 0
for src, trg in tqdm(loader, desc="Train", leave=False):
src, trg = src.to(device), trg.to(device)
trg_input = trg[:, :-1]
trg_label = trg[:, 1:]
optimizer.zero_grad()
output = model(src, trg_input)
output = output.reshape(-1, KO_VOCAB)
loss = criterion(output, trg_label.reshape(-1))
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(loader)
def valid_epoch_tf(model, loader, criterion):
model.eval()
epoch_loss = 0
with torch.no_grad():
for src, trg in tqdm(loader, desc="Valid", leave=False):
src, trg = src.to(device), trg.to(device)
trg_input = trg[:, :-1]
trg_label = trg[:, 1:]
output = model(src, trg_input)
output = output.reshape(-1, KO_VOCAB)
loss = criterion(output, trg_label.reshape(-1))
epoch_loss += loss.item()
return epoch_loss / len(loader)
# 3. 학습 루프
N_EPOCHS = 10
best_valid_loss = float("inf")
history_tf = {"train_loss": [], "valid_loss": []}
for epoch in range(N_EPOCHS):
train_loss = train_epoch_tf(model_tf, train_loader, optimizer_tf, criterion_tf)
valid_loss = valid_epoch_tf(model_tf, valid_loader, criterion_tf)
history_tf["train_loss"].append(train_loss)
history_tf["valid_loss"].append(valid_loss)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model_tf.state_dict(), "./data/transformer_best.pt")
saved = "✅"
else:
saved = ""
print(f"Epoch {epoch+1:02d} | Train: {train_loss:.3f} | Valid: {valid_loss:.3f} {saved}")
Epoch 01 | Train: 7.331 | Valid: 7.014 ✅
Epoch 02 | Train: 6.884 | Valid: 6.778 ✅
Epoch 03 | Train: 6.675 | Valid: 6.623 ✅
Epoch 04 | Train: 6.526 | Valid: 6.495 ✅
Epoch 05 | Train: 6.404 | Valid: 6.384 ✅
Epoch 06 | Train: 6.289 | Valid: 6.273 ✅
Epoch 07 | Train: 6.178 | Valid: 6.164 ✅
Epoch 08 | Train: 6.070 | Valid: 6.068 ✅
Epoch 09 | Train: 5.966 | Valid: 5.986 ✅
Epoch 10 | Train: 5.865 | Valid: 5.900 ✅
In [39]:
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 4))
plt.plot(history["train_loss"], label="Train")
plt.plot(history["valid_loss"], label="Valid")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Seq2Seq Training History")
plt.legend()
plt.tight_layout()
plt.show()
In [37]:
from nltk.translate.bleu_score import corpus_bleu
import matplotlib.pyplot as plt
# 1. 번역 함수
def translate_tf(model, sentence, sp_en, sp_ko, device, max_len=50):
model.eval()
tokens = sp_en.encode(sentence) + [3]
src = torch.tensor(tokens).unsqueeze(0).to(device)
trg = torch.tensor([[2]]).to(device) # <sos>
for _ in range(max_len):
with torch.no_grad():
output = model(src, trg)
pred_token = output[:, -1, :].argmax(1).item()
if pred_token == 3: # <eos>
break
trg = torch.cat([trg, torch.tensor([[pred_token]]).to(device)], dim=1)
return sp_ko.decode(trg.squeeze(0)[1:].tolist())
# 2. BLEU 평가
def evaluate_bleu_tf(model, dataset, sp_en, sp_ko, device, n=500):
refs, hyps = [], []
for i in range(min(n, len(dataset))):
pair = dataset.data[i]["translation"]
pred = translate_tf(model, pair["en"], sp_en, sp_ko, device)
ref = sp_ko.encode(pair["ko"], out_type=str)
hyp = sp_ko.encode(pred, out_type=str)
refs.append([ref])
hyps.append(hyp)
return corpus_bleu(refs, hyps) * 100
bleu_tf = evaluate_bleu_tf(model_tf, valid_dataset, sp_en, sp_ko, device)
print(f"BLEU Score: {bleu_tf:.2f}")
# 3. 샘플 번역 5개
print("\n--- 샘플 번역 ---")
for i in range(5):
pair = valid_dataset.data[i]["translation"]
pred = translate_tf(model_tf, pair["en"], sp_en, sp_ko, device)
print(f"EN: {pair['en']}")
print(f"정답: {pair['ko']}")
print(f"예측: {pred}")
print()
# 4. 시각화
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(history_tf["train_loss"], label="Train")
axes[0].plot(history_tf["valid_loss"], label="Valid")
axes[0].set_title("Transformer Training History")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].legend()
axes[1].bar(["BLEU"], [bleu_tf], color="coral")
axes[1].set_ylim(0, 100)
axes[1].set_title("BLEU Score")
axes[1].set_ylabel("Score")
axes[1].text(0, bleu_tf + 1, f"{bleu_tf:.2f}", ha="center")
plt.tight_layout()
plt.show()
BLEU Score: 0.00 --- 샘플 번역 --- EN: Yeah, a lot of it. 정답: 네, 무척요. 예측: - 그래, 네, 네 EN: I'll set up some tests. Shep, 정답: 날 뚫어지게 쳐다보는데 그만 해요 예측: - 난 그냥 ⁇ 고, 난 그냥 ⁇ 고 EN: Look, I don't like it any more than you do, but if you help me, I promise to keep you safe. 정답: 이봐 나도 너만큼 안 내켜 그래도 날 도우면 내가 보호해주지 예측: 내가 당신이 당신을 널 못 했으면 EN: Like, what does that even mean? 정답: 뭔 뜻이야? 예측: - 무슨 일이 있어? EN: She becomes the story. 정답: 리즈가 영웅이 되고 있어요 예측: - 그 사람야