In [1]:
################################################################################
# 랜덤 시드 값 설정
################################################################################
import os
# os.environ['PYTHONHASHSEED'] = '0'
# os.environ['CUDA_VISIBLE_DEVICES'] = ''
# import numpy as np
# import tensorflow as tf
# import random
# seed_value = 42
# np.random.seed(seed_value)
# tf.random.set_seed(seed_value)
# random.seed(seed_value)
################################################################################
In [2]:
cuda_version = 'Cuda not installed'
try:
import pycuda.driver as cuda
import pycuda.autoinit
# CUDA 장치가 있는지 확인 후 초기화
cuda.init()
if cuda.Device.count() > 0:
version = cuda.get_version()
cuda_version = f'CUDA Version: {version[0]}.{version[1]}'
else:
cuda_version = 'No CUDA-capable device found'
except ImportError:
cuda_version = 'pycuda not installed'
except Exception as e:
cuda_version = f'{str(e)}'
In [3]:
import sys
import keras
import tensorflow as tf
import numpy as np
import matplotlib
print("-"*80)
print(f"Python version : {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
print("Keras version : " + keras.__version__)
print("Tensorflow version : " + tf.__version__)
print(f"CUDA version : {cuda_version}")
print(f"Numpy version : {np.__version__}")
print("Matplotlib version: " + matplotlib.__version__)
print("-"*80)
2024-11-17 21:08:08.787626: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. 2024-11-17 21:08:08.976450: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-11-17 21:08:08.976484: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-11-17 21:08:09.005488: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered 2024-11-17 21:08:09.065900: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2024-11-17 21:08:09.836962: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
-------------------------------------------------------------------------------- Python version : 3.11.10 Keras version : 2.15.0 Tensorflow version : 2.15.0 CUDA version : CUDA Version: 12.2 Numpy version : 1.26.4 Matplotlib version: 3.9.2 --------------------------------------------------------------------------------
- 1-2. 패키지 의존성확인
pip freeze > requirements.txt
pip install -r requirements.txt
# requirements.txt 파일에서 file:// 경로를 가진 줄을 삭제하는 스크립트
with open('requirements.txt', 'r') as f:
lines = f.readlines()
with open('requirements_clean.txt', 'w') as f:
for line in lines:
if 'file://' not in line:
f.write(line)
In [4]:
pip freeze
absl-py==2.1.0 accelerate==1.0.0 aggdraw==1.3.19 aiohappyeyeballs==2.4.3 aiohttp==3.10.9 aiosignal==1.3.1 ann_visualizer==2.5 anyio==4.6.0 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 arrow==1.3.0 asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work astunparse==1.6.3 async-lru==2.0.4 attrs==24.2.0 babel==2.16.0 beautifulsoup4==4.12.3 bleach==6.1.0 cachetools==5.5.0 certifi==2024.8.30 cffi==1.17.1 chardet==3.0.4 charset-normalizer==3.3.2 click==8.1.7 comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1710320294760/work contourpy==1.3.0 cycler==0.12.1 datasets==3.0.1 debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1725269156501/work decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work defusedxml==0.7.1 dill==0.3.8 entrypoints==0.4 exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1720869315914/work executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1725214404607/work fastjsonschema==2.20.0 fasttext==0.9.3 filelock==3.16.1 flatbuffers==24.3.25 fonttools==4.53.1 fqdn==1.5.1 frozenlist==1.4.1 fsspec==2024.6.1 gast==0.6.0 gensim==4.3.3 google-auth==2.34.0 google-auth-oauthlib==1.2.1 google-pasta==0.2.0 googletrans==4.0.0rc1 graphviz==0.20.3 grpcio==1.66.1 h11==0.14.0 h2==3.2.0 h5py==3.11.0 hpack==3.0.0 hstspreload==2024.9.1 httpcore==1.0.6 httpx==0.27.2 huggingface-hub==0.25.1 hyperframe==5.2.0 idna==2.10 imageio==2.36.0 imageio-ffmpeg==0.5.1 importlib_metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1726082825846/work ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1719845459717/work ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1725050136642/work ipywidgets==8.1.5 isoduration==20.11.0 jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work Jinja2==3.1.4 joblib==1.4.2 JPype1==1.5.0 json5==0.9.25 jsonpointer==3.0.0 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 jupyter==1.1.1 jupyter-console==6.6.3 jupyter-events==0.10.0 jupyter-lsp==2.2.5 jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1716472197302/work jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1710257359434/work jupyter_server==2.14.2 jupyter_server_terminals==0.5.3 jupyterlab==4.2.5 jupyterlab_pygments==0.3.0 jupyterlab_server==2.27.3 jupyterlab_widgets==3.0.13 kagglehub==0.3.0 keras==2.15.0 keras-nlp==0.15.0 keras-tuner==1.4.7 kiwisolver==1.4.7 kobert-transformers==0.6.0 konlpy==0.6.0 kt-legacy==1.0.5 libclang==18.1.1 lxml==5.3.0 Mako==1.3.5 Markdown==3.7 markdown-it-py==3.0.0 MarkupSafe==2.1.5 matplotlib==3.9.2 matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1713250518406/work mdurl==0.1.2 mecab-python3==1.0.9 mistune==0.8.4 ml-dtypes==0.2.0 mpmath==1.3.0 multidict==6.1.0 multiprocess==0.70.16 namex==0.0.8 nbclient==0.10.0 nbconvert==5.6.1 nbformat==5.10.4 nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work networkx==3.3 nltk==3.9.1 notebook==7.2.2 notebook_shim==0.2.4 numpy==1.26.4 nvidia-cublas-cu12==12.1.3.1 nvidia-cuda-cupti-cu12==12.1.105 nvidia-cuda-nvrtc-cu12==12.1.105 nvidia-cuda-runtime-cu12==12.1.105 nvidia-cudnn-cu12==9.1.0.70 nvidia-cufft-cu12==11.0.2.54 nvidia-curand-cu12==10.3.2.106 nvidia-cusolver-cu12==11.4.5.107 nvidia-cusparse-cu12==12.1.0.106 nvidia-nccl-cu12==2.20.5 nvidia-nvjitlink-cu12==12.6.77 nvidia-nvtx-cu12==12.1.105 oauthlib==3.2.2 opt-einsum==3.3.0 optree==0.12.1 overrides==7.7.0 packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1718189413536/work pandas==2.2.2 pandocfilters==1.5.1 parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1712320355065/work patsy==0.5.6 pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1706113125309/work pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work pillow==10.4.0 platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1726315398971/work prometheus_client==0.21.0 prompt_toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1718047967974/work propcache==0.2.0 protobuf==4.25.4 psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1725737916418/work ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl pure_eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1721585709575/work pyarrow==17.0.0 pyasn1==0.6.1 pyasn1_modules==0.4.1 pybind11==2.13.6 pycparser==2.22 pycuda==2024.1.2 pydot @ file:///home/conda/feedstock_root/build_artifacts/pydot_1726737228028/work Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1714846767233/work pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1724616129934/work python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1709299778482/work python-json-logger==2.0.7 pytools==2024.1.14 pytz==2024.2 PyYAML==6.0.2 pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1725448927736/work referencing==0.35.1 regex==2024.9.11 requests==2.32.3 requests-oauthlib==2.0.0 rfc3339-validator==0.1.4 rfc3986==1.5.0 rfc3986-validator==0.1.1 rich==13.8.1 rouge_score==0.1.2 rpds-py==0.20.0 rsa==4.9 safetensors==0.4.5 scikit-learn==1.5.2 scipy==1.13.1 seaborn==0.13.2 Send2Trash==1.8.3 sentence-transformers==3.1.1 sentencepiece==0.2.0 six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work smart-open==7.0.5 sniffio==1.3.1 soupsieve==2.6 stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work statsmodels==0.14.4 sympy==1.13.3 tensorboard==2.15.2 tensorboard-data-server==0.7.2 tensorflow==2.15.0 tensorflow-estimator==2.15.0 tensorflow-hub==0.16.1 tensorflow-io-gcs-filesystem==0.37.1 tensorflow-text==2.15.0 termcolor==2.4.0 terminado==0.18.1 testpath==0.6.0 textblob==0.18.0.post0 tf_keras==2.15.1 threadpoolctl==3.5.0 tinycss2==1.3.0 tokenizers==0.20.0 torch==2.4.1 tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1724956126282/work tqdm==4.66.5 traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1713535121073/work transformers==4.45.2 triton==3.0.0 types-python-dateutil==2.9.0.20241003 typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1717802530399/work tzdata==2024.1 uri-template==1.3.0 urllib3==2.2.3 visualkeras==0.1.3 wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work webcolors==24.8.0 webencodings==0.5.1 websocket-client==1.8.0 Werkzeug==3.0.4 widgetsnbextension==4.0.13 wordcloud==1.9.3 wrapt==1.14.1 xxhash==3.5.0 yarl==1.14.0 zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1726248574750/work Note: you may need to restart the kernel to use updated packages.
- 1-3. Matplot & Numpy 환경설정
sudo apt-get install fonts-nanum* # 폰트 설치
sudo fc-cache -fv # 캐시 제거
sudo fc-list | grep nanum # 폰트 설치 확인
rm -rf ~/.cache/matplotlib/* # matplotlib 캐시 제거
In [5]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import numpy as np
# NumPy 출력 설정: 모든 값이 출력되도록 설정
np.set_printoptions(threshold=np.inf)
# NumPy 출력 설정: 배열이 한 줄로 출력되도록 설정
np.set_printoptions(linewidth=np.inf) # 출력 라인의 길이를 무한대로 설정
# Pandas 옵션 설정: 텍스트 생략 없이 출력
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.8f}'.format
# 나눔 폰트 설정
plt.rc('font', family='NanumGothic')
mpl.rcParams['axes.unicode_minus'] = False # 유니코드 마이너스를 일반 마이너스로 변경
# 폰트가 제대로 설정되었는지 확인
print([f.name for f in fm.fontManager.ttflist if 'Nanum' in f.name])
['NanumMyeongjo', 'NanumGothic', 'NanumMyeongjo', 'NanumSquareRound', 'NanumBarunpen', 'NanumBarunpen', 'NanumMyeongjo YetHangul', 'Nanum Brush Script', 'NanumSquare_ac', 'NanumGothicCoding', 'NanumGothic', 'NanumGothic', 'NanumBarunGothic YetHangul', 'NanumSquare_ac', 'NanumBarunGothic', 'NanumSquare', 'NanumSquareRound', 'NanumGothic Eco', 'NanumBarunGothic', 'NanumMyeongjo Eco', 'NanumMyeongjo Eco', 'NanumMyeongjo', 'NanumGothicCoding', 'NanumBarunGothic', 'NanumGothic Eco', 'NanumSquareRound', 'NanumSquare', 'NanumSquare', 'NanumSquare_ac', 'NanumSquareRound', 'NanumGothic Eco', 'NanumSquare_ac', 'NanumMyeongjo Eco', 'NanumSquare', 'Nanum Pen Script', 'NanumGothic', 'NanumMyeongjo Eco', 'NanumGothic Eco', 'NanumBarunGothic']
In [6]:
import pandas as pd
import os
import csv
data_dir = 'data'
ibk_fname = os.path.join(data_dir, 'NIM_RAW_IBK_20180101_20240831.csv')
bok_fname = os.path.join(data_dir, 'NIM_RAW_BOK_20180101_20240831.csv')
news_fname = os.path.join(data_dir, 'NIM_RAW_NEWS_20180101_20241021.csv')
# 데이터 로드
ibk_raw_df = pd.read_csv(ibk_fname, header=0, encoding='utf-8-sig')
bok_raw_df = pd.read_csv(bok_fname, header=0, encoding='utf-8-sig')
news_raw_df = pd.read_csv(news_fname, header=0, encoding='utf-8-sig', sep='|', quoting=csv.QUOTE_NONE, on_bad_lines='skip')
# 데이터 크기 확인
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
print(f"NEWS 데이터 크기: {news_raw_df.shape}")
IBK 데이터 크기: (2435, 44) BOK 데이터 크기: (2435, 26) NEWS 데이터 크기: (59664, 3)
In [7]:
pd.concat([ibk_raw_df.head(), ibk_raw_df.tail()])
Out[7]:
| 기준일자 | 이자수익자산 | 원화이자수익자산 | 원화예치금 | 원화유가증권 | 원화대출채권 | 원화대출금 | 기업자금대출금 | 가계자금대출금 | 원화매입어음 | 지급보증대지급금 | 팩토링채권 | 신용카드채권 | 환매조건부채권매수 | 원화대손충당금 | 기타운용 | 외화이자수익자산 | 외화예치금 | 외화유가증권 | 외화대출채권 | 외화대출금 | 매입외환 | 외화대손충당금 | 이자비용부채 | 원화이자비용부채 | 원화예수금 | 양도성예수금 | 원화차입금 | 환매조건부채권매도 | 매출어음 | 신용카드채권매출 | 원화사채 | 기타조달 | 외화이자비용부채 | 외화예수금 | 외화차입금 | 외화사채 | 순이자스프레드 | 원화예대금리차 | 외화예대금리차 | 원화순이자마진 | 외화순이자마진 | 순이자마진 | NIM | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 |
| 1 | 2018-01-02 | 3.30370000 | 3.28490000 | 0.00000000 | 1.88920000 | 0.82660000 | 1.37550000 | 0.00000000 | 1.87570000 | 17.30820000 | 0.00000000 | 1.64430000 | 0.00000000 | 2.44000000 | 1.62340000 | 2.03230000 | 0.65510000 | 1.21390000 | 0.40950000 | 0.23930000 | 2.22970000 | 0.65600000 | 1.51500000 | 0.11050000 | 0.00000000 | 3.28780000 | 3.54050000 | 4.77070000 | 1.83640000 | 2.00150000 | 2.61520000 | 1.80540000 | 1.18440000 | 1.88800000 | 1.36740000 | 3.28040000 | 2.64490000 | 1.32090000 | 3.19660000 | 0.00000000 | 0.00000000 | 1.46290000 | 1.66020000 | 1.93750000 |
| 2 | 2018-01-03 | 3.30460000 | 3.28650000 | 0.00000000 | 1.86650000 | 0.81830000 | 1.37690000 | 0.00000000 | 1.88050000 | 17.30720000 | 0.00000000 | 1.64810000 | 0.00000000 | 2.43130000 | 1.61540000 | 2.03230000 | 0.65070000 | 1.20610000 | 0.40930000 | 0.24370000 | 2.22800000 | 0.65480000 | 1.49380000 | 0.10980000 | 0.00000000 | 3.29130000 | 3.55340000 | 4.77530000 | 1.83690000 | 2.00590000 | 2.62340000 | 1.81450000 | 1.28380000 | 1.89060000 | 1.37140000 | 3.29250000 | 2.64230000 | 1.32420000 | 3.20470000 | 0.00000000 | 0.00000000 | 1.46330000 | 1.63850000 | 1.93970000 |
| 3 | 2018-01-04 | 3.30570000 | 3.28690000 | 0.00000000 | 1.94860000 | 0.82360000 | 1.38100000 | 0.00000000 | 1.88290000 | 17.30350000 | 0.00000000 | 1.65010000 | 0.00000000 | 2.41730000 | 1.61140000 | 2.03230000 | 0.64250000 | 1.20050000 | 0.41090000 | 0.17700000 | 2.23640000 | 0.65570000 | 1.50620000 | 0.11800000 | 0.00000000 | 3.28880000 | 3.55240000 | 4.78130000 | 1.83800000 | 2.00480000 | 2.62370000 | 1.81190000 | 1.36580000 | 1.88990000 | 1.37100000 | 3.29240000 | 2.64920000 | 1.32390000 | 3.20680000 | 0.00000000 | 0.00000000 | 1.46440000 | 1.59280000 | 1.93960000 |
| 4 | 2018-01-05 | 3.30690000 | 3.28860000 | 0.00000000 | 1.96320000 | 0.81520000 | 1.38570000 | 0.00000000 | 1.88490000 | 17.29030000 | 0.00000000 | 1.64920000 | 0.00000000 | 2.42770000 | 1.60960000 | 2.03230000 | 0.65840000 | 1.19500000 | 0.41460000 | 0.19720000 | 2.23380000 | 0.65780000 | 1.53630000 | 0.10810000 | 0.00000000 | 3.29350000 | 3.55060000 | 4.79310000 | 1.83870000 | 2.00130000 | 2.62280000 | 1.81020000 | 1.38920000 | 1.89510000 | 1.37090000 | 3.29190000 | 2.64800000 | 1.32400000 | 3.20890000 | 0.00000000 | 0.00000000 | 1.47170000 | 1.54810000 | 1.93780000 |
| 2430 | 2024-08-27 | 4.43510000 | 4.44640000 | 0.00000000 | 3.74340000 | 1.76090000 | 3.29630000 | 0.00000000 | 1.57740000 | 15.86680000 | 0.00000000 | 3.61760000 | 0.00000000 | 5.67640000 | 3.85510000 | 1.52310000 | 0.54660000 | 1.90690000 | 1.94820000 | 2.65820000 | 2.02860000 | 2.01430000 | 3.08060000 | 3.30550000 | 0.00000000 | 4.44700000 | 4.62900000 | 6.00450000 | 3.80760000 | 1.69920000 | 3.10640000 | 3.00760000 | 0.09190000 | 3.05640000 | 2.78950000 | 4.38160000 | 4.31680000 | 2.74080000 | 4.31820000 | 0.00000000 | 0.00000000 | 2.83920000 | 3.50000000 | 1.64300000 |
| 2431 | 2024-08-28 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 |
| 2432 | 2024-08-29 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 |
| 2433 | 2024-08-30 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 |
| 2434 | 2024-08-31 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 | 0.00000000 |
In [8]:
pd.concat([bok_raw_df.head(), bok_raw_df.tail()])
Out[8]:
| 기준일자 | 한국은행 기준금리 | 정부대출금 금리 | 무역금융지원 금리 | 영세자영업자지원 금리 | 신성장/일자리지원 금리 | 설비투자지원 금리 | 지방중소기업지원 금리 | 자금조정 금리 | 자금조정 예금금리 | 원/미국달러 | 원/위안 | 원/일본엔 | 원/유로 | KOSPI지수 | KOSPI거래량 | KOSPI거래대금 | KOSPI외국인 순매수 | 주식시장-거래량 | 주식시장-거래대금 | 시가총액 | KOSDAQ지수 | KOSDAQ거래량 | KOSDAQ거래대금 | KOSDAQ외국인 순매수 | NIM | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.50000000 | 1.63800000 | 0.50000000 | 0.50000000 | 0.50000000 | 0.75000000 | 0.75000000 | 2.50000000 | 0.50000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.00000000 |
| 1 | 2018-01-02 | 1.50000000 | 1.63800000 | 0.50000000 | 0.50000000 | 0.50000000 | 0.75000000 | 0.75000000 | 2.50000000 | 0.50000000 | 1071.40000000 | 163.65000000 | 951.05000000 | 1286.91000000 | 2479.65000000 | 23026.00000000 | 46007.00000000 | 2657.00000000 | 26221.00000000 | 47864.00000000 | 16135080.00000000 | 812.45000000 | 98920.00000000 | 66490.00000000 | 878.00000000 | 1.93750000 |
| 2 | 2018-01-03 | 1.50000000 | 1.63800000 | 0.50000000 | 0.50000000 | 0.50000000 | 0.75000000 | 0.75000000 | 2.50000000 | 0.50000000 | 1064.30000000 | 163.82000000 | 948.11000000 | 1283.76000000 | 2486.35000000 | 32734.00000000 | 59619.00000000 | 4006.00000000 | 33109.00000000 | 60196.00000000 | 16176340.00000000 | 822.31000000 | 120310.00000000 | 81576.00000000 | 600.00000000 | 1.93970000 |
| 3 | 2018-01-04 | 1.50000000 | 1.63800000 | 0.50000000 | 0.50000000 | 0.50000000 | 0.75000000 | 0.75000000 | 2.50000000 | 0.50000000 | 1065.40000000 | 163.90000000 | 946.64000000 | 1279.81000000 | 2466.46000000 | 32963.00000000 | 67930.00000000 | 3396.00000000 | 33384.00000000 | 68963.00000000 | 16049160.00000000 | 808.01000000 | 134645.00000000 | 83403.00000000 | -35.00000000 | 1.93960000 |
| 4 | 2018-01-05 | 1.50000000 | 1.63800000 | 0.50000000 | 0.50000000 | 0.50000000 | 0.75000000 | 0.75000000 | 2.50000000 | 0.50000000 | 1065.50000000 | 163.92000000 | 944.72000000 | 1286.17000000 | 2497.52000000 | 30529.00000000 | 61571.00000000 | 2466.00000000 | 30877.00000000 | 63175.00000000 | 16249700.00000000 | 828.03000000 | 122915.00000000 | 77241.00000000 | 1855.00000000 | 1.93780000 |
| 2430 | 2024-08-27 | 3.50000000 | 3.54300000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 4.00000000 | 3.00000000 | 1323.90000000 | 186.00000000 | 917.75000000 | 1478.27000000 | 2689.25000000 | 25974.00000000 | 79511.00000000 | -4075.00000000 | 26406.00000000 | 80507.00000000 | 21994790.00000000 | 764.95000000 | 83334.00000000 | 67956.00000000 | -16.00000000 | 1.64300000 |
| 2431 | 2024-08-28 | 3.50000000 | 3.54300000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 4.00000000 | 3.00000000 | 1330.40000000 | 186.73000000 | 925.21000000 | 1487.85000000 | 2689.83000000 | 27638.00000000 | 104033.00000000 | -3435.00000000 | 28077.00000000 | 105675.00000000 | 22005920.00000000 | 762.50000000 | 86858.00000000 | 70211.00000000 | -808.00000000 | 0.00000000 |
| 2432 | 2024-08-29 | 3.50000000 | 3.54300000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 4.00000000 | 3.00000000 | 1334.70000000 | 187.39000000 | 925.17000000 | 1484.79000000 | 2662.28000000 | 28469.00000000 | 112966.00000000 | -4208.00000000 | 28884.00000000 | 114647.00000000 | 21782580.00000000 | 756.04000000 | 83787.00000000 | 73351.00000000 | -241.00000000 | 0.00000000 |
| 2433 | 2024-08-30 | 3.50000000 | 3.54300000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 4.00000000 | 3.00000000 | 1335.30000000 | 187.56000000 | 921.75000000 | 1479.58000000 | 2674.31000000 | 26695.00000000 | 97111.00000000 | -3790.00000000 | 27885.00000000 | 103408.00000000 | 21874610.00000000 | 767.66000000 | 82330.00000000 | 74018.00000000 | 1939.00000000 | 0.00000000 |
| 2434 | 2024-08-31 | 3.50000000 | 3.54300000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 2.00000000 | 4.00000000 | 3.00000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.00000000 |
In [9]:
pd.concat([news_raw_df.head(), news_raw_df.tail()])
Out[9]:
| 기준일자 | 제목 | 기사 | |
|---|---|---|---|
| 0 | 2018-01-01 | 현대캐피탈, 삼성화재 꺾고 1위 탈환…IBK기업은행 3연승(종합) | 신영석, 속공 13득점 등 17득점 IBK기업은행, 메디 33득점 앞세워 현대건설 격파 현대캐피탈이 '숙명의 라이벌' 삼성화재에 짜릿한 역전승을 거두고 1위 자리를 탈환했다. 현대캐피탈은 1일 천안 유관순체육관에서 열린 프로배구 도드람 2017-2018 V 리그 남자부 4라운드 홈 경기에서 삼성화재를 세트... |
| 1 | 2018-01-01 | ‘새해 3점 출발’ IBK기업은행, 현대건설 꺾고 10승 고지 올라 | IBK기업은행이 1일 화성실내체육관에서 열린 ‘도드람 2017~2018 V리그’ 현대건설과 4라운드 맞대결에서 세트 스코어 3-1 승리를 거뒀다. 승점 3점을 추가하며 10승 고지에 올라 상위권 추격에 박차를 가했다. IBK기업은행은 높이에 강점을 가진 현대건설을 상대로 블로킹에서 압도적인 모습을 보이며 1... |
| 2 | 2018-01-01 | 프로배구 기업은행, 현대건설 꺾고 3연승 | 프로배구에서 여자부 3위 기업은행이 2위 현대건설을 꺾고 3연승을 달렸습니다. 기업은행은 경기도 화성종합실내체육관에서 열린 도드람 V리그 정규리그 홈경기에서 현대건설을 3대 1로 물리쳤습니다. 외국인 선수 메디가 양 팀 최다인 33점을 터뜨리며 승리에 앞장섰습니다. 3연승을 질주한... |
| 3 | 2018-01-01 | '메디 33득점' 기업은행, 현대건설 제압…불붙은 순위경쟁 | IBK기업은행이 새해 첫 경기에서 현대건설을 잡고 순위 경쟁에 불을 붙였다. 기업은행은 1일 화성실내체육관에서 열린 '도드람 2017-2018 V-리그' 여자부 4라운드 경기에서 현대건설을 세트 스코어 3-1(25-21 25-21 19-25 25-17)로 제압했다. 안방에서 승점 3점을 챙긴 기업은행(10승 6패·승점 29)은 2위... |
| 4 | 2018-01-01 | '호흡 척척' IBK기업은행, 시선 강탈 '신입 행원 응원전' | IBK기업은행이 새해 첫 날 신입 행원의 열띤 응원에 승리로 보답했다. 이정철 감독이 이끄는 IBK기업은행은 1일 화성 실내체육관에서 열린 도드람 2017~2018 V-리그 여자부 현대건설과의 4라운드 맞대결에서 세트스코어 3-1(25-21, 25-15, 19-25, 25-17)로 승리했다. 이날 1세트 종료 후 관중들의 시선은 일제히... |
| 59659 | 2024-10-21 | 상반기 은행권 가계대출 연체잔액 3.2조↑…10년만 최대치 | 고금리·고물가 장기화로 은행권의 가계대출 연체잔액이 올해 상반기 기준으로 3조원을 넘어선 것으로 나타났다. 21일 금융감독원에 따르면 올해... 고신용자들이 주 고객인 은행권의 가계대출 연체잔액이 크게 늘어나면서 고금리 장기화로 차주들의 상환 부담이 만만치 않음을 보여준다. 최근... |
| 59660 | 2024-10-21 | 무의미한 은행권 '퇴직연금 수익률' 경쟁… 정작 가입자 89% '초저위험 상... | 은행권이 퇴직연금 수익률 제고를 위해 사전지정운영제도(디폴트옵션)를 운영하고 있으나 제도 도입 취지가 유명무실하다는 지적이 제기되고 있다.... 이런 와중에 초저위험 상품과 고위험 상품의 중간 위험성격인 디딤펀드마저 은행권이 외면하고 있는 상황이다. 결국 은행권이 퇴직연금 수익률 제고에... |
| 59661 | 2024-10-21 | [더벨]금투협 주도 디딤펀드, 은행권은 '묵묵부답' | 있지만 은행권의 반응은 아직 냉담한 것으로 나타났다. 순차적으로 판매사를 늘려가고 있는 증권업권과 달리 은행업권에서는 아직 판매사로 나선 곳이 없는 상태다. 디딤펀드가 성공적으로 안착하기 위해서는 판매사들의 참여가 중요하다고 예상되는 만큼 은행권 움직임에 눈길이 쏠리고 있다. 16일... |
| 59662 | 2024-10-21 | 이달 주담대 증가세 반토막… 집단대출 문턱까지 더 조인다 | 금융당국과 은행권이 ‘가계부채와의 전쟁’을 본격화하면서 10월 들어 가계대출 증가세가 한풀 꺾였다. 가계대출 증가세를 주도했던 주택담보대출... 특히 은행권의 대출 문턱이 높아진 가운데 집단대출과 제2금융권 ‘풍선 효과’가 새로운 뇌관이 될 것이라 보고 여기에 초점을 맞추고 있다. 20일... |
| 59663 | 2024-10-21 | 김병환 금융위원장, ‘제4인뱅’ 설립 속도 낼까 | 올해 은행권의 주요 이슈 중 하나는 제4인터넷전문은행(제4인뱅)의 출범이다. 하지만 아직까지 제4인뱅 설립을 위한 행보는 지지부진한 상태다. 현재... 이에 금융위는 5개월 만에 은행권 경영·영업 관행 제도 개선 방안을 발표하며 은행권 경쟁을 촉진하기 위한 제도 개선을 추진하기로 했다. 특히 은행권에... |
- IBK 기업은행 데이터 Feature Engineering(불필요 변수 제거)
- 순이자스프레드(C) = A - B : 이자수익자산(A), 이자비용부채(B)변수 삭제
- 원화예대금리차(C1) = A13 - B11 : 원화대출채권(A13), 원화예수금(B11)변수삭제 ∴B11변수는 원래 없음
- 외화예대금리차(C2) = A23 - B21 : 외화대출채권(A23), 외화예수금(B21)변수삭제
In [10]:
ibk_feature_drop_column1 = ['이자수익자산', '원화대출채권', '외화대출채권', '이자비용부채', '외화예수금']
ibk_feature_drop_column1 = list(set(ibk_feature_drop_column1))
print(ibk_raw_df[ibk_feature_drop_column1])
ibk_raw_df = ibk_raw_df.drop(columns=ibk_feature_drop_column1)
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
이자수익자산 외화대출채권 이자비용부채 외화예수금 원화대출채권 0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 1 3.30370000 0.23930000 0.11050000 1.36740000 0.82660000 2 3.30460000 0.24370000 0.10980000 1.37140000 0.81830000 3 3.30570000 0.17700000 0.11800000 1.37100000 0.82360000 4 3.30690000 0.19720000 0.10810000 1.37090000 0.81520000 ... ... ... ... ... ... 2430 4.43510000 2.65820000 3.30550000 2.78950000 1.76090000 2431 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 2432 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 2433 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 2434 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 [2435 rows x 5 columns] IBK 데이터 크기: (2435, 39)
- IBK 기업은행 데이터 Feature Engineering(상관계수로 제거)
In [11]:
import pandas as pd
import numpy as np
# 첫 번째와 마지막 컬럼을 제외한 데이터프레임 추출
df_without_first_last = ibk_raw_df.iloc[:, 1:]
# 상관 행렬 계산
corr_matrix = df_without_first_last.corr()
# 상관계수가 0.95 이상인 변수 쌍 찾기 (자기 자신 제외)
high_corr_pairs = []
for i in range(len(corr_matrix.columns)):
for j in range(i + 1, len(corr_matrix.columns)):
if abs(corr_matrix.iloc[i, j]) >= 0.9945:
high_corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_matrix.iloc[i, j]))
# 결과 출력
if high_corr_pairs:
print("IBK DataFrame - 상관계수가 0.995 이상인 변수 쌍:")
for pair in high_corr_pairs:
print(f"{pair[0]} - {pair[1]}: {pair[2]:.3f}")
else:
print("IBK DataFrame - 상관계수가 0.995 이상인 변수 쌍이 없습니다.")
IBK DataFrame - 상관계수가 0.995 이상인 변수 쌍: 원화이자수익자산 - 원화예수금: 1.000 원화대출금 - 팩토링채권: 0.995 가계자금대출금 - 매출어음: 1.000 가계자금대출금 - NIM: 1.000 환매조건부채권매수 - 외화대손충당금: 0.995 원화대손충당금 - 외화대손충당금: 0.995 양도성예수금 - 외화차입금: 0.999 양도성예수금 - 원화예대금리차: 0.999 환매조건부채권매도 - 순이자스프레드: 0.999 매출어음 - NIM: 1.000 외화차입금 - 원화예대금리차: 1.000
- IBK 기업은행 데이터 Feature Engineering(상관계수로 제거)
- 원화대출금 - 팩토링채권: 0.995
- 원화대손충당금 - 외화대손충당금: 0.995
- 원화이자수익자산 - 원화예수금: 1.000 변수 삭제
- 환매조건부채권매수 - 외화대손충당금: 0.995
- 환매조건부채권매도 - 순이자스프레드: 0.999
- 가계자금대출금 - 매출어음: 1.000 변수 삭제
- 가계자금대출금 - NIM: 1.000 변수 삭제
- 매출어음 - NIM: 1.000 변수 삭제
- 외화차입금 - 원화예대금리차: 1.000 변수 삭제
- 양도성예수금 - 외화차입금: 0.999
- 양도성예수금 - 원화예대금리차: 0.999
In [12]:
ibk_feature_drop_column2 = ['팩토링채권', '외화대손충당금', '원화예수금', '순이자스프레드', '매출어음', '가계자금대출금', '매출어음', '외화차입금', '양도성예수금']
ibk_feature_drop_column2 = list(set(ibk_feature_drop_column2))
print(ibk_raw_df[ibk_feature_drop_column2])
ibk_raw_df = ibk_raw_df.drop(columns=ibk_feature_drop_column2)
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
팩토링채권 외화차입금 원화예수금 가계자금대출금 양도성예수금 매출어음 \
0 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
1 1.64430000 3.28040000 3.28780000 1.87570000 3.54050000 2.00150000
2 1.64810000 3.29250000 3.29130000 1.88050000 3.55340000 2.00590000
3 1.65010000 3.29240000 3.28880000 1.88290000 3.55240000 2.00480000
4 1.64920000 3.29190000 3.29350000 1.88490000 3.55060000 2.00130000
... ... ... ... ... ... ...
2430 3.61760000 4.38160000 4.44700000 1.57740000 4.62900000 1.69920000
2431 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
2432 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
2433 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
2434 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
외화대손충당금 순이자스프레드
0 0.00000000 0.00000000
1 1.51500000 1.32090000
2 1.49380000 1.32420000
3 1.50620000 1.32390000
4 1.53630000 1.32400000
... ... ...
2430 3.08060000 2.74080000
2431 0.00000000 0.00000000
2432 0.00000000 0.00000000
2433 0.00000000 0.00000000
2434 0.00000000 0.00000000
[2435 rows x 8 columns]
IBK 데이터 크기: (2435, 31)
- BOK 한국은행 데이터 Feature Engineering(불필요 변수 제거)
- 주식시장 : KOSPI + KOSDAQ 변수 삭제
- 원/달러 : 원/위안, 원/일본엔, 원/유로 변수 삭제
In [13]:
bok_feature_drop_column1 = ['원/위안', '원/일본엔', '원/유로', 'KOSPI지수', 'KOSPI거래량', 'KOSPI거래대금', 'KOSPI외국인 순매수', 'KOSDAQ지수', 'KOSDAQ거래량', 'KOSDAQ거래대금', 'KOSDAQ외국인 순매수']
bok_feature_drop_column1 = list(set(bok_feature_drop_column1))
print(bok_raw_df[bok_feature_drop_column1])
bok_raw_df = bok_raw_df.drop(columns=bok_feature_drop_column1)
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
KOSPI거래대금 KOSPI거래량 KOSDAQ지수 KOSDAQ외국인 순매수 \
0 NaN NaN NaN NaN
1 46007.00000000 23026.00000000 812.45000000 878.00000000
2 59619.00000000 32734.00000000 822.31000000 600.00000000
3 67930.00000000 32963.00000000 808.01000000 -35.00000000
4 61571.00000000 30529.00000000 828.03000000 1855.00000000
... ... ... ... ...
2430 79511.00000000 25974.00000000 764.95000000 -16.00000000
2431 104033.00000000 27638.00000000 762.50000000 -808.00000000
2432 112966.00000000 28469.00000000 756.04000000 -241.00000000
2433 97111.00000000 26695.00000000 767.66000000 1939.00000000
2434 NaN NaN NaN NaN
KOSDAQ거래대금 KOSPI외국인 순매수 원/일본엔 KOSPI지수 원/위안 \
0 NaN NaN NaN NaN NaN
1 66490.00000000 2657.00000000 951.05000000 2479.65000000 163.65000000
2 81576.00000000 4006.00000000 948.11000000 2486.35000000 163.82000000
3 83403.00000000 3396.00000000 946.64000000 2466.46000000 163.90000000
4 77241.00000000 2466.00000000 944.72000000 2497.52000000 163.92000000
... ... ... ... ... ...
2430 67956.00000000 -4075.00000000 917.75000000 2689.25000000 186.00000000
2431 70211.00000000 -3435.00000000 925.21000000 2689.83000000 186.73000000
2432 73351.00000000 -4208.00000000 925.17000000 2662.28000000 187.39000000
2433 74018.00000000 -3790.00000000 921.75000000 2674.31000000 187.56000000
2434 NaN NaN NaN NaN NaN
KOSDAQ거래량 원/유로
0 NaN NaN
1 98920.00000000 1286.91000000
2 120310.00000000 1283.76000000
3 134645.00000000 1279.81000000
4 122915.00000000 1286.17000000
... ... ...
2430 83334.00000000 1478.27000000
2431 86858.00000000 1487.85000000
2432 83787.00000000 1484.79000000
2433 82330.00000000 1479.58000000
2434 NaN NaN
[2435 rows x 11 columns]
BOK 데이터 크기: (2435, 15)
- BOK 한국은행 데이터 Feature Engineering(상관계수로 제거)
In [14]:
import pandas as pd
import numpy as np
# 첫 번째와 마지막 컬럼을 제외한 데이터프레임 추출
df_without_first_last = bok_raw_df.iloc[:, 1:]
# 상관 행렬 계산
corr_matrix = df_without_first_last.corr()
# 상관계수가 0.95 이상인 변수 쌍 찾기 (자기 자신 제외)
high_corr_pairs = []
for i in range(len(corr_matrix.columns)):
for j in range(i + 1, len(corr_matrix.columns)):
if abs(corr_matrix.iloc[i, j]) >= 0.985:
high_corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_matrix.iloc[i, j]))
# 결과 출력
if high_corr_pairs:
print("BOK DataFrame - 상관계수가 0.99 이상인 변수 쌍:")
for pair in high_corr_pairs:
print(f"{pair[0]} - {pair[1]}: {pair[2]:.3f}")
else:
print("BOK DataFrame - 상관계수가 0.985 이상인 변수 쌍이 없습니다.")
BOK DataFrame - 상관계수가 0.99 이상인 변수 쌍: 정부대출금 금리 - 설비투자지원 금리: 0.985 정부대출금 금리 - 지방중소기업지원 금리: 0.985 무역금융지원 금리 - 영세자영업자지원 금리: 1.000 무역금융지원 금리 - 신성장/일자리지원 금리: 1.000 무역금융지원 금리 - 설비투자지원 금리: 0.987 무역금융지원 금리 - 지방중소기업지원 금리: 0.987 무역금융지원 금리 - 자금조정 예금금리: 0.988 영세자영업자지원 금리 - 신성장/일자리지원 금리: 1.000 영세자영업자지원 금리 - 설비투자지원 금리: 0.987 영세자영업자지원 금리 - 지방중소기업지원 금리: 0.987 영세자영업자지원 금리 - 자금조정 예금금리: 0.988 신성장/일자리지원 금리 - 설비투자지원 금리: 0.987 신성장/일자리지원 금리 - 지방중소기업지원 금리: 0.987 신성장/일자리지원 금리 - 자금조정 예금금리: 0.988 설비투자지원 금리 - 지방중소기업지원 금리: 1.000
- BOK 한국은행 데이터 Feature Engineering(상관계수로 제거)
- 무역금융지원 금리 : 영세자영업자지원 금리(1.00), 신성장/일자리지원 금리(1.00), 설비투자지원 금리(0.99), 지방중소기업지원 금리(0.99), 자금조정 예금금리(0.99) 변수 삭제
- 영세자영업자지원 금리 : 신성장/일자리지원 금리(1.00), 설비투자지원 금리(0.99), 지방중소기업지원 금리(0.99), 자금조정 예금금리(0.99) 변수 삭제
- 신성장/일자리지원 금리 : 설비투자지원 금리(0.99), 지방중소기업지원 금리(0.99), 자금조정 예금금리(0.99) 변수 삭제
- 설비투자지원 금리 : 지방중소기업지원 금리(1.00) 변수 삭제
In [15]:
bok_feature_drop_column2 = ['영세자영업자지원 금리', '신성장/일자리지원 금리', '설비투자지원 금리', '지방중소기업지원 금리', '자금조정 예금금리']
bok_feature_drop_column2 = list(set(bok_feature_drop_column2))
print(bok_raw_df[bok_feature_drop_column2])
bok_raw_df = bok_raw_df.drop(columns=bok_feature_drop_column2)
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
자금조정 예금금리 설비투자지원 금리 신성장/일자리지원 금리 영세자영업자지원 금리 지방중소기업지원 금리 0 0.50000000 0.75000000 0.50000000 0.50000000 0.75000000 1 0.50000000 0.75000000 0.50000000 0.50000000 0.75000000 2 0.50000000 0.75000000 0.50000000 0.50000000 0.75000000 3 0.50000000 0.75000000 0.50000000 0.50000000 0.75000000 4 0.50000000 0.75000000 0.50000000 0.50000000 0.75000000 ... ... ... ... ... ... 2430 3.00000000 2.00000000 2.00000000 2.00000000 2.00000000 2431 3.00000000 2.00000000 2.00000000 2.00000000 2.00000000 2432 3.00000000 2.00000000 2.00000000 2.00000000 2.00000000 2433 3.00000000 2.00000000 2.00000000 2.00000000 2.00000000 2434 3.00000000 2.00000000 2.00000000 2.00000000 2.00000000 [2435 rows x 5 columns] BOK 데이터 크기: (2435, 10)
- IBK 기업은행, BOK 한국은행 Feature Engineering(평균이 0인 변수 제거)
In [16]:
# Describe the dataframe and find columns where the mean is 0
ibk_zero_mean_columns = ibk_raw_df.describe().transpose()
ibk_zero_mean_columns = ibk_zero_mean_columns[ibk_zero_mean_columns['mean'] == 0].index.tolist()
print(ibk_zero_mean_columns)
ibk_raw_df = ibk_raw_df.drop(columns=ibk_zero_mean_columns)
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
['원화예치금', '기업자금대출금', '지급보증대지급금', '신용카드채권', '원화이자비용부채', '외화예대금리차', '원화순이자마진'] IBK 데이터 크기: (2435, 24)
In [17]:
# Describe the dataframe and find columns where the mean is 0
bok_zero_mean_columns = bok_raw_df.describe().transpose()
bok_zero_mean_columns = bok_zero_mean_columns[bok_zero_mean_columns['mean'] == 0].index.tolist()
print(bok_zero_mean_columns)
bok_raw_df = bok_raw_df.drop(columns=bok_zero_mean_columns)
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
[] BOK 데이터 크기: (2435, 10)
- 3-2. Missing Data
- NIM값이 0인 일자는 데이터 삭제
- 변수의 평균, 중앙값, 최빈값을로 대체하지 않고 삭제함
In [18]:
# NIM 값이 null이거나 0인 행을 제거하는 코드
ibk_raw_df = ibk_raw_df[ibk_raw_df['NIM'].notnull() & (ibk_raw_df['NIM'] != 0)]
bok_raw_df = bok_raw_df[bok_raw_df['NIM'].notnull() & (bok_raw_df['NIM'] != 0)]
# 데이터 크기 확인
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
# Find matching dates and compare the lengths
matching_dates = pd.merge(ibk_raw_df[['기준일자']], bok_raw_df[['기준일자']], on='기준일자', how='inner')
# Compare counts
ibk_count = len(ibk_raw_df)
bok_count = len(bok_raw_df)
matching_count = len(matching_dates)
ibk_count, bok_count, matching_count
IBK 데이터 크기: (1646, 24) BOK 데이터 크기: (1646, 10)
Out[18]:
(1646, 1646, 1646)
- IBK 기업은행, BOK 한국은행 데이터 결측치 처리
- 변수의 평균, 중앙값, 최빈값으로 대체하지 않고 삭제함
In [19]:
import matplotlib.pyplot as plt
import numpy as np
# 1. IBK 데이터 결측치 개수 계산 (첫 번째, 마지막 컬럼 제외)
ibk_missing_counts = ibk_raw_df.iloc[:, 1:-1].isnull().sum()
# 2. BOK 데이터 결측치 개수 계산 (첫 번째, 마지막 컬럼 제외)
bok_missing_counts = bok_raw_df.iloc[:, 1:-1].isnull().sum()
# 3. 막대 그래프 그리기 (좌측: IBK, 우측: BOK)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 10), sharey=True)
# 4. IBK 데이터 결측치 시각화 (파란색 계열)
ax1.bar(ibk_missing_counts.index, ibk_missing_counts.values, color='skyblue')
ax1.set_title("IBK 데이터 결측치 개수")
ax1.set_xlabel("열 (Columns)")
ax1.set_ylabel("결측치 개수 (Missing Count)")
ax1.set_xticks(range(len(ibk_missing_counts.index))) # 틱 위치 고정
ax1.set_xticklabels(ibk_missing_counts.index, rotation=45) # 틱 레이블 설정
# 막대 안쪽에 결측치 개수 표시 (0인 경우 표시 안 함)
for i, v in enumerate(ibk_missing_counts.values):
if v > 0:
ax1.text(i, v / 2, str(v), ha='center', va='center', color='black')
# 5. BOK 데이터 결측치 시각화 (초록색 계열)
ax2.bar(bok_missing_counts.index, bok_missing_counts.values, color='lightgreen')
ax2.set_title("BOK 데이터 결측치 개수")
ax2.set_xlabel("열 (Columns)")
ax2.set_xticks(range(len(bok_missing_counts.index))) # 틱 위치 고정
ax2.set_xticklabels(bok_missing_counts.index, rotation=45) # 틱 레이블 설정
# 막대 안쪽에 결측치 개수 표시 (0인 경우 표시 안 함)
for i, v in enumerate(bok_missing_counts.values):
if v > 0:
ax2.text(i, v / 2, str(v), ha='center', va='center', color='black')
# 6. 레이아웃 조정 및 그래프 표시
plt.tight_layout()
plt.show()
In [20]:
# 1. IBK 데이터에서 결측치가 있는 행의 기준일자 추출
ibk_missing_dates = ibk_raw_df[ibk_raw_df.isnull().any(axis=1)]['기준일자'].tolist()
# 2. BOK 데이터에서 결측치가 있는 행의 기준일자 추출
bok_missing_dates = bok_raw_df[bok_raw_df.isnull().any(axis=1)]['기준일자'].tolist()
# 3. 두 리스트를 합침 (중복된 값은 제거)
combined_missing_dates = list(set(ibk_missing_dates + bok_missing_dates))
# 4. 결측치가 있는 기준일자 리스트 출력
print(f"IBK와 BOK에서 결측치가 있는 기준일자 리스트: {combined_missing_dates}")
ibk_raw_df = ibk_raw_df[~ibk_raw_df['기준일자'].isin(combined_missing_dates)]
bok_raw_df = bok_raw_df[~bok_raw_df['기준일자'].isin(combined_missing_dates)]
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
IBK와 BOK에서 결측치가 있는 기준일자 리스트: ['2022-12-30', '2019-03-31', '2023-12-29', '2018-12-31', '2020-12-31', '2019-12-31', '2021-12-31'] IBK 데이터 크기: (1639, 24) BOK 데이터 크기: (1639, 10)
- 3-3. Outlier Data
- IBK 기업은행, BOK 한국은행 이상치 데이터 처리
In [21]:
import matplotlib.pyplot as plt
import numpy as np
# 1. 이상치 계산을 위한 IQR 기준 함수
def detect_outliers_iqr(df):
outliers_count = {}
for column in df.columns:
if df[column].dtype in ['int64', 'float64']: # 숫자형 데이터만 이상치 계산
Q1 = df[column].quantile(0.25) # 1사분위수
Q3 = df[column].quantile(0.75) # 3사분위수
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
# 이상치 개수 계산
outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
outliers_count[column] = outliers.shape[0] # 이상치 개수 기록
return outliers_count
# 2. IBK 데이터에서 이상치 계산
ibk_outliers = detect_outliers_iqr(ibk_raw_df)
# 3. BOK 데이터에서 이상치 계산
bok_outliers = detect_outliers_iqr(bok_raw_df)
# 4. 막대 그래프 그리기 (좌측: IBK, 우측: BOK)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 10), sharey=True)
# 5. IBK 데이터 이상치 시각화 (파란색 계열)
ax1.bar(ibk_outliers.keys(), ibk_outliers.values(), color='skyblue')
ax1.set_title("IBK 데이터 변수별 이상치 개수")
ax1.set_xlabel("열 (Columns)")
ax1.set_ylabel("이상치 개수 (Outlier Count)")
ax1.set_xticks(range(len(ibk_outliers.keys()))) # 틱 위치 설정
ax1.set_xticklabels(ibk_outliers.keys(), rotation=45)
# 막대 위에 이상치 개수 표시 (IBK 데이터)
for i, v in enumerate(ibk_outliers.values()):
ax1.text(i, v + 0.5, str(v), ha='center', va='bottom')
# 6. BOK 데이터 이상치 시각화 (초록색 계열)
ax2.bar(bok_outliers.keys(), bok_outliers.values(), color='lightgreen')
ax2.set_title("BOK 데이터 변수별 이상치 개수")
ax2.set_xlabel("열 (Columns)")
ax2.set_xticks(range(len(bok_outliers.keys()))) # 틱 위치 설정
ax2.set_xticklabels(bok_outliers.keys(), rotation=45)
# 막대 위에 이상치 개수 표시 (BOK 데이터)
for i, v in enumerate(bok_outliers.values()):
ax2.text(i, v + 0.5, str(v), ha='center', va='bottom')
# 7. 레이아웃 조정 및 그래프 표시
plt.tight_layout()
plt.show()
In [22]:
# Correct the error and ensure x-axis ticks are spaced every 30 days
import pandas as pd
import matplotlib.pyplot as plt
# NIM 컬럼만 추출 (단일 헤더 가정)
nim_data = ibk_raw_df['NIM'] # NIM 데이터 추출
# 이상치 정의 (IQR 방법)
Q1 = nim_data.quantile(0.25)
Q3 = nim_data.quantile(0.75)
IQR = Q3 - Q1
# 이상치 범위 설정
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
# 이상치 탐지
outliers = nim_data[(nim_data < lower_bound) | (nim_data > upper_bound)]
# 영업일자 추출
std_dates = ibk_raw_df['기준일자']
# 그래프 그리기
plt.figure(figsize=(25, 10))
# NIM 데이터 플롯
plt.plot(std_dates, nim_data, label="NIM values")
# 이상치 값 플롯
plt.scatter(std_dates[outliers.index], outliers.values, color='r', label="Outliers", zorder=2)
# 이상치에 영업일자와 값 표시
for idx, value in outliers.items(): # 변경된 부분
plt.text(idx, value, f"{std_dates.iloc[idx]}: {value:.4f}", fontsize=10, color='black', ha='left', va='bottom')
# x축에 영업일자 30개마다 하나씩 표시
plt.xticks(ticks=range(0, len(std_dates), 30), labels=std_dates.iloc[::30], rotation=45)
# 하한과 상한 값 추가하여 타이틀 설정
plt.title(f"NIM Values with Outliers\n(하한: {lower_bound:.4f}, 상한: {upper_bound:.4f})")
plt.ylabel("NIM")
plt.legend()
plt.grid(True)
# 그래프 보여주기
plt.show()
In [23]:
# 1. NIM 값에 대한 이상치 기준 설정 (IQR 방식)
def find_outlier_dates_for_NIM(df):
Q1 = df['NIM'].quantile(0.25) # 1사분위수
Q3 = df['NIM'].quantile(0.75) # 3사분위수
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
# 이상치 조건
outliers = df[(df['NIM'] < lower_bound) | (df['NIM'] > upper_bound)]
# 이상치가 있는 기준일자만 리스트로 반환
outlier_dates = outliers['기준일자'].tolist()
return outlier_dates
# 2. NIM 값에 이상치가 있는 기준일자 리스트 반환
nim_outlier_dates = find_outlier_dates_for_NIM(ibk_raw_df)
# 4. 결측치가 있는 기준일자 리스트 출력
print(f"NIM값에 이상치가 있는 기준일자 리스트: {nim_outlier_dates}")
ibk_raw_df = ibk_raw_df[~ibk_raw_df['기준일자'].isin(nim_outlier_dates)]
bok_raw_df = bok_raw_df[~bok_raw_df['기준일자'].isin(nim_outlier_dates)]
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
NIM값에 이상치가 있는 기준일자 리스트: ['2018-01-30', '2018-01-31', '2018-05-30'] IBK 데이터 크기: (1636, 24) BOK 데이터 크기: (1636, 10)
In [24]:
import seaborn as sns
import matplotlib.pyplot as plt
# 기준일자를 제외한 변수들 간의 상관관계를 크게 히트맵으로 시각화
def plot_full_variable_correlation_heatmap(df):
plt.figure(figsize=(25, 10)) # 더 큰 사이즈로 설정
sns.heatmap(df.corr(), annot=True, fmt='.2f', cmap='YlGnBu', linewidths=0.5, vmin=-1, vmax=1)
plt.title('변수들 간의 상관관계 히트맵 (기준일자 제외)')
plt.show()
# 기준일자를 제외한 나머지 변수들의 상관관계 히트맵 크게 그리기
df_without_date = ibk_raw_df.drop(columns=['기준일자'])
plot_full_variable_correlation_heatmap(df_without_date)
In [25]:
import seaborn as sns
import matplotlib.pyplot as plt
# BOK 데이터셋을 대상으로 상관관계 히트맵을 다른 색상으로 그리기
def plot_full_variable_correlation_heatmap_bok(df):
plt.figure(figsize=(25, 10)) # 더 큰 사이즈로 설정
sns.heatmap(df.corr(), annot=True, fmt='.2f', cmap='YlGn', linewidths=0.5, vmin=-1, vmax=1) # 다른 색상 적용
plt.title('BOK 변수들 간의 상관관계 히트맵 (기준일자 제외)')
plt.show()
# 기준일자를 제외한 BOK 데이터셋의 변수들 간의 상관관계 히트맵 그리기
df_without_date_bok = bok_raw_df.drop(columns=['기준일자'])
plot_full_variable_correlation_heatmap_bok(df_without_date_bok)
- 정렬 및 인덱스 재생성
In [26]:
ibk_raw_df = ibk_raw_df.sort_values(by="기준일자").reset_index(drop=True)
bok_raw_df = bok_raw_df.sort_values(by="기준일자").reset_index(drop=True)
In [27]:
# nim_news_df에 NIM이라는 컬럼 만들고 0으로 초기화하기
news_raw_df['NIM'] = 0
# 기준일자별 첫 번째 NIM 값을 가져옴 (중복 제거)
ibk_unique_df = ibk_raw_df.drop_duplicates(subset=['기준일자']).set_index('기준일자')
# 기준일자를 기준으로 news_raw_df의 NIM 값을 ibk_raw_df의 NIM 값으로 업데이트
news_raw_df['NIM'] = news_raw_df['기준일자'].map(ibk_unique_df['NIM'])
# news_raw_df에서 ibk_raw_df와 기준일자가 일치하는 데이터만 남김 (문자열 기준)
news_raw_df = news_raw_df[news_raw_df['기준일자'].isin(ibk_unique_df.index)]
# 1.nim_ibk_df에 있는 기준일자만 nim_news_df에 남기기
pd.concat([news_raw_df.head(), news_raw_df.tail()], ignore_index=True)
# 데이터 크기 확인
print(f"IBK 데이터 크기: {ibk_raw_df.shape}")
print(f"BOK 데이터 크기: {bok_raw_df.shape}")
print(f"NEWS 데이터 크기: {news_raw_df.shape}")
IBK 데이터 크기: (1636, 24) BOK 데이터 크기: (1636, 10) NEWS 데이터 크기: (39264, 4)
- Mecab 설치 :
- 1.Mecab 설치(https://islibrary.tistory.com/53)
- 2.사용자 사전 추가(https://tape22.tistory.com/6)
In [28]:
from konlpy.tag import Mecab
# Mecab 인스턴스 생성
mecab = Mecab()
# 테스트할 문장
sentence = "기업은행에서 금융 업무를 처리합니다."
# 형태소 분석
result = mecab.morphs(sentence)
# 결과 출력
print(result)
['기업은행', '에서', '금융', '업무', '를', '처리', '합니다', '.']
In [29]:
from konlpy.tag import Mecab
# 제목과 기사를 합쳐서 '뉴스' 컬럼 생성
news_raw_df['뉴스'] = news_raw_df['제목'] + ' ' + news_raw_df['기사']
# Mecab 객체 생성
mecab = Mecab()
# 품사 필터링 설정 (명사, 동사, 형용사, 외국어, 고유명사 등)
allowed_pos = [
'NNG', 'NNP', 'NNB', 'NP', 'NR', # 명사 관련
'VV', 'VA', 'VX', 'VCP', 'VCN', # 동사 및 형용사
#'MM', 'MAG', 'MAJ', 'IC', # 부사 및 감탄사
'SL' # 외국어
# 'SH', 'SN' # 한자, 숫자
]
# 불용어 리스트 정의
stopwords = ['의','가','이','은','들','는','좀','잘','걍','과','도','를','으로','자','에','와','한','하다','속','것','등','때','또','로','고','인','듯','있','아','하','게','되','수','보','않','없','나','사','주','아니','못','그','같','때문','위','두','말','그렇','다','만','요','때','년','월','일','뒤','전','후','중','때','앞','뒤','또','곳','앞','뒤']
# 형태소 분석 후 지정된 품사만 토큰화하고, 불용어 제거하는 함수 정의
def tokenize_and_remove_stopwords(text):
# Mecab으로 형태소 분석 및 품사 필터링
tokens = [word for word, pos in mecab.pos(text) if pos in allowed_pos]
# 불용어 제거
filtered_tokens = [word for word in tokens if word not in stopwords]
return filtered_tokens
# 제목과 기사에 대해 각각 토큰화 수행 및 불용어 제거
news_raw_df['뉴스_TOKEN'] = news_raw_df['뉴스'].apply(tokenize_and_remove_stopwords)
# 결과 미리보기
print(news_raw_df[['뉴스', '뉴스_TOKEN']].head())
뉴스 \
24 기업은행, 무기계약직 정규직전환 추진 무기계약직 신규 채용 없어…'준정규직 처우개선을 위한 노사 공동 선언문' 기업은행 노사는 올해 상반기 인사 이후 이른 시일 안에 무기계약직의 정규직 전환을 추진하겠다고 2일 밝혔다. 기업은행 노사는 이날 시무식에서 이런 내용을 담은 '준정규직 처우개선을 위한 노사 공동 선언문'을 발표했다. 현재...
25 IBK기업은행 2018년 시무식 개최 IBK기업은행이 2일 오전 서울 중구 을지로 본점에서 김도진 은행장과 임직원 400여명이 참석한 가운데 2018년 시무식을 열었다. 김 행장은 신년사를 통해 올해 중소기업 대출 시장과 디지털 금융 시장에서 일대 격전이 예상된다며 ▲중기금융시장, 압도적인 차별로 선도 ▲디지털 혁신인재 1만명 육성...
26 [신년사] 김도진 기업은행장 가계대출 규제와 정부의 중소기업 지원정책 강화로 경쟁은행 모두가 중기대출 시장을 정조준하고 있습니다. 또한, 디지털 금융 분야는 인재확보와 혁신기술 도입, 플랫폼 구축 등 모든 영역으로 경쟁이 확산하고 있습니다. 저는 올해도 변함없이 고객과 현장을 가장 중요한 경영의 축으로 삼을 것입니다. 고객...
27 시중은행 새해 화두는 ‘디지털 강화’ 이대훈 NH농협은행장 김도진 IBK기업은행장 이대훈 NH농협은행장은 신년사를 통해 “미래를 대비한 새로운 사업기반을 찾아야 한다”며 “이를 위해 핀테크를 활용한 이종업종과의 융·복합을 추진할 것”이라고 말했다. 농협은행은 지주 차원에서 디지털금융 부문 조직과 디지털금융 최고책임자(CDO) 자리를...
28 주요 금융지주 회장·은행장들 신년사 속 경영 키워드는 김도진 IBK기업은행장은 "올해 중소기업 대출시장과 디지털 금융 시장에서 일대 격전이 예상된다"면서 "중기금융시장을 압도적인 차별화로 선도할 것이며, 디지털 혁신인재 1만명을 육성할 것"이라고 밝혔다. 더불어 동반자 금융을 통한 역동적인 창업시장을 조성한다는 계획이다. 은성수 한국수출입은행장은...
뉴스_TOKEN
24 [기업은행, 무기, 계약직, 정규직, 전환, 추진, 무기, 계약직, 신규, 채용, 정규직, 처우, 개선, 노사, 공동, 선언문, 기업은행, 노사, 올해, 상반기, 인사, 이후, 시일, 안, 무기, 계약직, 정규직, 전환, 추진, 기업은행, 노사, 이날, 시무식, 내용, 담, 정규직, 처우, 개선, 노사, 공동, 선언문, 발표]
25 [IBK, 기업, 은행, 시무식, 개최, IBK, 기업, 은행, 오전, 서울, 중구, 을지로, 본점, 김도진, 은행장, 임직원, 참석, 가운데, 시무식, 열, 김, 행장, 신년사, 올해, 중소기업, 대출, 시장, 디지털, 금융, 시장, 격전, 예상, 중기, 금융, 시장, 압도, 차별, 선도, 디지털, 혁신, 육성]
26 [신년사, 김도진, 기업, 은행장, 가계, 대출, 규제, 정부, 중소기업, 지원, 정책, 강화, 경쟁, 은행, 모두, 중기, 대출, 시장, 정조준, 디지털, 금융, 분야, 인재, 확보, 혁신, 기술, 도입, 플랫폼, 구축, 영역, 경쟁, 확산, 저, 올해, 고객, 현장, 중요, 경영, 축, 삼, 고객]
27 [시중, 은행, 새해, 화두, 디지털, 강화, 이대훈, NH, 농협, 은행장, 김도진, IBK, 기업, 은행장, 대훈, NH, 농협, 은행장, 신년사, 미래, 대비, 사업, 기반, 찾, 핀테크, 활용, 이종업, 종과, 융, 복합, 추진, 농협, 은행, 지주, 차원, 디지털, 금융, 부문, 조직, 디지털, 금융, 최고, 책임자, CDO, 자리]
28 [주요, 금융, 지주, 회장, 은행장, 신년사, 경영, 키워드, 김도진, IBK, 기업, 은행장, 올해, 중소기업, 대출, 시장, 디지털, 금융, 시장, 격전, 예상, 중기, 금융, 시장, 압도, 차별, 선도, 디지털, 혁신, 육성, 더불, 동반자, 금융, 역동, 창업, 시장, 조성, 계획, 성수, 한국, 수출입, 은행장]
In [30]:
from konlpy.tag import Mecab
# Mecab 객체 생성
mecab = Mecab()
# 모든 문장에서 부사 추출
adverbs = news_raw_df['뉴스'].apply(lambda sentence: [word for word, pos in mecab.pos(sentence) if pos in ('MAG', 'MM')])
# 결과를 리스트 형태로 합치기 (중복 제거 가능)
all_adverbs = set([adverb for sublist in adverbs for adverb in sublist])
print("부사 목록:", all_adverbs)
부사 목록: {'무사히', '다행히', '최근', '하도', '빡', '흔들', '응당', '일시', '펄펄', '그대로', '일찍', '유세', '활짝', '즉시', '공손히', '만일', '묵묵히', '으쓱', '속속', '유심히', '두', '상세히', '신중히', '걸핏하면', '절로', '곳', '실제', '버럭', '일제히', '한꺼번에', '뽀득', '다시금', '심지어', '갈팡질팡', '미처', '캉', '전', '발칵', '별반', '주렁주렁', '오직', '함게', '횡', '낱낱이', '서서히', '하하하', '꽉', '아연', '당장', '시끌시끌', '납작', '미끌', '깜깜', '다닥다닥', '흥', '드디어', '깨끗이', '더', '꼼꼼히', '째깍째깍', '쉬쉬', '쉽사리', '콕', '감감', '단지', '죽', '시급히', '똑', '제각각', '어정쩡', '내나', '땡', '칭', '덜', '빠짝', '동', '당연', '재빨리', '혹여나', '와이', '설마', '즈', '덜덜', '사실상', '팩', '지', '이내', '매', '우선', '즉각', '혹여', '자못', '박', '반드시', '혹', '깜작', '이리', '그나마', '썩', '대개', '그간', '특별히', '자세히', '화르르', '긴밀히', '다시', '당해', '안절부절', '웽', '시시각각', '인제', '정교히', '대강', '근', '요는', '싱글벙글', '나란히', '때마침', '척', '뚜렷이', '힘껏', '비교적', '참', '유달리', '이미', '다른', '쌩쌩', '맘껏', '가급적', '잔뜩', '거뜬히', '일체', '점점', '명실공히', '시원히', '한결같이', '허둥지둥', '벌벌', '여러모로', '사사건건', '성실히', '저리로', '지금껏', '단연', '마침', '뚝딱', '바삐', '똘똘', '다분히', '석', '하루빨리', '항상', '연차', '되려', '최대한', '도대체', '감히', '다름없이', '만약', '부랴부랴', '엄밀히', '한번', '모두', '뱅', '관계없이', '팽팽히', '과연', '그냥', '모조리', '마이', '매사', '마음대로', '골고루', '온갖', '훌쩍', '편히', '별', '서너', '거침없이', '턱', '짠', '전혀', '척하면', '아낌없이', '여실히', '튼튼히', '상관없이', '만지작', '들썩', '착착', '들쑥날쑥', '거듭', '각자', '텅', '터무니없이', '맘대로', '당당', '탕', '팅', '동동', '그리', '너도나도', '깊숙이', '빈틈없이', '득', '혹시나', '단단히', '머뭇', '특히', '두세', '이어', '생생', '깜짝', '광', '돌연', '이즘', '뎅', '텅텅', '충실히', '무난히', '캄캄', '진정', '차곡차곡', '아울러', '쏙', '거푸', '분주히', '그렇게', '실로', '쏙쏙', '철철', '별로', '쪼', '없이', '대폭', '핑', '덜컹', '시름시름', '제발', '제우', '만', '무려', '바이', '마냥', '고의로', '내내', '월등', '하여금', '아마', '꼬박꼬박', '탈탈', '월', '스스로', '기연', '아슬아슬', '쿠', '어렵사리', '호락호락', '그만큼', '가까이', '상당히', '원활히', '올', '매주', '팍팍', '바투', '오래', '당연히', '여지없이', '울퉁불퉁', '직접', '척척', '유독', '한가득', '아주', '슬쩍', '어제', '뚜렷히', '뒷', '끙끙', '오로지', '긍긍', '흔들흔들', '여신', '찔끔', '아무리', '이따금', '어김없이', '형편없이', '가만히', '하상', '어떻게', '일대', '내', '통틀어', '굉장히', '종종', '벅', '단번에', '어찌', '솔직히', '여간', '당초', '적잖이', '가령', '이제나저제나', '모', '머지않아', '언제든지', '잘못', '단숨에', '쑥쑥', '빨리', '견고히', '갈수록', '손수', '금', '연일', '널리', '몸소', '엎치락뒤치락', '자연스레', '윙', '언제', '연', '제', '획기적으로', '휘', '먼저', '멀리', '대단히', '헝', '크', '여러', '빠짐없이', '대거', '그제야', '곧', '꿈쩍', '쯔', '곰곰히', '얼마큼', '마음껏', '실지', '그', '금방', '급격히', '늘', '스무', '바로', '꽤', '슬그머니', '간단히', '끝없이', '딩', '사뭇', '아무래도', '푹', '간신히', '돈독히', '차츰', '각', '게다가', '두근두근', '몰래', '많이', '남몰래', '고루', '매차', '기껏', '길래', '뽀', '비단', '후딱', '사실', '새삼', '여느', '갑작스레', '되레', '모로', '번번이', '그저', '채', '탁', '언젠가', '여전히', '통째', '뭔', '가물가물', '상호', '내일', '톡톡', '실은', '제대로', '간간이', '설사', '새', '모레', '현재', '해마다', '껑충', '어느새', '차곡', '결국', '혹시', '자연히', '고사하고', '뒤숭숭', '반짝', '이토', '싱글', '소중히', '딴', '젤', '막힘없이', '마구', '다', '잠시', '꼼짝', '뚜벅뚜벅', '삐걱', '마무리', '모처럼', '한껏', '무작정', '힘없이', '명확히', '반짝반짝', '쿨', '끝내', '매일', '두어', '발벗고', '현저히', '높이', '픽', '솔솔', '아예', '빵빵', '매달', '장장', '강도높게', '우', '매우', '당근', '째깍', '화들짝', '자꾸', '쿠르', '쑥', '정', '그제', '매해', '제일', '매끼', '연방', '딱히', '욜로', '성시', '우뚝', '찌', '일부러', '한발', '오락가락', '그동안', '달리', '익히', '처음', '그만', '필연', '당', '주춤', '매월', '멋대로', '티', '엄정히', '꿈틀', '빈번히', '쿄', '흡사', '간절히', '따로', '연거푸', '이제', '틈틈이', '얼마나', '뚝뚝', '고스란히', '애초', '맥없이', '밍기적', '단호히', '억지로', '겹겹이', '분명히', '제멋대로', '낮', '맨', '다급히', '모다', '나몰라라', '멍', '홀로', '쇳', '절실히', '예외없이', '한없이', '부득이', '저', '곧바로', '팽팽', '헉헉', '유난히', '장차', '푸', '너무나', '고작', '왈', '본', '이냥', '분명', '걸핏', '이대로', '가뜩이나', '틱', '방긋', '신속히', '재차', '계속', '쓱', '격렬히', '연신', '활발히', '극진히', '마저', '통상', '조용히', '조만간', '말없이', '아무런', '지금', '안', '더불어', '우수수', '좀처럼', '진작', '닥', '요', '차일피일', '무조건', '하나하나', '전부', '갑자기', '오늘', '올해', '한참', '수', '씨이', '과감히', '아랫', '우르르', '행결', '생전', '가만', '이래저래', '뜻대로', '톡톡히', '수군수군', '면밀히', '마치', '그런', '굳이', '두루', '물론', '지극히', '조목조목', '삭', '무지', '똑같이', '잘', '직', '어서', '자릿', '키리', '첫', '강동', '뾰족', '출렁', '사뿐', '완전', '활활', '각별히', '흔쾌히', '조금', '빙그레', '가로', '멀찌감치', '예컨대', '고', '긴급히', '츄', '훨훨', '제법', '딱', '대체로', '마주', '떡', '뜬금없이', '은근슬쩍', '폭', '강력히', '너무', '부러', '마침내', '대략', '어차피', '꾹', '윗', '한층', '약', '밀접히', '당분간', '성큼', '영', '삐그덕', '일병', '현', '완전히', '함부로', '사수', '풋', '적어도', '아마도', '대놓고', '잠깐', '또', '니', '슬금슬금', '초', '때로', '괜히', '막상', '간간', '꽁꽁', '주로', '가득', '매번', '문턱', '충분히', '정말', '절대', '홀짝', '술술', '가뿐히', '월등히', '곧장', '평생', '왜', '엄연히', '한', '톡', '가까스로', '속절없이', '벌써', '한두', '쵸', '어영부영', '카', '상금', '모든', '엄격히', '되게', '삐끗', '변함없이', '무척', '온전히', '공히', '턱턱', '수상히', '꼼꼼', '털썩', '애먼', '선뜻', '일단', '다소', '온이', '꾸준히', '부쩍', '갓', '역', '진창', '어느', '분분히', '어떤', '통통', '쿡', '흔히', '무슨', '순', '감지덕지', '날로', '미리', '아랑곳없이', '방금', '눈치껏', '뻘뻘', '차라리', '열심히', '서로', '급기야', '찰랑찰랑', '든든히', '살랑', '자주', '킹', '급속히', '조용', '앙', '가리로', '요즘', '펄쩍', '실제로', '못', '출발', '욱', '즉석', '가장', '더더욱', '진짜', '새로', '종금', '싹', '가위', '새로이', '때이른', '무턱대고', '이제야', '소홀히', '조심스레', '멀다하고', '무진', '펑펑', '도로', '도저히', '속속들이', '허겁지겁', '확고히', '점차', '탈락', '흐지부지', '허우적', '함', '와르르', '적절히', '깊이', '쌩', '같이', '거꾸로', '실상', '보다', '옛', '미적미적', '워낙', '빙', '하나같이', '함께', '비록', '정작', '앞서', '저리', '쏠쏠히', '각각', '오랜', '촘촘히', '좀체', '이어서', '왕창', '확연히', '자구', '단', '영원히', '두둑이', '기왕이면', '수시로', '네', '갸우뚱', '어쩐지', '아직', '혼자', '꼬꼬', '엄중히', '그토록', '일일이', '마땅히', '발끈', '어느덧', '모모', '헹', '팡팡', '양', '자칫', '찬찬히', '세', '철저히', '슬슬', '투', '굳건히', '멈칫', '외려', '웬', '부글부글', '파', '주', '타', '모락모락', '큐', '저마다', '유례없이', '한바탕', '간당간당', '급히', '깊숙히', '겨우', '우물쭈물', '불쑥', '객관적으로', '넘', '조마조마', '쾅', '쉬', '줄곧', '끊임없이', '일찌감치', '온', '불과', '포', '짝', '오죽', '말랑말랑', '자그마치', '공고히', '휘청', '매년', '거의', '대체', '회', '우정', '턱없이', '쭉쭉', '달달', '일찍이', '확실히', '은근히', '찰칵', '버젓이', '팍', '극명히', '차근차근', '양대', '꼭', '흔', '각기', '극히', '그야말로', '꼼짝없이', '더욱', '결코', '아까', '확', '훨씬', '때아닌', '늘상', '축', '심히', '정확히', '이', '저런', '향후', '다달이', '한결', '찔끔찔끔', '따는', '뚝', '이루', '휙', '한창', '꿀꺽', '북적', '소폭', '봉', '여직', '일절', '쏠쏠', '첨', '웨', '통', '약간', '차차', '글루', '천천히', '단순히', '총', '원래', '못내', '살짝', '매시', '간편히', '이렇게', '당당히', '언제나', '조속히', '황', '나날이', '바짝', '몇몇', '얼만큼', '부단히', '저쪽', '좀', '헌', '이런', '착', '후끈', '원만히', '일양', '줄줄이', '옥신각신', '몇', '썰렁', '아무', '탄탄히'}
In [31]:
from konlpy.tag import Mecab
# Mecab 객체 생성
mecab = Mecab()
# 모든 문장에서 부사 추출
adverbs = news_raw_df['뉴스'].apply(lambda sentence: [word for word, pos in mecab.pos(sentence) if pos in ('MAJ', 'IC')])
# 결과를 리스트 형태로 합치기 (중복 제거 가능)
all_adverbs = set([adverb for sublist in adverbs for adverb in sublist])
print("감탄사 목록:", all_adverbs)
감탄사 목록: {'무', '엑', '오오', '아우', '오케이', '제기', '땡큐', '츠', '야', '아이', '개서', '이제', '워', '그람', '근데', '그러니까', '한편', '두', '웅', '예스', '헥', '이러다', '이러면', '아크', '하', '햇', '쓰', '하지만', '저', '헉', '이아', '푸', '에', '역시', '흥', '스', '후유', '그래서', '자', '옛', '원', '히', '안녕', '즐', '모', '아니', '흑', '또한', '기', '마스크', '아자', '글씨', '오마', '금', '단', '씨이', '네', '체', '또는', '오이', '우예', '아리', '아소', '피', '악', '움', '와우', '아이유', '헤이', '혹은', '크', '아뿔싸', '이야', '아니면', '헴', '아유', '음', '응', '도리어', '이씨', '그러나', '어쨌든', '그', '어', '및', '여하튼', '행이', '햐', '참', '서', '따', '그렇지만', '라라', '와', '머지', '글쎄', '뭐', '익', '앗', '하이', '여', '왜냐하면', '에이', '여보', '누', '열차', '에이치', '헤', '그래도', '마', '즉', '아', '이른바', '어쩌다', '후후', '다만', '호', '엄', '아이씨', '휴', '커', '그럼', '이', '예', '케', '잉', '치', '더욱이', '까', '오메', '머', '카', '그리고', '더구나', '워라', '오', '막', '왕', '그러면', '팅', '읍', '픽', '위', '유', '우', '아웅', '더군다나', '오히려', '허', '그러므로', '따라서', '라', '씨', '파이팅', '와따', '으', '그런데'}
In [32]:
from konlpy.tag import Mecab
# Mecab 객체 생성
mecab = Mecab()
# 모든 문장에서 부사 추출
adverbs = news_raw_df['뉴스'].apply(lambda sentence: [word for word, pos in mecab.pos(sentence) if pos in ('SL')])
# 결과를 리스트 형태로 합치기 (중복 제거 가능)
all_adverbs = set([adverb for sublist in adverbs for adverb in sublist])
print("외국어 목록:", all_adverbs)
외국어 목록: {'ATDPS', 'ADNOC', 'Community', 'project', 'his', 'According', 'ciio', 'Jump', 'successful', 'misconduct', 'Jhoo', 'WEH', 'Hybrid', 'Colon', 'health', 'Kb', 'CPMI', 'Lente', 'Space', 'Suite', 'Transaction', 'Track', 'Programing', 'Ex', 'Mix', 'Gurugram', 'MYCAR', 'gag', 'EPS', 'Play', 'He', 'andrew', 'Institutional', 'best', 'DVP', 'STA', 'PLA', 'Responsibility', 'Sharing', 'GLOBAL', 'Watch', 'ICE', 'SSIT', 'LX', 'issue', 'suspends', 'WTI', 'MEMO', 'FRN', 'returns', 'WFCC', 'KTWIZ', 'Your', 'Lounge', 'russa', '·', 'CSX', 'REC', 'Text', 'ONE', 'RV', 'Autumn', 'i', 'SNS', 'Interface', 'offers', 'line', 'BOOX', 'Pearl', 'Supplier', 'appointed', 'CB', 'Great', 'EC', 'Bee', 'LLM', 'AMRO', 'government', 'pak', 'CGB', 'PanOcean', 'billion', 'BaF', 'NGFW', 'TCE', 'CS', 'unfair', 'Bye', 'FC', 'IBKC', 'One', 'CBCNEWS', 'when', 'COE', 'General', 'CNBJOURNAL', 'failing', 'real', 'KOAFEC', 'past', 'nCov', 'Bro', 'Cariflex', 'five', 'Musique', 'DRX', 'BMW', 'Express', 'NZD', 'CBI', 'proof', 'JDC', 'BSA', 'DSL', 'receiving', 'ISS', 'IDX', 'day', 'CNN', 'private', 'SDS', 'Robot', 'EVISU', 'ITMO', 'Usance', 'SLL', 'Josef', 'https', 'ENM', 'PP', 'SSBT', 'LF', 'Ethics', 'PSG', 'IIF', 'ksh', 'align', 'DiY', 'AUX', 'MPRO', 'ENS', 'MOF', 'l', 'LEK', 'Best', 'merger', 'steel', 'Commission', 'ticketmonster', 'incidents', 'sportschosun', 'Mafia', 'Saif', 'MLCC', 'DKK', 'management', 'tech', 'LTD', 'search', 'was', 'viewers', 'jms', 'Cheer', 'preparations', 'PLUS', 'an', 'KOMBI', 'January', 'Indonesia', 'SAW', 'Xscan', 'Posco', 'Untact', 'Hospital', 'Unboxing', 'Games', 'KCB', 'activities', 'Retirement', 'spa', 'Score', 'Born', 'firm', 'it', 'ESS', 'HCIH', 'UB', 'Ent', 'Plaza', 'YONHAPNEWS', 'incruit', 'bottom', 'PRB', 'Yonhap', 'benefit', 'Partnership', 'WINTER', 'Tu', 'GB', 'Buffer', 'AutoRegression', 'v', 'against', 'their', 'Communication', 'KGC', 'today', 'Overair', 'NJ', 'DS', 'AMC', 'Gamser', 'info', 'Asiamoney', 'de', 'Crew', 'BU', 'Woori', 'IBKNY', 'ESI', 'ETS', 'pjh', 'Indian', 'Keo', 'Industry', 'UST', 'sectors', 'Cook', 'lazertinib', 'draws', 'Box', 'hefty', 'Jeju', 'Kudratov', 'tag', 'personal', 'CU', 'odd', 'League', 'EOS', 'WeBus', 'Jae', 'SKIET', 'KMH', 'CNS', 'Episode', 'GPS', 'GP', 'take', 'APCCAL', 'expand', 'ABC', 'Leading', 'Super', 'WATCHA', 'capitalization', 'ohmynews', 'MBC', 'QRNG', 'helpful', 'Star', 'push', 'Coming', 'Doosan', 'BOOKiT', 'form', 'EXIT', 'LSF', 'until', 'ETC', 'instgram', 'YSL', 'interest', 'EMI', 'WK', 'Fit', 'Meme', 'remained', 'CheilWorldwide', 'ywshin', 'Future', 'spring', 'Consultative', 'designs', 'Klaytn', 'ikpark', 'TALKS', 'vote', 'uTradeHub', 'estate', 'Jason', 'corp', 'RETC', 'Bond', 'Money', 'KB', 'ALL', 'Millennials', 'HOLD', 'shdmf', 'EXPERT', 'KTP', 'Authority', 'KAC', 'other', 'SF', 'JT', 'Member', 'Corbat', 'blocked', 'letter', 'mangusta', 'U', 'WEO', 'IFIS', 'UPS', 'TEU', 'release', 'Friday', 'ARS', 'Slice', 'IPM', 'ratings', 'month', 'Ventures', 'Henry', 'GC', 'Amundi', 'PPR', 'funds', 'RG', 'PTE', 'Stress', 'possible', 'bp', 'victories', 'P', 'digital', 'Greenwich', 'MMT', 'apnews', 'two', 'Challenge', 'DAY', 'FAANG', 'Sarl', 'CAR', 'Dae', 'IMO', 'CBS', 'between', 'KOFIH', 'KCCI', 'RFI', 'Brexit', 'rock', 'tockenized', 'Ulsan', 'Bruno', 'KPOK', 'save', 'partner', 'PIB', 'BO', 'BoAML', 'ING', 'felizkj', 'alwaysame', 'Beverage', 'GM', 'CSR', 'Request', 'Libor', 'superdoo', 'GIL', 'forest', 'UCLA', 'ECB', 'CIR', 'Exchanged', 'Machine', 'PNG', 'goGlobal', 'PHILIPPINE', 'Credit', 'mbc', 'TS', 'most', 'Lieu', 'xanadu', 'Kibo', 'DinnoLab', 'leadship', 'green', 'xconfind', 'PF', 'ETH', 'part', 'School', 'sharp', 'stock', 'Composite', 'Diop', 'initiative', 'Governance', 'PI', 'ABSTB', 'Only', 'Inside', 'back', 'Team', 'tip', 'Base', 'SPOTV', 'senior', 'April', 'Exit', 'LESS', 'currency', 'Rakuten', 'ALIO', 'NIGAA', 'CBSI', 'FIFA', 'FORUM', 'IRR', 'Korean', 'KAI', 'THE', 'GD', 'leehs', 'list', 'inflation', '×', 'TU', 'AA', 'MegaCARTI', 'make', 'nomination', 'JPT', 'H', 'GPT', 'FT', 'Gold', 'RPA', 'Enkhtubshin', 'minor', 'RF', 'Knee', 'Russian', 'SI', 'Choice', 'Personal', 'ymh', 'aspx', 'BUMI', 'CGT', 'ODM', 'TPG', 'reasonably', 'NCC', 'TNS', 'Build', 'suspected', 'year', 'stylelomo', 'purpose', 'Cambodia', 'WEHAGO', 'downward', 'BOK', 'KDN', 'Mover', 'establish', 'Fan', 'there', 'instagram', 'Quick', 'majority', 'Computer', 'Seller', 'sydkjs', 'STEAM', 'expansion', 'Facebook', 'SQI', 'CIS', 'CRM', 'diluted', 'rich', 'helping', 'individual', 'research', 'PLAZA', 'mini', 'csunell', 'Cell', 'KH', 'SSI', 'close', 'aymsdream', 'governor', 'Phèdre', 'SOFR', 'Xcopri', 'Energy', 'Currency', 'Change', 'Place', 'Kangwonland', 'Review', 'jumped', 'TradeHub', 'MLB', 'businesses', 'loan', 'DONGBU', 'Y', 'Attorney', 'Airlines', 'VisionFund', 'governance', 'Inno', 'exercise', 'Degradation', 'mironj', 'KOREA', 'KGGTF', 'DLG', 'WON', 'SMC', 'Internet', 'Arranger', 'QUICK', 'Normale', 'NIE', 'TIF', 'Vault', 'Mastercard', 'kosaf', 'Desk', 'KMAC', 'NICE', 'energy', 'Laundering', 'George', 'nowwego', 'combinator', 'cjg', 'leave', 'ASP', 'SUPEX', 'Tiem', 'NBFC', 'CORE', 'MOT', 'NXC', 'changing', 'success', 'craze', 'Start', 'InsighT', 'Statistics', 'Put', 'AFP', 'hacking', 'Alcoholic', 'Ocean', 'innovation', 'theme', 'AMD', 'presence', 'KKUJUN', 'Dong', 'Proof', 'UP', 'foray', 'Eximbank', 'Chairs', 'UNEP', 'UFJ', 'VJ', 'ACCC', 'consortium', 'wspark', 'Artificial', 'SK', 'knnews', 'CEO', 'BDC', 'JSC', 'artpark', 'provider', 'Angel', 'CD', 'Sponsored', 'mtkht', 'VISA', 'NFC', 'over', 'want', 'circlemin', 'OS', 'feedback', 'KAB', 'capital', 'scholarships', 'Contact', 'SEC', 'Fresh', 'MVP', 'UNIST', 'ECH', 'Sovannorak', 'refers', 'becoming', 'Party', 'star', 'marking', 'Loany', 'Copec', 'Ahmed', 'Industrial', 'EBITDA', 'Abiy', 'gutjy', 'Leo', 'b', 'institutional', 'Antanium', 'ksm', 'chocrystal', 'Normality', 'Mall', 'eq', 'Why', 'authorities', 'KODA', 'IFT', 'young', 'MRI', 'SR', 'Exclusive', 'quality', 'Women', 'LPGA', 'Fitch', 'Qatar', 'Practice', 'tie', 'WIS', 'foreign', 'khkim', 'DR', 'Huang', 'ljin', 'grad', 'pressured', 'Plogging', 'sylee', 'clerks', 'IDP', 'CBAM', 'IPTV', 'WIKI', 'non', 'held', 'Suisse', 'motivational', 'Heart', 'HTS', 'KorElecTerm', 'CCSI', 'DTI', 'KIND', 'hyung', 'CK', 'CLINIC', 'CSS', 'TLTRO', 'iOS', 'tCO', 'Feul', 'Since', 'Vietnam', 'filer', 'Rate', 'Federation', 'earlier', 'laborplus', 'Caltex', 'CBCD', 'Project', 'food', 'Du', 'LA', 'GKL', 'ajunews', 'sanha', 'unnerving', 'Goals', 'AVM', 'International', 'photothink', 'Aside', 'Policy', 'lovus', 'PX', 'Paperless', 'plans', 'Offline', 'j', 'Closed', 'Toolkit', 'OTS', 'BANK', 'Monetary', 'Awards', 'ibkc', 'inspires', 'Through', 'its', 'investors', 'CG', 'KCGI', 'newskija', 'iBK', 'MSCI', 'KOSPI', 'days', 'secretary', 'marcel', 'reliable', 'current', 'co', 'one', 'CQI', 'SPO', 'appointment', 'justice', 'MSPP', 'protest', 'NASA', 'Outlook', 'Perry', 'Original', 'handsome', 'notch', 'warns', 'WB', 'apos', 'Sumishin', 'IMF', 'CEV', 'Trade', 'KPMG', 'Celly', 'merge', 'demo', 'SV', 'Feb', 'citing', 'α', 'ISSB', 'World', 'losses', 'Daily', 'potential', 'DLF', 'able', 'PAYCO', 'For', 'BIS', 'FLEX', 'WWF', 'Generali', 'transformation', 'promote', 'inews', 'Ontact', 'run', 'Youth', 'FINMA', 'GMoMA', 'skyedaily', 'Hold', 'ing', 'license', 'Chef', 'Online', 'Identity', 'PET', 'Decenter', 'Fis', 'gradually', 'profit', 'American', 'zissue', 'ICR', 'Processor', 'Specifically', 'No', 'Shades', 'KATA', 'partnership', 'Doom', 'IPS', 'MapSea', 'MLA', 'hyunmin', 'North', 'seephoto', 'INDAWA', 'G', 'recapitalize', 'SKYPASS', 'leekb', 'Carbon', 'Award', 'down', 'App', 'WeWork', 'sent', 'boom', 'AMOREPACIFIC', 'charge', 'Away', 'MSME', 'Chemical', 'From', 'consolidated', 'FPSB', 'CtrlS', 'Society', 'Nature', 'SEOUL', 'Customer', 'financing', 'alarm', 'PO', 'kkna', 'tf', 'raising', 'JOB', 'tril', 'providing', 'COWAY', 'NCAA', 'ODA', 'Group', 'leadership', 'UntacT', 'KWPF', 'Asset', 'Metarverse', 'next', 'esg', 'wuchoi', 'Indonesian', 'DRB', 'munonam', 'RA', 'Cher', 'paperless', 'shared', 'coinlocker', 'SMBC', 'OFF', 'OK', 'skyedialy', 'hands', 'Corporation', 'Workflow', 'grow', 'whan', 'bridge', 'MATERIALS', 'implement', 'MUFG', 'gsm', 'X', 'Privilege', 'AgensGraph', 'Networking', 'MILEnjoy', 'GAM', 'BASE', 'Month', 'GLN', 'Prevention', 'Jin', 'capacity', 'SIRIUS', 'CVS', 'Extended', 'OEM', 'center', 'KSSB', 'chc', 'CGFS', 'them', 'SARS', 'List', 'EO', 'Aa', 'Ni', 'HSD', 'SWC', 'NASH', 'acquire', 'Magazine', 'CISO', 'venture', 'Day', 'safety', 'chosun', 'VAN', 'mna', 'ETF', 'PBA', 'Biz', 'record', 'SKT', 'Later', 'RE', 'New', 'Forfaiting', 'GDP', 'Goldfajn', 'hij', 'ej', 'IDB', 'SA', 'Retail', 'mln', 'Electronics', 'NORILLC', 'TPS', 'delivered', 'SCMP', 'launching', 'Drean', 'weeks', 'Dooray', 'kch', 'Export', 'stoweon', 'CTK', 'made', 'relief', 'doing', 'CFTC', 'Is', 'Hyperledger', 'last', 'has', 'this', 'NBC', 'components', 'html', 'LOI', 'Asian', 'Sin', 'DIY', 'lock', 'amivantamab', 'KIA', 'local', 'plan', 'Wei', 'ays', 'KUOS', 'Nations', 'ER', 'BOKFSC', 'PQ', 'especially', 'Net', 'IRA', 'TA', 'Stay', 'used', 'Gettyimagesbank', 'Vs', 'ENA', 'provisions', 'Cockpit', 'Shared', 'MCN', 'nh', 'ABL', 'Stars', 'grant', 'DaaS', 'strong', 'BHC', 'TBML', 'later', 'youngtiger', 'Fixed', 'SAC', 'ASEAN', 'HDFC', 'Univ', 'manufacturer', 'three', 'AAA', 'Pass', 'skitsch', 'Iranian', 'Rule', 'KOTA', 'Infinity', 'machine', 'KAIST', 'flounder', 'w', 'UNDP', 'Knowledge', 'Demo', 'MATE', 'Car', 'Unicorn', 'FDS', 'Present', 'months', 'HK', 'JLPS', 'industry', 'AT', 'CSO', 'outbreak', 'Repowering', 'TIPS', 'SSAFY', 'System', 'Meet', 'CMS', 'series', 'Exectutives', 'MATRIX', 'KDR', 'sERP', 'jungkim', 'webpoem', 'EDCF', 'surged', 'NH', 'medical', 'years', 'LCDC', 'perpetual', 'ITX', 'COMNEXT', 'crypto', 'mis', 'GH', 'TCO', 'Detection', 'hacho', 'inspection', 'FunFun', 'kovopr', 'cement', 'ABF', 'COVID', 'Hob', 'FAST', 'People', 'South', 'Doing', 'CCyB', 'XBOX', 'STEEL', 'finmall', 'VOGO', 'Off', 'PAGODA', 'sowon', 'Southeast', 'Hein', 'GIO', 'Chem', 'Water', 'MOR', 'naver', 'DNV', 'SISAMAGAZINE', 'medium', 'ago', 'aliexpress', 'DGI', 'FIMA', 'Russia', 'Week', 'REIZ', 'Age', 'PSD', 'Income', 'KSD', 'dkkim', 'motto', 'Friend', 'seizure', 'LACP', 'I', 'Tool', 'Service', 'Date', 'Omaliss', 'finalize', 'commissions', 'JP', 'Spasms', 'BMI', 'Wednesday', 'as', 'mk', 'without', 'in', 'MW', 'under', 'consumers', 'Investment', 'EU', 'Peace', 'check', 'UOB', 'OE', 'aftermath', 'gojongwin', 'SJM', 'Ajban', 'SSG', 'RTGS', 'kbasket', 'chak', 'Fun', 'Helen', 'Boom', 'HMI', 'cooperation', 'NYDFS', 'SICPA', 'Dual', 'JBIC', 'Facility', 'APAC', 'TNFD', 'role', 'Asia', 'Things', 'Committee', 'Masahiko', 'RFMS', 'Products', 'RegTech', 'FINDA', 'Metal', 'CABEI', 'recovered', 'fallout', 'Tokai', 'ethics', 'Storage', 'Reality', 'TR', 'IoT', 'CFO', 'ST', 'nation', 'Academy', 'RUN', 'Environmental', 'up', 'HIGH', 'rate', 'mjk', 'Pianissimo', 'SO', 'Jerzy', 'acquisitions', 'violated', 'FRONT', 'ISA', 'only', 'gets', 'fotogyoo', 'downgrade', 'Navigating', 'NB', 'NICEabc', 'LP', 'PBI', 'scale', 'Get', 'Winner', 'kyunghyang', 'meeting', 'US', 'Artprice', 'hjlee', 'Monster', 'surpassed', 'HAAH', 'ML', 'IGS', 'FUN', 'more', 'Festival', 'EZ', 'trend', 'THAAD', 'shpark', 'Interactive', 'gmail', 'Discovery', 'CKD', 'WF', 'National', 'Holdings', 'CDS', 'Round', 'film', 'FADU', 'cap', 'As', 'Heroes', 'XAI', 'NW', 'ssimo', 'Wong', 'NWS', 'Option', 'SaaS', 'Set', 'HUG', 'approved', 'Illinois', 'MOU', 'KOVO', 'NCM', 'Factoring', 'Primary', 'persona', 'should', 'SSANGYONGCNE', 'seodw', 'MBO', 'Oil', 'vticket', 'income', 'pay', 'business', 'MOMENT', 'Eastern', 'PCB', 'PDA', 'son', 'HKN', 'advance', 'supporting', 'palletizing', 'urges', 'Peer', 'IFF', 'at', 'abc', 'ES', 'Practitioner', 'Manah', 'trillion', 'sale', 'PRO', 'all', 'Barclays', 'KPGA', 'SCK', 'PBR', 'Desktop', 'wan', 'sbg', 'SBA', 'Leverage', 'IR', 'Class', 'pic', 'arm', 'Jane', 'Incitygram', 'Cost', 'parachute', 'Europe', 'Launch', 'WIN', 'KaPick', 'Data', 'Crypto', 'Resilience', 'Flybit', 'NSFR', 'LB', 'vsGS', 'property', 'ADT', 'Interchange', 'Even', 'Fed', 'fixed', 'EarlyTect', 'GGS', 'requires', 'ISO', 'kbs', 'Bang', 'biz', 'PayPal', 'Liiv', 'maintain', 'View', 'KEITI', 'bink', 'MONEY', 'pilly', 'EGS', 'matching', 'hee', 'ACE', 'Flagship', 'cole', 'MOOMIN', 'MAU', 'Management', 'jpg', 'bhc', 'purchase', 'NOK', 'Pet', 'Hypothesis', 'anti', 'Medium', 'Image', 'FDI', 'Sanghi', 'Anthony', 'SUDARMA', 'NIV', 'agile', 'Covid', 'percent', 'lender', 'Mandated', 'Hyun', 'Point', 'Multi', 'EXperience', 'operation', 'V', 'CM', 'Based', 'DREAM', 'thunder', 'confirmed', 'based', 'Camera', 'QuB', 'photolee', 'Next', 'SDAMC', 'EBRD', 'Shinhan', 'ontact', 'corporate', 'AML', 'rules', 'Air', 'SDC', 'Greater', 'NBIM', 'trip', 'Statista', 'BIG', 'heeva', 'ynalee', 'APEC', 'XV', 'MBM', 'D', 'BIZ', 'hby', 'freepik', 'Facilitation', 'IN', 'Hall', 'SKB', 'www', 'TEN', 'around', 'Provisioning', 'Sierra', 'Passport', 'ECOS', 'period', 'track', 'LTE', 'crucial', 'SN', 'Zhihong', 'TK', 'selling', 'soo', 'wholesale', 'MBK', 'JPM', 'beat', 'Filer', 'taking', 'Association', 'Environment', 'ECONOMYTALK', 'Namdong', 'Yoo', 'PER', 'RSP', 'FY', 'CFL', 'actively', 'CYS', '÷', 'FESTIVAL', 'Software', 'DDP', 'kindergarten', 'achieved', 'give', 'BD', 'HOT', 'NGFS', 'gaining', 'Countdown', 'bps', 'BM', 'DRAM', 'Steatohepatitis', 'Pic', 'SAMIC', 'YP', 'PEJ', 'SIEM', 'ET', 'Individual', 'WELCOME', 'Digital', 'continue', 'HTML', 'MUZIKTIGER', 'BRV', 'Ripple', 'AUM', 'Africa', 'NIM', 'STO', 'finder', 'letters', 'KCSI', 'CNBNEWS', 'lab', 'mile', 'syoh', 'MWC', 'CTR', 'KOEN', 'CCMM', 'BOE', 'permanent', 'knowledge', 'AED', 'CCM', 'Huxley', 'BMSI', 'Green', 'bkjung', 'alongside', 'Serve', 'Yoon', 'ELF', 'TMM', 'vessel', 'KS', 'Council', 'daum', 'official', 'MD', 'BCS', 'ICBC', 'AIM', 'IF', 'Brand', 'Sa', 'Big', 'MFI', 'CAC', 'ITF', 'DWEC', 'placed', 'CLO', 'Bid', 'Foreign', 'developing', 'sj', 'LGD', 'LSE', 'chan', 'Revie', 'Display', 'VVIP', 'double', 'jeong', 'Blochain', 'Busan', 'VI', 'Korea', 'MyCar', 'rating', 'VIMO', 'Regtech', 'etbot', 'DC', 'un', 'BC', 'SHINHAN', 'NEWS', 'hscity', 'responding', 'Buku', 'APP', 'RookieWM', 'CAO', 'invest', 'QF', 'Card', 'x', 'posted', 'Special', 'De', 'OJK', 'KBP', 'Van', 'Dawn', 'Saxy', 'FAIR', 'Mn', 'KBotSAM', 'KSQI', 'services', 'union', 'JPG', 'PIMS', 'TIS', 'OECD', 'vsKGC', 'BDO', 'exculpate', 'yoon', 'Technology', 'Stove', 'LOTTE', 'reported', 'Travel', 'REDD', 'BYOC', 'Finnvera', 'YOLO', 'BGF', 'Senior', 'EAD', 'cany', 'Fair', 'Rath', 'SKY', 'DAMOA', 'Equality', 'low', 'KTR', 'state', 'BEP', 'cargo', 'ACCESS', 'CC', 'Insurance', 'UX', 'directors', 'Fin', 'Bernardo', 'Anh', 'CFP', 'III', 'ends', 'POE', 'jhid', 'blackdesert', 'UPA', 'ARD', 'keeps', 'eco', 'lose', 'Kuroda', 'BLASH', 'organize', 'SOFIA', 'AC', 'jykim', 'Citibank', 'm', 'Pick', 'get', 'Api', 'CR', 'sycoolguy', 'CJENM', 'nearly', 'OCI', 'KoCAM', 'SpinX', 'Bot', 'Krakatau', 'BBQ', 'brokerage', 'Jan', 'POSCO', 'jordanh', 'provides', 'create', 'BBVA', 'encouraged', 'CAS', 'BW', 'border', 'pya', 'Guarantee', 'marathon', 'MCG', 'but', 'VAR', 'Jussi', 'abcd', 'Zone', 'Job', 'Upgrade', 'risk', 'Market', 'IB', 'Swap', 'questions', 'NIPA', 'Kg', 'This', 'Dec', 'Visa', 'figures', 'DLS', 'IFDS', 'bolster', 'OPEC', 'FCEDA', 'KMR', 'Virtual', 'Cello', 'OKcal', 'Trusted', 'Nguyen', 'largest', 'IDR', 'Forum', 'KPI', 'Some', 'shakeup', 'Escrow', 'kysplanet', 'opened', 'hrsong', 'Times', 'kiup', 'NIFS', 'set', 'VDI', 'manpower', 'OTID', 'Stock', 'Hope', 'Ro', 'the', 'requests', 'Office', 'bear', 'YG', 'Copyright', 'BSI', 'Pacific', 'ryan', 'Bert', 'TCB', 'Explorer', 'order', 'MTN', 'MTC', 'AIA', 'Planning', 'GNI', 'TIP', 'CNT', 'FTSE', 'Kato', 'Thomas', 'sympathy', 'suspend', 'guarantee', 'YH', 'inks', 'FMI', 'SM', 'segye', 'AI', 'sources', 'seats', 'SMPA', 'sponsors', 'kit', 'Sunday', 'law', 'bok', 'Plastic', 'Private', 'STL', 'efforts', 'Their', 'KBO', 'Diversity', 'KBS', 'transactions', 'controlling', 'TSS', 'Institute', 'HR', 'Persona', 'Triple', 'be', 'GPTW', 'major', 'rapped', 'Greening', 'OO', 'Thi', 'ECONOMY', 'CH', 'Singapore', 'join', 'WBSC', 'KRX', 'economic', 'left', 'color', 'Ode', 'Kiat', 'DBJ', 'took', 'Mason', 'jm', 'basis', 'heemangfdn', 'OTP', 'AMPC', 'CHEMICAL', 'EDPF', 'jhope', 'YMCA', 'LMFP', 'FGI', 'win', 'Thein', 'productive', 'that', 'BIN', 'park', 'Kindergarten', 'fufus', 'bank', 'Publication', 'UAE', 'VIG', 'exceeds', 'My', 'SBJ', 'Silicon', 'WBG', 'ARKO', 'Ceruzzi', 'Up', 'seta', 'ministry', 'increased', 'Phnom', 'entry', 'KOFIX', 'Checkmarx', 'heard', 'iMBC', 'jhkang', 'expenses', 'AFC', 'Renewable', 'Vice', 'Working', 'ROA', 'Reengineering', 'Response', 'ESG', 'DX', 'SOL', 'MAR', 'DDoS', 'DNA', 'Extension', 'taehoonlim', 'CNG', 'receives', 'movie', 'Satisfaction', 'XXV', 'YALOO', 'HIC', 'fun', 'MBS', 'purposes', 'SBS', 'gains', 'DIRECT', 'BMP', 'Hwaseong', 'JPY', 'proceed', 'SKS', 'school', 'how', 'APT', 'FULL', 'vowed', 'TALK', 'imnews', 'KF', 'PWP', 'Innovation', 'scchoo', 'Virgin', 'showed', 'm', 'MMORPG', 'Good', 'Shining', 'light', 'High', 'establishing', 'Nov', 'being', 'February', 'eight', 'Westerly', 'DART', 'York', 'future', 'DHK', 'whicks', 'Ethereum', 'affected', 'IRNA', 'Wave', 'Souvenir', 'roguehy', 'Savings', 'CCL', 'Central', 'Haarasilta', 'HCN', 'DP', 'LFP', 'Yi', 'good', 'till', 'JBANK', 'Whan', 'came', 'Former', 'EMEAP', 'Master', 'joint', 'busan', 'SMR', 'TradeClub', 'KJ', 'University', 'issued', 'DSS', 'LEPP', 'free', 'contract', 'NMS', 'KPS', 'AP', 'seat', 'Guillamon', 'Nihon', 'QLED', 'RBA', 'sending', 'Deep', 'OCR', 'Report', 'Chairman', 'FARM', 'IPA', 'right', 'Kakao', 'swap', 'AK', 'into', 'results', 'FATF', 'BSK', 'C', 'YJC', 'Gymnasium', 'Develop', 'Proposal', 'KCPI', 'finance', 'managed', 'yonhap', 'OJT', 'Principles', 'SL', 'Study', 'WANT', 'blackdesertm', 'Wealth', 'CDP', 'We', 'seeks', 'YK', 'downgrading', 'eye', 'Ba', 'YO', 'Reset', 'MHE', 'Single', 'Piano', 'CP', 'SDI', 'Gangwon', 'Protect', 'kr', 'More', 'STORY', 'rises', 'friendly', 'story', 'Quality', 'ERP', 'bluesoda', 'YES', 'WORLD', 'countries', 'LeYN', 'PE', 'youth', 'Wa', 'Shop', 'SPORTS', 'EOD', 'BCP', 'heads', 'uncertain', 'CKM', 'True', 'mjkang', 'LawTalk', 'Deal', 'electronic', 'from', 'costs', 'wild', 'VFX', 'QoQ', 'AFPK', 'Meeting', 'organizational', 'Allset', 'BigData', 'vs', 'such', 'Supervisory', 'bot', 'cross', 'example', 'EY', 'Lock', 'Knight', 'W', 'July', 'KIST', 'KITA', 'dividends', 'tournament', 'setting', 'Story', 'MERS', 'Rising', 'At', 'floor', 'Module', 'EFA', 'Redefine', 'BNW', 'information', 'dongsoo', 'Cellfie', 'IMFC', 'BTIG', 'PNS', 'Livewith', 'CNBC', 'RCPS', 'eBay', 'CARD', 'Dichterliebe', 'shadow', 'TRS', 'LTC', 'Zero', 'SPIE', 'height', 'indigent', 'Idle', 'WSI', 'pop', 'ARKK', 'Won', 'decoration', 'Jordan', 'Certificate', 'award', 'PCCU', 'gooeunmo', 'LCK', 'KTNET', 'bigger', 'number', 'Transformation', 'Tmap', 'formed', 'qoq', 'igon', 'air', 'put', 'mm', 'boost', 'Bpifrance', 'WKBL', 'Tie', 'YBS', 'out', 'by', 'IRUKEY', 'Maeil', 'competitiveness', 'Kickoff', 'MCI', 'six', 'sector', 'MoneyStation', 'PLC', 'Altos', 'CIB', 'WINI', 'decided', 'Banks', 'received', 'GOP', 'PKO', 'Chang', 'Custody', 'small', 'HKIAC', 'NSCLC', 'MSA', 'SOAR', 'BFI', 'NIA', 'cm', 'Neutral', 'SSC', 'GMP', 'JA', 'Both', 'maru', 'TechFin', 'ISDS', 'high', 'Flow', 'SSF', 'click', 'Lee', 'Ruud', 'STIZ', 'u', 'Governors', 'PGA', 'Programming', 'fff', 'TSB', 'Santander', 'Pay', 'Youngone', 'CTO', 'translation', 'RDP', 'Suk', 'younger', 'Share', 'Typed', 'MEMBERSHIP', 'JGTO', 'SDGs', 'Payments', 'milestone', 'HANATOUR', 'PBS', 'dbeorlf', 'Da', 'Refund', 'TICN', 'KBIZ', 'SMFG', 'Syndicated', 'org', 'strategy', 'closer', 'holjjak', 'SiC', 'TEAM', 'PG', 'launches', 'jyj', 'photo', 'konplash', 'Soloists', 'KYC', 'Burrito', 'Son', 'Now', 'FSS', 'chosunbiz', 'DHJM', 'leading', 'AS', 'HKETO', 'THECHEAT', 'Seoul', 'BOT', 'agrees', 'VTM', 'Neurocloud', 'Perkasa', 'Free', 'iMESSAGE', 'Dung', 'modest', 'MT', 'RCBC', 'Kwieciński', 'inducted', 'KISTI', 'kicked', 'EUR', 'hwan', 'iguffaw', 'MR', 'BTO', 'SAR', 'Guermazi', 'Rated', 'ming', 'off', 'officials', 'Reserve', 'Kick', 'plant', 'CTA', 'Moodys', 'JPMorgan', 'EIC', 'Work', 'Hana', 'KOICA', 'Clean', 'SVB', 'surveillance', 'ICS', 'March', 'krona', 'Human', 'wowtv', 'By', 'winningest', 'id', 'vulnerable', 'HBC', 'DLT', 'Character', 'security', 'Heads', 'eSIM', 'STB', 'iNnovation', 'seoul', 'DAC', 'Invitational', 'attempts', 'Goodjob', 'Michael', 'DO', 'Blacknumbers', 'coronavirus', 'saver', 'SOS', 'Show', 'Fi', 'Fallen', 'SRT', 'prerequisite', 'ecos', 'global', 'aircraft', 'dahora', 'Customizing', 'ISM', 'WIZ', 'Hot', 'slack', 'TD', 'position', 'Program', 'IBT', 'ilty', 'KDMA', 'trust', 'sky', 'GRI', 'Firm', 'CCR', 'klk', 'Rating', 'Sham', 'LOUNGE', 'JW', 'President', 'JTC', 'com', 'half', 'restored', 'GoodNews', 'FIU', 'regional', 'maker', 'Resources', 'iu', 'HANARO', 'aa', 'PB', 'SJ', 'each', 'ACademy', 'FX', 'MMF', 'Content', 'DA', 'SBI', 'RGT', 'Leadership', 'dips', 'siri', 'LIFE', 'wiz', 'DSME', 'XRP', 'BPS', 'wish', 'TI', 'Mmoney', 'EV', 'aims', 'pseudojm', 'BBS', 'Baek', 'RCS', 'Targets', 'Avon', 'credit', 'Soft', 'inauguration', 'neighboring', 'measure', 'Buyer', 'engaging', 'px', 'Deposit', 'KIKO', 'CO', 'hsjung', 'Hanwha', 'Partners', 'HNP', 'Ye', 'opposition', 'Sh', 'KOMPAS', 'Marketin', 'RegS', 'KT', 'eun', 'Initiative', 'STX', 'East', 'civil', 'ICSID', 'TJ', 'WREI', 'KDB', 'Lab', 'seankim', 'NBA', 'hankooki', 'CNB', 'Northeast', 'KTB', 'Davis', 'top', 'can', 'ESMO', 'ADS', 'Classic', 'led', 'some', 'Construction', 'BEFF', 'CIO', 'STIC', 'LGNCS', 'ICAK', 'BRAVO', 'ELT', 'jtbc', 'SLUSH', 'paperkiller', 'min', 'LGU', 'FinTech', 'ROBO', 'kdy', 'ddakbom', 'SOC', 'tried', 'COP', 'NAND', 'Tele', 'rnt', 'Wise', 'ibkbox', 'WHITEPAPERSC', 'CEREMA', 'Process', 'ibkonejob', 'Always', 'USD', 'SERVICE', 'CBT', 'Traded', 'EYEBALL', 'PS', 'Universe', 'While', 'KSA', 'BOX', 'decline', 'Agile', 'remarks', 'chairman', 'Bonds', 'feared', 'SHOPPING', 'supporters', 'SMEFF', 'DONGSUH', 'AWARD', 'SUMMER', 'Small', 'Promise', 'opening', 'Defending', 'BPA', 'absolute', 'CET', 'region', 'eXpert', 'Shinbo', 'sportssoeul', 'Ltd', 'DSR', 'IGE', 'EDD', 'yoy', 'Penh', 'mtn', 'Seong', 'team', 'Kim', 'decision', 'commercial', 'loss', 'LS', 'fed', 'note', 'SmilePay', 'caused', 'IT', 'KO', 'http', 'PRM', 'Joseph', 'Giarraputo', 'ELB', 'Citi', 'IP', 'ccbb', 'Imagine', 'MASTER', 'Optical', 'INSTEX', 'Sustainable', 'Treasurer', 'joined', 'workers', 'aber', 'MAINTAIN', 'moves', 'Promoter', 'vice', 'WNBA', 'Cargill', 'sdw', 'Bridge', 'our', 'Better', 'STARTUPTODAY', 'FSC', 'ASMR', 'including', 'ifland', 'KNPS', 'respective', 'savings', 'Mitraniaga', 'produce', 'Connecting', 'GOLD', 'PRESS', 'ratio', 'Table', 'GOGO', 'HSBC', 'Altteulsaltteul', 'JLPGA', 'Economic', 'financier', 'Loan', 'Moon', 'Focus', 'BankSign', 'ku', 'partially', 'BofA', 'Teller', 'Vladivostok', 'Bureau', 'EMP', 'WHITEPAPER', 'ECG', 'Trio', 'WPNA', 'groom', 'RFHIC', 'WEPs', 'Dr', 'NextONE', 'ILJIN', 'HDC', 'KCG', 'laundering', 'United', 'c', 'KSS', 'CAMS', 'launch', 'alleged', 'YOUTH', 'MRT', 'clients', 'webex', 'Deforestation', 'bonds', 'Wear', 'INFE', 'exposure', 'greenlight', 'Installment', 'Chamber', 'Derivative', 'map', 'LOUP', 'khan', 'With', 'multiple', 'Cheong', 'press', 'institution', 'FKI', 'GFE', 'DID', 'MY', 'opens', 'MSI', 'NYT', 'talks', 'fined', 'States', 'OCED', 'Furthermore', 'hyuk', 'Task', 'chief', 'RM', 'Bintang', 'It', 'TDF', 'company', 'SITI', 'Directors', 'reappointment', 'program', 'INSIGHTKOREA', 'Aramco', 'PDR', 'accelerator', 'CI', 'suwu', 'seiyu', 'must', 'YoY', 'LINE', 'Fraser', 'LSK', 'tuney', 'LC', 'PR', 'CER', 'CEA', 'Compact', 'nowcasting', 'CSM', 'KEB', 'SPA', 'briefing', 'group', 'F', 'repeat', 'Cho', 'rates', 'operations', 'Today', 'KES', 'g', 'GTL', 'Kong', 'eXplainable', 'Gender', 'NYSE', 'Meal', 'Blur', 'UPI', 'TECH', 'VIX', 'Young', 'TWV', 'KSP', 'accommodate', 'Mate', 'newsis', 'OLED', 'MO', 'through', 'TOSS', 'KEIRETSU', 'funding', 'Landscape', 'repo', 'icon', 'new', 'VC', 'Oscar', 'Ratings', 'markets', 'TFT', 'EuroStoxx', 'Bo', 'arranged', 'CMA', 'SH', 'myjs', 'f', 'ELS', 'KANGOL', 'none', 'Prop', 'KINDEX', 'KPX', 'Consolidated', 'ANZ', 'ASK', 'SLR', 'wooKim', 'Properties', 'Together', 'MC', 'donga', 'MyID', 'Province', 'BCBS', 'term', 'Guitar', 'CVT', 'JC', 'ljh', 'M', 'infusion', 'Camp', 'Banker', 'kyh', 'Banking', 'UAM', 'jung', 'HOSPITAL', 'PDF', 'Coding', 'PJICO', 'DB', 'PC', 'UN', 'esjang', 'SNB', 'fnRASSI', 'Trasformation', 'TFEB', 'shop', 'overseas', 'SQ', 'invested', 'Rich', 'also', 'investment', 'Monday', 'WTS', 'Agreement', 'jangwz', 'outlook', 'MMA', 'Credila', 'Places', 'Swe', 'BIDV', 'SCD', 'SUPER', 'Valentin', 'GFC', 'Analytics', 'MAINBiZ', 'privatized', 'shinhan', 'BNEF', 'Pulse', 'referring', 'fire', 'LIVING', 'LTV', 'worldbankgroup', 'least', 'Out', 'dong', 'tellers', 'SMEs', 'WENEED', 'MBA', 'NK', 'paygos', 'SKC', 'deepriver', 'stood', 'KWood', 'Seminar', 'Repo', 'mind', 'Arm', 'R', 'viola', 'o', 'who', 'LTA', 'Video', 'sports', 'regulatory', 'America', 'history', 'DCM', 'zzang', 'forum', 'news', 'latter', 'Survey', 'flows', 'coming', 'BANKING', 'Sovereign', 'leads', 'NZBA', 'theWM', 'Reducing', 'third', 'NEXT', 'n', 'Paribas', 'TP', 'wjr', 'Evaluation', 'virtual', 'Linux', 'facebook', 'growth', 'Bayesian', 'S', 'Influencer', 'Overnight', 'KFX', 'KMDA', 'API', 'Thin', 'POST', 'Inssait', 'PL', 'Netflix', 'Whisper', 'e', 'ECS', 'HDAC', 'KBSN', 'ISMS', 'L', 'AHD', 'jhson', 'Growth', 'trade', 'Layered', 'currently', 'bbb', 'DJ', 'Tech', 'IND', 'CFRA', 'ACSI', 'finances', 'WSJ', 'NBCI', 'final', 'Fn', 'Jafurah', 'LVMH', 'BBF', 'hmgp', 'pricing', 'hun', 'VS', 'principal', 'edaily', 'Hub', 'CBC', 'IFC', 'me', 'Liquidity', 'generation', 'ojo', 'RAM', 'EKB', 'Min', 'Operation', 'DLI', 'ace', 'Chinese', 'VM', 'may', 'intended', 'zheng', 'hot', 'TOGETHER', 'Fraud', 'Rights', 'CCO', 'Plug', 'THISCovery', 'dream', 'very', 'Index', 'SWIFT', 'DPS', 'BTL', 'Gather', 'mark', 'rosehans', 'asiae', 'UAMCO', 'XIBK', 'lax', 'psykims', 'balance', 'isaac', 'Funds', 'HMM', 'Corp', 'sbs', 'coddy', 'MAGA', 'STT', 'EXA', 'growing', 'KIWOOM', 'Robotics', 'pass', 'CFT', 'during', 'KDI', 'phonalist', 'II', 'OH', 'than', 'goham', 'EST', 'Series', 'CJ', 'tue', 'thebell', 'pangbin', 'upandup', 'District', 'similar', 'AR', 'rose', 'Actis', 'DFS', 'Non', 'Disclosure', 'AESA', 'ko', 'Tab', 'Elegy', 'youtube', 'ajh', 'OnePASS', 'Spirit', 'ubc', 'place', 'BG', 'csun', 'LCD', 'MMO', 'Discover', 'sc', 'using', 'SOTWO', 'HB', 'photocdj', 'TRASS', 'which', 'Counter', 'RFR', 'HKMA', 'st', 'CMB', 'eCommerce', 'hwaru', 'Hi', 'graphics', 'Manager', 'bbeom', 'Fine', 'Supervision', 'interview', 'MyData', 'Agris', 'LTI', 'SSM', 'jhmoon', 'BAT', 'heraldcorp', 'suncho', 'SamsungEng', 'jean', 'Officer', 'IRP', 'CPC', 'lending', 'ETRI', 'han', 'B', 'Be', 'LCC', 'DT', 'KCSG', 'mishandling', 'Cash', 'Ministry', 'AFoCO', 'yesphoto', 'Practitioners', 'earnings', 'Covert', 'core', 'a', 'FM', 'previously', 'BNG', 'NongHyup', 'Pandemic', 'Dian', 'All', 'jin', 'achieves', 'BitGo', 'Perpetual', 'ARCVERSE', 'RTMS', 'Happy', 'Pixabay', 'offerings', 'second', 'Silvertown', 'robust', 'amin', 'general', 'ECA', 'CSI', 'Iran', 'buying', 'OVID', 'KBN', 'regulation', 'before', 'GO', 'Professionals', 'touch', 'DL', 'strengthen', 'HMC', 'CGV', 'Foot', 'Caixa', 'Tier', 'plenty', 'outperform', 'On', 'deal', 'buffer', 'Volleyball', 'Re', 'Speech', 'Make', 'care', 'Back', 'FAN', 'SC', 'php', 'trigger', 'jkimmina', 'AG', 'KENCA', 'Relations', 'designed', 'approval', 'Enerbility', 'Chief', 'key', 'FINANCE', 'Δ', 'recent', 'Maekyung', 'vulnerabilities', 'bit', 'quickly', 'Democracy', 'move', 'Gap', 'FORCA', 'CBIRC', 'TKG', 'SBTI', 'ALBERT', 'backlash', 'sold', 'Pham', 'mandate', 'STM', 'Learning', 'Mynd', 'FUNd', 'ABLEFI', 'BUSAN', 'popocar', 'KISA', 'TIPA', 'moving', 'utzza', 'joze', 'willfully', 'SDR', 'Sales', 'Weekly', 'syf', 'Movie', 'Business', 'goodnim', 'KOTAN', 'picks', 'DN', 't', 'promoted', 'products', 'DDR', 'are', 'SPC', 'COFIX', 'DSC', 'Tuesday', 'PWM', 'Excellence', 'HMR', 'Chung', 'Georgetown', 'Parasite', 'Board', 'Do', 'Southern', 'disciplinary', 'Earnings', 'entering', 'minfo', 'Debt', 'kg', 'revenue', 'CAPA', 'could', 'BPCE', 'planning', 'Bit', 'CVC', 'Select', 'BANKiT', 'asking', 'ryuryu', 'million', 'CDBC', 'India', 'JKL', 'TCC', 'km', 'Joy', 'baa', 'scholarship', 'Ca', 'ever', 'tv', 'Host', 'offer', 'executive', 'BCA', 'EOI', 'hkmpooh', 'alexei', 'mush', 'message', 'FTA', 'view', 'FCP', 'unit', 'Style', 'SSO', 'CATUS', 'Champ', 'FOMC', 'kanjo', 'FIVB', 'BSB', 'Matthew', 'NEMAME', 'filing', 'zip', 'surisuri', 'signatures', 'YGPA', 'JV', 'buy', 'NCR', 'service', 'dr', 'cover', 'startups', 'Ae', 'Additionally', 'OF', 'winner', 'Club', 'Effect', 'Sustainability', 'psnews', 'Haruhiko', 'UFG', 'worker', 'Damien', 'visited', 'COM', 'market', 'GS', 'Subcustodian', 'stoo', 'shareholders', 'TM', 'yatoya', 'TOPCIT', 'branch', 'MnM', 'process', 'MK', 'Lewis', 'Handsome', 'First', 'Learn', 'Valley', 'MOEF', 'FAQ', 'chanIndustrial', 'wiki', 'Indicator', 'Care', 'Investments', 'ADVISER', 'stake', 'Manunggal', 'YWCA', 'price', 'Finnq', 'AMS', 'Diet', 'LNG', 'LBVAR', 'Framework', 'VIP', 'LDS', 'DongwonF', 'Kasa', 'money', 'Stewardship', 'rule', 'WooTube', 'kimhs', 'GFSN', 'LOC', 'Treasury', 'Indawa', 'jieunlee', 'opportunity', 'Valderrabano', 'Gumi', 'provided', 'Food', 'SMTH', 'NAL', 'token', 'Shareholder', 'activity', 'Memories', 'Payment', 'Ring', 'carpos', 'Double', 'IRGC', 'Buy', 'directing', 'cut', 'Robo', 'related', 'Solution', 'DWS', 'Workplace', 'Ghosh', 'ANTA', 'FICC', 'recovery', 'SEK', 'IIT', 'ANNA', 'starting', 'several', 'Challenges', 'KFDA', 'IDC', 'keeping', 'taeho', 'BI', 'DNK', 'eID', 'Executive', 'FXON', 'joins', 'gained', 'kdb', 'Sign', 'ibk', 'Airbus', 'MTB', 'ad', 'component', 'HBM', 'you', 'TV', 'after', 'ABS', 'LED', 'SOLUTIONS', 'jobs', 'doremi', 'UNI', 'KKH', 'SURE', 'OCIO', 'Standard', 'companies', 'Agency', 'Force', 'time', 'vegan', 'The', 'pixabay', 'Infantile', 'Interview', 'VINA', 'Frank', 'KCS', 'CDO', 'reports', 'INTERNATIONAL', 'face', 'KPC', 'PK', 'ahead', 'advised', 'Rookie', 'MG', 'SPV', 'net', 'WP', 'pdj', 'Orchestration', 'backed', 'retiring', 'CBO', 'dadazon', 'Carstens', 'zero', 'KSI', 'altos', 'now', 'Epidemic', 'kane', 'hs', 'IS', 'WGBI', 'FSB', 'JUMP', 'Heavy', 'KTX', 'outstanding', 'respectively', 'Tap', 'ADAS', 'sh', 'KICOX', 'participation', 'ywkim', 'investments', 'CT', 'level', 'SBTi', 'Top', 'CCB', 'Financial', 'KAL', 'yna', 'MTID', 'ECO', 'BoA', 'according', 'Arup', 'dahyeji', 'strum', 'accounts', 'soonsin', 'division', 'FDIC', 'sang', 'MTS', 'acquisition', 'Reconstruction', 'Over', 'HANWHA', 'return', 'NNA', 'portfolio', 'Startup', 'signed', 'CAD', 'STR', 'Bottom', 'trues', 'MMDA', 'KIC', 'jye', 'Year', 'Sanjeev', 'GKD', 'Square', 'expulsion', 'SPAC', 'kms', 'Trust', 'LCR', 'Getty', 'ABB', 'Al', 'Geely', 'QR', 'PLAY', 'Outperform', 'Intan', 'largely', 'OT', 'industries', 'UBS', 'yes', 'SUMMARY', 'hjkwon', 'Of', 'KCC', 'gnnews', 'AICC', 'product', 'iH', 'disappointing', 'Adalberto', 'zeroground', 'employees', 'ceremony', 'fairyhkj', 'Promotion', 'A', 'Saving', 'Insight', 'GW', 'eFX', 'NCT', 'professional', 'GHOS', 'barred', 'itM', 'MVNO', 'rated', 'syyoon', 'icpa', 'Emory', 'Pro', 'tenure', 'mbn', 'POP', 'Balance', 'TJB', 'Economist', 'said', 'Doctor', 'Bizapplication', 'NEW', 'Cosmos', 'reimburse', 'COO', 'PAY', 'GLAD', 'Apple', 'Regulation', 'Talk', 'Equator', 'Donga', 'Choi', 'Recognition', 'Blue', 'president', 'EDI', 'man', 'LIBOR', 'Park', 'first', 'highest', 'GEI', 'Winter', 'Closing', 'Jong', 'Jun', 'CPU', 'newly', 'difficulties', 'Agri', 'Trading', 'operating', 'Demoday', 'KBRI', 'Venture', 'WM', 'HUFS', 'Invest', 'Voice', 'Byoung', 'Sell', 'Polish', 'CMIG', 'pointer', 'within', 'Tomorrow', 'GA', 'Large', 'yongin', 'To', 'SAMSUNG', 'Byung', 'NOW', 'ljm', 'TAX', 'TikTok', 'vitaminji', 'hhkim', 'Emissions', 'city', 'CBDC', 'Robotic', 'Value', 'GCF', '・', 'kb', 'hankyung', 'Global', 'GEPP', 'Petrolimex', 'together', 'China', 'COSMAX', 'bln', 'MIGA', 'ygjung', 'shops', 'AUD', 'BIK', 'AICPA', 'Gyeongsang', 'COCO', 'Paris', 'expecting', 'TOUR', 'code', 'MDS', 'Km', 'NCsoft', 'HYUNDAI', 'Stop', 'POS', 'Letter', 'Bok', 'KIAT', 'Pre', 'have', 'N', 'Asean', 'Purpose', 'CDCS', 'INTERNATIO', 'NIS', 'Texas', 'eXcellence', 'aT', 'Phone', 'State', 'latest', 'AppCAMO', 'meddle', 'BBC', 'DAEKYO', 'pjy', 'HE', 'yjkim', 'Minsur', 'Science', 'ICJ', 'CBB', 'MDM', 'Kensho', 'taskforce', 'LH', 'yeul', 'focuses', 'KBW', 'tn', 'Capital', 'PCAF', 'bkhong', 'CRO', 'closed', 'Add', 'BB', 'FBI', 'CDRO', 'comes', 'Ilan', 'kimhyun', 'crowd', 'DFI', 'SEACEN', 'on', 'Warjiyo', 'DSINFRA', 'TOB', 'restaurant', 'Kyung', 'SLS', 'allocated', 'GST', 'museum', 'Financing', 'appointing', 'accelerate', 'open', 'IBKS', 'SW', 'road', 'bookkeeping', 'OSB', 'Securities', 'Bank', 'Center', 'Myanmar', 'DVI', 'IC', 'SUV', 'Test', 'say', 'temper', 'Taipei', 'halimkoo', 'BUY', 'continuing', 'Key', 'jgsm', 'FANG', 'kmib', 'Kids', 'allows', 'board', 'LEAD', 'Secured', 'BS', 'crashed', 'Economics', 'Amazon', 'Provider', 'OPB', 'newspim', 'Metaverse', 'failure', 'agency', 'with', 'nyusos', 'Let', 'LOG', 'Overweight', 'BL', 'HF', 'kbj', 'IPO', 'OFAC', 'Moody', 'HD', 'FINTECHPOST', 'SGI', 'Viability', 'notice', 'Concept', 'big', 'Price', 'KIXX', 'Branch', 'ALM', 'Dream', 'DJI', 'paper', 'IRRBB', 'Token', 'Oct', 'CLASS', 'Corporate', 'compact', 'pressure', 'jeonsh', 'Thursday', 'APK', 'English', 'HBsmith', 'POWER', 'Preview', 'Wood', 'EXPO', 'Kaminski', 'fnnews', 'people', 'IBK×', 'OPUS', 'Call', 'FCF', 'ISLANDS', 'Wings', 'NCSI', 'obtained', 'KGI', 'champions', 'HKD', 'helps', 'RP', 'foothold', 'NF', 'units', 'errors', 'DailyWith', 'KPHA', 'Managed', 'Doosanfc', 'VND', 'Security', 'Images', 'GET', 'Auto', 'COMPANY', 'NAVER', 'SNT', 'yyko', 'Loyalty', 'QA', 'ECM', 'SIB', 'Secondary', 'OCC', 'Enterprise', 'HPE', 'overtake', 'banks', 'COIN', 'Front', 'cloud', 'JTBC', 'kjbank', 'GEN', 'KG', 'specializes', 'Hong', 'Finance', 'GRS', 'Automation', 'Who', 'width', 'lite', 'of', 's', 'ytn', 'DOCUMENT', 'everywhere', 'SDG', 'structure', 'warning', 'virus', 'unimpressive', 'ID', 'VG', 'book', 'silent', 'burgeoning', 'KORIBOR', 'looked', 'accordance', 'enjoying', 'Bills', 'comprehensive', 'utredehub', 'J', 'however', 'STF', 'Sweet', 'ICO', 'week', 'hye', 'etnews', 'were', 'EXO', 'wshan', 'MARS', 'achievement', 'Fame', 'Art', 'KOTRA', 'NBP', 'robot', 'receive', 'NY', 'HCI', 'keep', 'tae', 'mydaily', 'CE', 'Competency', 'ESM', 'WISE', 'country', 'VKBIA', 'imports', 'Assessment', 'sportsseoul', 'deposit', 'GoGreen', 'Scale', 'IBKSB', 'Agustin', 'CNTN', 'Multiple', 'LEI', 'MiCA', 'Emerging', 'kate', 'graduate', 'times', 'SUSBA', 'case', 'LMS', 'internal', 'ICONLOOP', 'DCXI', 'Mileage', 'Saturday', 'ROE', 'agreed', 'Exchange', 'LG', 'Newspaper', 'office', 'site', 'rumor', 'Scraping', 'demian', 'had', 'd', 'ISP', 'headache', 'HANA', 'krnaver', 'saw', 'IBM', 'Cooperation', 'PPP', 'RWA', 'CRP', 'support', 'Transformaion', 'Economy', 'An', 'comply', 'WHITEPAPERIBK', 'benefits', 'LOL', 'thanks', 'AJ', 'self', 'VNL', 'Palma', 'Development', 'Take', 'performance', 'Committed', 'BNK', 'YouTube', 'NSP', 'Complex', 'vows', 'why', 'Tbk', 'FBW', 'junokong', 'Linked', 'job', 'won', 'maintains', 'ADB', 'BaaS', 'WA', 'lower', 'bjko', 'PaaS', 'go', 'Startups', 'Easy', 'lgh', 'compliance', 'been', 'background', 'VLC', 'early', 'BTC', 'Fishing', 'bright', 'Social', 'CAN', 'updates', 'CPOD', 'Tower', 'Plus', 'demonstrates', 'TechEmerge', 'SMB', 'KIHOILBO', 'Jeolla', 'establishment', 'Information', 'Hansol', 'pct', 'Ali', 'FSCM', 'IM', 'KECI', 'Smart', 'fn', 'th', 'SCFI', 'ABA', 'labor', 'STOP', 'due', 'quarter', 'BPI', 'KITIA', 'RBNZ', 'STP', 'CCTV', 'Gherkin', 'RFQ', 'In', 'Line', 'WI', 'Product', 'LIMS', 'Mohammed', 'hankookilbo', 'namu', 'misocamera', 'GTX', 'ESL', 'NSW', 'chul', 'supply', 'Poland', 'Electronic', 'PPCB', 'After', 'ULA', 'ICT', 'appoint', 'chembank', 'began', 'img', 'ABD', 'NFT', 'MBN', 'FRB', 'Open', 'amid', 'RFP', 'SHINSEGAE', 'Nonghyup', 'eyes', 'partners', 'kimkim', 'MA', 'Domestic', 'Post', 'Boutheina', 'Executives', 'PCE', 'BOA', 'text', 'Its', 'follow', 'FI', 'ILO', 'Spark', 'pick', 'BK', 'Web', 'FA', 'Empowerment', 'Makhtar', 'VLOC', 'Charity', 'POC', 'PT', 'Sept', 'Government', 'BOJ', 'zenism', 'lenders', 'economies', 'SME', 'SeSAC', 'MIP', 'Vector', 'LASER', 'Head', 'DEMO', 'Batman', 'Pension', 'even', 'usance', 'RBI', 'JB', 'PPCBank', 'collected', 'Vision', 'ADFD', 'NC', 'Media', 'sedaily', 'hy', 'course', 'KOFR', 'Yip', 'proposal', 'NVIDIA', 'former', 'is', 'HAI', 'presented', 'he', 'EMV', 'Laziz', 'kt', 'Incheon', 'Livestock', 'DoDream', 'NO', 'Fitness', 'UNGC', 'rely', 'REI', 'LIG', 'Asiana', 'KEA', 'MZ', 'VOA', 'DIP', 'Exporsure', 'end', 'diplomatic', 'FIS', 'adequate', 'Wish', 'art', 'Anti', 'BaFin', 'economy', 'Aviation', 'Cover', 'reach', 'BNP', 'Wanna', 'Eye', 'JBPOS', 'KEPCO', 'acm', 'Leaders', 'O', 'MES', 'Investor', 'Cellissimo', 'reshuffle', 'spend', 'NGO', 'Daegu', 'lunches', 'or', 'WE', 'mktv', 'Packet', 'practice', 'assumed', 'event', 'CPI', 'Hofman', 'share', 'Z', 'Life', 'Copyrights', 'tekken', 'NHN', 'NPL', 'sized', 'KCD', 'Quant', 'EDC', 'bond', 'MENITH', 'YTN', 'ICC', 'central', 'Materiality', 'AfDB', 'Self', 'pg', 'UCC', 'Lead', 'RBC', 'Signal', 'public', 'OTT', 'Paper', 'NI', 'higher', 'CES', 'SimpliVity', 'MORE', 'PD', 'Public', 'Islamic', 'to', 'while', 'Japan', 'HaR', 'Nextdecade', 'became', 'gsoo', 'fund', 'long', 'LIVE', 'trading', 'choyh', 'person', 'four', 'Repricing', 'well', 'KLPGA', 'VR', 'SOHO', 'Remimazolam', 'Street', 'PSMOR', 'seriously', 'FDA', 'Sentiment', 'Cup', 'UNG', 'ABN', 'banking', 'Part', 'Gang', 'KCI', 'BNDCC', 'following', 'Target', 'Crisis', 'Begin', 'since', 'Under', 'dropped', 'IDA', 'BoH', 'woo', 'eunin', 'Q', 'Must', 'needy', 'authority', 'involving', 'KAMC', 'Win', 'considering', 'Suwaidi', 'consultant', 'CAMP', 'about', 'Festina', 'AE', 'slipped', 'PEF', 'T', 'these', 'DGB', 'Bae', 'Op', 'SofTech', 'CXO', 'Alliance', 'Meta', 'Boot', 'Will', 'SAFE', 'URL', 'CFA', 'getting', 'KIP', 'MHN', 'OTC', 'IaaS', 'May', 'debt', 'Exhibition', 'Love', 'BIFC', 'snowball', 'IATA', 'ETN', 'account', 'for', 'KDAC', 'Immediate', 'bizwatch', 'EBS', 'KIEP', 'airline', 'iM', 'FlawedAmmyy', 'tax', 'TF', 'Changing', 'Baa', 'economidaily', 'Scientist', 'Strength', 'News', 'Gov', 'EBN', 'SSA', 'operate', 'yGOS', 'Tranche', 'ATM', 'MS', 'KR', 'Mid', 'Item', 'khoon', 'Techcombank', 'BNPL', 'GCSI', 'gain', 'KOBC', 'K', 'HSCEI', 'cbs', 'Direct', 'KSC', 'customers', 'KTOA', 'KIMST', 'PASS', 'Hero', 'RTI', 'OMS', 'Town', 'KPLGA', 'logs', 'agreement', 'KIB', 'QCA', 'become', 'lkn', 'IFRS', 'announced', 'no', 'whether', 'SP', 'Link', 'BA', 'ISC', 'k', 'Niaga', 'Transport', 'bumpy', 'Stocks', 'Role', 'Intelligence', 'But', 'named', 'entity', 'CEB', 'mt', 'Read', 'controversial', 'GX', 'Ratio', 'Portfolio', 'ON', 'Diby', 'Coverage', 'futures', 'Refresh', 'milpark', 'Bumi', 'Feasibility', 'TKI', 'HL', 'seven', 'Not', 'IMD', 'increase', 'BTS', 'p', 'Considering', 'Wallet', 'ks', 'SUM', 'loans', 'IBk', 'CNY', 'Two', 'PwC', 'cursor', 'Covered', 'DHL', 'step', 'Company', 'LAB', 'nCOv', 'Bar', 'SB', 'ABCP', 'KCGS', 'poll', 'Model', 'aiming', 'Simple', 'tobacco', 'jc', 'FRM', 'frozen', 'SBSCNBC', 'Have', 'PICK', 'baikal', 'Yellow', 'reform', 'Paing', 'Layer', 'leverage', 'yang', 'financial', 'Dragon', 'dwlim', 'Dear', 'expect', 'KNF', 'European', 'and', 'Fintech', 'KBI', 'Fund', 'PhotoCure', 'among', 'Targe', 'Communications', 'Valuation', 'technology', 'pallet', 'Mr', 'COOP', 'CSPA', 'MX', 'Import', 'RB', 'Kraton', 'Hey', 'Relationship', 'Network', 'GFANZ', 'PCA', 'WHO', 'Cloud', 'PoC', 'Strategic', 'mobile', 'yeongnam', 'stopped', 'result', 'shape', 'Wind', 'Herald', 'IBK', 'soars', 'Services', 'tensions', 'FLY', 'TalkTalk', 'Nikko', 'ground', 'ONA', 'DBS', 'Jakarta', 'SUNJIN', 'SCB', 'TOP', 'MB', 'Ji', 'Knock', 'Consumer', 'BBB', 'UI', 'Platform', 'VIPCLASS', 'E', 'MOU', 'JOL', 'pmkeul', 'Perfomance', 'Strong', 'sharing', 'MP', 'parth', 'WISET', 'structured', 'Continuity', 'psy', 'indirect', 'evolving', 'KODEX', 'friendtolife', 'matches', 'NYCB', 'PCI', 'Pool', 'spent', 'Taxonomy', 'ACTION', 'KEXIM', 'Unit', 'watchdog', 'Capex', 'NCS', 'Home', 'NamhaeChem', 'Dealer', 'jioh', 'peace', 'PAYCO×SC', 'tootsie', 'will', 'enterprises', 'head', 'Application', 'firms', 'retail', 'Co', 'CMO', 'shares'}
In [33]:
from konlpy.tag import Mecab
# Mecab 객체 생성
mecab = Mecab()
# 모든 문장에서 부사 추출
adverbs = news_raw_df['뉴스'].apply(lambda sentence: [word for word, pos in mecab.pos(sentence) if pos in ('SH')])
# 결과를 리스트 형태로 합치기 (중복 제거 가능)
all_adverbs = set([adverb for sublist in adverbs for adverb in sublist])
print("한자 목록:", all_adverbs)
한자 목록: {'企', '盃', '鳥', '創', '乳', '讀', '募', '輸', '女', '三井住友', '支', '協', '技', '啄', '三菱', '李', '十駕', '熙', '亞', '四面楚歌', '深', '來', '檢', '彦', '兼', '尼', '兆', '百聞', '治', '巨', '賢', '債', '春', '四時貨色', '故', '來', '犬', '红', '華', '而', '勞', '鴛', '同', '百年', '清', '投', '勞', '試', '百', '逅', '隆', '三年七月十一日', '劉', '述', '弗'}
In [34]:
from collections import Counter
# 뉴스_TOKEN 컬럼에서 모든 토큰을 합친 리스트 생성
all_tokens = [token for tokens in news_raw_df['뉴스_TOKEN'] for token in tokens]
# 토큰의 빈도를 계산
token_counts = Counter(all_tokens)
# 상위 100개의 가장 많이 나오는 단어 출력
most_common_tokens = token_counts.most_common(100)
# 전체 토큰 개수 계산 (중복 포함)
total_token_count = sum(token_counts.values())
# 중복되지 않는 토큰 개수 계산 (고유한 토큰)
unique_token_count = len(set(all_tokens))
# 결과 출력
print(f"전체 토큰 개수(중복 포함): {total_token_count}")
print(f"고유한 토큰 개수(중복 제거): {unique_token_count}\n")
# 상위 100개의 가장 많이 나오는 단어 출력
for word, count in most_common_tokens:
print(f"{word}: {count}")
전체 토큰 개수(중복 포함): 1744774 고유한 토큰 개수(중복 제거): 30161 은행: 90706 IBK: 45180 기업: 45060 금융: 30128 대출: 18506 금리: 14914 투자: 14100 억: 13574 한국은행: 12432 지원: 11020 기업은행: 10991 한국: 10325 증권: 8797 조: 8667 중소기업: 6991 따르: 6123 은행장: 6118 기준: 6105 분기: 5309 서비스: 5211 시장: 5162 하나: 5158 우리: 5047 올해: 4993 고객: 4926 이번: 4405 협약: 4391 서울: 4271 경제: 4238 지난해: 4189 업무: 4158 규모: 4119 가계: 4030 KB: 4024 자금: 4015 코로나: 4002 성장: 4000 국내: 3938 대상: 3902 펀드: 3899 증가: 3807 사업: 3746 상품: 3638 영업: 3490 저축: 3477 받: 3372 제공: 3369 실적: 3361 신용: 3198 신한은행: 3187 혁신: 3178 기자: 3155 농협: 3144 지역: 3137 본부: 3121 기관: 3118 대비: 3113 확대: 3074 전망: 3072 시중: 3071 부: 3037 행장: 3022 목표: 2985 디지털: 2925 경영: 2909 채용: 2897 위원회: 2896 사진: 2842 국민은행: 2813 최근: 2812 출시: 2805 경기: 2740 최대: 2726 사회: 2719 체결: 2696 가능: 2678 발표: 2665 주가: 2626 진행: 2620 창업: 2588 관련: 2578 카드: 2576 정책: 2541 인상: 2523 유지: 2513 대표: 2476 장: 2470 씨티: 2444 개최: 2431 당국: 2427 강화: 2387 상승: 2367 뱅크: 2358 윤종원: 2358 거래: 2352 통화: 2323 창공: 2320 오: 2275 높: 2261 예금: 2236
- Word Cloud
In [35]:
from PIL import Image
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
# 배경 이미지 불러오기
background_image_path = 'image/nim_wordcloud_background.png' # 로컬 환경에 맞게 경로 수정
background_image = np.array(Image.open(background_image_path))
# 한글 폰트 경로 설정 (예시로 나눔고딕 사용)
font_path = '/usr/share/fonts/truetype/nanum/NanumGothic.ttf' # 로컬 환경에 맞게 수정
# 타이틀_OKT과 내용_OKT 데이터를 결합한 후 리스트로 변환
tokens = news_raw_df['뉴스_TOKEN'].tolist()
# 빈 리스트가 아닌 경우만 필터링하여 하나의 텍스트로 변환
tokens = [token_list for token_list in tokens if token_list] # 빈 리스트 제거
all_tokens = [token for token_list in tokens for token in token_list] # 모든 토큰 합침
# 단어 빈도수 계산
token_counts = Counter(all_tokens)
# 텍스트가 비어 있지 않은지 확인
if len(token_counts) > 0:
# 워드클라우드 생성 (단어 빈도수 반영)
wordcloud = WordCloud(
font_path=font_path,
width=1250,
height=500,
max_words=500, # 최대 단어 수를 500개로 확장
background_color='white', # 진한 회색 배경
# background_color='#E9E9E9', # 진한 회색 배경
mask=background_image,
colormap='coolwarm', # 파란 계열 컬러맵 설정
# colormap='viridis', # 파란 계열 컬러맵 설정
# colormap='Blues', # 파란 계열 컬러맵 설정
# colormap='Purples', # 파란 계열 컬러맵 설정
# colormap='Spectral', # 파란 계열 컬러맵 설정
max_font_size=200, # 최대 폰트 크기 설정
min_font_size=10 # 최소 폰트 크기 설정
).generate_from_frequencies(token_counts)
# 워드클라우드 시각화 (figsize 크게 조정, dpi 설정)
plt.figure(figsize=(25, 10), dpi=100) # 크기를 크게 조정
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
# plt.title('2018.1.1 ~ 2024.6.30 뉴스', fontsize=20)
plt.show()
else:
print("토큰화된 리스트가 비어 있습니다.")
- Word2Vec Embedding
In [36]:
from gensim.models import Word2Vec
embedding_size = 100 # 임베딩 차원 수
w2v_model = Word2Vec(sentences = news_raw_df['뉴스_TOKEN'], vector_size = 100, window = 5, min_count = 5, workers = 4, sg = 0)
In [37]:
# 완성된 임베딩 매트릭스의 크기 확인
w2v_model.wv.vectors.shape
print(w2v_model.wv.most_similar("금리"))
print(w2v_model.wv.most_similar("금융"))
[('코픽스', 0.6482152342796326), ('우대금리', 0.6257911920547485), ('bp', 0.6239266991615295), ('p', 0.6158222556114197), ('낮추', 0.6054574847221375), ('인하', 0.598855197429657), ('주담', 0.5980218052864075), ('주기', 0.5960594415664673), ('여기', 0.5940781235694885), ('내리', 0.5920925140380859)]
[('최근', 0.4365432858467102), ('방안', 0.4197574853897095), ('회장단', 0.4196576774120331), ('주효', 0.41465094685554504), ('강화', 0.40485095977783203), ('당부', 0.4033476710319519), ('연합회', 0.40214526653289795), ('추진', 0.3992335796356201), ('방침', 0.39621081948280334), ('KNF', 0.3949715495109558)]
- 단어 임베딩 유사도 시각화(Word Embedding Similarity Visualization)
In [38]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import numpy as np
# 두 개의 주요 키워드와 색상
keywords = ["금리", "금융"]
colormaps = ['YlGn', 'Blues']
fig, ax = plt.subplots(1, 2, figsize=(25, 10))
for idx, (keyword, cmap_name) in enumerate(zip(keywords, colormaps)):
# 각 키워드와 가장 유사한 단어 30개 가져오기
similar_words = w2v_model.wv.most_similar(keyword, topn=30)
# 유사 단어 및 벡터 추출
words = [keyword] + [word for word, _ in similar_words]
vectors = [w2v_model.wv[word] for word in words]
similarities = [1.0] + [sim for _, sim in similar_words]
# PCA로 2차원으로 축소
pca = PCA(n_components=2)
vecs_2d = pca.fit_transform(vectors)
# 컬러맵 설정 및 버블 색상
cmap = plt.get_cmap(cmap_name)
bubble_colors = [cmap(i / len(words)) for i in range(len(words))]
# 중심 단어와 유사 단어 간 연결선 추가
for i in range(1, len(words)):
ax[idx].plot([vecs_2d[0, 0], vecs_2d[i, 0]], [vecs_2d[0, 1], vecs_2d[i, 1]], color='lightgray', linestyle='--', linewidth=0.5)
# 버블 차트 그리기
bubble_sizes = np.array(similarities) * 1000 # 유사도에 따른 크기 조정
ax[idx].scatter(vecs_2d[:, 0], vecs_2d[:, 1], s=bubble_sizes, color=bubble_colors, alpha=0.8)
# 각 단어에 레이블 추가
for i, word in enumerate(words):
# ax[idx].text(vecs_2d[i, 0] + 0.05, vecs_2d[i, 1] + 0.05, word, fontsize=9, ha='right')
# ax[idx].text(vecs_2d[i, 0], vecs_2d[i, 1] + 0.5, word, fontsize=12, ha='center', va='bottom', weight='bold')
random_offset = np.random.uniform(0.5, 1.5) # -0.1에서 0.3 사이의 무작위 오프셋 생성
ax[idx].text(vecs_2d[i, 0], vecs_2d[i, 1] + random_offset, word, fontsize=13, ha='center', va='bottom')
# y축 제한을 설정하여 텍스트 공간 확보
y_min, y_max = vecs_2d[:, 1].min(), vecs_2d[:, 1].max()
margin = (y_max - y_min) * 0.2 # 위아래 여백 추가
ax[idx].set_ylim(y_min - margin, y_max + margin)
# subplot 제목과 축 설정
ax[idx].set_title(f"Words Similar to '{keyword}'", fontsize=18)
# 레이아웃 조정 및 전체 출력
plt.tight_layout()
plt.show()
In [39]:
from gensim.models import Word2Vec
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
embedding_dim = 100 # 임베딩 차원 수
# 각 뉴스 토큰을 평균 풀링한 임베딩으로 변환
def get_average_embedding(tokens):
embeddings = [w2v_model.wv[token] if token in w2v_model.wv else np.zeros(embedding_dim) for token in tokens]
if embeddings:
return np.mean(embeddings, axis=0) # 평균 풀링하여 고정된 길이의 벡터로 변환
else:
return np.zeros(embedding_dim) # 빈 토큰의 경우 0 벡터 반환
# 평균 풀링된 임베딩을 '뉴스_EMBEDDING' 컬럼으로 저장
news_raw_df['뉴스_EMBEDDING'] = news_raw_df['뉴스_TOKEN'].apply(get_average_embedding)
# 4. 결과 미리보기
print(news_raw_df[['뉴스_TOKEN', '뉴스_EMBEDDING']].head())
뉴스_TOKEN \
24 [기업은행, 무기, 계약직, 정규직, 전환, 추진, 무기, 계약직, 신규, 채용, 정규직, 처우, 개선, 노사, 공동, 선언문, 기업은행, 노사, 올해, 상반기, 인사, 이후, 시일, 안, 무기, 계약직, 정규직, 전환, 추진, 기업은행, 노사, 이날, 시무식, 내용, 담, 정규직, 처우, 개선, 노사, 공동, 선언문, 발표]
25 [IBK, 기업, 은행, 시무식, 개최, IBK, 기업, 은행, 오전, 서울, 중구, 을지로, 본점, 김도진, 은행장, 임직원, 참석, 가운데, 시무식, 열, 김, 행장, 신년사, 올해, 중소기업, 대출, 시장, 디지털, 금융, 시장, 격전, 예상, 중기, 금융, 시장, 압도, 차별, 선도, 디지털, 혁신, 육성]
26 [신년사, 김도진, 기업, 은행장, 가계, 대출, 규제, 정부, 중소기업, 지원, 정책, 강화, 경쟁, 은행, 모두, 중기, 대출, 시장, 정조준, 디지털, 금융, 분야, 인재, 확보, 혁신, 기술, 도입, 플랫폼, 구축, 영역, 경쟁, 확산, 저, 올해, 고객, 현장, 중요, 경영, 축, 삼, 고객]
27 [시중, 은행, 새해, 화두, 디지털, 강화, 이대훈, NH, 농협, 은행장, 김도진, IBK, 기업, 은행장, 대훈, NH, 농협, 은행장, 신년사, 미래, 대비, 사업, 기반, 찾, 핀테크, 활용, 이종업, 종과, 융, 복합, 추진, 농협, 은행, 지주, 차원, 디지털, 금융, 부문, 조직, 디지털, 금융, 최고, 책임자, CDO, 자리]
28 [주요, 금융, 지주, 회장, 은행장, 신년사, 경영, 키워드, 김도진, IBK, 기업, 은행장, 올해, 중소기업, 대출, 시장, 디지털, 금융, 시장, 격전, 예상, 중기, 금융, 시장, 압도, 차별, 선도, 디지털, 혁신, 육성, 더불, 동반자, 금융, 역동, 창업, 시장, 조성, 계획, 성수, 한국, 수출입, 은행장]
뉴스_EMBEDDING
24 [-0.2936545, -0.063949786, 0.15546481, 0.3691246, 0.25956467, -0.45849842, -0.16506225, 0.29538772, -0.18597198, -0.7252315, 0.025675979, -0.05647864, -0.17351507, 0.53554535, 0.100842305, 0.054026872, -0.15745391, -0.3052879, 0.14987938, -0.39474043, 0.41084856, -0.14271416, -0.34552476, 0.19518717, -0.07114915, 0.46322712, -0.74219245, 0.06903728, 0.5592631, 0.02187683, -0.56512576, -0.020742724, -0.652881, -0.72009856, -0.25559312, -0.4809571, 0.7714682, 0.18630892, 0.24278232, 0.39209166, -0.26570046, 0.091245845, 0.3417717, -0.15243912, 0.12755617, -0.71489495, -0.40015712, 0.036072157, 0.30928764, -0.06459345, -0.77506375, 0.60363513, -0.39198413, -0.1095438, 0.52200097, -0.17368637, 0.33005458, -0.08554281, -0.16903882, 0.6626469, 0.26860502, -0.31727386, 0.16831318, 0.54661363, -0.22691324, -0.2213713, 0.13022773, -0.018926715, 0.5978344, 0.7009308, 0.3213291, -0.06116992, -0.47204834, 0.053395636, 0.41495168, 0.1398972, -0.00920044, 0.51387507, -0.20239796, -0.33881006, 0.06384138, 0.39957187, -0.22631931, 0.2703376, -0.3715142, -0.08272757, -0.009439395, 0.81558067, -0.056132033, -0.2033178, 0.06716417, 0.32786578, 0.2202653, 0.7835054, 0.49024755, 0.9157504, 0.02800904, 0.17644204, -0.290084, -0.06456937]
25 [-0.2975807, -0.20421638, -0.37442383, -0.07110159, 0.59014755, 0.12125732, 0.60117084, 0.8362974, 0.068440735, -0.51822335, 0.5107214, -0.22376877, 0.30631652, 0.2898636, -0.1981506, 0.36800513, -0.060740598, -0.19881113, -0.29918158, -0.3337409, 0.49441957, -0.47326252, -0.47912678, -0.25601766, 0.027651895, 0.46597135, -0.16599816, 0.361556, -0.30897662, -0.062810756, 0.10552911, -1.2679831, -0.106211625, -0.75202465, -0.52530086, 0.26791075, 0.79248756, 0.53838426, -0.13899043, 0.9164999, 0.0035071606, 0.12689179, 0.08354963, 0.40294522, -0.07831688, -0.98201007, -0.23519583, -0.68863493, 0.1205491, 0.27172065, 0.2242901, 0.25589004, -0.26702186, -0.15100779, 0.41484767, 0.108983286, -0.374988, 0.025507003, -0.3982625, 0.7523837, -0.43955246, 0.07485791, 0.78390574, 0.2136505, -0.3415865, -0.30612984, 0.45834386, 0.34307876, 0.48561114, 0.66802055, 0.13805439, 0.3547748, -0.24001221, -0.13986234, 0.42025846, -0.62192166, 0.12754987, 0.7708804, -0.21657385, -0.31876987, -0.1780718, -0.2224597, -0.12299407, 0.43107238, -0.71141946, -0.69743055, 0.40410796, 0.17466691, -0.032128792, -0.20082636, -0.44824558, 0.20884177, 1.0180194, 0.48581776, 0.24548782, 1.0574403, 0.11473683, 0.4316606, -0.2315027, -0.14678365]
26 [-0.42377922, 0.024577472, -1.1906108, -0.06739444, 0.108710915, -0.18691176, 0.66816133, 0.99571824, 0.02092722, -0.45816752, 0.10296306, 0.40871105, 0.46973583, 0.112247325, -0.10738673, 0.1922757, -0.24588741, -0.700075, 0.26101184, -0.325764, 0.75832814, -0.81085986, 0.021889204, 0.20574446, -0.545688, -0.13841736, -0.043123707, 0.07090131, -0.25888866, -0.5231019, -0.28532022, -0.86185557, -0.21649751, -0.76339245, 0.20253356, 0.43671095, 0.5341098, 0.085155495, -0.4540465, 1.0900004, -0.18638751, 0.47467944, 0.96845543, 0.2548339, -0.18340899, -1.0339344, -0.5534861, -0.11726243, 0.18133438, 0.50345504, -0.14004155, 0.5082815, 0.27887943, -0.39594567, 0.4477179, 0.03603548, -0.3756733, 0.45916605, -0.2493515, 0.24938972, -0.05120723, 0.23806353, 0.4731409, 0.7193707, -0.8359665, -0.62230533, 0.487687, 0.8493804, 0.37255606, 0.8178832, 0.08775585, -0.20260869, 0.051475864, 0.1508299, 0.63159126, -0.35328886, 0.11265301, 0.6617319, -0.272211, -0.15319678, 0.062918305, 0.0008279278, -0.29681253, 0.40470183, -0.6353815, -0.58790725, 0.2058634, 0.52653337, 0.18137056, -0.19361599, -0.16765231, -0.030727787, 0.88576245, 0.9781849, 0.2593181, 0.65262246, 0.19025251, 0.120107025, 0.5025016, -0.21913363]
27 [-0.25999814, 0.23773001, -0.740562, 0.38188815, -0.2486214, 0.19512695, 0.43561172, 1.0254062, -0.0697482, -0.49229512, 0.14953461, -0.061072297, 0.568503, 0.03566913, -0.34003806, -0.051359087, -0.03342707, -0.48989153, -0.26669854, -0.2795337, 0.7193236, -0.3662757, -0.19066596, 0.18272828, 0.17561668, 0.108593345, 0.07884654, 0.319849, 0.12399265, 0.03691339, -0.6173884, -1.0267603, 0.049208302, -1.0655564, 0.15319689, 0.5073724, 0.6513228, -0.30587053, -0.23791945, 0.42746, -0.3936097, 0.13167036, 0.34474495, 0.4968981, -0.37724283, -0.51537234, -0.54944175, -0.1251328, 0.09750848, 0.24724706, 0.08021895, 0.034549933, -0.40245003, -0.3480686, 0.37548828, 0.08623444, -0.44683436, -0.24506082, -0.42248634, 0.11496667, -0.074910216, 0.45474735, 0.09272769, 0.41920307, -0.44448358, -0.41468382, 0.5744027, 0.8429997, 0.16168453, 0.78796893, 0.33230177, -0.2059246, -0.0492663, -0.066606395, 0.5321139, -0.19388846, 0.4093176, 0.21567222, -0.16424467, 0.021607356, -0.2565405, 0.25989455, 0.004387416, 0.44521248, -1.0161549, 0.012800819, -0.21445175, -0.03618673, -0.32609162, -0.39460716, -0.13144654, 0.026473993, 0.58066005, 0.9627563, 0.44213736, 0.465126, -0.23279832, 0.5710766, -0.28808773, 0.14436409]
28 [-0.31400126, 0.19960853, -0.7782601, 0.05610985, 0.5714007, 0.18556274, 0.27871335, 0.8291547, 0.30857444, -0.68533665, 0.103632994, -0.056005336, 0.54998523, 0.023030404, -0.27579185, 0.48393977, 0.12383999, -0.37000954, -0.4007358, -0.39360115, 0.79164386, -0.47186348, -0.16375682, 0.2511676, -0.07175525, 0.46192142, -0.17501132, 0.5156904, -0.26812395, -0.091444895, -0.15124626, -1.5699885, -0.23379344, -1.0283003, -0.46527335, 0.4071721, 0.8264367, 0.38988957, -0.36066023, 1.042176, 0.27450803, 0.18871547, 0.45529434, 0.46309802, -0.051551946, -1.2206159, -0.23086688, -0.4751691, 0.24673831, 0.24975118, -0.07428119, 0.13170254, -0.0886228, -0.4086152, 0.26268467, 0.07418195, -0.636193, 0.1662906, -0.36955503, 0.4329298, -0.19749458, 0.45892987, 0.5549369, 0.6006121, -0.39635754, -0.5740201, 0.3979762, 0.7694749, 0.3038244, 0.4564403, -0.049281154, 0.015743267, -0.38954145, 0.27843216, 0.47843948, -0.78516215, 0.2876527, 0.62058836, -0.21174699, -0.04415966, -0.33965895, -0.12950593, -0.12796341, 0.6130193, -0.920191, -0.57228225, 0.25404623, 0.17659071, -0.26321748, 0.09613314, -0.35248315, 0.20174086, 0.9937319, 0.7463866, 0.4597625, 0.5179219, -0.02212865, 0.14931904, 0.15425003, -0.0043220273]
In [40]:
import numpy as np
import pandas as pd
def augment_with_shared_noise(df1, df2, num_copies=23, noise_level_ratio=0.000001):
"""
두 데이터프레임에 동일한 노이즈를 적용하여 증강된 데이터프레임을 반환합니다.
:param df1: 첫 번째 데이터프레임 (IBK 데이터)
:param df2: 두 번째 데이터프레임 (BOK 데이터)
:param num_copies: 각 기준일자별로 추가할 복제본의 개수
:param noise_level_ratio: 노이즈의 표준 편차 비율
:return: 증강된 두 데이터프레임 (동일한 노이즈가 적용된 데이터프레임)
"""
augmented_list1 = []
augmented_list2 = []
for date, raw_data1 in df1.groupby(df1.columns[0]):
raw_data2 = df2[df2[df2.columns[0]] == date]
if raw_data2.empty:
continue
# 원본 데이터 추가
augmented_list1.append(raw_data1)
augmented_list2.append(raw_data2)
# 그룹을 num_copies 만큼 복제
repeated_group1 = pd.DataFrame(np.repeat(raw_data1.iloc[:, 1:].values, num_copies, axis=0), columns=raw_data1.iloc[:, 1:].columns)
repeated_group2 = pd.DataFrame(np.repeat(raw_data2.iloc[:, 1:].values, num_copies, axis=0), columns=raw_data2.iloc[:, 1:].columns)
# 앞에 date 컬럼 추가
date_column1 = np.repeat(date, repeated_group1.shape[0])
date_column2 = np.repeat(date, repeated_group2.shape[0])
repeated_group1.insert(0, raw_data1.columns[0], date_column1)
repeated_group2.insert(0, raw_data2.columns[0], date_column2)
# 동일한 노이즈 생성
raw_noise = np.random.normal(0, noise_level_ratio, size=(repeated_group1.shape[0], 1))
nim_noise = np.random.normal(0, noise_level_ratio, size=(repeated_group1.shape[0], 1))
# 첫 번째 두 줄: 마지막 컬럼을 제외한 나머지 열에 raw_noise 적용
repeated_group1.iloc[:, 1:-1] += raw_noise
repeated_group2.iloc[:, 1:-1] += raw_noise
# 두 번째 두 줄: 마지막 컬럼에 nim_noise 적용
repeated_group1.iloc[:, -1] += nim_noise.flatten()
repeated_group2.iloc[:, -1] += nim_noise.flatten()
# 복제된 데이터를 원본 데이터 뒤에 추가
augmented_list1.append(repeated_group1)
augmented_list2.append(repeated_group2)
# 각 리스트를 하나의 데이터프레임으로 결합
augmented_df1 = pd.concat(augmented_list1, ignore_index=True)
augmented_df2 = pd.concat(augmented_list2, ignore_index=True)
return augmented_df1, augmented_df2
# 함수 사용 예시
ibk_raw_df, bok_raw_df = augment_with_shared_noise(ibk_raw_df, bok_raw_df, num_copies=23, noise_level_ratio=0.000001)
# IBK와 BOK 데이터의 동일한 행에서 NIM 값이 다른지 비교
def compare_nim_values(ibk_df, bok_df, target_column):
# IBK와 BOK의 NIM 값이 다른 행을 찾기
differing_nim_rows = ibk_df[ibk_df[target_column] != bok_df[target_column]]
if differing_nim_rows.empty:
print("모든 행에서 IBK와 BOK의 NIM 값이 동일합니다.")
else:
print(f"총 {len(differing_nim_rows)}개의 행에서 IBK와 BOK의 NIM 값이 다릅니다.")
return differing_nim_rows
# NIM 값이 다른 행 조사
differing_nim_rows = compare_nim_values(ibk_raw_df, bok_raw_df, 'NIM')
# NIM 값이 다른 행 출력
if differing_nim_rows is not None:
differing_nim_rows.head()
모든 행에서 IBK와 BOK의 NIM 값이 동일합니다.
In [41]:
import matplotlib.pyplot as plt
# 그래프 설정 (아래위로 배치)
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(25, 10), sharex=False)
# 증강 전 타겟 (NIM) 시각화 (24개 단위 평균)
ax1.plot(ibk_raw_df['NIM'].iloc[::24].index, ibk_raw_df['NIM'].iloc[::24].values, color='blue', label='Original NIM (24개 단위 평균)')
ax1.set_title("증강 전 타겟 (NIM)")
ax1.set_xlabel("일자 (24개 단위 평균)")
ax1.set_ylabel("NIM 값")
ax1.legend()
# 증강 후 타겟 (NIM) 시각화 (전체 데이터)
ax2.plot(ibk_raw_df['NIM'].index, ibk_raw_df['NIM'].values, color='orange', label='Augmented NIM')
ax2.set_title("증강 후 타겟 (NIM)")
ax2.set_xlabel("일자")
ax2.set_ylabel("NIM 값")
ax2.legend()
# 레이아웃 조정 및 그래프 표시
plt.tight_layout()
plt.show()
In [42]:
from statsmodels.tsa.stattools import adfuller
stat, p_value, lags, nobs, crit, icb = adfuller(ibk_raw_df['NIM'])
print('-'*80)
print(f'ADF Test 결과')
print('-'*80)
print(f'ADF Statistic: {stat}')
print(f'p-value: {p_value}')
print(f'Critical Values: {crit}')
print('-'*80)
# ADF Test 결과
# ADF Statistic: -1.769986684955549
# p-value: 0.3954609507253797
# Critical Values: {'1%': -3.430516765985009, '5%': -2.861613706693738, '10%': -2.566809231713139}
--------------------------------------------------------------------------------
ADF Test 결과
--------------------------------------------------------------------------------
ADF Statistic: -1.7699605928076725
p-value: 0.39547407311447247
Critical Values: {'1%': -3.430516765985009, '5%': -2.861613706693738, '10%': -2.566809231713139}
--------------------------------------------------------------------------------
In [43]:
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
# 시계열 분해 (추세, 계절성, 잔차)
decomposition = seasonal_decompose(ibk_raw_df['NIM'].iloc[::24], model='additive', period=30)
# 분해된 각 구성 요소
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
# ADF 테스트 함수 정의
def adf_test(series, name):
result = adfuller(series.dropna())
print(f'ADF Test for {name}')
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:', result[4])
print('---\n')
return result
# 추세, 계절성, 잔차에 대한 ADF 테스트 수행
# 추세 (Trend) >> 귀무가설(H₀): 시계열 데이터에 추세가 있으며, 비정성적이다.
result = adf_test(trend, 'Trend')
trend_p_value = result[1]
# 계절성 (Seasonality) >> 귀무가설(H₀): 시계열 데이터에 계절성이 있으며, 비정성적이다.
result = adf_test(seasonal, 'Seasonality')
seasonal_p_value = result[1]
# 잔차 (Residual) >> 귀무가설(H₀): 시계열 데이터에 단위근이 존재하여 정상성을 만족하지 않는다. (즉, 잔차에도 주기적인 패턴이나 변화가 있을 수 있다)
result = adf_test(residual, 'Residual')
residual_p_value = result[1]
# 분해 결과 시각화 (크기 25, 10, x축 표시 없이 그리기)
fig, axs = plt.subplots(3, 1, figsize=(25, 10))
# 추세 (Trend)
axs[0].plot(trend, label='Trend', color='green')
axs[0].set_title('추세(Trend)')
axs[0].grid(True)
axs[0].set_xticks([]) # x축 표시 제거
axs[0].annotate(f'p-value: {trend_p_value:.6e}', xy=(0.5, 0.85), xycoords='axes fraction', fontsize=15, ha='center', bbox=dict(facecolor='white', edgecolor='red', boxstyle='round,pad=0.5'))
# 계절성 (Seasonality)
axs[1].plot(seasonal, label='Seasonality', color='orange')
axs[1].set_title('계절성(Seasonality)')
axs[1].grid(True)
axs[1].set_xticks([]) # x축 표시 제거
axs[1].annotate(f'p-value: {seasonal_p_value:.6e}', xy=(0.5, 0.85), xycoords='axes fraction', fontsize=15, ha='center', bbox=dict(facecolor='white', edgecolor='red', boxstyle='round,pad=0.5'))
# 잔차 (Residual)
axs[2].plot(residual, label='Residual', color='blue')
axs[2].set_title('잔차(Residual)')
axs[2].grid(True)
axs[2].set_xticks([]) # x축 표시 제거
axs[2].annotate(f'p-value: {residual_p_value:.6e}', xy=(0.5, 0.85), xycoords='axes fraction', fontsize=15, ha='center', bbox=dict(facecolor='white', edgecolor='red', boxstyle='round,pad=0.5'))
plt.tight_layout()
plt.show()
ADF Test for Trend
ADF Statistic: -1.4631400699462933
p-value: 0.5516602927995895
Critical Values: {'1%': -3.4344929153128296, '5%': -2.8633698507720933, '10%': -2.567744178825802}
---
ADF Test for Seasonality
ADF Statistic: -32.428803233709665
p-value: 0.0
Critical Values: {'1%': -3.4344181718827462, '5%': -2.8633368604103104, '10%': -2.567726611627638}
---
ADF Test for Residual
ADF Statistic: -18.220940400302357
p-value: 2.379943939209202e-30
Critical Values: {'1%': -3.4344642432857992, '5%': -2.8633571955690647, '10%': -2.5677374399794197}
---
| 연도 | 1월 | 2월 | 3월 | 4월 | 5월 | 6월 | 7월 | 8월 | 9월 | 10월 | 11월 | 12월 | 연간 합계 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2020 | 22 | 20 | 22 | 22 | 20 | 22 | 23 | 21 | 22 | 21 | 22 | 23 | 260 |
| 2021 | 20 | 20 | 23 | 22 | 20 | 22 | 22 | 22 | 22 | 21 | 22 | 23 | 259 |
| 2022 | 21 | 20 | 23 | 21 | 21 | 22 | 21 | 23 | 22 | 21 | 22 | 22 | 259 |
| 2023 | 21 | 20 | 23 | 20 | 22 | 22 | 21 | 23 | 21 | 22 | 22 | 21 | 258 |
| 2024 | 22 | 20 | 21 | 22 | 21 | 20 | 23 | 22 | 21 | 22 | 21 | 22 | 257 |
In [44]:
# 5일 후(120시간 후) NIM 값에서 현재 NIM 값을 빼서 TARGET 컬럼 생성
ibk_raw_df['TARGET'] = ibk_raw_df['NIM'].diff(-5 * 24)
bok_raw_df['TARGET'] = bok_raw_df['NIM'].diff(-5 * 24)
In [45]:
from statsmodels.tsa.stattools import adfuller
stat, p_value, lags, nobs, crit, icb = adfuller(ibk_raw_df['TARGET'].dropna())
print('-'*80)
print(f'ADF Test 결과')
print('-'*80)
print(f'ADF Statistic: {stat}')
print(f'p-value: {p_value}')
print(f'Critical Values: {crit}')
print('-'*80)
# ADF Test 결과
# ADF Statistic: -1.769986684955549
# p-value: 0.3954609507253797
# Critical Values: {'1%': -3.430516765985009, '5%': -2.861613706693738, '10%': -2.566809231713139}
--------------------------------------------------------------------------------
ADF Test 결과
--------------------------------------------------------------------------------
ADF Statistic: -15.901311308889962
p-value: 8.322937541457286e-29
Critical Values: {'1%': -3.430517277897847, '5%': -2.8616139329409513, '10%': -2.566809352138397}
--------------------------------------------------------------------------------
In [46]:
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
# 시계열 분해 (추세, 계절성, 잔차)
decomposition = seasonal_decompose(ibk_raw_df['TARGET'].iloc[::24].dropna(), model='additive', period=30)
# 분해된 각 구성 요소
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
# ADF 테스트 함수 정의
def adf_test(series, name):
result = adfuller(series.dropna())
print(f'ADF Test for {name}')
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:', result[4])
print('---\n')
return result
# 추세, 계절성, 잔차에 대한 ADF 테스트 수행
# 추세 (Trend) >> 귀무가설(H₀): 시계열 데이터에 추세가 있으며, 비정성적이다.
result = adf_test(trend, 'Trend')
trend_p_value = result[1]
# 계절성 (Seasonality) >> 귀무가설(H₀): 시계열 데이터에 계절성이 있으며, 비정성적이다.
result = adf_test(seasonal, 'Seasonality')
seasonal_p_value = result[1]
# 잔차 (Residual) >> 귀무가설(H₀): 시계열 데이터에 단위근이 존재하여 정상성을 만족하지 않는다. (즉, 잔차에도 주기적인 패턴이나 변화가 있을 수 있다)
result = adf_test(residual, 'Residual')
residual_p_value = result[1]
# 분해 결과 시각화 (크기 25, 10, x축 표시 없이 그리기)
fig, axs = plt.subplots(3, 1, figsize=(25, 10))
# 추세 (Trend)
axs[0].plot(trend, label='Trend', color='green')
axs[0].set_title('추세(Trend)')
axs[0].grid(True)
axs[0].set_xticks([]) # x축 표시 제거
axs[0].annotate(f'p-value: {trend_p_value:.6e}', xy=(0.5, 0.85), xycoords='axes fraction', fontsize=15, ha='center', bbox=dict(facecolor='white', edgecolor='red', boxstyle='round,pad=0.5'))
# 계절성 (Seasonality)
axs[1].plot(seasonal, label='Seasonality', color='orange')
axs[1].set_title('계절성(Seasonality)')
axs[1].grid(True)
axs[1].set_xticks([]) # x축 표시 제거
axs[1].annotate(f'p-value: {seasonal_p_value:.6e}', xy=(0.5, 0.85), xycoords='axes fraction', fontsize=15, ha='center', bbox=dict(facecolor='white', edgecolor='red', boxstyle='round,pad=0.5'))
# 잔차 (Residual)
axs[2].plot(residual, label='Residual', color='blue')
axs[2].set_title('잔차(Residual)')
axs[2].grid(True)
axs[2].set_xticks([]) # x축 표시 제거
axs[2].annotate(f'p-value: {residual_p_value:.6e}', xy=(0.5, 0.85), xycoords='axes fraction', fontsize=15, ha='center', bbox=dict(facecolor='white', edgecolor='red', boxstyle='round,pad=0.5'))
plt.tight_layout()
plt.show()
ADF Test for Trend
ADF Statistic: -7.560415022152772
p-value: 3.020516593452587e-11
Critical Values: {'1%': -3.4345060805811993, '5%': -2.8633756615919035, '10%': -2.5677472730713236}
---
ADF Test for Seasonality
ADF Statistic: -67.26793753871208
p-value: 0.0
Critical Values: {'1%': -3.434430865671321, '5%': -2.8633424632707527, '10%': -2.567729595112625}
---
ADF Test for Residual
ADF Statistic: -12.341290161447
p-value: 6.110331349377807e-23
Critical Values: {'1%': -3.434508723681996, '5%': -2.863376828187341, '10%': -2.567747894280675}
---
In [47]:
import matplotlib.pyplot as plt
# 그래프 설정 (아래위로 배치)
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(25, 10), sharex=False)
# 증강 전 타겟 (NIM) 시각화 (24개 단위 평균)
ax1.plot(ibk_raw_df['NIM'].iloc[::24].index, ibk_raw_df['NIM'].iloc[::24].values, color='blue', label='Original NIM (24개 단위 평균)')
ax1.set_title("증강 전 타겟 (NIM)")
ax1.set_xlabel("일자 (24개 단위 평균)")
ax1.set_ylabel("NIM 값")
ax1.legend()
# 증강 후 타겟 (NIM) 시각화 (전체 데이터)
ax2.plot(ibk_raw_df['TARGET'].dropna().index, ibk_raw_df['TARGET'].dropna().values, color='orange', label='타겟(차분)')
ax2.set_title("정성성 후 타겟(차분))")
ax2.set_xlabel("일자")
ax2.set_ylabel("차분 값")
ax2.legend()
# 레이아웃 조정 및 그래프 표시
plt.tight_layout()
plt.show()
In [48]:
# 1. IBK와 BOK 데이터프레임에서 TARGET이 NaN인 기준일자를 각각 추출
ibk_nan_dates = set(ibk_raw_df.loc[ibk_raw_df['TARGET'].isna(), '기준일자'])
bok_nan_dates = set(bok_raw_df.loc[bok_raw_df['TARGET'].isna(), '기준일자'])
# 2. 두 데이터프레임의 기준일자를 합집합으로 결합하고 정렬
unique_nan_dates = sorted(ibk_nan_dates | bok_nan_dates)
# 결과 출력
print("TARGET 값이 NaN인 중복되지 않은 기준일자 목록:")
print(unique_nan_dates)
TARGET 값이 NaN인 중복되지 않은 기준일자 목록: ['2024-08-21', '2024-08-22', '2024-08-23', '2024-08-26', '2024-08-27']
In [49]:
# 기준일자가 '2018-01-02'인 행 삭제 후 인덱스 재설정
ibk_raw_df = ibk_raw_df.loc[~ibk_raw_df['기준일자'].isin(unique_nan_dates)].reset_index(drop=True)
bok_raw_df = bok_raw_df.loc[~bok_raw_df['기준일자'].isin(unique_nan_dates)].reset_index(drop=True)
news_raw_df = news_raw_df.loc[~news_raw_df['기준일자'].isin(unique_nan_dates)].reset_index(drop=True)
# news_raw_df에 ibk_raw_df의 'NIM', 'TARGET' 컬럼을 붙임
news_raw_df['NIM'] = ibk_raw_df['NIM']
news_raw_df['TARGET'] = ibk_raw_df['TARGET']
print(ibk_raw_df.shape)
print(bok_raw_df.shape)
print(news_raw_df.shape)
(39144, 25) (39144, 11) (39144, 8)
In [50]:
# 두 데이터프레임에서 동일한 인덱스에 대해 NIM과 TARGET 값을 비교
# 우선 동일한 인덱스만을 추출하기 위해 IBK, BOK, NEWS 데이터프레임의 공통 인덱스를 찾습니다.
common_index = ibk_raw_df.index.intersection(bok_raw_df.index).intersection(news_raw_df.index)
# NIM 값과 TARGET 값을 비교하여 서로 다른 행을 찾음
mismatch_rows = []
for idx in common_index:
nim_ibk = ibk_raw_df.loc[idx, 'NIM']
nim_bok = bok_raw_df.loc[idx, 'NIM']
target_ibk = ibk_raw_df.loc[idx, 'TARGET']
target_bok = bok_raw_df.loc[idx, 'TARGET']
# NIM 값 또는 TARGET 값이 서로 다른 경우 해당 인덱스를 기록
if nim_ibk != nim_bok or target_ibk != target_bok:
mismatch_rows.append({
'Index': idx,
'IBK_NIM': nim_ibk,
'BOK_NIM': nim_bok,
'IBK_TARGET': target_ibk,
'BOK_TARGET': target_bok
})
# 결과를 데이터프레임으로 변환하여 출력
mismatch_df = pd.DataFrame(mismatch_rows)
print("NIM 값과 TARGET 값이 서로 다른 행:")
print(mismatch_df)
NIM 값과 TARGET 값이 서로 다른 행: Empty DataFrame Columns: [] Index: []
In [51]:
import numpy as np
total_cnt = len(ibk_raw_df)//24
train_cnt = int(total_cnt * 0.7)
val_cnt = int(total_cnt * 0.2)
test_cnt = total_cnt - train_cnt - val_cnt
total_size = total_cnt * 24
train_size = train_cnt * 24
val_size = val_cnt * 24
test_size = test_cnt * 24
print(f"total_size : {total_size} >> {total_size/24}일")
print(f"train_size : {train_size} >> {train_size/24}일")
print(f"val_size : {val_size} >> {val_size/24}일")
print(f"test_size : {test_size} >> {test_size/24}일")
total_size : 39144 >> 1631.0일 train_size : 27384 >> 1141.0일 val_size : 7824 >> 326.0일 test_size : 3936 >> 164.0일
In [52]:
import numpy as np
import matplotlib.pyplot as plt
# 사용자 정의 함수: 퍼센트와 건수를 파이 차트에 표시
def autopct_with_counts(pct, sizes):
total = sum(sizes)
count = int(pct * total / 100)
return f'{pct:.1f}%\n({count})'
# 데이터 설정
sizes = [train_size, val_size, test_size]
labels = ['Train', 'Validation', 'Test']
# 파이 차트와 바 차트를 동시에 그리는 시각화
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25, 10))
# 파이 차트 (좌측) - 폭을 줘서 입체적인 효과 및 건수 표시 추가
ax1.pie(sizes, labels=labels, autopct=lambda pct: autopct_with_counts(pct, sizes), startangle=90,
colors=['#1f77b4', '#aec7e8', '#6baed6'], explode=[0.05, 0.05, 0.05], shadow=True, textprops={'fontsize': 15})
# 바 차트 (우측) - 막대에 그림자 효과 추가 및 막대 안에 건수 표시
bars = ax2.bar(labels, sizes, color=['#1f77b4', '#aec7e8', '#6baed6'], edgecolor='black')
ax2.set_xlabel('Dataset')
ax2.set_ylabel('Size')
# 막대에 투명도를 주고 막대 안에 건수 표시 추가
for bar, size in zip(bars, sizes):
bar.set_alpha(0.8)
# 막대 내부에 건수 표시 (중앙에 위치)
ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() / 2, str(size), ha='center', va='center', fontsize=15, color='black')
plt.tight_layout()
plt.show()
In [53]:
import numpy as np
nim_date = ibk_raw_df['기준일자'].astype(str).str[:10].to_numpy()
original_nim_values = ibk_raw_df['NIM'].to_numpy()
original_target_values= ibk_raw_df['TARGET'].to_numpy()
target_values = ibk_raw_df['TARGET'].to_numpy()
for i in range(0, 241, 24):
print(f"nim_date[{i:03d}]: {nim_date[i]}, nim_values[{i:03d}]: {original_nim_values[i]:+.4f}, target_values[{i:03d}]: {original_target_values[i]:+.4f}")
nim_date[000]: 2018-01-02, nim_values[000]: +1.9375, target_values[000]: +0.0818 nim_date[024]: 2018-01-03, nim_values[024]: +1.9397, target_values[024]: -0.0084 nim_date[048]: 2018-01-04, nim_values[048]: +1.9396, target_values[048]: -0.0073 nim_date[072]: 2018-01-05, nim_values[072]: +1.9378, target_values[072]: -0.0300 nim_date[096]: 2018-01-08, nim_values[096]: +1.9551, target_values[096]: -0.0080 nim_date[120]: 2018-01-09, nim_values[120]: +1.8557, target_values[120]: -0.0750 nim_date[144]: 2018-01-10, nim_values[144]: +1.9481, target_values[144]: +0.1601 nim_date[168]: 2018-01-11, nim_values[168]: +1.9469, target_values[168]: -0.1933 nim_date[192]: 2018-01-12, nim_values[192]: +1.9678, target_values[192]: +0.0216 nim_date[216]: 2018-01-15, nim_values[216]: +1.9631, target_values[216]: -0.0175 nim_date[240]: 2018-01-16, nim_values[240]: +1.9307, target_values[240]: +0.0027
In [54]:
# train_size만큼의 데이터로 평균과 표준편차 계산
nim_train_data = original_nim_values[:train_size]
nim_train_mean = np.mean(nim_train_data)
nim_train_std = np.std(nim_train_data)
target_train_data = original_target_values[:train_size]
target_train_mean = np.mean(target_train_data)
target_train_std = np.std(target_train_data)
print('-'*80)
print(f"nim_train_mean: {nim_train_mean:+.8f}")
print(f"nim_train_std : {nim_train_std:+.8f}")
print('-'*80)
print(f"target_train_mean: {target_train_mean:+.8f}")
print(f"target_train_std : {target_train_std:+.8f}")
print('-'*80)
-------------------------------------------------------------------------------- nim_train_mean: +1.69604716 nim_train_std : +0.18829625 -------------------------------------------------------------------------------- target_train_mean: +0.00075180 target_train_std : +0.03153061 --------------------------------------------------------------------------------
In [55]:
import numpy as np
print("ibk_raw_df 표준화 :")
# 첫 번째 컬럼 제외한 나머지 데이터 가져오기(기준일자, TARGET 제외, NIM 포함)
nim_ibks_data = ibk_raw_df.iloc[:, 1:-1].values
# 데이터를 float 타입으로 변환, 변환할 수 없는 값은 NaN으로 처리
nim_ibks_data = np.array(nim_ibks_data, dtype=float)
print(nim_ibks_data[:10, :])
# train_size가 정수인지 확인 후 슬라이스
ibks_train_data = nim_ibks_data[:train_size]
# 훈련 데이터로 평균과 표준편차 계산 (각 열에 대해)
ibks_train_mean = np.mean(ibks_train_data, axis=0) # 열 기준 평균
ibks_train_std = np.std(ibks_train_data, axis=0) # 열 기준 표준편차
# 표준화: (x - mean) / std
nim_ibks_data = (nim_ibks_data - ibks_train_mean) / ibks_train_std
# 표준화된 데이터를 numpy 파일로 저장
np.save('data/numpy/nim_ibks_data.npy', nim_ibks_data)
print("nim_ibks_data 저장 완료: data/numpy/nim_ibks_data.npy")
# numpy 파일에서 ibk_raw_data 불러오기
nim_ibks_data = np.load('data/numpy/nim_ibks_data.npy')
print(nim_ibks_data[:10, :])
ibk_raw_df 표준화 : [[ 3.2849 1.8892 1.3755 17.3082 2.44 1.6234 2.0323 0.6551 1.2139 0.4095 2.2297 0.656 4.7707 1.8364 2.6152 1.8054 1.1844 1.888 2.6449 3.1966 1.4629 1.6602 1.9375 ] [ 3.28489906 1.88919906 1.37549906 17.30819906 2.43999906 1.62339906 2.03229906 0.65509906 1.21389906 0.40949906 2.22969906 0.65599906 4.77069906 1.83639906 2.61519906 1.80539906 1.18439906 1.88799906 2.64489906 3.19659906 1.46289906 1.66019906 1.93750034] [ 3.28490082 1.88920082 1.37550082 17.30820082 2.44000082 1.62340082 2.03230082 0.65510082 1.21390082 0.40950082 2.22970082 0.65600082 4.77070082 1.83640082 2.61520082 1.80540082 1.18440082 1.88800082 2.64490082 3.19660082 1.46290082 1.66020082 1.93749969] [ 3.28490131 1.88920131 1.37550131 17.30820131 2.44000131 1.62340131 2.03230131 0.65510131 1.21390131 0.40950131 2.22970131 0.65600131 4.77070131 1.83640131 2.61520131 1.80540131 1.18440131 1.88800131 2.64490131 3.19660131 1.46290131 1.66020131 1.93750031] [ 3.284902 1.889202 1.375502 17.308202 2.440002 1.623402 2.032302 0.655102 1.213902 0.409502 2.229702 0.656002 4.770702 1.836402 2.615202 1.805402 1.184402 1.888002 2.644902 3.196602 1.462902 1.660202 1.93750057] [ 3.28489874 1.88919874 1.37549874 17.30819874 2.43999874 1.62339874 2.03229874 0.65509874 1.21389874 0.40949874 2.22969874 0.65599874 4.77069874 1.83639874 2.61519874 1.80539874 1.18439874 1.88799874 2.64489874 3.19659874 1.46289874 1.66019874 1.93750059] [ 3.28490131 1.88920131 1.37550131 17.30820131 2.44000131 1.62340131 2.03230131 0.65510131 1.21390131 0.40950131 2.22970131 0.65600131 4.77070131 1.83640131 2.61520131 1.80540131 1.18440131 1.88800131 2.64490131 3.19660131 1.46290131 1.66020131 1.9375019 ] [ 3.28489811 1.88919811 1.37549811 17.30819811 2.43999811 1.62339811 2.03229811 0.65509811 1.21389811 0.40949811 2.22969811 0.65599811 4.77069811 1.83639811 2.61519811 1.80539811 1.18439811 1.88799811 2.64489811 3.19659811 1.46289811 1.66019811 1.93750007] [ 3.28489961 1.88919961 1.37549961 17.30819961 2.43999961 1.62339961 2.03229961 0.65509961 1.21389961 0.40949961 2.22969961 0.65599961 4.77069961 1.83639961 2.61519961 1.80539961 1.18439961 1.88799961 2.64489961 3.19659961 1.46289961 1.66019961 1.93750046] [ 3.28490082 1.88920082 1.37550082 17.30820082 2.44000082 1.62340082 2.03230082 0.65510082 1.21390082 0.40950082 2.22970082 0.65600082 4.77070082 1.83640082 2.61520082 1.80540082 1.18440082 1.88800082 2.64490082 3.19660082 1.46290082 1.66020082 1.93750101]] nim_ibks_data 저장 완료: data/numpy/nim_ibks_data.npy [[ 0.78164967 0.98740583 0.40947391 1.16488859 0.41430169 0.63326199 0.08316508 1.7230817 1.15192276 0.09237283 0.60186645 -0.09195606 0.36697266 0.39283116 0.72249536 0.89143848 0.46964555 0.63428204 0.93241689 0.79204667 1.5229571 0.94421722 1.28230297] [ 0.78164753 0.98740429 0.4094717 1.16488792 0.41430054 0.63326013 0.08316416 1.72307371 1.15191908 0.09236951 0.60186448 -0.09195841 0.36697036 0.39282829 0.72249216 0.89143558 0.46964338 0.63427834 0.93241505 0.79204449 1.52295521 0.94421554 1.28230479] [ 0.78165156 0.98740719 0.40947586 1.16488917 0.41430269 0.63326363 0.08316588 1.7230887 1.151926 0.09237573 0.60186818 -0.091954 0.36697468 0.39283367 0.72249817 0.89144102 0.46964745 0.63428528 0.9324185 0.79204859 1.52295876 0.94421869 1.28230134] [ 0.78165268 0.98740799 0.40947701 1.16488952 0.41430329 0.6332646 0.08316636 1.72309287 1.15192792 0.09237746 0.60186921 -0.09195277 0.36697588 0.39283516 0.72249984 0.89144254 0.46964859 0.63428721 0.93241946 0.79204973 1.52295974 0.94421957 1.28230459] [ 0.78165424 0.98740912 0.40947863 1.16489 0.41430412 0.63326595 0.08316703 1.72309868 1.15193061 0.09237987 0.60187065 -0.09195106 0.36697756 0.39283725 0.72250217 0.89144465 0.46965017 0.63428991 0.9324208 0.79205132 1.52296112 0.9442208 1.282306 ] [ 0.78164679 0.98740376 0.40947094 1.1648877 0.41430015 0.6332595 0.08316385 1.72307098 1.15191781 0.09236838 0.6018638 -0.09195921 0.36696957 0.39282732 0.72249107 0.89143459 0.46964263 0.63427707 0.93241442 0.79204374 1.52295456 0.94421497 1.28230608] [ 0.78165268 0.987408 0.40947701 1.16488952 0.41430329 0.6332646 0.08316636 1.72309287 1.15192793 0.09237746 0.60186921 -0.09195277 0.36697589 0.39283516 0.72249984 0.89144254 0.46964859 0.63428722 0.93241947 0.79204973 1.52295974 0.94421957 1.28231306] [ 0.78164535 0.98740273 0.40946946 1.16488725 0.41429938 0.63325825 0.08316324 1.72306563 1.15191534 0.09236616 0.60186248 -0.09196079 0.36696802 0.3928254 0.72248893 0.89143264 0.46964118 0.63427459 0.93241318 0.79204228 1.52295329 0.94421384 1.28230333] [ 0.78164877 0.98740518 0.40947298 1.16488831 0.41430121 0.63326121 0.0831647 1.72307834 1.15192121 0.09237143 0.60186562 -0.09195705 0.36697169 0.39282995 0.72249402 0.89143726 0.46964463 0.63428048 0.93241611 0.79204575 1.5229563 0.94421652 1.28230544] [ 0.78165156 0.98740719 0.40947586 1.16488917 0.41430269 0.63326363 0.08316588 1.7230887 1.151926 0.09237573 0.60186818 -0.091954 0.36697468 0.39283367 0.72249817 0.89144102 0.46964745 0.63428528 0.9324185 0.79204859 1.52295876 0.9442187 1.28230834]]
In [56]:
import numpy as np
import pandas as pd
# numpy 배열 불러오기
nim_ibks_data = np.load('data/numpy/nim_ibks_data.npy')
# 앞에 nim_date 추가, 뒤에 nim_values와 target_values 추가
combined_data = np.column_stack((nim_date, nim_ibks_data, original_nim_values, original_target_values))
# numpy 배열을 데이터프레임으로 변환
columns = ['nim_date'] + [f'feature_{i}' for i in range(nim_ibks_data.shape[1])] + ['original_nim_values', 'original_target_values']
nim_ibks_df = pd.DataFrame(combined_data, columns=columns)
# 데이터프레임을 CSV 파일로 저장
nim_ibks_df.to_csv('temp/nim_ibks_data_검증용.csv', index=False)
print("데이터가 'temp/nim_ibks_data_검증용.csv'로 저장되었습니다.")
데이터가 'temp/nim_ibks_data_검증용.csv'로 저장되었습니다.
In [57]:
import numpy as np
print("bok_train_std 표준화 :")
# 첫 번째 컬럼 제외한 나머지 데이터 가져오기
nim_boks_data = bok_raw_df.iloc[:, 1:-1].values
# 데이터를 float 타입으로 변환, 변환할 수 없는 값은 NaN으로 처리
nim_boks_data = np.array(nim_boks_data, dtype=float)
print(nim_boks_data[:10, :])
# train_size가 정수인지 확인 후 슬라이스
boks_train_data = nim_boks_data[:train_size]
# 훈련 데이터로 평균과 표준편차 계산 (각 열에 대해)
boks_train_mean = np.mean(boks_train_data, axis=0) # 열 기준 평균
boks_train_std = np.std(boks_train_data, axis=0) # 열 기준 표준편차
# 표준화: (x - mean) / std
nim_boks_data = (nim_boks_data - boks_train_mean) / boks_train_std
# 표준화된 데이터를 numpy 파일로 저장
np.save('data/numpy/nim_boks_data.npy', nim_boks_data)
print("nim_boks_data 저장 완료: data/numpy/nim_boks_data.npy")
# numpy 파일에서 ibk_raw_data 불러오기
nim_boks_data = np.load('data/numpy/nim_boks_data.npy')
print(nim_boks_data[:10, :])
bok_train_std 표준화 : [[1.50000000e+00 1.63800000e+00 5.00000000e-01 2.50000000e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750000e+00] [1.49999906e+00 1.63799906e+00 4.99999061e-01 2.49999906e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750034e+00] [1.50000082e+00 1.63800082e+00 5.00000823e-01 2.50000082e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93749969e+00] [1.50000131e+00 1.63800131e+00 5.00001313e-01 2.50000131e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750031e+00] [1.50000200e+00 1.63800200e+00 5.00001997e-01 2.50000200e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750057e+00] [1.49999874e+00 1.63799874e+00 4.99998740e-01 2.49999874e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750059e+00] [1.50000131e+00 1.63800131e+00 5.00001314e-01 2.50000131e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750190e+00] [1.49999811e+00 1.63799811e+00 4.99998111e-01 2.49999811e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750007e+00] [1.49999961e+00 1.63799961e+00 4.99999605e-01 2.49999961e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750046e+00] [1.50000082e+00 1.63800082e+00 5.00000824e-01 2.50000082e+00 1.07140000e+03 2.62210000e+04 4.78640000e+04 1.61350800e+07 1.93750101e+00]] nim_boks_data 저장 완료: data/numpy/nim_boks_data.npy [[ 0.6632433 0.80555514 0.57183998 0.69671882 -1.59971295 -1.14715363 -0.99772466 -0.30659251 1.28230297] [ 0.66324144 0.80555305 0.57183435 0.69671749 -1.59971297 -1.14715363 -0.99772466 -0.30659251 1.28230479] [ 0.66324493 0.80555697 0.57184492 0.69671997 -1.59971294 -1.14715363 -0.99772466 -0.30659251 1.28230134] [ 0.6632459 0.80555806 0.57184786 0.69672066 -1.59971293 -1.14715363 -0.99772466 -0.30659251 1.28230459] [ 0.66324726 0.80555959 0.57185196 0.69672163 -1.59971292 -1.14715363 -0.99772466 -0.30659251 1.282306 ] [ 0.6632408 0.80555233 0.57183242 0.69671704 -1.59971298 -1.14715363 -0.99772466 -0.30659251 1.28230608] [ 0.6632459 0.80555807 0.57184787 0.69672067 -1.59971293 -1.14715363 -0.99772466 -0.30659251 1.28231306] [ 0.66323956 0.80555093 0.57182865 0.69671615 -1.59971299 -1.14715363 -0.99772466 -0.30659251 1.28230333] [ 0.66324252 0.80555426 0.57183761 0.69671826 -1.59971296 -1.14715363 -0.99772466 -0.30659251 1.28230544] [ 0.66324493 0.80555697 0.57184493 0.69671998 -1.59971294 -1.14715363 -0.99772466 -0.30659251 1.28230834]]
In [58]:
# 평균 풀링된 임베딩 추출 및 numpy 배열로 변환
news_embedding = np.array(news_raw_df['뉴스_EMBEDDING'].tolist()) # (num_samples, 100)
# PCA로 차원 유지하여 임베딩 변환
pca = PCA(n_components=100)
news_embeddings_pca = pca.fit_transform(news_embedding)
# DataFrame에 PCA 변환된 임베딩을 저장
news_raw_df['뉴스_EMBEDDING_100'] = list(news_embeddings_pca)
# '뉴스_EMBEDDING_100'과 'NIM' 컬럼 추출
news_embed_data = np.array(news_raw_df['뉴스_EMBEDDING_100'].tolist()) # (num_samples, 100)
news_nim_data = np.array(news_raw_df['NIM'].tolist()).reshape(-1, 1) # (num_samples, 1)
# NIM 컬럼을 훈련 데이터로 표준화
news_train_data = news_nim_data[:train_size]
news_train_mean = np.mean(news_train_data)
news_train_std = np.std(news_train_data)
news_nim_data = (news_nim_data - news_train_mean) / news_train_std
# 뉴스 임베딩과 표준화된 NIM 값 결합
nim_news_data = np.hstack([news_embed_data, news_nim_data]) # (num_samples, 101)
# 결합된 데이터를 numpy 파일로 저장
np.save('data/numpy/nim_news_data.npy', nim_news_data)
# 저장된 파일 불러오기 및 확인
nim_news_data = np.load('data/numpy/nim_news_data.npy')
print("nim_news_data 표준화 :")
print(f"nim_news_data.shape: {nim_news_data.shape}")
print(nim_news_data[:10, :])
nim_news_data 표준화 : nim_news_data.shape: (39144, 101) [[-3.21269972e-01 4.25721923e-01 -3.52086240e-01 6.39228666e-01 5.28767333e-01 -9.67022364e-01 -3.44325945e-01 -2.30847090e-01 1.22352447e+00 2.45458207e-01 -7.55765683e-01 -3.65414217e-01 1.20298128e-01 -4.86304074e-01 -2.95807418e-01 5.46162719e-01 5.55639058e-01 -1.15033933e+00 -5.63106876e-01 -3.44310166e-01 -3.09211696e-02 -4.48014318e-02 -2.31431534e-01 -3.07388497e-01 -1.06730066e-02 -1.57194347e-01 6.59067386e-01 2.59235765e-01 -7.43907628e-02 3.27744041e-01 -4.37445740e-01 -2.24102351e-01 -2.29624634e-01 -1.06893989e-01 -3.70324180e-02 2.62310231e-02 7.27176592e-02 -5.69934075e-02 1.20357218e-01 5.21648238e-03 3.22776185e-01 1.07155038e-02 1.37041960e-01 1.47687856e-01 5.95450324e-02 2.87496824e-01 1.21037368e-02 2.24911523e-02 1.38345759e-01 3.70268930e-03 -2.10831639e-01 9.20462558e-02 2.26445057e-01 1.59386647e-01 2.15273854e-01 1.28179696e-01 -1.36940843e-01 -4.44203661e-02 8.21991116e-02 -2.49547563e-01 -7.04727233e-02 -5.29950160e-02 1.44789239e-01 1.84884282e-01 6.81178079e-02 2.30137795e-01 1.68263845e-02 -1.33812564e-01 2.83593880e-02 1.28650181e-02 -4.27196596e-02 -2.48349046e-02 -8.77755292e-02 7.46599920e-02 -8.91660051e-02 6.46458549e-02 -2.61607201e-02 2.51290398e-02 6.72080709e-02 -6.20256872e-04 1.02442439e-01 -4.16550801e-02 1.01890516e-01 -1.22049135e-02 9.18895848e-02 -2.88116433e-02 -5.97582966e-03 4.45473853e-02 2.51564855e-02 1.14570520e-01 -1.25357642e-02 1.55878033e-04 -3.28692335e-02 4.83670431e-02 -3.78450848e-02 4.66957552e-02 -2.89561512e-02 5.12417630e-02 -9.58678188e-03 -5.69626638e-02 1.28230297e+00] [-1.30576890e+00 -5.38964188e-01 1.31110424e-01 1.45180632e+00 -3.50945618e-01 -2.74151436e-01 -1.01303931e+00 -1.07883352e+00 2.51289204e-01 -6.07429407e-01 1.94365721e-01 -2.88463916e-01 -8.28129204e-01 1.55055151e-01 1.28618407e-01 -7.51208327e-01 -5.88566340e-02 -8.80124549e-02 -2.70386184e-01 6.73099222e-02 -3.30599765e-01 2.34785253e-01 2.04356632e-01 3.42687136e-01 3.75157001e-01 -2.81372935e-01 6.27055802e-02 3.85684253e-01 2.70104788e-01 -2.66381014e-01 -4.90956648e-01 -2.45607248e-01 1.20412043e-01 -1.55631362e-01 1.23070295e-01 1.93881787e-02 1.75469100e-01 -7.36672636e-02 -3.31112154e-02 3.88896906e-02 -6.98907877e-02 -9.07315719e-02 1.62365813e-02 4.32917245e-02 -1.29793506e-01 -1.51174043e-01 -5.37238444e-02 3.27988985e-01 1.61934107e-03 -6.32230354e-02 -9.03977904e-02 -1.41007790e-01 -1.20890848e-01 2.41315429e-01 9.37474585e-02 1.02785815e-01 1.48455023e-01 -1.26487465e-01 6.39170641e-02 3.19237640e-03 4.26666343e-02 -2.10277484e-01 8.99496377e-03 -2.64442825e-02 -5.57621335e-02 1.17964761e-02 -1.34700889e-02 5.02932129e-02 -2.52842658e-02 -7.72335679e-02 9.85545439e-02 1.51742079e-01 6.57555495e-02 -4.30089367e-02 -3.94387233e-02 8.37002993e-02 2.22470142e-02 -3.66661715e-02 -9.85167828e-02 -5.96211594e-02 1.56065808e-01 -1.08879299e-01 -1.17414990e-01 -5.16350032e-02 1.82690756e-02 1.29885237e-02 -4.58040806e-02 2.50447800e-02 -2.34057540e-02 -2.35293953e-02 4.93221952e-03 -5.01275375e-02 -3.36120790e-02 -8.09844933e-02 5.30705948e-02 5.73111157e-03 1.18357744e-02 9.12190249e-03 4.27269170e-02 -2.15149480e-02 1.28230479e+00] [ 1.80298902e-01 -1.50027174e+00 4.19122029e-01 1.11391361e+00 8.16388702e-01 4.02108680e-01 -1.47265521e+00 1.45233601e-01 5.19004059e-01 -9.75264671e-01 -1.29708753e-01 4.58754338e-02 1.74247953e-01 4.87775677e-01 -5.95867222e-01 -4.51018321e-01 -3.66926761e-01 -1.16012678e-01 -1.21957339e-01 -6.07197219e-02 2.10701895e-01 2.09249735e-01 -7.41814223e-02 6.13948841e-01 1.61169857e-01 1.41012670e-01 -2.29891577e-01 9.33399558e-02 -5.54969561e-03 -2.38702110e-01 -3.05838617e-01 1.12142846e-01 5.89067298e-02 -6.38989408e-02 8.76372412e-02 1.52084789e-01 1.17410953e-01 9.05251310e-02 3.21258967e-01 3.01861847e-02 1.21952521e-01 -2.60760024e-03 1.64084688e-02 9.29529088e-02 -1.33525291e-02 -8.96722159e-02 -8.47692794e-02 9.69188358e-02 1.30939460e-01 -2.02076322e-01 -1.23681977e-01 -1.84996882e-01 -2.67827099e-02 -7.40974267e-02 9.08866944e-02 -7.23709911e-02 1.90325980e-02 -1.15503171e-01 -1.87678821e-01 -3.42230217e-02 3.73324913e-02 2.03735932e-01 4.59584819e-03 3.70995013e-02 -2.60227592e-02 2.08684828e-01 1.81270364e-02 3.06477801e-02 -6.58411347e-02 -9.90582196e-02 -4.98292628e-02 5.75425819e-02 1.45598850e-02 3.35949196e-02 -7.17613088e-02 -3.38339653e-02 4.82667258e-02 7.52679219e-02 5.13305417e-02 3.12103033e-02 4.08338929e-02 -1.02357536e-01 -3.38210418e-02 -3.60488699e-02 6.37269726e-02 3.47748348e-02 7.08931623e-02 4.20765901e-02 -1.23684608e-02 5.04750183e-02 2.12338336e-02 6.92934230e-03 -1.32178219e-02 9.81234196e-03 2.37166171e-02 -4.62458089e-02 -2.99038983e-03 -2.35328812e-02 -1.10941417e-02 -7.16783241e-03 1.28230134e+00] [-1.01733204e+00 -8.68363504e-01 2.90595525e-01 4.50939520e-01 1.18121353e+00 -8.65090482e-01 -4.26211932e-01 -6.94817199e-01 -3.83429046e-01 -1.01022834e+00 -5.42709651e-02 2.62695296e-01 1.64884290e-01 2.67129378e-01 -3.36263722e-01 -7.92369731e-01 4.17727201e-02 -2.51804588e-01 -3.92099105e-01 -2.79003619e-01 6.56881584e-01 -3.06061458e-01 -1.25123554e-01 1.51121318e-01 1.46915832e-01 1.73057539e-01 -4.35388224e-01 -2.89454450e-01 -2.80835787e-01 1.23248126e-01 -3.03880296e-02 -2.78149054e-01 5.55632895e-01 -1.49925580e-01 -1.37513449e-01 -1.15598653e-01 4.30702975e-01 2.53120207e-01 1.30816841e-01 -7.21264260e-02 1.54528953e-01 1.73303827e-01 1.10298203e-01 -2.41805773e-01 1.63539839e-01 1.78097882e-01 -1.65327160e-01 8.02551630e-02 -3.00919046e-01 -3.90024796e-02 -1.78243387e-01 1.63740958e-03 -5.07516456e-02 -1.36444395e-02 1.95769064e-01 -1.92471721e-02 1.43736945e-02 5.20614633e-02 -1.31644746e-01 -1.44126031e-02 4.50768910e-02 -6.90899467e-02 1.38580050e-02 1.06710988e-01 -1.64584807e-01 1.11200167e-02 -1.03878294e-01 5.60211584e-02 2.83419964e-02 -5.65137618e-03 1.02834066e-01 4.22094307e-02 -1.75670401e-02 -1.13406305e-01 -1.53004062e-01 1.54011283e-02 9.62681453e-02 8.17058042e-02 -1.08774204e-02 -1.54644592e-01 3.78621826e-02 5.17711364e-02 6.47454952e-02 6.29523366e-02 -2.48572573e-03 6.14989460e-03 -5.12300189e-02 -4.10786235e-02 4.45220802e-03 5.85235781e-02 3.36176696e-02 -3.90017189e-02 -3.00415182e-02 -7.83515236e-02 -2.08036982e-02 6.93567619e-02 3.11544505e-02 -5.29258692e-03 -5.46181450e-03 3.81291670e-02 1.28230459e+00] [-6.35537258e-01 -7.88652918e-01 5.05977356e-01 1.99744840e+00 8.18018075e-02 -5.56292332e-01 -1.17200825e+00 -5.32438121e-01 -3.08672342e-01 -8.22804742e-01 9.23069998e-02 -3.55605482e-02 -5.15167863e-01 5.71313189e-01 -2.15937287e-01 -5.85257657e-01 -3.89727004e-01 -1.06987464e-01 -1.94299153e-01 1.99842306e-01 3.87441355e-01 -2.07694079e-01 2.04352154e-01 3.81120903e-01 -4.10722681e-03 -2.22701536e-02 3.60237793e-02 4.11293030e-01 1.93407539e-01 -3.09108281e-01 1.26664195e-01 -2.40280981e-01 5.16283911e-01 -1.47313118e-01 3.28316991e-01 -7.36525442e-02 -9.08486962e-02 1.42864138e-01 -9.60493534e-02 5.35346494e-02 -1.37765798e-02 -8.05569230e-02 -1.13682198e-02 6.06700931e-02 8.56152466e-02 7.14075711e-02 -4.26396785e-03 3.28382291e-01 -2.02444832e-01 -2.92036830e-01 -1.10017276e-01 -1.15564082e-01 -7.41276093e-02 2.43583415e-01 1.14271964e-01 -3.93560872e-02 4.48107054e-01 -8.97812811e-02 -2.13215846e-02 5.53847538e-02 1.76700368e-01 -9.29932965e-02 2.66930908e-01 1.36567478e-01 -1.11225845e-01 9.20075408e-02 6.30135281e-02 8.17109443e-02 -1.53729100e-01 -9.72230682e-02 5.89535165e-02 1.16877865e-01 7.03110364e-03 -1.25809083e-01 -8.74271519e-02 -3.37076583e-02 3.01637957e-02 7.42931308e-02 -2.03077846e-01 -7.98662192e-02 8.00031495e-02 -1.01923219e-01 -6.45972756e-02 4.90417047e-02 1.76806224e-01 6.94059355e-02 -6.85188849e-02 -1.80968096e-02 7.23166649e-03 1.08799323e-01 3.57952524e-02 -8.48567716e-02 -7.09268651e-03 -2.46810485e-02 6.40714487e-02 3.85997989e-02 1.47874763e-02 2.53550457e-02 5.01834835e-02 -1.03064419e-02 1.28230600e+00] [ 1.92403173e+00 -9.06651954e-01 3.45925612e-01 1.02024598e+00 -7.23989979e-01 8.79381622e-01 -6.90258264e-01 9.14184773e-01 2.68664563e-01 2.21018542e-01 -2.22348418e-02 3.04567699e-01 2.15455483e-01 8.49679806e-02 1.93949184e-01 2.86668224e-01 -5.11778817e-01 -1.53468583e-01 4.01886815e-01 6.93800394e-02 -2.02198996e-01 1.33213014e-01 1.74342644e-01 8.78588908e-02 -2.19816329e-03 -1.38317134e-01 -1.66678592e-01 -4.72567337e-01 -3.91933387e-01 -1.90025167e-01 -3.89011238e-01 -1.50954716e-01 -1.98773591e-01 4.12574775e-01 -3.14889975e-02 -6.08784998e-02 3.04334578e-02 1.69543764e-01 -1.65583741e-01 -1.58850073e-01 1.44643582e-01 3.69910163e-01 -2.86390486e-02 -3.61015234e-02 3.94083253e-02 -2.87614401e-01 -6.03650913e-02 -7.67296265e-02 1.97179294e-02 6.37607818e-02 3.57080021e-02 5.52461326e-02 1.40215766e-01 -1.81047532e-02 -5.35740670e-02 -1.32385272e-01 2.73121921e-01 7.53945708e-02 6.91357362e-02 1.80133027e-01 -1.12755837e-01 1.18390879e-01 9.71417990e-02 -9.79793596e-02 8.62659661e-02 -5.78292059e-02 1.02916006e-01 -7.64502304e-03 -1.20308884e-01 2.31508734e-02 -9.49047741e-02 -2.45140924e-02 -5.56500378e-02 -9.18271550e-02 -4.67029686e-02 1.19434295e-01 3.26407925e-02 -5.99616006e-03 -3.54270839e-02 -2.60247337e-03 6.12898848e-02 2.46778580e-02 -1.64067261e-02 -3.10123476e-04 -3.68817695e-02 -1.83276792e-01 -8.66330974e-02 -9.13201850e-03 -2.52741829e-02 -1.56894140e-02 -1.13471505e-02 -1.91800351e-03 2.17314305e-02 1.31481795e-02 -1.22045525e-02 2.61854171e-03 -1.85250753e-03 7.55013648e-03 -7.23273140e-02 9.30820281e-03 1.28230608e+00] [-4.43503705e-01 1.21190978e+00 2.27526756e+00 8.61053998e-01 4.96450951e-01 -1.10576027e-01 -4.88841805e-01 2.27681388e-01 -8.75512433e-02 2.96258700e-01 -5.11201084e-02 3.25762165e-01 2.41716377e-01 6.71144474e-01 -4.15789550e-01 -3.65616793e-01 -1.31112153e-01 -3.18736234e-01 1.27273149e-01 -7.41295590e-02 -2.54937328e-01 -1.91410179e-02 -3.21161591e-02 2.60261739e-01 -3.66179068e-02 -6.40712651e-02 -4.09942676e-01 1.87608917e-01 4.08066046e-02 8.80045247e-02 3.26679896e-01 -5.59056156e-02 -2.58782561e-01 -2.54115126e-02 9.46058370e-02 -9.14124296e-02 -1.58784609e-01 2.16818257e-03 9.06225213e-02 -3.48202163e-03 3.28290890e-01 2.10196894e-01 -1.78280214e-01 -8.73986838e-02 -6.87487000e-02 1.01218364e-01 -8.56778849e-02 -1.26383070e-01 -1.67912601e-01 -7.50062441e-02 -1.89698322e-02 -6.13106280e-02 -1.37099691e-01 -4.22772696e-02 6.83539452e-02 1.39371812e-01 8.77523271e-02 8.62325576e-03 8.60724203e-02 -1.43550589e-01 -3.87391855e-02 -1.44570484e-01 -1.33894022e-01 -2.41202228e-02 8.29372055e-02 -4.63896888e-02 4.88663076e-02 8.08844506e-02 -2.65122265e-02 -5.76159016e-02 4.16713861e-02 9.11568539e-02 4.46821497e-03 3.61224543e-02 1.07917562e-01 2.09562091e-02 8.63427809e-03 5.62428191e-02 5.75368009e-02 2.79757565e-02 3.56714601e-02 -5.51023519e-02 -2.62609334e-02 3.17442564e-02 -1.07712363e-01 4.63453120e-02 4.20320170e-02 -2.67719703e-02 4.44424674e-02 3.24943222e-02 -5.25302904e-02 6.53444441e-02 1.28916871e-02 4.96754944e-02 -4.93975719e-02 2.83177797e-02 6.61680495e-02 1.36363423e-02 -2.39008054e-02 -1.38093373e-02 1.28231306e+00] [-1.30576890e+00 -5.38964188e-01 1.31110424e-01 1.45180632e+00 -3.50945618e-01 -2.74151436e-01 -1.01303931e+00 -1.07883352e+00 2.51289204e-01 -6.07429407e-01 1.94365721e-01 -2.88463916e-01 -8.28129204e-01 1.55055151e-01 1.28618407e-01 -7.51208327e-01 -5.88566340e-02 -8.80124549e-02 -2.70386184e-01 6.73099222e-02 -3.30599765e-01 2.34785253e-01 2.04356632e-01 3.42687136e-01 3.75157001e-01 -2.81372935e-01 6.27055802e-02 3.85684253e-01 2.70104788e-01 -2.66381014e-01 -4.90956648e-01 -2.45607248e-01 1.20412043e-01 -1.55631362e-01 1.23070295e-01 1.93881787e-02 1.75469100e-01 -7.36672636e-02 -3.31112154e-02 3.88896906e-02 -6.98907877e-02 -9.07315719e-02 1.62365813e-02 4.32917245e-02 -1.29793506e-01 -1.51174043e-01 -5.37238444e-02 3.27988985e-01 1.61934107e-03 -6.32230354e-02 -9.03977904e-02 -1.41007790e-01 -1.20890848e-01 2.41315429e-01 9.37474585e-02 1.02785815e-01 1.48455023e-01 -1.26487465e-01 6.39170641e-02 3.19237640e-03 4.26666343e-02 -2.10277484e-01 8.99496377e-03 -2.64442825e-02 -5.57621335e-02 1.17964761e-02 -1.34700889e-02 5.02932129e-02 -2.52842658e-02 -7.72335679e-02 9.85545439e-02 1.51742079e-01 6.57555495e-02 -4.30089367e-02 -3.94387233e-02 8.37002993e-02 2.22470142e-02 -3.66661715e-02 -9.85167828e-02 -5.96211594e-02 1.56065808e-01 -1.08879299e-01 -1.17414990e-01 -5.16350032e-02 1.82690756e-02 1.29885237e-02 -4.58040806e-02 2.50447800e-02 -2.34057540e-02 -2.35293953e-02 4.93221952e-03 -5.01275375e-02 -3.36120790e-02 -8.09844933e-02 5.30705948e-02 5.73111157e-03 1.18357744e-02 9.12190249e-03 4.27269170e-02 -2.15149480e-02 1.28230333e+00] [-5.40261841e-01 1.02158335e-01 1.14313931e-01 2.91786960e-01 1.61415152e-01 -1.11324012e+00 -3.61694382e-01 -5.35031385e-01 1.36007295e+00 3.12656478e-02 -7.32548914e-01 -2.76274283e-01 -4.75872186e-02 -1.58142039e-01 -6.84763898e-03 4.62738639e-01 2.53757353e-01 -1.19479907e+00 -4.12559484e-01 -4.81440674e-01 3.93500283e-02 -2.03724357e-01 -1.26718461e-01 -2.16742290e-01 9.22770595e-02 -1.78484864e-01 3.88398225e-01 6.75439352e-02 -1.64348114e-01 3.39249351e-01 -1.89293164e-01 -1.93290141e-01 -3.12516256e-01 -4.11354756e-01 9.53983160e-02 -6.64026347e-02 1.72487653e-01 1.22584132e-02 5.69359529e-02 -9.08385481e-02 3.17709786e-01 -7.05092112e-02 2.62805606e-01 2.08699363e-01 7.66345276e-02 2.37388943e-01 2.36151524e-02 -1.42559818e-01 2.24378303e-01 2.27239039e-01 -1.47474977e-01 9.99877672e-02 1.77765047e-01 1.31089300e-01 1.81667988e-01 7.85458737e-02 -2.12741173e-01 1.06174591e-01 8.13766108e-02 -2.62302596e-01 -5.87985571e-02 -7.97311490e-02 1.64571323e-01 1.99721792e-01 1.53670099e-03 8.12070686e-02 6.86185585e-03 -9.75782304e-03 -4.35908454e-02 9.55609254e-03 -1.14349201e-01 -3.70303665e-02 5.40343891e-03 8.10039819e-02 -5.84688006e-02 6.86399442e-02 -1.24282585e-02 5.37200383e-02 1.10360663e-01 -2.35159285e-02 1.18523142e-01 -4.08719608e-02 8.60593956e-02 1.05485371e-02 8.70744893e-03 -1.13329344e-02 4.58502434e-02 3.02755456e-02 1.33386236e-02 6.04737123e-02 -2.94469544e-02 -3.60832944e-02 -1.14017563e-02 2.84535172e-02 -4.04422207e-02 -2.17376464e-02 -1.63598679e-02 7.31290652e-02 2.20734734e-02 -7.39749607e-02 1.28230544e+00] [-2.45478897e+00 1.76361302e+00 -1.36876451e+00 -3.15396667e+00 -1.93692302e+00 1.11209150e-01 -7.75010445e-01 1.03725248e+00 -2.68733286e-01 -7.42989137e-01 7.07420605e-02 -2.56040658e-01 2.89492172e-01 4.08595997e-02 -2.42686048e-01 -4.79918138e-02 3.31679445e-01 -1.47536990e-01 2.45470960e-01 3.21688987e-01 1.63768604e-01 1.74099455e-01 -8.32625406e-02 1.92451209e-01 1.19960755e-01 -3.33324072e-02 8.33694766e-02 3.58751885e-04 2.32443798e-02 -1.39414646e-02 -8.33333989e-02 -1.16792517e-01 1.22190609e-01 8.49390256e-02 3.64956478e-02 -2.16144923e-02 9.20784843e-02 1.22994123e-01 -6.30669008e-03 5.45434983e-03 5.41984827e-02 5.17779115e-03 6.91894164e-02 -7.02473580e-02 -4.60076237e-02 4.24343784e-02 1.20368441e-01 -4.23528588e-02 -1.21308789e-01 4.73142852e-02 -7.78274770e-02 2.41255679e-02 -4.09991172e-02 -8.60180674e-03 -3.39995386e-03 -3.09527340e-02 -1.81260030e-02 -1.98917292e-02 3.68429905e-02 3.28582837e-02 8.37604126e-02 1.12860391e-02 3.34422564e-02 4.63965641e-02 1.91137570e-02 1.72354935e-02 3.22273302e-02 -6.34201541e-02 -7.64316928e-02 3.19031694e-02 -2.85152621e-02 -6.82473374e-03 -4.61152032e-02 -2.51429387e-02 -2.59513773e-02 9.74393728e-03 3.92724073e-02 5.03542937e-02 -1.73151603e-02 6.38348560e-02 -1.55187032e-02 2.12355222e-02 1.62737324e-02 -1.39280678e-02 8.72181240e-03 6.44022092e-03 2.13289159e-03 -2.66198597e-02 -4.89825482e-02 1.17613642e-02 1.12883021e-02 -9.01426158e-03 -1.32374059e-02 7.54849796e-03 6.13205950e-03 -6.22374060e-03 -3.28824179e-02 1.35436734e-02 3.95233866e-03 1.04698335e-02 1.28230834e+00]]
In [59]:
import numpy as np
# 각 데이터셋의 마지막 컬럼 추출
ibks_last_column = nim_ibks_data[:, -1]
boks_last_column = nim_boks_data[:, -1]
news_last_column = nim_news_data[:, -1]
# 모든 인덱스의 마지막 컬럼이 동일한지 확인
all_equal = np.allclose(ibks_last_column, boks_last_column) and np.allclose(boks_last_column, news_last_column)
if all_equal:
print("모든 데이터셋의 마지막 컬럼 값이 인덱스별로 동일합니다.")
else:
print("데이터셋의 마지막 컬럼 값이 인덱스별로 다릅니다.")
# 각 인덱스별 값 출력
for i, (ibks_val, boks_val, news_val) in enumerate(zip(ibks_last_column, boks_last_column, news_last_column)):
if not (np.isclose(ibks_val, boks_val) and np.isclose(boks_val, news_val)):
print(f"인덱스 {i}: IBKS={ibks_val}, BOKS={boks_val}, NEWS={news_val}")
모든 데이터셋의 마지막 컬럼 값이 인덱스별로 동일합니다.
In [60]:
import numpy as np
# numpy 파일로 저장
print(f"nim_date.shape: {nim_date.shape}")
print(f"nim_date[:10]: {nim_date[:10]}")
np.save('data/numpy/nim_date.npy', nim_date, allow_pickle=True)
print(f"nim_date saved: data/numpy/nim_date.npy")
# numpy 파일에서 불러오기
nim_date = np.load('data/numpy/nim_date.npy', allow_pickle=True)
print(f"nim_date loaded: data/numpy/nim_date.npy")
print(f"nim_date[:10]: {nim_date[:10]}")
nim_date.shape: (39144,) nim_date[:10]: ['2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02'] nim_date saved: data/numpy/nim_date.npy nim_date loaded: data/numpy/nim_date.npy nim_date[:10]: ['2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02' '2018-01-02']
In [61]:
# import numpy as np
# # numpy 파일로 저장
# print(f"original_nim_values.shape: {original_nim_values.shape}")
# print(f"original_nim_values[:10]: {original_nim_values[:10]}")
# np.save('data/numpy/original_nim_values.npy', original_nim_values)
# print(f"original_nim_values saved: data/numpy/original_nim_values.npy")
# # numpy 파일에서 불러오기
# original_nim_values = np.load('data/numpy/original_nim_values.npy')
# print(f"original_nim_values loaded: data/numpy/original_nim_values.npy")
# print(f"original_nim_values[:10]: {original_nim_values[:10]}")
In [62]:
# import numpy as np
# # numpy 파일로 저장
# print(f"original_target_values.shape: {original_target_values.shape}")
# print(f"original_target_values[:10]: {original_target_values[:10]}")
# np.save('data/numpy/original_target_values.npy', original_target_values)
# print(f"original_target_values saved: data/numpy/original_target_values.npy")
# # numpy 파일에서 불러오기
# original_target_values = np.load('data/numpy/original_target_values.npy')
# print(f"original_target_values loaded: data/numpy/original_target_values.npy")
# print(f"original_target_values[:10]: {original_target_values[:10]}")
- 시계열에 제공할 target_values 생성(표준화)
In [63]:
import numpy as np
target_values = original_target_values
target_train_data = target_values[:train_size]
target_train_mean = np.mean(target_train_data)
target_train_std = np.std(target_train_data)
target_values = (target_values - target_train_mean) / target_train_std
print('-'*80)
print(f"target_train_mean: {target_train_mean:+.8f}")
print(f"target_train_std : {target_train_std:+.8f}")
print('-'*80)
# numpy 파일로 저장
print(f"target_values.shape: {target_values.shape}")
print(f"target_values[:10]: {target_values[:10]}")
np.save('data/numpy/target_values.npy', target_values)
print(f"target_values saved: data/numpy/target_values.npy")
# numpy 파일에서 불러오기
target_values = np.load('data/numpy/target_values.npy')
print(f"target_values loaded: data/numpy/target_values.npy")
print(f"target_values[:10]: {target_values[:10]}")
-------------------------------------------------------------------------------- target_train_mean: +0.00075180 target_train_std : +0.03153061 -------------------------------------------------------------------------------- target_values.shape: (39144,) target_values[:10]: [2.57046096 2.57046355 2.57043122 2.57040007 2.57052615 2.57042324 2.57044529 2.57043156 2.57044085 2.5704819 ] target_values saved: data/numpy/target_values.npy target_values loaded: data/numpy/target_values.npy target_values[:10]: [2.57046096 2.57046355 2.57043122 2.57040007 2.57052615 2.57042324 2.57044529 2.57043156 2.57044085 2.5704819 ]
In [64]:
import numpy as np
# 3일 후 차이를 계산할 간격 (일일 24개의 데이터이므로 5일 후는 5 * 24 = 120)
interval = 5 * 24
# 최근 365일(일일 24개씩)의 데이터 시작 인덱스
start_index = len(original_nim_values) - 365 * 24
# 5일 후의 절대 차이를 저장할 리스트
absolute_differences = []
# 24개 단위로 3일 후의 절대 차이를 계산
print("오늘 값과 3일 후 값, 날짜, 절대 차이:")
for i in range(start_index, len(original_nim_values) - interval, 24):
# 오늘 값과 3일 후 값
today_value = original_nim_values[i]
future_value = original_nim_values[i + interval]
# 오늘 날짜와 3일 후 날짜 (문자열 그대로 사용)
today_date = nim_date[i]
future_date = nim_date[i + interval] # 문자열 그대로 사용
# 3일 후 값과 오늘 값의 차이의 절대값 계산
absolute_difference = abs(future_value - today_value)
absolute_differences.append(absolute_difference)
# 출력
print(f"Index {i}: {today_date},[{today_value:.8f}] | {future_date},[{future_value:.8f}] Absolute Difference: {absolute_difference:.8f}")
# 절대 차이의 합계 계산
sum_absolute_difference = np.sum(absolute_differences)
# 절대 차이의 평균 계산
correct_threshold = np.mean(absolute_differences)
print(f"\n최근 365일 동안의 5일 후 절대 차이의 합계: {sum_absolute_difference:.8f}")
print(f"최근 365일 동안의 5일 후 절대 차이의 평균: {correct_threshold:.8f}")
오늘 값과 3일 후 값, 날짜, 절대 차이: Index 30384: 2023-02-24,[1.80270000] | 2023-03-06,[1.75590000] Absolute Difference: 0.04680000 Index 30408: 2023-02-27,[1.80220000] | 2023-03-07,[1.77850000] Absolute Difference: 0.02370000 Index 30432: 2023-02-28,[1.79380000] | 2023-03-08,[1.77600000] Absolute Difference: 0.01780000 Index 30456: 2023-03-02,[1.77290000] | 2023-03-09,[1.80440000] Absolute Difference: 0.03150000 Index 30480: 2023-03-03,[1.79230000] | 2023-03-10,[1.79330000] Absolute Difference: 0.00100000 Index 30504: 2023-03-06,[1.75590000] | 2023-03-13,[1.78590000] Absolute Difference: 0.03000000 Index 30528: 2023-03-07,[1.77850000] | 2023-03-14,[1.79720000] Absolute Difference: 0.01870000 Index 30552: 2023-03-08,[1.77600000] | 2023-03-15,[1.77120000] Absolute Difference: 0.00480000 Index 30576: 2023-03-09,[1.80440000] | 2023-03-16,[1.77420000] Absolute Difference: 0.03020000 Index 30600: 2023-03-10,[1.79330000] | 2023-03-17,[1.80020000] Absolute Difference: 0.00690000 Index 30624: 2023-03-13,[1.78590000] | 2023-03-20,[1.78220000] Absolute Difference: 0.00370000 Index 30648: 2023-03-14,[1.79720000] | 2023-03-21,[1.79980000] Absolute Difference: 0.00260000 Index 30672: 2023-03-15,[1.77120000] | 2023-03-22,[1.79130000] Absolute Difference: 0.02010000 Index 30696: 2023-03-16,[1.77420000] | 2023-03-23,[1.80780000] Absolute Difference: 0.03360000 Index 30720: 2023-03-17,[1.80020000] | 2023-03-24,[1.79500000] Absolute Difference: 0.00520000 Index 30744: 2023-03-20,[1.78220000] | 2023-03-27,[1.78190000] Absolute Difference: 0.00030000 Index 30768: 2023-03-21,[1.79980000] | 2023-03-28,[1.77430000] Absolute Difference: 0.02550000 Index 30792: 2023-03-22,[1.79130000] | 2023-03-29,[1.74990000] Absolute Difference: 0.04140000 Index 30816: 2023-03-23,[1.80780000] | 2023-03-30,[1.78220000] Absolute Difference: 0.02560000 Index 30840: 2023-03-24,[1.79500000] | 2023-03-31,[1.81390000] Absolute Difference: 0.01890000 Index 30864: 2023-03-27,[1.78190000] | 2023-04-03,[1.73390000] Absolute Difference: 0.04800000 Index 30888: 2023-03-28,[1.77430000] | 2023-04-04,[1.77570000] Absolute Difference: 0.00140000 Index 30912: 2023-03-29,[1.74990000] | 2023-04-05,[1.75950000] Absolute Difference: 0.00960000 Index 30936: 2023-03-30,[1.78220000] | 2023-04-06,[1.77310000] Absolute Difference: 0.00910000 Index 30960: 2023-03-31,[1.81390000] | 2023-04-07,[1.77430000] Absolute Difference: 0.03960000 Index 30984: 2023-04-03,[1.73390000] | 2023-04-10,[1.78610000] Absolute Difference: 0.05220000 Index 31008: 2023-04-04,[1.77570000] | 2023-04-11,[1.75210000] Absolute Difference: 0.02360000 Index 31032: 2023-04-05,[1.75950000] | 2023-04-12,[1.77770000] Absolute Difference: 0.01820000 Index 31056: 2023-04-06,[1.77310000] | 2023-04-13,[1.76300000] Absolute Difference: 0.01010000 Index 31080: 2023-04-07,[1.77430000] | 2023-04-14,[1.77060000] Absolute Difference: 0.00370000 Index 31104: 2023-04-10,[1.78610000] | 2023-04-17,[1.77180000] Absolute Difference: 0.01430000 Index 31128: 2023-04-11,[1.75210000] | 2023-04-18,[1.77470000] Absolute Difference: 0.02260000 Index 31152: 2023-04-12,[1.77770000] | 2023-04-19,[1.76060000] Absolute Difference: 0.01710000 Index 31176: 2023-04-13,[1.76300000] | 2023-04-20,[1.78060000] Absolute Difference: 0.01760000 Index 31200: 2023-04-14,[1.77060000] | 2023-04-21,[1.77760000] Absolute Difference: 0.00700000 Index 31224: 2023-04-17,[1.77180000] | 2023-04-24,[1.77280000] Absolute Difference: 0.00100000 Index 31248: 2023-04-18,[1.77470000] | 2023-04-25,[1.76870000] Absolute Difference: 0.00600000 Index 31272: 2023-04-19,[1.76060000] | 2023-04-26,[1.77910000] Absolute Difference: 0.01850000 Index 31296: 2023-04-20,[1.78060000] | 2023-04-27,[1.75150000] Absolute Difference: 0.02910000 Index 31320: 2023-04-21,[1.77760000] | 2023-04-28,[1.76000000] Absolute Difference: 0.01760000 Index 31344: 2023-04-24,[1.77280000] | 2023-05-02,[1.76660000] Absolute Difference: 0.00620000 Index 31368: 2023-04-25,[1.76870000] | 2023-05-03,[1.77370000] Absolute Difference: 0.00500000 Index 31392: 2023-04-26,[1.77910000] | 2023-05-04,[1.74710000] Absolute Difference: 0.03200000 Index 31416: 2023-04-27,[1.75150000] | 2023-05-08,[1.72470000] Absolute Difference: 0.02680000 Index 31440: 2023-04-28,[1.76000000] | 2023-05-09,[1.74230000] Absolute Difference: 0.01770000 Index 31464: 2023-05-02,[1.76660000] | 2023-05-10,[1.74170000] Absolute Difference: 0.02490000 Index 31488: 2023-05-03,[1.77370000] | 2023-05-11,[1.75030000] Absolute Difference: 0.02340000 Index 31512: 2023-05-04,[1.74710000] | 2023-05-12,[1.75120000] Absolute Difference: 0.00410000 Index 31536: 2023-05-08,[1.72470000] | 2023-05-15,[1.75870000] Absolute Difference: 0.03400000 Index 31560: 2023-05-09,[1.74230000] | 2023-05-16,[1.71730000] Absolute Difference: 0.02500000 Index 31584: 2023-05-10,[1.74170000] | 2023-05-17,[1.72450000] Absolute Difference: 0.01720000 Index 31608: 2023-05-11,[1.75030000] | 2023-05-18,[1.74110000] Absolute Difference: 0.00920000 Index 31632: 2023-05-12,[1.75120000] | 2023-05-19,[1.67500000] Absolute Difference: 0.07620000 Index 31656: 2023-05-15,[1.75870000] | 2023-05-22,[1.74410000] Absolute Difference: 0.01460000 Index 31680: 2023-05-16,[1.71730000] | 2023-05-23,[1.73280000] Absolute Difference: 0.01550000 Index 31704: 2023-05-17,[1.72450000] | 2023-05-24,[1.72300000] Absolute Difference: 0.00150000 Index 31728: 2023-05-18,[1.74110000] | 2023-05-25,[1.73100000] Absolute Difference: 0.01010000 Index 31752: 2023-05-19,[1.67500000] | 2023-05-26,[1.71140000] Absolute Difference: 0.03640000 Index 31776: 2023-05-22,[1.74410000] | 2023-05-30,[1.74200000] Absolute Difference: 0.00210000 Index 31800: 2023-05-23,[1.73280000] | 2023-05-31,[1.78070000] Absolute Difference: 0.04790000 Index 31824: 2023-05-24,[1.72300000] | 2023-06-01,[1.74310000] Absolute Difference: 0.02010000 Index 31848: 2023-05-25,[1.73100000] | 2023-06-02,[1.72470000] Absolute Difference: 0.00630000 Index 31872: 2023-05-26,[1.71140000] | 2023-06-05,[1.73910000] Absolute Difference: 0.02770000 Index 31896: 2023-05-30,[1.74200000] | 2023-06-07,[1.73800000] Absolute Difference: 0.00400000 Index 31920: 2023-05-31,[1.78070000] | 2023-06-08,[1.78380000] Absolute Difference: 0.00310000 Index 31944: 2023-06-01,[1.74310000] | 2023-06-09,[1.71370000] Absolute Difference: 0.02940000 Index 31968: 2023-06-02,[1.72470000] | 2023-06-12,[1.77160000] Absolute Difference: 0.04690000 Index 31992: 2023-06-05,[1.73910000] | 2023-06-13,[1.71580000] Absolute Difference: 0.02330000 Index 32016: 2023-06-07,[1.73800000] | 2023-06-14,[1.73840000] Absolute Difference: 0.00040000 Index 32040: 2023-06-08,[1.78380000] | 2023-06-15,[1.74820000] Absolute Difference: 0.03560000 Index 32064: 2023-06-09,[1.71370000] | 2023-06-16,[1.71190000] Absolute Difference: 0.00180000 Index 32088: 2023-06-12,[1.77160000] | 2023-06-19,[1.72880000] Absolute Difference: 0.04280000 Index 32112: 2023-06-13,[1.71580000] | 2023-06-20,[1.71850000] Absolute Difference: 0.00270000 Index 32136: 2023-06-14,[1.73840000] | 2023-06-21,[1.75140000] Absolute Difference: 0.01300000 Index 32160: 2023-06-15,[1.74820000] | 2023-06-22,[1.77770000] Absolute Difference: 0.02950000 Index 32184: 2023-06-16,[1.71190000] | 2023-06-23,[1.75260000] Absolute Difference: 0.04070000 Index 32208: 2023-06-19,[1.72880000] | 2023-06-26,[1.74480000] Absolute Difference: 0.01600000 Index 32232: 2023-06-20,[1.71850000] | 2023-06-27,[1.71430000] Absolute Difference: 0.00420000 Index 32256: 2023-06-21,[1.75140000] | 2023-06-28,[1.73520000] Absolute Difference: 0.01620000 Index 32280: 2023-06-22,[1.77770000] | 2023-06-29,[1.73690000] Absolute Difference: 0.04080000 Index 32304: 2023-06-23,[1.75260000] | 2023-06-30,[1.77150000] Absolute Difference: 0.01890000 Index 32328: 2023-06-26,[1.74480000] | 2023-07-03,[1.73380000] Absolute Difference: 0.01100000 Index 32352: 2023-06-27,[1.71430000] | 2023-07-04,[1.71620000] Absolute Difference: 0.00190000 Index 32376: 2023-06-28,[1.73520000] | 2023-07-05,[1.72140000] Absolute Difference: 0.01380000 Index 32400: 2023-06-29,[1.73690000] | 2023-07-06,[1.74460000] Absolute Difference: 0.00770000 Index 32424: 2023-06-30,[1.77150000] | 2023-07-07,[1.74850000] Absolute Difference: 0.02300000 Index 32448: 2023-07-03,[1.73380000] | 2023-07-10,[1.77430000] Absolute Difference: 0.04050000 Index 32472: 2023-07-04,[1.71620000] | 2023-07-11,[1.74060000] Absolute Difference: 0.02440000 Index 32496: 2023-07-05,[1.72140000] | 2023-07-12,[1.70650000] Absolute Difference: 0.01490000 Index 32520: 2023-07-06,[1.74460000] | 2023-07-13,[1.73630000] Absolute Difference: 0.00830000 Index 32544: 2023-07-07,[1.74850000] | 2023-07-14,[1.74020000] Absolute Difference: 0.00830000 Index 32568: 2023-07-10,[1.77430000] | 2023-07-17,[1.73990000] Absolute Difference: 0.03440000 Index 32592: 2023-07-11,[1.74060000] | 2023-07-18,[1.74410000] Absolute Difference: 0.00350000 Index 32616: 2023-07-12,[1.70650000] | 2023-07-19,[1.73440000] Absolute Difference: 0.02790000 Index 32640: 2023-07-13,[1.73630000] | 2023-07-20,[1.74950000] Absolute Difference: 0.01320000 Index 32664: 2023-07-14,[1.74020000] | 2023-07-21,[1.74420000] Absolute Difference: 0.00400000 Index 32688: 2023-07-17,[1.73990000] | 2023-07-24,[1.75010000] Absolute Difference: 0.01020000 Index 32712: 2023-07-18,[1.74410000] | 2023-07-25,[1.73930000] Absolute Difference: 0.00480000 Index 32736: 2023-07-19,[1.73440000] | 2023-07-26,[1.72940000] Absolute Difference: 0.00500000 Index 32760: 2023-07-20,[1.74950000] | 2023-07-27,[1.74670000] Absolute Difference: 0.00280000 Index 32784: 2023-07-21,[1.74420000] | 2023-07-28,[1.76600000] Absolute Difference: 0.02180000 Index 32808: 2023-07-24,[1.75010000] | 2023-07-31,[1.77970000] Absolute Difference: 0.02960000 Index 32832: 2023-07-25,[1.73930000] | 2023-08-01,[1.73920000] Absolute Difference: 0.00010000 Index 32856: 2023-07-26,[1.72940000] | 2023-08-02,[1.72790000] Absolute Difference: 0.00150000 Index 32880: 2023-07-27,[1.74670000] | 2023-08-03,[1.74520000] Absolute Difference: 0.00150000 Index 32904: 2023-07-28,[1.76600000] | 2023-08-04,[1.74420000] Absolute Difference: 0.02180000 Index 32928: 2023-07-31,[1.77970000] | 2023-08-07,[1.73250000] Absolute Difference: 0.04720000 Index 32952: 2023-08-01,[1.73920000] | 2023-08-08,[1.74110000] Absolute Difference: 0.00190000 Index 32976: 2023-08-02,[1.72790000] | 2023-08-09,[1.73010000] Absolute Difference: 0.00220000 Index 33000: 2023-08-03,[1.74520000] | 2023-08-10,[1.75090000] Absolute Difference: 0.00570000 Index 33024: 2023-08-04,[1.74420000] | 2023-08-11,[1.72820000] Absolute Difference: 0.01600000 Index 33048: 2023-08-07,[1.73250000] | 2023-08-14,[1.72220000] Absolute Difference: 0.01030000 Index 33072: 2023-08-08,[1.74110000] | 2023-08-16,[1.72200000] Absolute Difference: 0.01910000 Index 33096: 2023-08-09,[1.73010000] | 2023-08-17,[1.70310000] Absolute Difference: 0.02700000 Index 33120: 2023-08-10,[1.75090000] | 2023-08-18,[1.71920000] Absolute Difference: 0.03170000 Index 33144: 2023-08-11,[1.72820000] | 2023-08-21,[1.69710000] Absolute Difference: 0.03110000 Index 33168: 2023-08-14,[1.72220000] | 2023-08-22,[1.69920000] Absolute Difference: 0.02300000 Index 33192: 2023-08-16,[1.72200000] | 2023-08-23,[1.66800000] Absolute Difference: 0.05400000 Index 33216: 2023-08-17,[1.70310000] | 2023-08-24,[1.66140000] Absolute Difference: 0.04170000 Index 33240: 2023-08-18,[1.71920000] | 2023-08-25,[1.67620000] Absolute Difference: 0.04300000 Index 33264: 2023-08-21,[1.69710000] | 2023-08-28,[1.65230000] Absolute Difference: 0.04480000 Index 33288: 2023-08-22,[1.69920000] | 2023-08-29,[1.67200000] Absolute Difference: 0.02720000 Index 33312: 2023-08-23,[1.66800000] | 2023-08-30,[1.69150000] Absolute Difference: 0.02350000 Index 33336: 2023-08-24,[1.66140000] | 2023-08-31,[1.73240000] Absolute Difference: 0.07100000 Index 33360: 2023-08-25,[1.67620000] | 2023-09-01,[1.75400000] Absolute Difference: 0.07780000 Index 33384: 2023-08-28,[1.65230000] | 2023-09-04,[1.72810000] Absolute Difference: 0.07580000 Index 33408: 2023-08-29,[1.67200000] | 2023-09-05,[1.75320000] Absolute Difference: 0.08120000 Index 33432: 2023-08-30,[1.69150000] | 2023-09-06,[1.74290000] Absolute Difference: 0.05140000 Index 33456: 2023-08-31,[1.73240000] | 2023-09-07,[1.71830000] Absolute Difference: 0.01410000 Index 33480: 2023-09-01,[1.75400000] | 2023-09-08,[1.74360000] Absolute Difference: 0.01040000 Index 33504: 2023-09-04,[1.72810000] | 2023-09-11,[1.72250000] Absolute Difference: 0.00560000 Index 33528: 2023-09-05,[1.75320000] | 2023-09-12,[1.72120000] Absolute Difference: 0.03200000 Index 33552: 2023-09-06,[1.74290000] | 2023-09-13,[1.71280000] Absolute Difference: 0.03010000 Index 33576: 2023-09-07,[1.71830000] | 2023-09-14,[1.70950000] Absolute Difference: 0.00880000 Index 33600: 2023-09-08,[1.74360000] | 2023-09-15,[1.70560000] Absolute Difference: 0.03800000 Index 33624: 2023-09-11,[1.72250000] | 2023-09-18,[1.72160000] Absolute Difference: 0.00090000 Index 33648: 2023-09-12,[1.72120000] | 2023-09-19,[1.71690000] Absolute Difference: 0.00430000 Index 33672: 2023-09-13,[1.71280000] | 2023-09-20,[1.69190000] Absolute Difference: 0.02090000 Index 33696: 2023-09-14,[1.70950000] | 2023-09-21,[1.67720000] Absolute Difference: 0.03230000 Index 33720: 2023-09-15,[1.70560000] | 2023-09-22,[1.71260000] Absolute Difference: 0.00700000 Index 33744: 2023-09-18,[1.72160000] | 2023-09-25,[1.71770000] Absolute Difference: 0.00390000 Index 33768: 2023-09-19,[1.71690000] | 2023-09-26,[1.74590000] Absolute Difference: 0.02900000 Index 33792: 2023-09-20,[1.69190000] | 2023-09-27,[1.77390000] Absolute Difference: 0.08200000 Index 33816: 2023-09-21,[1.67720000] | 2023-10-04,[1.77340000] Absolute Difference: 0.09620000 Index 33840: 2023-09-22,[1.71260000] | 2023-10-05,[1.74830000] Absolute Difference: 0.03570000 Index 33864: 2023-09-25,[1.71770000] | 2023-10-06,[1.77050000] Absolute Difference: 0.05280000 Index 33888: 2023-09-26,[1.74590000] | 2023-10-10,[1.77220000] Absolute Difference: 0.02630000 Index 33912: 2023-09-27,[1.77390000] | 2023-10-11,[1.73940000] Absolute Difference: 0.03450000 Index 33936: 2023-10-04,[1.77340000] | 2023-10-12,[1.72040000] Absolute Difference: 0.05300000 Index 33960: 2023-10-05,[1.74830000] | 2023-10-13,[1.74080000] Absolute Difference: 0.00750000 Index 33984: 2023-10-06,[1.77050000] | 2023-10-16,[1.72440000] Absolute Difference: 0.04610000 Index 34008: 2023-10-10,[1.77220000] | 2023-10-17,[1.72820000] Absolute Difference: 0.04400000 Index 34032: 2023-10-11,[1.73940000] | 2023-10-18,[1.76080000] Absolute Difference: 0.02140000 Index 34056: 2023-10-12,[1.72040000] | 2023-10-19,[1.74730000] Absolute Difference: 0.02690000 Index 34080: 2023-10-13,[1.74080000] | 2023-10-20,[1.74750000] Absolute Difference: 0.00670000 Index 34104: 2023-10-16,[1.72440000] | 2023-10-23,[1.72080000] Absolute Difference: 0.00360000 Index 34128: 2023-10-17,[1.72820000] | 2023-10-24,[1.73230000] Absolute Difference: 0.00410000 Index 34152: 2023-10-18,[1.76080000] | 2023-10-25,[1.72820000] Absolute Difference: 0.03260000 Index 34176: 2023-10-19,[1.74730000] | 2023-10-26,[1.71280000] Absolute Difference: 0.03450000 Index 34200: 2023-10-20,[1.74750000] | 2023-10-27,[1.69890000] Absolute Difference: 0.04860000 Index 34224: 2023-10-23,[1.72080000] | 2023-10-30,[1.70900000] Absolute Difference: 0.01180000 Index 34248: 2023-10-24,[1.73230000] | 2023-10-31,[1.72690000] Absolute Difference: 0.00540000 Index 34272: 2023-10-25,[1.72820000] | 2023-11-01,[1.69440000] Absolute Difference: 0.03380000 Index 34296: 2023-10-26,[1.71280000] | 2023-11-02,[1.67570000] Absolute Difference: 0.03710000 Index 34320: 2023-10-27,[1.69890000] | 2023-11-03,[1.69740000] Absolute Difference: 0.00150000 Index 34344: 2023-10-30,[1.70900000] | 2023-11-06,[1.68570000] Absolute Difference: 0.02330000 Index 34368: 2023-10-31,[1.72690000] | 2023-11-07,[1.70330000] Absolute Difference: 0.02360000 Index 34392: 2023-11-01,[1.69440000] | 2023-11-08,[1.72480000] Absolute Difference: 0.03040000 Index 34416: 2023-11-02,[1.67570000] | 2023-11-09,[1.68480000] Absolute Difference: 0.00910000 Index 34440: 2023-11-03,[1.69740000] | 2023-11-10,[1.74590000] Absolute Difference: 0.04850000 Index 34464: 2023-11-06,[1.68570000] | 2023-11-13,[1.70650000] Absolute Difference: 0.02080000 Index 34488: 2023-11-07,[1.70330000] | 2023-11-14,[1.70760000] Absolute Difference: 0.00430000 Index 34512: 2023-11-08,[1.72480000] | 2023-11-15,[1.69820000] Absolute Difference: 0.02660000 Index 34536: 2023-11-09,[1.68480000] | 2023-11-16,[1.68820000] Absolute Difference: 0.00340000 Index 34560: 2023-11-10,[1.74590000] | 2023-11-17,[1.71700000] Absolute Difference: 0.02890000 Index 34584: 2023-11-13,[1.70650000] | 2023-11-20,[1.70580000] Absolute Difference: 0.00070000 Index 34608: 2023-11-14,[1.70760000] | 2023-11-21,[1.71030000] Absolute Difference: 0.00270000 Index 34632: 2023-11-15,[1.69820000] | 2023-11-22,[1.72350000] Absolute Difference: 0.02530000 Index 34656: 2023-11-16,[1.68820000] | 2023-11-23,[1.67600000] Absolute Difference: 0.01220000 Index 34680: 2023-11-17,[1.71700000] | 2023-11-24,[1.69710000] Absolute Difference: 0.01990000 Index 34704: 2023-11-20,[1.70580000] | 2023-11-27,[1.69520000] Absolute Difference: 0.01060000 Index 34728: 2023-11-21,[1.71030000] | 2023-11-28,[1.72830000] Absolute Difference: 0.01800000 Index 34752: 2023-11-22,[1.72350000] | 2023-11-29,[1.72770000] Absolute Difference: 0.00420000 Index 34776: 2023-11-23,[1.67600000] | 2023-11-30,[1.76120000] Absolute Difference: 0.08520000 Index 34800: 2023-11-24,[1.69710000] | 2023-12-01,[1.76900000] Absolute Difference: 0.07190000 Index 34824: 2023-11-27,[1.69520000] | 2023-12-04,[1.74100000] Absolute Difference: 0.04580000 Index 34848: 2023-11-28,[1.72830000] | 2023-12-05,[1.73580000] Absolute Difference: 0.00750000 Index 34872: 2023-11-29,[1.72770000] | 2023-12-06,[1.72190000] Absolute Difference: 0.00580000 Index 34896: 2023-11-30,[1.76120000] | 2023-12-07,[1.75160000] Absolute Difference: 0.00960000 Index 34920: 2023-12-01,[1.76900000] | 2023-12-08,[1.76300000] Absolute Difference: 0.00600000 Index 34944: 2023-12-04,[1.74100000] | 2023-12-11,[1.76400000] Absolute Difference: 0.02300000 Index 34968: 2023-12-05,[1.73580000] | 2023-12-12,[1.76090000] Absolute Difference: 0.02510000 Index 34992: 2023-12-06,[1.72190000] | 2023-12-13,[1.76430000] Absolute Difference: 0.04240000 Index 35016: 2023-12-07,[1.75160000] | 2023-12-14,[1.75290000] Absolute Difference: 0.00130000 Index 35040: 2023-12-08,[1.76300000] | 2023-12-15,[1.78590000] Absolute Difference: 0.02290000 Index 35064: 2023-12-11,[1.76400000] | 2023-12-18,[1.77390000] Absolute Difference: 0.00990000 Index 35088: 2023-12-12,[1.76090000] | 2023-12-19,[1.79100000] Absolute Difference: 0.03010000 Index 35112: 2023-12-13,[1.76430000] | 2023-12-20,[1.77980000] Absolute Difference: 0.01550000 Index 35136: 2023-12-14,[1.75290000] | 2023-12-21,[1.79250000] Absolute Difference: 0.03960000 Index 35160: 2023-12-15,[1.78590000] | 2023-12-22,[1.81110000] Absolute Difference: 0.02520000 Index 35184: 2023-12-18,[1.77390000] | 2023-12-26,[1.77550000] Absolute Difference: 0.00160000 Index 35208: 2023-12-19,[1.79100000] | 2023-12-27,[1.75510000] Absolute Difference: 0.03590000 Index 35232: 2023-12-20,[1.77980000] | 2023-12-28,[1.75240000] Absolute Difference: 0.02740000 Index 35256: 2023-12-21,[1.79250000] | 2024-01-02,[1.79800000] Absolute Difference: 0.00550000 Index 35280: 2023-12-22,[1.81110000] | 2024-01-03,[1.81320000] Absolute Difference: 0.00210000 Index 35304: 2023-12-26,[1.77550000] | 2024-01-04,[1.79410000] Absolute Difference: 0.01860000 Index 35328: 2023-12-27,[1.75510000] | 2024-01-05,[1.81170000] Absolute Difference: 0.05660000 Index 35352: 2023-12-28,[1.75240000] | 2024-01-08,[1.80360000] Absolute Difference: 0.05120000 Index 35376: 2024-01-02,[1.79800000] | 2024-01-09,[1.82330000] Absolute Difference: 0.02530000 Index 35400: 2024-01-03,[1.81320000] | 2024-01-10,[1.81200000] Absolute Difference: 0.00120000 Index 35424: 2024-01-04,[1.79410000] | 2024-01-11,[1.78010000] Absolute Difference: 0.01400000 Index 35448: 2024-01-05,[1.81170000] | 2024-01-12,[1.77690000] Absolute Difference: 0.03480000 Index 35472: 2024-01-08,[1.80360000] | 2024-01-15,[1.78520000] Absolute Difference: 0.01840000 Index 35496: 2024-01-09,[1.82330000] | 2024-01-16,[1.78860000] Absolute Difference: 0.03470000 Index 35520: 2024-01-10,[1.81200000] | 2024-01-17,[1.79390000] Absolute Difference: 0.01810000 Index 35544: 2024-01-11,[1.78010000] | 2024-01-18,[1.79450000] Absolute Difference: 0.01440000 Index 35568: 2024-01-12,[1.77690000] | 2024-01-19,[1.80000000] Absolute Difference: 0.02310000 Index 35592: 2024-01-15,[1.78520000] | 2024-01-22,[1.81160000] Absolute Difference: 0.02640000 Index 35616: 2024-01-16,[1.78860000] | 2024-01-23,[1.79820000] Absolute Difference: 0.00960000 Index 35640: 2024-01-17,[1.79390000] | 2024-01-24,[1.78550000] Absolute Difference: 0.00840000 Index 35664: 2024-01-18,[1.79450000] | 2024-01-25,[1.79480000] Absolute Difference: 0.00030000 Index 35688: 2024-01-19,[1.80000000] | 2024-01-26,[1.77530000] Absolute Difference: 0.02470000 Index 35712: 2024-01-22,[1.81160000] | 2024-01-29,[1.78180000] Absolute Difference: 0.02980000 Index 35736: 2024-01-23,[1.79820000] | 2024-01-30,[1.77610000] Absolute Difference: 0.02210000 Index 35760: 2024-01-24,[1.78550000] | 2024-01-31,[1.78310000] Absolute Difference: 0.00240000 Index 35784: 2024-01-25,[1.79480000] | 2024-02-01,[1.75780000] Absolute Difference: 0.03700000 Index 35808: 2024-01-26,[1.77530000] | 2024-02-02,[1.74500000] Absolute Difference: 0.03030000 Index 35832: 2024-01-29,[1.78180000] | 2024-02-05,[1.70980000] Absolute Difference: 0.07200000 Index 35856: 2024-01-30,[1.77610000] | 2024-02-06,[1.74700000] Absolute Difference: 0.02910000 Index 35880: 2024-01-31,[1.78310000] | 2024-02-07,[1.73160000] Absolute Difference: 0.05150000 Index 35904: 2024-02-01,[1.75780000] | 2024-02-08,[1.74450000] Absolute Difference: 0.01330000 Index 35928: 2024-02-02,[1.74500000] | 2024-02-13,[1.76230000] Absolute Difference: 0.01730000 Index 35952: 2024-02-05,[1.70980000] | 2024-02-14,[1.75350000] Absolute Difference: 0.04370000 Index 35976: 2024-02-06,[1.74700000] | 2024-02-15,[1.74790000] Absolute Difference: 0.00090000 Index 36000: 2024-02-07,[1.73160000] | 2024-02-16,[1.73090000] Absolute Difference: 0.00070000 Index 36024: 2024-02-08,[1.74450000] | 2024-02-19,[1.75720000] Absolute Difference: 0.01270000 Index 36048: 2024-02-13,[1.76230000] | 2024-02-20,[1.72570000] Absolute Difference: 0.03660000 Index 36072: 2024-02-14,[1.75350000] | 2024-02-21,[1.73900000] Absolute Difference: 0.01450000 Index 36096: 2024-02-15,[1.74790000] | 2024-02-22,[1.74670000] Absolute Difference: 0.00120000 Index 36120: 2024-02-16,[1.73090000] | 2024-02-23,[1.70700000] Absolute Difference: 0.02390000 Index 36144: 2024-02-19,[1.75720000] | 2024-02-26,[1.71930000] Absolute Difference: 0.03790000 Index 36168: 2024-02-20,[1.72570000] | 2024-02-27,[1.70950000] Absolute Difference: 0.01620000 Index 36192: 2024-02-21,[1.73900000] | 2024-02-28,[1.78510000] Absolute Difference: 0.04610000 Index 36216: 2024-02-22,[1.74670000] | 2024-02-29,[1.72370000] Absolute Difference: 0.02300000 Index 36240: 2024-02-23,[1.70700000] | 2024-03-04,[1.70140000] Absolute Difference: 0.00560000 Index 36264: 2024-02-26,[1.71930000] | 2024-03-05,[1.70360000] Absolute Difference: 0.01570000 Index 36288: 2024-02-27,[1.70950000] | 2024-03-06,[1.68420000] Absolute Difference: 0.02530000 Index 36312: 2024-02-28,[1.78510000] | 2024-03-07,[1.68670000] Absolute Difference: 0.09840000 Index 36336: 2024-02-29,[1.72370000] | 2024-03-08,[1.70560000] Absolute Difference: 0.01810000 Index 36360: 2024-03-04,[1.70140000] | 2024-03-11,[1.71890000] Absolute Difference: 0.01750000 Index 36384: 2024-03-05,[1.70360000] | 2024-03-12,[1.70280000] Absolute Difference: 0.00080000 Index 36408: 2024-03-06,[1.68420000] | 2024-03-13,[1.70920000] Absolute Difference: 0.02500000 Index 36432: 2024-03-07,[1.68670000] | 2024-03-14,[1.69110000] Absolute Difference: 0.00440000 Index 36456: 2024-03-08,[1.70560000] | 2024-03-15,[1.69240000] Absolute Difference: 0.01320000 Index 36480: 2024-03-11,[1.71890000] | 2024-03-18,[1.70350000] Absolute Difference: 0.01540000 Index 36504: 2024-03-12,[1.70280000] | 2024-03-19,[1.68930000] Absolute Difference: 0.01350000 Index 36528: 2024-03-13,[1.70920000] | 2024-03-20,[1.66100000] Absolute Difference: 0.04820000 Index 36552: 2024-03-14,[1.69110000] | 2024-03-21,[1.69600000] Absolute Difference: 0.00490000 Index 36576: 2024-03-15,[1.69240000] | 2024-03-22,[1.69510000] Absolute Difference: 0.00270000 Index 36600: 2024-03-18,[1.70350000] | 2024-03-25,[1.68840000] Absolute Difference: 0.01510000 Index 36624: 2024-03-19,[1.68930000] | 2024-03-26,[1.65260000] Absolute Difference: 0.03670000 Index 36648: 2024-03-20,[1.66100000] | 2024-03-27,[1.68510000] Absolute Difference: 0.02410000 Index 36672: 2024-03-21,[1.69600000] | 2024-03-28,[1.68180000] Absolute Difference: 0.01420000 Index 36696: 2024-03-22,[1.69510000] | 2024-03-29,[1.71380000] Absolute Difference: 0.01870000 Index 36720: 2024-03-25,[1.68840000] | 2024-04-01,[1.72570000] Absolute Difference: 0.03730000 Index 36744: 2024-03-26,[1.65260000] | 2024-04-02,[1.73360000] Absolute Difference: 0.08100000 Index 36768: 2024-03-27,[1.68510000] | 2024-04-03,[1.74240000] Absolute Difference: 0.05730000 Index 36792: 2024-03-28,[1.68180000] | 2024-04-04,[1.74830000] Absolute Difference: 0.06650000 Index 36816: 2024-03-29,[1.71380000] | 2024-04-05,[1.73720000] Absolute Difference: 0.02340000 Index 36840: 2024-04-01,[1.72570000] | 2024-04-08,[1.76400000] Absolute Difference: 0.03830000 Index 36864: 2024-04-02,[1.73360000] | 2024-04-09,[1.75500000] Absolute Difference: 0.02140000 Index 36888: 2024-04-03,[1.74240000] | 2024-04-11,[1.74670000] Absolute Difference: 0.00430000 Index 36912: 2024-04-04,[1.74830000] | 2024-04-12,[1.75620000] Absolute Difference: 0.00790000 Index 36936: 2024-04-05,[1.73720000] | 2024-04-15,[1.76610000] Absolute Difference: 0.02890000 Index 36960: 2024-04-08,[1.76400000] | 2024-04-16,[1.74100000] Absolute Difference: 0.02300000 Index 36984: 2024-04-09,[1.75500000] | 2024-04-17,[1.72680000] Absolute Difference: 0.02820000 Index 37008: 2024-04-11,[1.74670000] | 2024-04-18,[1.71390000] Absolute Difference: 0.03280000 Index 37032: 2024-04-12,[1.75620000] | 2024-04-19,[1.71100000] Absolute Difference: 0.04520000 Index 37056: 2024-04-15,[1.76610000] | 2024-04-22,[1.73260000] Absolute Difference: 0.03350000 Index 37080: 2024-04-16,[1.74100000] | 2024-04-23,[1.68840000] Absolute Difference: 0.05260000 Index 37104: 2024-04-17,[1.72680000] | 2024-04-24,[1.70490000] Absolute Difference: 0.02190000 Index 37128: 2024-04-18,[1.71390000] | 2024-04-25,[1.73770000] Absolute Difference: 0.02380000 Index 37152: 2024-04-19,[1.71100000] | 2024-04-26,[1.73170000] Absolute Difference: 0.02070000 Index 37176: 2024-04-22,[1.73260000] | 2024-04-29,[1.69590000] Absolute Difference: 0.03670000 Index 37200: 2024-04-23,[1.68840000] | 2024-04-30,[1.73280000] Absolute Difference: 0.04440000 Index 37224: 2024-04-24,[1.70490000] | 2024-05-02,[1.70210000] Absolute Difference: 0.00280000 Index 37248: 2024-04-25,[1.73770000] | 2024-05-03,[1.69390000] Absolute Difference: 0.04380000 Index 37272: 2024-04-26,[1.73170000] | 2024-05-07,[1.69410000] Absolute Difference: 0.03760000 Index 37296: 2024-04-29,[1.69590000] | 2024-05-08,[1.72890000] Absolute Difference: 0.03300000 Index 37320: 2024-04-30,[1.73280000] | 2024-05-09,[1.73310000] Absolute Difference: 0.00030000 Index 37344: 2024-05-02,[1.70210000] | 2024-05-10,[1.74350000] Absolute Difference: 0.04140000 Index 37368: 2024-05-03,[1.69390000] | 2024-05-13,[1.74790000] Absolute Difference: 0.05400000 Index 37392: 2024-05-07,[1.69410000] | 2024-05-14,[1.72950000] Absolute Difference: 0.03540000 Index 37416: 2024-05-08,[1.72890000] | 2024-05-16,[1.69370000] Absolute Difference: 0.03520000 Index 37440: 2024-05-09,[1.73310000] | 2024-05-17,[1.70140000] Absolute Difference: 0.03170000 Index 37464: 2024-05-10,[1.74350000] | 2024-05-20,[1.71160000] Absolute Difference: 0.03190000 Index 37488: 2024-05-13,[1.74790000] | 2024-05-21,[1.69590000] Absolute Difference: 0.05200000 Index 37512: 2024-05-14,[1.72950000] | 2024-05-22,[1.65380000] Absolute Difference: 0.07570000 Index 37536: 2024-05-16,[1.69370000] | 2024-05-23,[1.68870000] Absolute Difference: 0.00500000 Index 37560: 2024-05-17,[1.70140000] | 2024-05-24,[1.73330000] Absolute Difference: 0.03190000 Index 37584: 2024-05-20,[1.71160000] | 2024-05-27,[1.69180000] Absolute Difference: 0.01980000 Index 37608: 2024-05-21,[1.69590000] | 2024-05-28,[1.68820000] Absolute Difference: 0.00770000 Index 37632: 2024-05-22,[1.65380000] | 2024-05-29,[1.69720000] Absolute Difference: 0.04340000 Index 37656: 2024-05-23,[1.68870000] | 2024-05-30,[1.69520000] Absolute Difference: 0.00650000 Index 37680: 2024-05-24,[1.73330000] | 2024-05-31,[1.70330000] Absolute Difference: 0.03000000 Index 37704: 2024-05-27,[1.69180000] | 2024-06-03,[1.69990000] Absolute Difference: 0.00810000 Index 37728: 2024-05-28,[1.68820000] | 2024-06-04,[1.68940000] Absolute Difference: 0.00120000 Index 37752: 2024-05-29,[1.69720000] | 2024-06-05,[1.66300000] Absolute Difference: 0.03420000 Index 37776: 2024-05-30,[1.69520000] | 2024-06-07,[1.69730000] Absolute Difference: 0.00210000 Index 37800: 2024-05-31,[1.70330000] | 2024-06-10,[1.68830000] Absolute Difference: 0.01500000 Index 37824: 2024-06-03,[1.69990000] | 2024-06-11,[1.67940000] Absolute Difference: 0.02050000 Index 37848: 2024-06-04,[1.68940000] | 2024-06-12,[1.70270000] Absolute Difference: 0.01330000 Index 37872: 2024-06-05,[1.66300000] | 2024-06-13,[1.69240000] Absolute Difference: 0.02940000 Index 37896: 2024-06-07,[1.69730000] | 2024-06-14,[1.69690000] Absolute Difference: 0.00040000 Index 37920: 2024-06-10,[1.68830000] | 2024-06-17,[1.70930000] Absolute Difference: 0.02100000 Index 37944: 2024-06-11,[1.67940000] | 2024-06-18,[1.68280000] Absolute Difference: 0.00340000 Index 37968: 2024-06-12,[1.70270000] | 2024-06-19,[1.68840000] Absolute Difference: 0.01430000 Index 37992: 2024-06-13,[1.69240000] | 2024-06-20,[1.68620000] Absolute Difference: 0.00620000 Index 38016: 2024-06-14,[1.69690000] | 2024-06-21,[1.69280000] Absolute Difference: 0.00410000 Index 38040: 2024-06-17,[1.70930000] | 2024-06-24,[1.69010000] Absolute Difference: 0.01920000 Index 38064: 2024-06-18,[1.68280000] | 2024-06-25,[1.67480000] Absolute Difference: 0.00800000 Index 38088: 2024-06-19,[1.68840000] | 2024-06-26,[1.64200000] Absolute Difference: 0.04640000 Index 38112: 2024-06-20,[1.68620000] | 2024-06-27,[1.65840000] Absolute Difference: 0.02780000 Index 38136: 2024-06-21,[1.69280000] | 2024-06-28,[1.67180000] Absolute Difference: 0.02100000 Index 38160: 2024-06-24,[1.69010000] | 2024-07-01,[1.66670000] Absolute Difference: 0.02340000 Index 38184: 2024-06-25,[1.67480000] | 2024-07-02,[1.70790000] Absolute Difference: 0.03310000 Index 38208: 2024-06-26,[1.64200000] | 2024-07-03,[1.67920000] Absolute Difference: 0.03720000 Index 38232: 2024-06-27,[1.65840000] | 2024-07-04,[1.66320000] Absolute Difference: 0.00480000 Index 38256: 2024-06-28,[1.67180000] | 2024-07-05,[1.68950000] Absolute Difference: 0.01770000 Index 38280: 2024-07-01,[1.66670000] | 2024-07-08,[1.68270000] Absolute Difference: 0.01600000 Index 38304: 2024-07-02,[1.70790000] | 2024-07-09,[1.66710000] Absolute Difference: 0.04080000 Index 38328: 2024-07-03,[1.67920000] | 2024-07-10,[1.66650000] Absolute Difference: 0.01270000 Index 38352: 2024-07-04,[1.66320000] | 2024-07-11,[1.67980000] Absolute Difference: 0.01660000 Index 38376: 2024-07-05,[1.68950000] | 2024-07-12,[1.69300000] Absolute Difference: 0.00350000 Index 38400: 2024-07-08,[1.68270000] | 2024-07-15,[1.70350000] Absolute Difference: 0.02080000 Index 38424: 2024-07-09,[1.66710000] | 2024-07-16,[1.71720000] Absolute Difference: 0.05010000 Index 38448: 2024-07-10,[1.66650000] | 2024-07-17,[1.70250000] Absolute Difference: 0.03600000 Index 38472: 2024-07-11,[1.67980000] | 2024-07-18,[1.69070000] Absolute Difference: 0.01090000 Index 38496: 2024-07-12,[1.69300000] | 2024-07-19,[1.71280000] Absolute Difference: 0.01980000 Index 38520: 2024-07-15,[1.70350000] | 2024-07-22,[1.70410000] Absolute Difference: 0.00060000 Index 38544: 2024-07-16,[1.71720000] | 2024-07-23,[1.68380000] Absolute Difference: 0.03340000 Index 38568: 2024-07-17,[1.70250000] | 2024-07-24,[1.69280000] Absolute Difference: 0.00970000 Index 38592: 2024-07-18,[1.69070000] | 2024-07-25,[1.69160000] Absolute Difference: 0.00090000 Index 38616: 2024-07-19,[1.71280000] | 2024-07-26,[1.68510000] Absolute Difference: 0.02770000 Index 38640: 2024-07-22,[1.70410000] | 2024-07-29,[1.66730000] Absolute Difference: 0.03680000 Index 38664: 2024-07-23,[1.68380000] | 2024-07-30,[1.69490000] Absolute Difference: 0.01110000 Index 38688: 2024-07-24,[1.69280000] | 2024-07-31,[1.67300000] Absolute Difference: 0.01980000 Index 38712: 2024-07-25,[1.69160000] | 2024-08-01,[1.68480000] Absolute Difference: 0.00680000 Index 38736: 2024-07-26,[1.68510000] | 2024-08-02,[1.66320000] Absolute Difference: 0.02190000 Index 38760: 2024-07-29,[1.66730000] | 2024-08-05,[1.66740000] Absolute Difference: 0.00010000 Index 38784: 2024-07-30,[1.69490000] | 2024-08-06,[1.70100000] Absolute Difference: 0.00610000 Index 38808: 2024-07-31,[1.67300000] | 2024-08-07,[1.69790000] Absolute Difference: 0.02490000 Index 38832: 2024-08-01,[1.68480000] | 2024-08-08,[1.68880000] Absolute Difference: 0.00400000 Index 38856: 2024-08-02,[1.66320000] | 2024-08-09,[1.68240000] Absolute Difference: 0.01920000 Index 38880: 2024-08-05,[1.66740000] | 2024-08-12,[1.67190000] Absolute Difference: 0.00450000 Index 38904: 2024-08-06,[1.70100000] | 2024-08-13,[1.68080000] Absolute Difference: 0.02020000 Index 38928: 2024-08-07,[1.69790000] | 2024-08-14,[1.68760000] Absolute Difference: 0.01030000 Index 38952: 2024-08-08,[1.68880000] | 2024-08-16,[1.67480000] Absolute Difference: 0.01400000 Index 38976: 2024-08-09,[1.68240000] | 2024-08-19,[1.66380000] Absolute Difference: 0.01860000 Index 39000: 2024-08-12,[1.67190000] | 2024-08-20,[1.68720000] Absolute Difference: 0.01530000 최근 365일 동안의 5일 후 절대 차이의 합계: 8.23810000 최근 365일 동안의 5일 후 절대 차이의 평균: 0.02288361
In [65]:
import numpy as np
# numpy 파일로 저장
print(f"total_size: {total_size}, {total_size/24}일")
print(f"train_size: {train_size}, {train_size/24}일")
print(f"val_size : {val_size}, {val_size/24}일")
print(f"test_size : {test_size}, {test_size/24}일")
print(f"nim_train_mean: {nim_train_mean:+.6f}")
print(f"nim_train_std : {nim_train_std:+.6f}")
print(f"target_train_mean: {target_train_mean:+.6f}")
print(f"target_train_std : {target_train_std:+.6f}")
print(f"correct_threshold : {correct_threshold:+.6f}")
# 변수들을 numpy 배열로 저장
nim_variables = np.array([
total_size, train_size, val_size, test_size,
nim_train_mean, nim_train_std, target_train_mean, target_train_std, correct_threshold
])
# numpy 파일로 저장
np.save('data/numpy/nim_variables.npy', nim_variables)
print(f"nim_variables saved: data/numpy/nim_variables.npy")
# numpy 파일에서 불러오기
nim_variables = np.load('data/numpy/nim_variables.npy')
print(f"nim_variables loaded: data/numpy/nim_variables.npy")
total_size = int(nim_variables[0])
train_size = int(nim_variables[1])
val_size = int(nim_variables[2])
test_size = int(nim_variables[3])
nim_train_mean = nim_variables[4]
nim_train_std = nim_variables[5]
target_train_mean = nim_variables[6]
target_train_std = nim_variables[7]
correct_threshold = nim_variables[8]
print(f"total_size: {total_size}, {total_size/24}일")
print(f"train_size: {train_size}, {train_size/24}일")
print(f"val_size : {val_size}, {val_size/24}일")
print(f"test_size : {test_size}, {test_size/24}일")
print(f"nim_train_mean: {nim_train_mean:+.6f}")
print(f"nim_train_std : {nim_train_std:+.6f}")
print(f"target_train_mean: {target_train_mean:+.6f}")
print(f"target_train_std : {target_train_std:+.6f}")
print(f"correct_threshold : {correct_threshold:+.6f}")
total_size: 39144, 1631.0일 train_size: 27384, 1141.0일 val_size : 7824, 326.0일 test_size : 3936, 164.0일 nim_train_mean: +1.696047 nim_train_std : +0.188296 target_train_mean: +0.000752 target_train_std : +0.031531 correct_threshold : +0.022884 nim_variables saved: data/numpy/nim_variables.npy nim_variables loaded: data/numpy/nim_variables.npy total_size: 39144, 1631.0일 train_size: 27384, 1141.0일 val_size : 7824, 326.0일 test_size : 3936, 164.0일 nim_train_mean: +1.696047 nim_train_std : +0.188296 target_train_mean: +0.000752 target_train_std : +0.031531 correct_threshold : +0.022884
In [ ]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(25, 10))
# plt.plot(nim_ibks_data, label='IBK Data')
# plt.title('IBK Raw Data Over Time')
# plt.xlabel('Time')
# plt.ylabel('Value')
# plt.grid(True)
# plt.show()
In [ ]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(25, 10))
# # 'YlGnBu' 컬러맵에서 색상 리스트를 생성 (4개의 색상 추출)
# cmap = plt.get_cmap('YlGnBu')
# colors = cmap(np.linspace(0, 1, nim_ibks_data.shape[1])).tolist() # 열의 개수에 맞게 색상 생성
# sns.violinplot(data=nim_ibks_data, palette=colors) # 색상 목록 전달
# plt.title('Violin Plot of IBK Data (Multiple Variables)')
# plt.xlabel('Variables')
# plt.ylabel('Value')
# plt.grid(True)
# plt.show()
In [ ]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(25, 10))
# plt.plot(nim_boks_data, label='BOK Data')
# plt.title('BOK Raw Data Over Time')
# plt.xlabel('Time')
# plt.ylabel('Value')
# plt.grid(True)
# plt.show()
In [ ]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(25, 10))
# # 'YlGnBu' 컬러맵에서 색상 리스트를 생성 (4개의 색상 추출)
# cmap = plt.get_cmap('YlGn')
# colors = cmap(np.linspace(0, 1, nim_boks_data.shape[1])).tolist() # 열의 개수에 맞게 색상 생성
# sns.violinplot(data=nim_boks_data, palette=colors) # 색상 목록 전달
# plt.title('Violin Plot of BOK Data (Multiple Variables)')
# plt.xlabel('Variables')
# plt.ylabel('Value')
# plt.grid(True)
# plt.show()
In [ ]:
# from sklearn.preprocessing import StandardScaler
# from sklearn.manifold import TSNE
# # '뉴스_EMBEDDING' 컬럼을 추출해 (num_samples, embedding_dim) 형태로 변환
# embedded_vectors = np.vstack(news_raw_df['뉴스_EMBEDDING'].values)
# # 임베딩 벡터 정규화 (표준화)하여 t-SNE에 사용할 준비
# scaler = StandardScaler()
# embedded_vectors_scaled = scaler.fit_transform(embedded_vectors)
# # t-SNE로 차원 축소 (2차원으로 축소)
# tsne = TSNE(n_components=2, random_state=42)
# embedded_vectors_2d = tsne.fit_transform(embedded_vectors_scaled)
# # 시각화를 위해 NIM 값 가져오기
# nim_values = news_raw_df['NIM']
# # 길이 일치 확인 후, 작은 길이에 맞춰 슬라이싱
# min_length = min(len(embedded_vectors_2d), len(nim_values))
# embedded_vectors_2d = embedded_vectors_2d[:min_length]
# nim_values = nim_values.iloc[:min_length]
# # t-SNE 결과를 NIM 값에 따라 시각화
# plt.figure(figsize=(25, 10))
# scatter = plt.scatter(embedded_vectors_2d[:, 0], embedded_vectors_2d[:, 1], c=nim_values, cmap='coolwarm', s=50, alpha=0.7)
# # 색상바 추가
# plt.colorbar(scatter, label='NIM Value')
# plt.title('t-SNE Visualization of News Embeddings by NIM Value')
# plt.grid(True)
# plt.show()
In [ ]:
# from sklearn.cluster import KMeans
# import matplotlib.pyplot as plt
# from sklearn.manifold import TSNE
# from sklearn.preprocessing import StandardScaler
# import numpy as np
# # 1. 임베딩 벡터 준비
# # embedded_sequences를 (num_samples, max_len, embedding_dim)에서 (num_samples, embedding_dim)으로 변환
# embedded_vectors = np.vstack(news_raw_df['뉴스_EMBEDDING'].values)
# # 2. 임베딩 벡터를 표준화 (StandardScaler 사용)
# scaler = StandardScaler()
# embedded_vectors_scaled = scaler.fit_transform(embedded_vectors)
# # 3. KMeans 클러스터링 (10개의 클러스터)
# kmeans = KMeans(n_clusters=10, random_state=42)
# clusters = kmeans.fit_predict(embedded_vectors_scaled)
# # 4. t-SNE를 사용하여 차원 축소 (2차원)
# tsne = TSNE(n_components=2, random_state=42)
# embedded_vectors_2d = tsne.fit_transform(embedded_vectors_scaled)
# # 5. 시각화: 각 클러스터를 다른 색상으로 표시
# plt.figure(figsize=(25, 10))
# scatter = plt.scatter(embedded_vectors_2d[:, 0], embedded_vectors_2d[:, 1], c=clusters, cmap='tab10', s=50, alpha=0.7)
# # 그래프 제목 및 축 레이블 설정
# plt.title('t-SNE Visualization of News Embeddings with K-Means Clustering (10 Clusters)')
# plt.grid(True)
# plt.colorbar(scatter, label='Cluster') # 클러스터를 색상으로 표시하는 색상 바 추가
# plt.show()
In [82]:
import numpy as np
# 업로드된 파일 경로
file_path = 'data/numpy/nim_values.npy'
# 파일 읽기
nim_values = np.load(file_path)
# 각 일자의 첫 번째 값(0번째 값)만 추출
daily_first_values = nim_values[::24]
# 추출된 데이터에서 마지막 365일만 남기기
last_365_days = daily_first_values[-365:]
# 5일 간의 차이를 계산
five_day_differences = last_365_days[5:] - last_365_days[:-5]
# 5일 간 차이의 절대값 계산
five_day_absolute_differences = np.abs(five_day_differences)
# 평균 및 중위값 계산
mean_value = np.mean(five_day_absolute_differences)
median_value = np.median(five_day_absolute_differences)
# 출력: 평균과 중위값
print(f"Mean: {mean_value:.8f}")
print(f"Median: {median_value:.8f}")
# 루프를 돌면서 각 상위 퍼센트 값을 전역 변수로 할당
for p in range(100, 0, -5):
key = f"Top_{100-p}_percent"
value = np.percentile(five_day_absolute_differences, p)
globals()[key] = value # 전역 변수로 할당
# 모든 전역 변수 출력
for p in range(100, 0, -5):
key = f"Top_{100-p}_percent"
if key in globals():
print(f"{key}: {globals()[key]:.8f}")
Mean: 0.02290222 Median: 0.02030000 Top_0_percent: 0.09840000 Top_5_percent: 0.05305000 Top_10_percent: 0.04613000 Top_15_percent: 0.04071500 Top_20_percent: 0.03608000 Top_25_percent: 0.03302500 Top_30_percent: 0.03000000 Top_35_percent: 0.02707000 Top_40_percent: 0.02452000 Top_45_percent: 0.02300000 Top_50_percent: 0.02030000 Top_55_percent: 0.01810000 Top_60_percent: 0.01562000 Top_65_percent: 0.01326500 Top_70_percent: 0.00984000 Top_75_percent: 0.00750000 Top_80_percent: 0.00558000 Top_85_percent: 0.00410000 Top_90_percent: 0.00270000 Top_95_percent: 0.00120000
In [81]:
Top_25_percent
Out[81]:
0.03302500000000008