In [1]:
import setup_env
from setup_env import device
--------------------------------------------------------------------------------
=== Hardware Acceleration ===
PyTorch version: 2.10.0+cu128
Using NVIDIA GPU (CUDA)
CUDA version: 12.8
GPU name: NVIDIA GeForce RTX 5070 Ti
GPU count: 1
Total GPU memory: 15.92 GB
Allocated memory: 0.00 GB
Free memory: 15.92 GB
Device: cuda
=== Matplotlib Settings ===
✅ Font: NanumGothic
=== System Info ===
OS: Ubuntu 24.04.3 LTS (Noble Numbat)
Kernel: 6.6.87.2-microsoft-standard-WSL2
Architecture: x86_64
Python: 3.12.3
Working directory: /workspace/ai-deeplearning/tutorial
=== Library Versions ===
NumPy: 2.4.2
Pandas: 3.0.1
Matplotlib: 3.10.7
Scikit-learn: 1.7.2
OpenCV: Not installed → !pip install -q opencv-python
Pillow: 12.0.0
Seaborn: 0.13.2
TensorFlow: Not installed → !pip install -q tensorflow
Transformers: 5.2.0
TorchVision: 0.25.0+cu128
=== Environment setup completed ===
--------------------------------------------------------------------------------
=== Visualizing Test Plot (Wide View) ===
=== GPU Usage Code Snippet === Device set to: cuda ---------------------------------------- # 아래 코드를 복사해서 모델과 데이터를 GPU로 보내세요: model = YourModel().to(device) data = data.to(device) ---------------------------------------- === Environment setup completed === --------------------------------------------------------------------------------
EXAONE-3.5-7.8B-Instruct-GGUF¶
In [5]:
!CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir -q
import llama_cpp
print(f"llama-cpp-python: {llama_cpp.__version__}")
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. nvidia-resiliency-ext 0.4.1+cuda13 requires pynvml>=12.0.0, which is not installed. numba 0.62.1 requires numpy<2.4,>=1.22, but you have numpy 2.4.2 which is incompatible. WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning. llama-cpp-python: 0.3.16
In [4]:
import os
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
filename = "EXAONE-3.5-7.8B-Instruct-Q4_K_M.gguf"
local_path = f"./models/{filename}"
if not os.path.exists(local_path):
model_path = hf_hub_download(
repo_id="LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct-GGUF",
filename=filename,
cache_dir="./models"
)
else:
model_path = local_path
llm = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=-1, verbose=True)
print("로드 완료")
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 CUDA devices:
Device 0: NVIDIA GeForce RTX 5070 Ti, compute capability 12.0, VMM: yes
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 5070 Ti) - 15009 MiB free
llama_model_loader: loaded meta data with 27 key-value pairs and 292 tensors from ./models/models--LGAI-EXAONE--EXAONE-3.5-7.8B-Instruct-GGUF/snapshots/c618bf67338171760c72c3f109f2900cb7d79855/EXAONE-3.5-7.8B-Instruct-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = exaone
llama_model_loader: - kv 1: general.type str = model
llama_model_loader: - kv 2: general.name str = EXAONE 3.5 7.8B 32K INST
llama_model_loader: - kv 3: general.finetune str = 32k-INST
llama_model_loader: - kv 4: general.basename str = EXAONE-3.5
llama_model_loader: - kv 5: general.size_label str = 7.8B
llama_model_loader: - kv 6: exaone.embedding_length u32 = 4096
llama_model_loader: - kv 7: exaone.attention.head_count u32 = 32
llama_model_loader: - kv 8: exaone.attention.head_count_kv u32 = 8
llama_model_loader: - kv 9: exaone.context_length u32 = 32768
llama_model_loader: - kv 10: exaone.attention.layer_norm_rms_epsilon f32 = 0.000010
llama_model_loader: - kv 11: exaone.feed_forward_length u32 = 14336
llama_model_loader: - kv 12: exaone.block_count u32 = 32
llama_model_loader: - kv 13: general.file_type u32 = 15
llama_model_loader: - kv 14: exaone.rope.freq_base f32 = 1000000.000000
llama_model_loader: - kv 15: exaone.rope.dimension_count u32 = 128
llama_model_loader: - kv 16: tokenizer.ggml.model str = gpt2
llama_model_loader: - kv 17: tokenizer.ggml.pre str = exaone
llama_model_loader: - kv 18: tokenizer.ggml.tokens arr[str,102400] = ["[PAD]", "[BOS]", "[EOS]", "[UNK]", ...
llama_model_loader: - kv 19: tokenizer.ggml.token_type arr[i32,102400] = [3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, ...
llama_model_loader: - kv 20: tokenizer.ggml.merges arr[str,101782] = ["t h", "Ġ a", "Ġ í", "i n", "Ġ t...
llama_model_loader: - kv 21: tokenizer.ggml.bos_token_id u32 = 1
llama_model_loader: - kv 22: tokenizer.ggml.eos_token_id u32 = 361
llama_model_loader: - kv 23: tokenizer.ggml.unknown_token_id u32 = 3
llama_model_loader: - kv 24: tokenizer.ggml.padding_token_id u32 = 0
llama_model_loader: - kv 25: tokenizer.chat_template str = {% for message in messages %}{% if lo...
llama_model_loader: - kv 26: general.quantization_version u32 = 2
llama_model_loader: - type f32: 66 tensors
llama_model_loader: - type q4_K: 193 tensors
llama_model_loader: - type q6_K: 33 tensors
print_info: file format = GGUF V3 (latest)
print_info: file type = Q4_K - Medium
print_info: file size = 4.44 GiB (4.88 BPW)
init_tokenizer: initializing tokenizer for type 2
load: control token: 182 '[extra_id_20]' is not marked as EOG
load: control token: 269 '[extra_id_107]' is not marked as EOG
load: control token: 212 '[extra_id_50]' is not marked as EOG
load: control token: 270 '[extra_id_108]' is not marked as EOG
load: control token: 217 '[extra_id_55]' is not marked as EOG
load: control token: 55 'PI:PHONE_NUM' is not marked as EOG
load: control token: 325 '[extra_id_163]' is not marked as EOG
load: control token: 310 '[extra_id_148]' is not marked as EOG
load: control token: 348 '[extra_id_186]' is not marked as EOG
load: control token: 285 '[extra_id_123]' is not marked as EOG
load: control token: 2 '[EOS]' is not marked as EOG
load: control token: 53 'PI:EMAIL' is not marked as EOG
load: control token: 132 '[unused70]' is not marked as EOG
load: control token: 124 '[unused62]' is not marked as EOG
load: control token: 80 '[unused18]' is not marked as EOG
load: control token: 98 '[unused36]' is not marked as EOG
load: control token: 63 '[unused1]' is not marked as EOG
load: control token: 114 '[unused52]' is not marked as EOG
load: control token: 336 '[extra_id_174]' is not marked as EOG
load: control token: 148 '[unused86]' is not marked as EOG
load: control token: 100 '[unused38]' is not marked as EOG
load: control token: 155 '[unused93]' is not marked as EOG
load: control token: 94 '[unused32]' is not marked as EOG
load: control token: 360 '[extra_id_198]' is not marked as EOG
load: control token: 203 '[extra_id_41]' is not marked as EOG
load: control token: 323 '[extra_id_161]' is not marked as EOG
load: control token: 344 '[extra_id_182]' is not marked as EOG
load: control token: 263 '[extra_id_101]' is not marked as EOG
load: control token: 161 '[unused99]' is not marked as EOG
load: control token: 266 '[extra_id_104]' is not marked as EOG
load: control token: 160 '[unused98]' is not marked as EOG
load: control token: 350 '[extra_id_188]' is not marked as EOG
load: control token: 349 '[extra_id_187]' is not marked as EOG
load: control token: 178 '[extra_id_16]' is not marked as EOG
load: control token: 44 '<|c++|>' is not marked as EOG
load: control token: 246 '[extra_id_84]' is not marked as EOG
load: control token: 71 '[unused9]' is not marked as EOG
load: control token: 104 '[unused42]' is not marked as EOG
load: control token: 176 '[extra_id_14]' is not marked as EOG
load: control token: 275 '[extra_id_113]' is not marked as EOG
load: control token: 135 '[unused73]' is not marked as EOG
load: control token: 69 '[unused7]' is not marked as EOG
load: control token: 249 '[extra_id_87]' is not marked as EOG
load: control token: 122 '[unused60]' is not marked as EOG
load: control token: 256 '[extra_id_94]' is not marked as EOG
load: control token: 271 '[extra_id_109]' is not marked as EOG
load: control token: 85 '[unused23]' is not marked as EOG
load: control token: 229 '[extra_id_67]' is not marked as EOG
load: control token: 45 '<|python|>' is not marked as EOG
load: control token: 78 '[unused16]' is not marked as EOG
load: control token: 185 '[extra_id_23]' is not marked as EOG
load: control token: 169 '[extra_id_7]' is not marked as EOG
load: control token: 322 '[extra_id_160]' is not marked as EOG
load: control token: 251 '[extra_id_89]' is not marked as EOG
load: control token: 77 '[unused15]' is not marked as EOG
load: control token: 282 '[extra_id_120]' is not marked as EOG
load: control token: 208 '[extra_id_46]' is not marked as EOG
load: control token: 206 '[extra_id_44]' is not marked as EOG
load: control token: 233 '[extra_id_71]' is not marked as EOG
load: control token: 59 'PI:ID' is not marked as EOG
load: control token: 81 '[unused19]' is not marked as EOG
load: control token: 74 '[unused12]' is not marked as EOG
load: control token: 138 '[unused76]' is not marked as EOG
load: control token: 164 '[extra_id_2]' is not marked as EOG
load: control token: 194 '[extra_id_32]' is not marked as EOG
load: control token: 140 '[unused78]' is not marked as EOG
load: control token: 313 '[extra_id_151]' is not marked as EOG
load: control token: 355 '[extra_id_193]' is not marked as EOG
load: control token: 91 '[unused29]' is not marked as EOG
load: control token: 180 '[extra_id_18]' is not marked as EOG
load: control token: 343 '[extra_id_181]' is not marked as EOG
load: control token: 328 '[extra_id_166]' is not marked as EOG
load: control token: 67 '[unused5]' is not marked as EOG
load: control token: 334 '[extra_id_172]' is not marked as EOG
load: control token: 82 '[unused20]' is not marked as EOG
load: control token: 197 '[extra_id_35]' is not marked as EOG
load: control token: 361 '[|endofturn|]' is not marked as EOG
load: control token: 324 '[extra_id_162]' is not marked as EOG
load: control token: 302 '[extra_id_140]' is not marked as EOG
load: control token: 272 '[extra_id_110]' is not marked as EOG
load: control token: 149 '[unused87]' is not marked as EOG
load: control token: 250 '[extra_id_88]' is not marked as EOG
load: control token: 153 '[unused91]' is not marked as EOG
load: control token: 60 'PI:IP_ADDRESS' is not marked as EOG
load: control token: 329 '[extra_id_167]' is not marked as EOG
load: control token: 49 '<|css|>' is not marked as EOG
load: control token: 314 '[extra_id_152]' is not marked as EOG
load: control token: 61 'PI:USER' is not marked as EOG
load: control token: 341 '[extra_id_179]' is not marked as EOG
load: control token: 299 '[extra_id_137]' is not marked as EOG
load: control token: 315 '[extra_id_153]' is not marked as EOG
load: control token: 166 '[extra_id_4]' is not marked as EOG
load: control token: 136 '[unused74]' is not marked as EOG
load: control token: 236 '[extra_id_74]' is not marked as EOG
load: control token: 207 '[extra_id_45]' is not marked as EOG
load: control token: 133 '[unused71]' is not marked as EOG
load: control token: 226 '[extra_id_64]' is not marked as EOG
load: control token: 340 '[extra_id_178]' is not marked as EOG
load: control token: 338 '[extra_id_176]' is not marked as EOG
load: control token: 332 '[extra_id_170]' is not marked as EOG
load: control token: 354 '[extra_id_192]' is not marked as EOG
load: control token: 103 '[unused41]' is not marked as EOG
load: control token: 145 '[unused83]' is not marked as EOG
load: control token: 307 '[extra_id_145]' is not marked as EOG
load: control token: 293 '[extra_id_131]' is not marked as EOG
load: control token: 196 '[extra_id_34]' is not marked as EOG
load: control token: 292 '[extra_id_130]' is not marked as EOG
load: control token: 219 '[extra_id_57]' is not marked as EOG
load: control token: 238 '[extra_id_76]' is not marked as EOG
load: control token: 280 '[extra_id_118]' is not marked as EOG
load: control token: 268 '[extra_id_106]' is not marked as EOG
load: control token: 125 '[unused63]' is not marked as EOG
load: control token: 356 '[extra_id_194]' is not marked as EOG
load: control token: 351 '[extra_id_189]' is not marked as EOG
load: control token: 119 '[unused57]' is not marked as EOG
load: control token: 95 '[unused33]' is not marked as EOG
load: control token: 231 '[extra_id_69]' is not marked as EOG
load: control token: 303 '[extra_id_141]' is not marked as EOG
load: control token: 320 '[extra_id_158]' is not marked as EOG
load: control token: 327 '[extra_id_165]' is not marked as EOG
load: control token: 262 '[extra_id_100]' is not marked as EOG
load: control token: 174 '[extra_id_12]' is not marked as EOG
load: control token: 309 '[extra_id_147]' is not marked as EOG
load: control token: 56 'PI:BUSINESS_NUM' is not marked as EOG
load: control token: 66 '[unused4]' is not marked as EOG
load: control token: 241 '[extra_id_79]' is not marked as EOG
load: control token: 111 '[unused49]' is not marked as EOG
load: control token: 167 '[extra_id_5]' is not marked as EOG
load: control token: 62 '[unused0]' is not marked as EOG
load: control token: 297 '[extra_id_135]' is not marked as EOG
load: control token: 316 '[extra_id_154]' is not marked as EOG
load: control token: 267 '[extra_id_105]' is not marked as EOG
load: control token: 112 '[unused50]' is not marked as EOG
load: control token: 216 '[extra_id_54]' is not marked as EOG
load: control token: 70 '[unused8]' is not marked as EOG
load: control token: 234 '[extra_id_72]' is not marked as EOG
load: control token: 106 '[unused44]' is not marked as EOG
load: control token: 220 '[extra_id_58]' is not marked as EOG
load: control token: 319 '[extra_id_157]' is not marked as EOG
load: control token: 255 '[extra_id_93]' is not marked as EOG
load: control token: 193 '[extra_id_31]' is not marked as EOG
load: control token: 141 '[unused79]' is not marked as EOG
load: control token: 286 '[extra_id_124]' is not marked as EOG
load: control token: 345 '[extra_id_183]' is not marked as EOG
load: control token: 222 '[extra_id_60]' is not marked as EOG
load: control token: 202 '[extra_id_40]' is not marked as EOG
load: control token: 265 '[extra_id_103]' is not marked as EOG
load: control token: 48 '<|html|>' is not marked as EOG
load: control token: 224 '[extra_id_62]' is not marked as EOG
load: control token: 93 '[unused31]' is not marked as EOG
load: control token: 191 '[extra_id_29]' is not marked as EOG
load: control token: 264 '[extra_id_102]' is not marked as EOG
load: control token: 295 '[extra_id_133]' is not marked as EOG
load: control token: 120 '[unused58]' is not marked as EOG
load: control token: 43 '<|c|>' is not marked as EOG
load: control token: 258 '[extra_id_96]' is not marked as EOG
load: control token: 276 '[extra_id_114]' is not marked as EOG
load: control token: 248 '[extra_id_86]' is not marked as EOG
load: control token: 52 'PI:URL' is not marked as EOG
load: control token: 131 '[unused69]' is not marked as EOG
load: control token: 201 '[extra_id_39]' is not marked as EOG
load: control token: 337 '[extra_id_175]' is not marked as EOG
load: control token: 137 '[unused75]' is not marked as EOG
load: control token: 158 '[unused96]' is not marked as EOG
load: control token: 347 '[extra_id_185]' is not marked as EOG
load: control token: 142 '[unused80]' is not marked as EOG
load: control token: 213 '[extra_id_51]' is not marked as EOG
load: control token: 151 '[unused89]' is not marked as EOG
load: control token: 242 '[extra_id_80]' is not marked as EOG
load: control token: 159 '[unused97]' is not marked as EOG
load: control token: 79 '[unused17]' is not marked as EOG
load: control token: 68 '[unused6]' is not marked as EOG
load: control token: 308 '[extra_id_146]' is not marked as EOG
load: control token: 168 '[extra_id_6]' is not marked as EOG
load: control token: 278 '[extra_id_116]' is not marked as EOG
load: control token: 171 '[extra_id_9]' is not marked as EOG
load: control token: 352 '[extra_id_190]' is not marked as EOG
load: control token: 284 '[extra_id_122]' is not marked as EOG
load: control token: 215 '[extra_id_53]' is not marked as EOG
load: control token: 281 '[extra_id_119]' is not marked as EOG
load: control token: 3 '[UNK]' is not marked as EOG
load: control token: 221 '[extra_id_59]' is not marked as EOG
load: control token: 90 '[unused28]' is not marked as EOG
load: control token: 254 '[extra_id_92]' is not marked as EOG
load: control token: 283 '[extra_id_121]' is not marked as EOG
load: control token: 64 '[unused2]' is not marked as EOG
load: control token: 204 '[extra_id_42]' is not marked as EOG
load: control token: 130 '[unused68]' is not marked as EOG
load: control token: 96 '[unused34]' is not marked as EOG
load: control token: 301 '[extra_id_139]' is not marked as EOG
load: control token: 54 'PI:ACCOUNT_NUM' is not marked as EOG
load: control token: 172 '[extra_id_10]' is not marked as EOG
load: control token: 126 '[unused64]' is not marked as EOG
load: control token: 50 '<|vue|>' is not marked as EOG
load: control token: 189 '[extra_id_27]' is not marked as EOG
load: control token: 181 '[extra_id_19]' is not marked as EOG
load: control token: 97 '[unused35]' is not marked as EOG
load: control token: 209 '[extra_id_47]' is not marked as EOG
load: control token: 139 '[unused77]' is not marked as EOG
load: control token: 190 '[extra_id_28]' is not marked as EOG
load: control token: 342 '[extra_id_180]' is not marked as EOG
load: control token: 247 '[extra_id_85]' is not marked as EOG
load: control token: 165 '[extra_id_3]' is not marked as EOG
load: control token: 240 '[extra_id_78]' is not marked as EOG
load: control token: 170 '[extra_id_8]' is not marked as EOG
load: control token: 46 '<|javascript|>' is not marked as EOG
load: control token: 123 '[unused61]' is not marked as EOG
load: control token: 291 '[extra_id_129]' is not marked as EOG
load: control token: 88 '[unused26]' is not marked as EOG
load: control token: 51 '<|java|>' is not marked as EOG
load: control token: 331 '[extra_id_169]' is not marked as EOG
load: control token: 239 '[extra_id_77]' is not marked as EOG
load: control token: 274 '[extra_id_112]' is not marked as EOG
load: control token: 113 '[unused51]' is not marked as EOG
load: control token: 107 '[unused45]' is not marked as EOG
load: control token: 252 '[extra_id_90]' is not marked as EOG
load: control token: 232 '[extra_id_70]' is not marked as EOG
load: control token: 65 '[unused3]' is not marked as EOG
load: control token: 288 '[extra_id_126]' is not marked as EOG
load: control token: 253 '[extra_id_91]' is not marked as EOG
load: control token: 192 '[extra_id_30]' is not marked as EOG
load: control token: 121 '[unused59]' is not marked as EOG
load: control token: 359 '[extra_id_197]' is not marked as EOG
load: control token: 89 '[unused27]' is not marked as EOG
load: control token: 311 '[extra_id_149]' is not marked as EOG
load: control token: 273 '[extra_id_111]' is not marked as EOG
load: control token: 154 '[unused92]' is not marked as EOG
load: control token: 115 '[unused53]' is not marked as EOG
load: control token: 333 '[extra_id_171]' is not marked as EOG
load: control token: 357 '[extra_id_195]' is not marked as EOG
load: control token: 1 '[BOS]' is not marked as EOG
load: control token: 306 '[extra_id_144]' is not marked as EOG
load: control token: 163 '[extra_id_1]' is not marked as EOG
load: control token: 118 '[unused56]' is not marked as EOG
load: control token: 99 '[unused37]' is not marked as EOG
load: control token: 244 '[extra_id_82]' is not marked as EOG
load: control token: 358 '[extra_id_196]' is not marked as EOG
load: control token: 289 '[extra_id_127]' is not marked as EOG
load: control token: 117 '[unused55]' is not marked as EOG
load: control token: 134 '[unused72]' is not marked as EOG
load: control token: 187 '[extra_id_25]' is not marked as EOG
load: control token: 162 '[extra_id_0]' is not marked as EOG
load: control token: 279 '[extra_id_117]' is not marked as EOG
load: control token: 211 '[extra_id_49]' is not marked as EOG
load: control token: 304 '[extra_id_142]' is not marked as EOG
load: control token: 296 '[extra_id_134]' is not marked as EOG
load: control token: 108 '[unused46]' is not marked as EOG
load: control token: 188 '[extra_id_26]' is not marked as EOG
load: control token: 287 '[extra_id_125]' is not marked as EOG
load: control token: 259 '[extra_id_97]' is not marked as EOG
load: control token: 227 '[extra_id_65]' is not marked as EOG
load: control token: 346 '[extra_id_184]' is not marked as EOG
load: control token: 223 '[extra_id_61]' is not marked as EOG
load: control token: 175 '[extra_id_13]' is not marked as EOG
load: control token: 186 '[extra_id_24]' is not marked as EOG
load: control token: 353 '[extra_id_191]' is not marked as EOG
load: control token: 205 '[extra_id_43]' is not marked as EOG
load: control token: 243 '[extra_id_81]' is not marked as EOG
load: control token: 84 '[unused22]' is not marked as EOG
load: control token: 277 '[extra_id_115]' is not marked as EOG
load: control token: 76 '[unused14]' is not marked as EOG
load: control token: 109 '[unused47]' is not marked as EOG
load: control token: 156 '[unused94]' is not marked as EOG
load: control token: 317 '[extra_id_155]' is not marked as EOG
load: control token: 127 '[unused65]' is not marked as EOG
load: control token: 235 '[extra_id_73]' is not marked as EOG
load: control token: 260 '[extra_id_98]' is not marked as EOG
load: control token: 298 '[extra_id_136]' is not marked as EOG
load: control token: 195 '[extra_id_33]' is not marked as EOG
load: control token: 300 '[extra_id_138]' is not marked as EOG
load: control token: 0 '[PAD]' is not marked as EOG
load: control token: 318 '[extra_id_156]' is not marked as EOG
load: control token: 128 '[unused66]' is not marked as EOG
load: control token: 47 '<|markdown|>' is not marked as EOG
load: control token: 330 '[extra_id_168]' is not marked as EOG
load: control token: 245 '[extra_id_83]' is not marked as EOG
load: control token: 294 '[extra_id_132]' is not marked as EOG
load: control token: 101 '[unused39]' is not marked as EOG
load: control token: 198 '[extra_id_36]' is not marked as EOG
load: control token: 177 '[extra_id_15]' is not marked as EOG
load: control token: 58 'PI:KEY' is not marked as EOG
load: control token: 261 '[extra_id_99]' is not marked as EOG
load: control token: 72 '[unused10]' is not marked as EOG
load: control token: 143 '[unused81]' is not marked as EOG
load: control token: 237 '[extra_id_75]' is not marked as EOG
load: control token: 183 '[extra_id_21]' is not marked as EOG
load: control token: 179 '[extra_id_17]' is not marked as EOG
load: control token: 83 '[unused21]' is not marked as EOG
load: control token: 116 '[unused54]' is not marked as EOG
load: control token: 105 '[unused43]' is not marked as EOG
load: control token: 146 '[unused84]' is not marked as EOG
load: control token: 57 'PI:ANNON' is not marked as EOG
load: control token: 87 '[unused25]' is not marked as EOG
load: control token: 92 '[unused30]' is not marked as EOG
load: control token: 290 '[extra_id_128]' is not marked as EOG
load: control token: 110 '[unused48]' is not marked as EOG
load: control token: 129 '[unused67]' is not marked as EOG
load: control token: 147 '[unused85]' is not marked as EOG
load: control token: 214 '[extra_id_52]' is not marked as EOG
load: control token: 75 '[unused13]' is not marked as EOG
load: control token: 144 '[unused82]' is not marked as EOG
load: control token: 150 '[unused88]' is not marked as EOG
load: control token: 312 '[extra_id_150]' is not marked as EOG
load: control token: 225 '[extra_id_63]' is not marked as EOG
load: control token: 102 '[unused40]' is not marked as EOG
load: control token: 305 '[extra_id_143]' is not marked as EOG
load: control token: 73 '[unused11]' is not marked as EOG
load: control token: 228 '[extra_id_66]' is not marked as EOG
load: control token: 200 '[extra_id_38]' is not marked as EOG
load: control token: 184 '[extra_id_22]' is not marked as EOG
load: control token: 321 '[extra_id_159]' is not marked as EOG
load: control token: 199 '[extra_id_37]' is not marked as EOG
load: control token: 326 '[extra_id_164]' is not marked as EOG
load: control token: 335 '[extra_id_173]' is not marked as EOG
load: control token: 86 '[unused24]' is not marked as EOG
load: control token: 152 '[unused90]' is not marked as EOG
load: control token: 339 '[extra_id_177]' is not marked as EOG
load: control token: 173 '[extra_id_11]' is not marked as EOG
load: control token: 218 '[extra_id_56]' is not marked as EOG
load: control token: 210 '[extra_id_48]' is not marked as EOG
load: control token: 230 '[extra_id_68]' is not marked as EOG
load: control token: 257 '[extra_id_95]' is not marked as EOG
load: control token: 157 '[unused95]' is not marked as EOG
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
load: printing all EOG tokens:
load: - 42 ('<|endoftext|>')
load: - 361 ('[|endofturn|]')
load: special tokens cache size = 362
load: token to piece cache size = 0.6622 MB
print_info: arch = exaone
print_info: vocab_only = 0
print_info: n_ctx_train = 32768
print_info: n_embd = 4096
print_info: n_layer = 32
print_info: n_head = 32
print_info: n_head_kv = 8
print_info: n_rot = 128
print_info: n_swa = 0
print_info: is_swa_any = 0
print_info: n_embd_head_k = 128
print_info: n_embd_head_v = 128
print_info: n_gqa = 4
print_info: n_embd_k_gqa = 1024
print_info: n_embd_v_gqa = 1024
print_info: f_norm_eps = 0.0e+00
print_info: f_norm_rms_eps = 1.0e-05
print_info: f_clamp_kqv = 0.0e+00
print_info: f_max_alibi_bias = 0.0e+00
print_info: f_logit_scale = 0.0e+00
print_info: f_attn_scale = 0.0e+00
print_info: n_ff = 14336
print_info: n_expert = 0
print_info: n_expert_used = 0
print_info: causal attn = 1
print_info: pooling type = 0
print_info: rope type = 2
print_info: rope scaling = linear
print_info: freq_base_train = 1000000.0
print_info: freq_scale_train = 1
print_info: n_ctx_orig_yarn = 32768
print_info: rope_finetuned = unknown
print_info: model type = 8B
print_info: model params = 7.82 B
print_info: general.name = EXAONE 3.5 7.8B 32K INST
print_info: vocab type = BPE
print_info: n_vocab = 102400
print_info: n_merges = 101782
print_info: BOS token = 1 '[BOS]'
print_info: EOS token = 361 '[|endofturn|]'
print_info: EOT token = 42 '<|endoftext|>'
print_info: UNK token = 3 '[UNK]'
print_info: PAD token = 0 '[PAD]'
print_info: LF token = 560 'Ċ'
print_info: EOG token = 42 '<|endoftext|>'
print_info: EOG token = 361 '[|endofturn|]'
print_info: max token length = 48
load_tensors: loading model tensors, this can take a while... (mmap = true)
load_tensors: layer 0 assigned to device CUDA0, is_swa = 0
load_tensors: layer 1 assigned to device CUDA0, is_swa = 0
load_tensors: layer 2 assigned to device CUDA0, is_swa = 0
load_tensors: layer 3 assigned to device CUDA0, is_swa = 0
load_tensors: layer 4 assigned to device CUDA0, is_swa = 0
load_tensors: layer 5 assigned to device CUDA0, is_swa = 0
load_tensors: layer 6 assigned to device CUDA0, is_swa = 0
load_tensors: layer 7 assigned to device CUDA0, is_swa = 0
load_tensors: layer 8 assigned to device CUDA0, is_swa = 0
load_tensors: layer 9 assigned to device CUDA0, is_swa = 0
load_tensors: layer 10 assigned to device CUDA0, is_swa = 0
load_tensors: layer 11 assigned to device CUDA0, is_swa = 0
load_tensors: layer 12 assigned to device CUDA0, is_swa = 0
load_tensors: layer 13 assigned to device CUDA0, is_swa = 0
load_tensors: layer 14 assigned to device CUDA0, is_swa = 0
load_tensors: layer 15 assigned to device CUDA0, is_swa = 0
load_tensors: layer 16 assigned to device CUDA0, is_swa = 0
load_tensors: layer 17 assigned to device CUDA0, is_swa = 0
load_tensors: layer 18 assigned to device CUDA0, is_swa = 0
load_tensors: layer 19 assigned to device CUDA0, is_swa = 0
load_tensors: layer 20 assigned to device CUDA0, is_swa = 0
load_tensors: layer 21 assigned to device CUDA0, is_swa = 0
load_tensors: layer 22 assigned to device CUDA0, is_swa = 0
load_tensors: layer 23 assigned to device CUDA0, is_swa = 0
load_tensors: layer 24 assigned to device CUDA0, is_swa = 0
load_tensors: layer 25 assigned to device CUDA0, is_swa = 0
load_tensors: layer 26 assigned to device CUDA0, is_swa = 0
load_tensors: layer 27 assigned to device CUDA0, is_swa = 0
load_tensors: layer 28 assigned to device CUDA0, is_swa = 0
load_tensors: layer 29 assigned to device CUDA0, is_swa = 0
load_tensors: layer 30 assigned to device CUDA0, is_swa = 0
load_tensors: layer 31 assigned to device CUDA0, is_swa = 0
load_tensors: layer 32 assigned to device CUDA0, is_swa = 0
load_tensors: tensor 'token_embd.weight' (q4_K) (and 0 others) cannot be used with preferred buffer type CUDA_Host, using CPU instead
load_tensors: offloading 32 repeating layers to GPU
load_tensors: offloading output layer to GPU
load_tensors: offloaded 33/33 layers to GPU
load_tensors: CUDA0 model buffer size = 4320.64 MiB
load_tensors: CPU_Mapped model buffer size = 225.00 MiB
..........................................................................................
llama_context: constructing llama_context
llama_context: n_seq_max = 1
llama_context: n_ctx = 2048
llama_context: n_ctx_per_seq = 2048
llama_context: n_batch = 512
llama_context: n_ubatch = 512
llama_context: causal_attn = 1
llama_context: flash_attn = 0
llama_context: kv_unified = false
llama_context: freq_base = 1000000.0
llama_context: freq_scale = 1
llama_context: n_ctx_per_seq (2048) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
set_abort_callback: call
llama_context: CUDA_Host output buffer size = 0.39 MiB
create_memory: n_ctx = 2048 (padded)
llama_kv_cache_unified: layer 0: dev = CUDA0
llama_kv_cache_unified: layer 1: dev = CUDA0
llama_kv_cache_unified: layer 2: dev = CUDA0
llama_kv_cache_unified: layer 3: dev = CUDA0
llama_kv_cache_unified: layer 4: dev = CUDA0
llama_kv_cache_unified: layer 5: dev = CUDA0
llama_kv_cache_unified: layer 6: dev = CUDA0
llama_kv_cache_unified: layer 7: dev = CUDA0
llama_kv_cache_unified: layer 8: dev = CUDA0
llama_kv_cache_unified: layer 9: dev = CUDA0
llama_kv_cache_unified: layer 10: dev = CUDA0
llama_kv_cache_unified: layer 11: dev = CUDA0
llama_kv_cache_unified: layer 12: dev = CUDA0
llama_kv_cache_unified: layer 13: dev = CUDA0
llama_kv_cache_unified: layer 14: dev = CUDA0
llama_kv_cache_unified: layer 15: dev = CUDA0
llama_kv_cache_unified: layer 16: dev = CUDA0
llama_kv_cache_unified: layer 17: dev = CUDA0
llama_kv_cache_unified: layer 18: dev = CUDA0
llama_kv_cache_unified: layer 19: dev = CUDA0
llama_kv_cache_unified: layer 20: dev = CUDA0
llama_kv_cache_unified: layer 21: dev = CUDA0
llama_kv_cache_unified: layer 22: dev = CUDA0
llama_kv_cache_unified: layer 23: dev = CUDA0
llama_kv_cache_unified: layer 24: dev = CUDA0
llama_kv_cache_unified: layer 25: dev = CUDA0
llama_kv_cache_unified: layer 26: dev = CUDA0
llama_kv_cache_unified: layer 27: dev = CUDA0
llama_kv_cache_unified: layer 28: dev = CUDA0
llama_kv_cache_unified: layer 29: dev = CUDA0
llama_kv_cache_unified: layer 30: dev = CUDA0
llama_kv_cache_unified: layer 31: dev = CUDA0
llama_kv_cache_unified: CUDA0 KV buffer size = 256.00 MiB
llama_kv_cache_unified: size = 256.00 MiB ( 2048 cells, 32 layers, 1/1 seqs), K (f16): 128.00 MiB, V (f16): 128.00 MiB
llama_context: enumerating backends
llama_context: backend_ptrs.size() = 2
llama_context: max_nodes = 2336
llama_context: worst-case: n_tokens = 512, n_seqs = 1, n_outputs = 0
graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512
graph_reserve: reserving a graph for ubatch with n_tokens = 1, n_seqs = 1, n_outputs = 1
graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512
llama_context: CUDA0 compute buffer size = 208.00 MiB
llama_context: CUDA_Host compute buffer size = 16.01 MiB
llama_context: graph nodes = 1126
llama_context: graph splits = 2
로드 완료
CUDA : ARCHS = 1200 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
Model metadata: {'tokenizer.chat_template': "{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{{ '[|system|][|endofturn|]\n' }}{% endif %}{{ '[|' + message['role'] + '|]' + message['content'] }}{% if message['role'] == 'user' %}{{ '\n' }}{% else %}{{ '[|endofturn|]\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[|assistant|]' }}{% endif %}", 'tokenizer.ggml.unknown_token_id': '3', 'tokenizer.ggml.eos_token_id': '361', 'tokenizer.ggml.bos_token_id': '1', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'gpt2', 'exaone.rope.dimension_count': '128', 'general.file_type': '15', 'general.architecture': 'exaone', 'exaone.attention.layer_norm_rms_epsilon': '0.000010', 'exaone.feed_forward_length': '14336', 'tokenizer.ggml.padding_token_id': '0', 'exaone.attention.head_count_kv': '8', 'general.basename': 'EXAONE-3.5', 'tokenizer.ggml.pre': 'exaone', 'general.name': 'EXAONE 3.5 7.8B 32K INST', 'general.finetune': '32k-INST', 'general.type': 'model', 'general.size_label': '7.8B', 'exaone.embedding_length': '4096', 'exaone.attention.head_count': '32', 'exaone.rope.freq_base': '1000000.000000', 'exaone.context_length': '32768', 'exaone.block_count': '32'}
Available chat formats from metadata: chat_template.default
Using gguf chat template: {% for message in messages %}{% if loop.first and message['role'] != 'system' %}{{ '[|system|][|endofturn|]
' }}{% endif %}{{ '[|' + message['role'] + '|]' + message['content'] }}{% if message['role'] == 'user' %}{{ '
' }}{% else %}{{ '[|endofturn|]
' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[|assistant|]' }}{% endif %}
Using chat eos_token: [|endofturn|]
Using chat bos_token: [BOS]
In [5]:
import subprocess
result = subprocess.run(["nvidia-smi", "--query-gpu=memory.used,memory.free,memory.total", "--format=csv,noheader"], capture_output=True, text=True)
used, free, total = result.stdout.strip().split(", ")
print(f"GPU 메모리: {used} 사용중 / {free} 여유 / {total} 전체")
GPU 메모리: 10059 MiB 사용중 / 5937 MiB 여유 / 16303 MiB 전체
In [16]:
import chromadb
client = chromadb.HttpClient(host="127.0.0.1", port=55533)
collection = client.get_or_create_collection(name="rag_collection")
def ask(question):
results = collection.query(query_texts=[question], n_results=3)
context = "\n".join(results["documents"][0])
output = llm.create_chat_completion(
messages=[
{"role": "system", "content": f"""다음 참고 정보를 우선적으로 활용해서 답변해줘. 참고 정보에 없으면 네가 알고 있는 지식으로 답변해줘.
[참고 정보]
{context}"""},
{"role": "user", "content": question}
],
max_tokens=512
)
return output["choices"][0]["message"]["content"]
In [17]:
ask("카레집 이름이 뭐야?")
Llama.generate: 178 prefix-match hit, remaining 14 prompt tokens to eval llama_perf_context_print: load time = 236.17 ms llama_perf_context_print: prompt eval time = 152.94 ms / 14 tokens ( 10.92 ms per token, 91.54 tokens per second) llama_perf_context_print: eval time = 915.96 ms / 136 runs ( 6.74 ms per token, 148.48 tokens per second) llama_perf_context_print: total time = 1149.68 ms / 150 tokens llama_perf_context_print: graphs reused = 131
Out[17]:
'제공된 정보에는 특정 카레집의 이름이 명시적으로 언급되어 있지 않습니다. 연세대학교 정보대학원의 식사 이용 안내에 따르면 외부 식당을 이용해야 하며, 정확한 식당 리스트는 추후 업데이트 예정이라고 나와 있습니다. 따라서 현재로선 특정 카레집의 이름을 확인하기 어렵습니다. \n\n카레를 드시고 싶으시다면, 연세대학교 주변의 카레 전문점을 직접 확인해 보시는 것이 좋을 것 같습니다. 주변 지역의 인기 있는 카레집들을 찾아보시거나, 학교 측에 업데이트된 식당 리스트를 문의해 보시는 것도 좋은 방법일 것입니다.'
In [18]:
ask("결제는 어떻게해?")
Llama.generate: 178 prefix-match hit, remaining 12 prompt tokens to eval llama_perf_context_print: load time = 236.17 ms llama_perf_context_print: prompt eval time = 100.36 ms / 12 tokens ( 8.36 ms per token, 119.56 tokens per second) llama_perf_context_print: eval time = 2178.86 ms / 315 runs ( 6.92 ms per token, 144.57 tokens per second) llama_perf_context_print: total time = 2491.55 ms / 327 tokens llama_perf_context_print: graphs reused = 304
Out[18]:
'결제 방법에 대해 자세한 내용이 참고 정보에 명시되어 있지 않지만, 일반적인 외부 식당 이용 절차를 바탕으로 추정해 볼 수 있습니다:\n\n1. **주문 및 장부 기재**: 금요일 저녁이나 토요일 점심에 식당을 방문하신 후, 정보대학원 장부에 과정명과 성명을 기재하고 주문을 하실 것입니다.\n\n2. **결제 방식**:\n - **현금 결제**: 가장 기본적인 방법으로, 주문한 음식의 총 금액을 현금으로 직접 지불하실 수 있습니다.\n - **카드 결제**: 만약 식당에서 카드 결제를 지원한다면, 신용카드나 체크카드로 결제가 가능할 수 있습니다. 하지만 이 경우 카드 결제가 가능한지 미리 확인하시는 것이 좋습니다.\n - **개별 초과 금액 결제**: 주문 가능 금액이 정해져 있고 초과 시 개별 결제가 필요하다면, 주문 시 안내된 금액을 초과하는 부분은 별도로 현금이나 카드로 지불하시게 될 것입니다.\n\n**추천 사항**:\n- 결제 전에 식당 직원에게 결제 방법에 대해 확인하시는 것이 가장 정확한 정보를 얻는 방법입니다.\n- 만약 특정 카드나 모바일 결제 앱을 사용하고 싶으시다면, 미리 해당 앱이나 카드사에 문의해 보는 것도 좋습니다.\n\n정확한 절차를 위해서는 해당 식당이나 정보대학원에 직접 문의하시는 것이 가장 확실한 방법일 것입니다.'
In [19]:
ask("강남구에도 있어")
Llama.generate: 178 prefix-match hit, remaining 12 prompt tokens to eval llama_perf_context_print: load time = 236.17 ms llama_perf_context_print: prompt eval time = 125.83 ms / 12 tokens ( 10.49 ms per token, 95.37 tokens per second) llama_perf_context_print: eval time = 2092.68 ms / 304 runs ( 6.88 ms per token, 145.27 tokens per second) llama_perf_context_print: total time = 2424.55 ms / 316 tokens llama_perf_context_print: graphs reused = 293
Out[19]:
"강남구에 연세대학교와 직접 연관된 특정 식당이나 식사 시설에 대한 정보는 주어진 참고 정보에서 명시적으로 언급되어 있지 않습니다. 주어진 정보는 주로 연세대학교 교내 지침과 교내 외부 식당 이용에 초점을 맞추고 있습니다.\n\n만약 연세대학교 학생이나 교직원이 강남구 지역에서 식사를 해결해야 하는 상황이라면, 다음과 같은 일반적인 조언을 드릴 수 있습니다:\n\n1. **강남구 내 유명한 식당 및 카페**: 강남구에는 다양한 식당과 카페가 있습니다. 예를 들어, 강남역 주변에는 다양한 한식당, 중식당, 양식당, 그리고 카페들이 많이 있습니다. 구체적인 장소로는 '강남역 맛집 거리', '코엑스 주변', '가로수길' 등이 있습니다.\n\n2. **온라인 주문 시스템**: 배달 앱 (예: 배달의민족, 요기요, 쿠팡이츠 등)을 활용하면 강남구 내 다양한 식당에서 음식을 주문하고 배달받을 수 있습니다.\n\n3. **대학 연계 서비스**: 연세대학교가 운영하는 외부 식당이나 제휴 식당이 강남구에 위치해 있는지 확인해 보는 것도 좋습니다. 학교 측에 직접 문의하여 최신 정보를 얻는 것이 가장 정확할 것입니다.\n\n더 구체적인 정보나 특정 장소에 대한 추천이 필요하시다면, 연세대학교 학생처나 관련 부서에 직접 문의하시는 것을 권장드립니다."
In [20]:
ask("주식시장 몇시까지야?")
Llama.generate: 178 prefix-match hit, remaining 14 prompt tokens to eval llama_perf_context_print: load time = 236.17 ms llama_perf_context_print: prompt eval time = 225.30 ms / 14 tokens ( 16.09 ms per token, 62.14 tokens per second) llama_perf_context_print: eval time = 1110.46 ms / 161 runs ( 6.90 ms per token, 144.98 tokens per second) llama_perf_context_print: total time = 1433.98 ms / 175 tokens llama_perf_context_print: graphs reused = 155
Out[20]:
'주식시장의 운영 시간은 국가와 거래소에 따라 다릅니다. 한국의 경우, 주요 주식시장인 코스피와 코스닥 시장의 일반적인 운영 시간은 다음과 같습니다:\n\n- **코스피 시장**: 평일 오전 9시부터 오후 3시 30분까지 (주말 및 공휴일 휴장)\n- **코스닥 시장**: 코스피와 동일하게 평일 오전 9시부터 오후 3시 30분까지 (주말 및 공휴일 휴장)\n\n주말과 공휴일에는 대부분의 거래가 중단됩니다. 하지만 실시간 거래 관련 서비스나 일부 정보 제공은 운영 시간 외에도 가능할 수 있으니, 구체적인 시간이나 변경 사항은 한국거래소 웹사이트나 금융 앱을 통해 확인하시는 것이 가장 정확합니다.'
In [21]:
ask("캐나다 수도가 어디지?")
Llama.generate: 178 prefix-match hit, remaining 13 prompt tokens to eval llama_perf_context_print: load time = 236.17 ms llama_perf_context_print: prompt eval time = 210.80 ms / 13 tokens ( 16.22 ms per token, 61.67 tokens per second) llama_perf_context_print: eval time = 88.40 ms / 13 runs ( 6.80 ms per token, 147.07 tokens per second) llama_perf_context_print: total time = 307.08 ms / 26 tokens llama_perf_context_print: graphs reused = 11
Out[21]:
'캐나다의 수도는 오타와(Ottawa)입니다.'
In [23]:
ask("주진규 전화번호")
Llama.generate: 41 prefix-match hit, remaining 169 prompt tokens to eval llama_perf_context_print: load time = 236.17 ms llama_perf_context_print: prompt eval time = 233.24 ms / 169 tokens ( 1.38 ms per token, 724.58 tokens per second) llama_perf_context_print: eval time = 152.27 ms / 23 runs ( 6.62 ms per token, 151.05 tokens per second) llama_perf_context_print: total time = 398.65 ms / 192 tokens llama_perf_context_print: graphs reused = 21
Out[23]:
'주진규의 전화번호는 010-1234-5678입니다.'