Local Multimodal pipeline with OpenVINO¶
OpenVINO™ is an open-source toolkit for optimizing and deploying AI inference. The OpenVINO™ Runtime supports various hardware devices including x86 and ARM CPUs, and Intel GPUs. It can help to boost deep learning performance in Computer Vision, Automatic Speech Recognition, Natural Language Processing and other common tasks.
Hugging Face multimodal model can be supported by OpenVINO through OpenVINOMultiModal
class.
In [ ]:
Copied!
%pip install llama-index-multi-modal-llms-openvino -q
%pip install llama-index-multi-modal-llms-openvino -q
In [ ]:
Copied!
%pip install llama-index llama-index-readers-file -q
%pip install llama-index llama-index-readers-file -q
Export and compress multimodal model¶
It is possible to export your model to the OpenVINO IR format with the CLI, and load the model from local folder.
In [ ]:
Copied!
from pathlib import Path
model_id = "llava-hf/llava-v1.6-mistral-7b-hf"
model_path = Path(model_id.split("/")[-1]) / "FP16"
if not model_path.exists():
!optimum-cli export openvino --model {model_id} --weight-format fp16 {model_path}
from pathlib import Path
model_id = "llava-hf/llava-v1.6-mistral-7b-hf"
model_path = Path(model_id.split("/")[-1]) / "FP16"
if not model_path.exists():
!optimum-cli export openvino --model {model_id} --weight-format fp16 {model_path}
In [ ]:
Copied!
import shutil
import nncf
import openvino as ov
import gc
core = ov.Core()
compression_configuration = {
"mode": nncf.CompressWeightsMode.INT4_SYM,
"group_size": 64,
"ratio": 0.6,
}
compressed_model_path = model_path.parent / "INT4"
if not compressed_model_path.exists():
ov_model = core.read_model(model_path / "openvino_language_model.xml")
compressed_ov_model = nncf.compress_weights(ov_model, **compression_config)
ov.save_model(
compressed_ov_model,
compressed_model_path / "openvino_language_model.xml",
)
del compressed_ov_model
del ov_model
gc.collect()
for file_name in model_path.glob("*"):
if file_name.name in [
"openvino_language_model.xml",
"openvino_language_model.bin",
]:
continue
shutil.copy(file_name, compressed_model_path)
import shutil
import nncf
import openvino as ov
import gc
core = ov.Core()
compression_configuration = {
"mode": nncf.CompressWeightsMode.INT4_SYM,
"group_size": 64,
"ratio": 0.6,
}
compressed_model_path = model_path.parent / "INT4"
if not compressed_model_path.exists():
ov_model = core.read_model(model_path / "openvino_language_model.xml")
compressed_ov_model = nncf.compress_weights(ov_model, **compression_config)
ov.save_model(
compressed_ov_model,
compressed_model_path / "openvino_language_model.xml",
)
del compressed_ov_model
del ov_model
gc.collect()
for file_name in model_path.glob("*"):
if file_name.name in [
"openvino_language_model.xml",
"openvino_language_model.bin",
]:
continue
shutil.copy(file_name, compressed_model_path)
INFO:nncf:Statistics of the bitwidth distribution: ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ │ 8 │ 2% (1 / 225) │ 0% (0 / 224) │ ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ │ 4 │ 98% (224 / 225) │ 100% (224 / 224) │ ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙
Output()
Prepare the input data¶
In [ ]:
Copied!
import os
os.makedirs("./input_images", exist_ok=True)
url = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"
image = Image.open(requests.get(url, stream=True).raw)
image
import os
os.makedirs("./input_images", exist_ok=True)
url = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"
image = Image.open(requests.get(url, stream=True).raw)
image
Out[ ]: