Skip to content

Commit

Permalink
Fix converting clip image f32 to OV tensor in LLaVA processing (openv…
Browse files Browse the repository at this point in the history
…inotoolkit#1124)

Ticket: CVS-154750
  • Loading branch information
yatarkan authored Nov 1, 2024
1 parent f3b9a68 commit c9cc4d9
Showing 1 changed file with 35 additions and 32 deletions.
67 changes: 35 additions & 32 deletions src/cpp/src/visual_language/vision_encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,36 @@
using namespace ov::genai;

namespace {
/**
* @brief Converts an OpenVINO image tensor (1HWC) to a clip_image_u8 structure.
*
* @param image_tensor An OpenVINO tensor (1HWC) containing the image data.
* @return A clip_image_u8 structure containing the image data.
*/
clip_image_u8 tensor_to_clip_image_u8(const ov::Tensor& image_tensor) {
clip_image_u8 image{
int(image_tensor.get_shape().at(2)),
int(image_tensor.get_shape().at(1)),
{image_tensor.data<uint8_t>(), image_tensor.data<uint8_t>() + image_tensor.get_size()}
};
return image;
}

/**
* @brief Converts a clip_image_f32 structure to an OpenVINO image tensor (1CHW).
*
* @param image A clip_image_f32 structure containing the image data.
* @return An OpenVINO tensor containing the image data (1CHW).
*/
ov::Tensor clip_image_f32_to_tensor(const clip_image_f32& image) {
ov::Tensor image_tensor{
ov::element::f32,
{1, 3, static_cast<size_t>(image.ny), static_cast<size_t>(image.nx)}
};
std::memcpy(image_tensor.data<float>(), image.buf.data(), image.buf.size() * sizeof(float));
return image_tensor;
}

int ensure_divide(int length, int patch_size) {
return std::max(static_cast<int>(std::round(static_cast<float>(length) / patch_size) * patch_size), patch_size);
}
Expand Down Expand Up @@ -275,11 +305,7 @@ ov::Tensor prepare_vis_position_ids(
}

EncodedImage llava_image_embed_make_with_bytes_slice(clip_ctx& ctx_clip, const ov::Tensor& img, ov::InferRequest& encoder, int max_slice_nums, int scale_resolution, size_t patch_size, bool never_split) {
clip_image_u8 source{
int(img.get_shape().at(2)),
int(img.get_shape().at(1)),
{img.data<uint8_t>(), img.data<uint8_t>() + img.get_size()}
};
clip_image_u8 source = tensor_to_clip_image_u8(img);
std::vector<std::vector<clip_image_u8>> imgs = ::slice_image(source, max_slice_nums, scale_resolution, patch_size, never_split);
std::vector<std::vector<ov::Tensor>> results;
std::vector<std::vector<ImageSize>> sizes;
Expand Down Expand Up @@ -430,32 +456,13 @@ clip_image_f32 preprocess_clip_image_llava(const clip_image_u8& image, const Pro
}

ov::Tensor get_pixel_values_llava(const ov::Tensor& image, const ProcessorConfig& config) {
// ov::Tensor to clip_image_u8
clip_image_u8 input_image{
int(image.get_shape().at(2)),
int(image.get_shape().at(1)),
{image.data<uint8_t>(), image.data<uint8_t>() + image.get_size()}
};

clip_image_u8 input_image = tensor_to_clip_image_u8(image);
clip_image_f32 preprocessed_image = preprocess_clip_image_llava(input_image, config);

// Convert clip_image_f32 to ov::Tensor
ov::Tensor result(
ov::element::f32,
{1, 3, size_t(preprocessed_image.ny), size_t(preprocessed_image.nx)},
(void*)(preprocessed_image.buf.data())
);

return result;
return clip_image_f32_to_tensor(preprocessed_image);
}

ov::Tensor get_pixel_values_llava_next(const ov::Tensor& image, const ProcessorConfig& config) {
// ov::Tensor to clip_image_u8
clip_image_u8 input_image{
int(image.get_shape().at(2)),
int(image.get_shape().at(1)),
{image.data<uint8_t>(), image.data<uint8_t>() + image.get_size()}
};
clip_image_u8 input_image = tensor_to_clip_image_u8(image);

std::pair<int, int> size{config.size_shortest_edge, config.size_shortest_edge};
auto patch_size = config.crop_size_height;
Expand Down Expand Up @@ -578,11 +585,7 @@ std::vector<clip_image_u8> split_image_internvl(
}

ov::Tensor get_pixel_values_internvl(const ov::Tensor& image, const ProcessorConfig& config) {
clip_image_u8 input_image{
int(image.get_shape().at(2)),
int(image.get_shape().at(1)),
{image.data<uint8_t>(), image.data<uint8_t>() + image.get_size()}
};
clip_image_u8 input_image = tensor_to_clip_image_u8(image);

const size_t image_size = config.size_shortest_edge;

Expand Down

0 comments on commit c9cc4d9

Please sign in to comment.