Skip to content

Commit

Permalink
Support deserialize from file and mmap (#722)
Browse files Browse the repository at this point in the history
Signed-off-by: yah01 <yang.cen@zilliz.com>
  • Loading branch information
yah01 authored Mar 27, 2023
1 parent bf2a600 commit 7e52ec0
Show file tree
Hide file tree
Showing 15 changed files with 185 additions and 225 deletions.
5 changes: 5 additions & 0 deletions include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,11 @@ class BaseConfig : public Config {
}
};

struct LoadConfig {
// load with mmap
bool enable_mmap{false};
};

} // namespace knowhere

#endif /* CONFIG_H */
6 changes: 6 additions & 0 deletions include/knowhere/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#ifndef INDEX_H
#define INDEX_H

#include "knowhere/config.h"
#include "knowhere/index_node.h"
#include "knowhere/log.h"

Expand Down Expand Up @@ -248,6 +249,11 @@ class Index {
return this->node->Deserialize(binset);
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config = {}) {
return this->node->Deserialize(filename, config);
}

int64_t
Dim() const {
return this->node->Dim();
Expand Down
3 changes: 3 additions & 0 deletions include/knowhere/index_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ class IndexNode : public Object {
virtual Status
Deserialize(const BinarySet& binset) = 0;

virtual Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) = 0;

virtual std::unique_ptr<BaseConfig>
CreateConfig() const = 0;

Expand Down
5 changes: 5 additions & 0 deletions include/knowhere/index_node_thread_pool_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ class IndexNodeThreadPoolWrapper : public IndexNode {
return index_node_->Deserialize(binset);
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) {
return index_node_->DeserializeFromFile(filename, config);
}

std::unique_ptr<BaseConfig>
CreateConfig() const {
return index_node_->CreateConfig();
Expand Down
6 changes: 6 additions & 0 deletions src/index/annoy/annoy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "index/annoy/annoy_config.h"
#include "kissrandom.h"
#include "knowhere/comp/thread_pool.h"
#include "knowhere/expected.h"
#include "knowhere/factory.h"
namespace knowhere {

Expand Down Expand Up @@ -218,6 +219,11 @@ class AnnoyIndexNode : public IndexNode {
return Status::success;
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) override {
return Status::not_implemented;
}

std::unique_ptr<BaseConfig>
CreateConfig() const override {
return std::make_unique<AnnoyConfig>();
Expand Down
5 changes: 5 additions & 0 deletions src/index/diskann/diskann.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ class DiskANNIndexNode : public IndexNode {
return Status::not_implemented;
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) override {
return Status::not_implemented;
}

std::unique_ptr<BaseConfig>
CreateConfig() const override {
return std::make_unique<DiskANNConfig>();
Expand Down
5 changes: 5 additions & 0 deletions src/index/flat/flat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,11 @@ class FlatIndexNode : public IndexNode {
return Status::success;
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) override {
return Status::not_implemented;
}

std::unique_ptr<BaseConfig>
CreateConfig() const override {
return std::make_unique<FlatConfig>();
Expand Down
5 changes: 5 additions & 0 deletions src/index/flat_gpu/flat_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ class GpuFlatIndexNode : public IndexNode {
return Status::success;
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) override {
return Status::not_implemented;
}

virtual std::unique_ptr<BaseConfig>
CreateConfig() const override {
return std::make_unique<GpuFlatConfig>();
Expand Down
22 changes: 22 additions & 0 deletions src/index/hnsw/hnsw.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@

#include <omp.h>

#include <exception>
#include <new>

#include "common/range_util.h"
#include "hnswlib/hnswalg.h"
#include "hnswlib/hnswlib.h"
#include "index/hnsw/hnsw_config.h"
#include "knowhere/comp/index_param.h"
#include "knowhere/comp/thread_pool.h"
#include "knowhere/config.h"
#include "knowhere/expected.h"
#include "knowhere/factory.h"
#include "knowhere/log.h"

namespace knowhere {
class HnswIndexNode : public IndexNode {
Expand Down Expand Up @@ -321,6 +327,22 @@ class HnswIndexNode : public IndexNode {
return Status::success;
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) override {
if (index_) {
delete index_;
}
try {
hnswlib::SpaceInterface<float>* space = nullptr;
index_ = new (std::nothrow) hnswlib::HierarchicalNSW<float>(space);
index_->loadIndex(filename, config);
} catch (std::exception& e) {
LOG_KNOWHERE_WARNING_ << "hnsw inner error, " << e.what();
return Status::hnsw_inner_error;
}
return Status::success;
}

std::unique_ptr<BaseConfig>
CreateConfig() const override {
return std::make_unique<HnswConfig>();
Expand Down
8 changes: 8 additions & 0 deletions src/index/ivf/ivf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class IvfIndexNode : public IndexNode {
Serialize(BinarySet& binset) const override;
Status
Deserialize(const BinarySet& binset) override;
Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) override;
std::unique_ptr<BaseConfig>
CreateConfig() const override {
if constexpr (std::is_same<faiss::IndexIVFFlat, T>::value) {
Expand Down Expand Up @@ -605,6 +607,12 @@ IvfIndexNode<T>::Deserialize(const BinarySet& binset) {
return Status::success;
}

template <typename T>
Status
IvfIndexNode<T>::DeserializeFromFile(const std::string& filename, const LoadConfig& config) {
return Status::not_implemented;
}

template <>
Status
IvfIndexNode<faiss::IndexIVFFlat>::Deserialize(const BinarySet& binset) {
Expand Down
5 changes: 5 additions & 0 deletions src/index/ivf_gpu/ivf_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ class GpuIvfIndexNode : public IndexNode {
return Status::success;
}

Status
DeserializeFromFile(const std::string& filename, const LoadConfig& config) override {
return Status::not_implemented;
}

virtual std::unique_ptr<BaseConfig>
CreateConfig() const override {
return std::make_unique<typename KnowhereConfigType<T>::Type>();
Expand Down
62 changes: 62 additions & 0 deletions src/io/fileIO.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (C) 2019-2023 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

#pragma once
#include <sys/fcntl.h>
#include <unistd.h>

#include <stdexcept>

namespace knowhere {
struct FileReader {
int fd;
size_t size;

FileReader(const std::string& filename, bool auto_remove = false) {
fd = open(filename.data(), O_RDONLY);
if (fd < 0) {
std::runtime_error("Cannot open file");
}

size = lseek(fd, 0, SEEK_END);
lseek(fd, 0, SEEK_SET);

if (auto_remove) {
unlink(filename.data());
}
}

ssize_t
read(char* dst, size_t n) {
return ::read(fd, dst, n);
}

off_t
seek(off_t offset) {
return lseek(fd, offset, SEEK_SET);
}

off_t
advance(off_t offset) {
return lseek(fd, offset, SEEK_CUR);
}

off_t
offset() {
return lseek(fd, 0, SEEK_CUR);
}

int
close() {
return ::close(fd);
}
};
} // namespace knowhere
140 changes: 0 additions & 140 deletions thirdparty/hnswlib/hnswlib/bruteforce.h

This file was deleted.

Loading

0 comments on commit 7e52ec0

Please sign in to comment.