Skip to content

Commit

Permalink
working dynamic vertex data
Browse files Browse the repository at this point in the history
  • Loading branch information
Aapo Kyrola committed Jan 23, 2013
1 parent d1d20dd commit 767474b
Show file tree
Hide file tree
Showing 8 changed files with 306 additions and 21 deletions.
14 changes: 12 additions & 2 deletions graphchi_xcode/graphchi_xcode.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@
5F359BF81596B76200AF7672 /* graphchi_program.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = graphchi_program.hpp; sourceTree = "<group>"; };
5F3AA7AC15A632A300B467C1 /* binary_adjacency_list.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = binary_adjacency_list.hpp; sourceTree = "<group>"; };
5F3B0889158AC5520058A8B1 /* graphchi_engine.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = graphchi_engine.hpp; sourceTree = "<group>"; };
5F3D4F10164DF5C2003A50A6 /* inmemconncomp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = inmemconncomp.cpp; sourceTree = "<group>"; };
5F54B84615FD4B2500B3842C /* chivector.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = chivector.hpp; sourceTree = "<group>"; };
5F54B84715FD4C1900B3842C /* dynamicdata_smoketest.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = dynamicdata_smoketest.cpp; sourceTree = "<group>"; };
5F54B85115FD4D9F00B3842C /* test_dynamicedata */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = test_dynamicedata; sourceTree = BUILT_PRODUCTS_DIR; };
Expand All @@ -217,6 +216,8 @@
5F56F703159B771900423ECE /* bulksync_functional_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bulksync_functional_test.cpp; sourceTree = "<group>"; };
5F56F713159B78BF00423ECE /* test_bulksync_functional */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = test_bulksync_functional; sourceTree = BUILT_PRODUCTS_DIR; };
5F56F71E159B918100423ECE /* basic_smoketest.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = basic_smoketest.cpp; sourceTree = "<group>"; };
5F67653F16AF2EAF00359562 /* inmemconncomps.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = inmemconncomps.cpp; sourceTree = "<group>"; };
5F67654116AF31C500359562 /* vertex_data_dynamic.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = vertex_data_dynamic.hpp; sourceTree = "<group>"; };
5F74B03715D349EF00ED3EA9 /* cgs_lda_vertexprogram.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = cgs_lda_vertexprogram.hpp; sourceTree = "<group>"; };
5F74B03815D3530A00ED3EA9 /* cgs_lda.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = cgs_lda.cpp; sourceTree = "<group>"; };
5F74B04215D3532600ED3EA9 /* graphlab_lda */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = graphlab_lda; sourceTree = BUILT_PRODUCTS_DIR; };
Expand Down Expand Up @@ -686,6 +687,14 @@
path = tests;
sourceTree = "<group>";
};
5F67654016AF31C500359562 /* dynamicdata */ = {
isa = PBXGroup;
children = (
5F67654116AF31C500359562 /* vertex_data_dynamic.hpp */,
);
path = dynamicdata;
sourceTree = "<group>";
};
5F74B03515D349EF00ED3EA9 /* graphlab_toolkit_ports */ = {
isa = PBXGroup;
children = (
Expand Down Expand Up @@ -775,6 +784,7 @@
5F7A10341589266800748D0D /* auxdata */ = {
isa = PBXGroup;
children = (
5F67654016AF31C500359562 /* dynamicdata */,
5F7A10351589266800748D0D /* degree_data.hpp */,
5F7A10361589266800748D0D /* vertex_data.hpp */,
);
Expand Down Expand Up @@ -893,11 +903,11 @@
5FCC1EBD1599F59A0003D0E9 /* application_template.cpp */,
5FCC1EBE1599F59A0003D0E9 /* communitydetection.cpp */,
5FCC1EBF1599F59A0003D0E9 /* connectedcomponents.cpp */,
5F67653F16AF2EAF00359562 /* inmemconncomps.cpp */,
5FCC1EC01599F59A0003D0E9 /* pagerank.cpp */,
5FCC1EC11599F59A0003D0E9 /* pagerank_functional.cpp */,
5FCC242B15A378DC0003D0E9 /* streaming_pagerank.cpp */,
5FCC2104159DEC0E0003D0E9 /* trianglecounting.cpp */,
5F3D4F10164DF5C2003A50A6 /* inmemconncomp.cpp */,
5F0A1FBC16A9FBA50066FB56 /* randomwalks.cpp */,
);
name = example_apps;
Expand Down
7 changes: 5 additions & 2 deletions src/api/chifilenames.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,11 @@ namespace graphchi {
intervalsF.close();
}

static size_t get_num_vertices(std::string basefilename);
static size_t get_num_vertices(std::string basefilename) {
/**
* Returns the number of vertices in a graph. The value is stored in a separate file <graphname>.numvertices
*/
static VARIABLE_IS_NOT_USED size_t get_num_vertices(std::string basefilename);
static VARIABLE_IS_NOT_USED size_t get_num_vertices(std::string basefilename) {
std::string numv_filename = basefilename + ".numvertices";
std::ifstream vfileF(numv_filename.c_str());
if (!vfileF.good()) {
Expand Down
6 changes: 2 additions & 4 deletions src/api/dynamicdata/chivector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class extension_pool {
template <typename T>
class chivector {

uint16_t origsize;
uint16_t nsize;
uint16_t ncapacity;
T * data;
Expand All @@ -65,8 +64,7 @@ class chivector {
}

chivector(uint16_t sz, uint16_t cap, T * dataptr) : data(dataptr) {
origsize = sz;
nsize = origsize;
nsize = sz;
ncapacity = cap;
assert(cap >= nsize);
extensions = NULL;
Expand Down Expand Up @@ -108,7 +106,7 @@ class chivector {

T get(int idx) {
if (idx >= ncapacity) {
return (* extensions)[idx - (int)origsize];
return (* extensions)[idx - (int)ncapacity];
} else {
return data[idx];
}
Expand Down
8 changes: 8 additions & 0 deletions src/api/graph_objects.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,17 @@ namespace graphchi {
/**
* Get the value of vertex
*/
#ifndef DYNAMICVERTEXDATA
VertexDataType get_data() {
return *(this->dataptr);
}
#else
// VertexDataType must be a chivector
VertexDataType * get_vector() {
this->modified = true; // Assume vector always modified... Temporaryh solution.
return this->dataptr;
}
#endif

/**
* Modify the vertex value. The new value will be
Expand Down
223 changes: 223 additions & 0 deletions src/engine/auxdata/dynamicdata/vertex_data_dynamic.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@


/**
* @file
* @author Aapo Kyrola <akyrola@cs.cmu.edu>
* @version 1.0
*
* @section LICENSE
*
* Copyright [2012] [Aapo Kyrola, Guy Blelloch, Carlos Guestrin / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @section DESCRIPTION
*
* The class manages vertex values (vertex data) when the
* vertex data is dynamic. That is, the vertex data type must
* be a chivector.
*
* To enable dynamically sized data, vertex data must be stored in
* small (1 million-vertex) blocks.
*/


#ifndef DYNAMICVERTEXDATA
ERROR(DYNAMICVERTEXDATA NEEDS TO BE DEFINED)
#endif

#ifndef DEF_GRAPHCHI_VERTEXDATA
#define DEF_GRAPHCHI_VERTEXDATA

#include <stdlib.h>
#include <string>
#include <fcntl.h>
#include <errno.h>
#include <sys/stat.h>
#include <assert.h>

#include "graphchi_types.hpp"
#include "api/chifilenames.hpp"
#include "io/stripedio.hpp"
#include "util/ioutil.hpp"
#include "api/dynamicdata/chivector.hpp"
#include "shards/dynamicdata/dynamicblock.hpp"

namespace graphchi {

template <typename VertexDataType>
struct vdblock_t {
int blockid;
int fd;
uint8_t* data;
dynamicdata_block<VertexDataType> * dblock;
vdblock_t(int bid) : blockid(bid), data(NULL), dblock(NULL) {}
};

template <typename VertexDataType>
class vertex_data_store {

typedef vdblock_t<VertexDataType> vdblock;
protected:

stripedio * iomgr;

/* Current range of vertices in memory */
vid_t vertex_st;
vid_t vertex_en;

std::string dirname;
size_t verticesperblock;

VertexDataType * loaded_chunk;
std::vector<vdblock> loadedblocks; // Blocks currently in memory


public:

vertex_data_store(std::string base_filename, size_t nvertices, stripedio * iomgr) : iomgr(iomgr), loaded_chunk(NULL){
vertex_st = vertex_en = 0;
verticesperblock = 1024 * 1024;

dirname = filename_vertex_data<VertexDataType>(base_filename) + ".dynamic_blockdir";
check_size(nvertices);
}

virtual ~vertex_data_store() {
iomgr->wait_for_writes();
releaseblocks();
}


void check_size(size_t nvertices) {
int nblocks = (nvertices - 1) / verticesperblock + 1;
for(int i=0; i < nblocks; i++) {
init_block(i);
}
}


private:
std::string blockfilename(int blockid) {
std::stringstream ss;
ss << dirname;
ss << "/";
ss << blockid;
return ss.str();
}

void releaseblocks() {
for(int i=0; i < loadedblocks.size(); i++) {
delete(loadedblocks[i].dblock);
iomgr->managed_release(loadedblocks[i].fd, &loadedblocks[i].data);
iomgr->close_session(loadedblocks[i].fd);
loadedblocks[i].data = NULL;
loadedblocks[i].dblock = NULL;
}
loadedblocks.clear();
}

void init_block(int blockid) {
std::string bfilename = blockfilename(blockid);
if (!file_exists(bfilename)) {
mkdir(dirname.c_str(), 0777);
size_t initsize = verticesperblock * sizeof(typename VertexDataType::sizeword_t);
int f = open(bfilename.c_str(), O_RDWR | O_CREAT, S_IROTH | S_IWOTH | S_IWUSR | S_IRUSR);
uint8_t * zeros = (uint8_t *) calloc(verticesperblock, sizeof(typename VertexDataType::sizeword_t));
write_compressed(f, zeros, initsize);
free(zeros);

write_block_uncompressed_size(bfilename, initsize);
close(f);
}
}

void clear(size_t nvertices) {
assert(false); // Not implemented
}

vdblock load_block(int blockid) {
vdblock db(blockid);

std::string blockfname = blockfilename(blockid);
db.fd = iomgr->open_session(blockfname, false, true);
int realsize = get_block_uncompressed_size(blockfname, -1);
assert(realsize > 0);

iomgr->managed_malloc(db.fd, &db.data, realsize, 0);
iomgr->managed_preada_now(db.fd, &db.data, realsize, 0);
db.dblock = new dynamicdata_block<VertexDataType>(verticesperblock, (uint8_t *)db.data, realsize);
return db;
}

void write_block(vdblock &block) {
int realsize;
uint8_t * outdata;
block.dblock->write(&outdata, realsize);
std::string blockfname = blockfilename(block.blockid);
iomgr->managed_pwritea_now(block.fd, &outdata, realsize, 0); /* Need to write whole block in the compressed regime */
write_block_uncompressed_size(blockfname, realsize);
}

public:

/**
* Loads a chunk of vertex values
* @param vertex_st first vertex id
* @param vertex_en last vertex id, inclusive
*/
virtual void load(vid_t _vertex_st, vid_t _vertex_en) {
assert(_vertex_en >= _vertex_st);
vertex_st = _vertex_st;
vertex_en = _vertex_en;

releaseblocks();

int min_blockid = vertex_st / verticesperblock;
int max_blockid = vertex_en / verticesperblock;
for(int i=min_blockid; i <= max_blockid; i++) {
loadedblocks.push_back(load_block(i));
}
}

/**
* Saves the current chunk of vertex values
*/
virtual void save(bool async=false) {
for(int i=0; i < loadedblocks.size(); i++) {
write_block(loadedblocks[i]);
}
}


/**
* Returns id of the first vertex currently in memory. Fails if nothing loaded yet.
*/
vid_t first_vertex_id() {
return vertex_st;
}


VertexDataType * vertex_data_ptr(vid_t vertexid) {
int blockid = vertexid / verticesperblock;
int firstloaded = loadedblocks[0].blockid;
dynamicdata_block<VertexDataType> * dynblock = loadedblocks[blockid - firstloaded].dblock;
return dynblock->edgevec(vertexid % verticesperblock);
}


};
}

#endif
5 changes: 5 additions & 0 deletions src/engine/auxdata/vertex_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
/* Note: This class shares a lot of code with the degree_data.hpp. It might be
useful to have a common base class "sequential-file". */

#ifdef DYNAMICVERTEXDATA
#include "auxdata/dynamicdata/vertex_data_dynamic.hpp"
#else

#ifndef DEF_GRAPHCHI_VERTEXDATA
#define DEF_GRAPHCHI_VERTEXDATA

Expand Down Expand Up @@ -147,4 +151,5 @@ namespace graphchi {
}

#endif
#endif

14 changes: 7 additions & 7 deletions src/shards/dynamicdata/dynamicblock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,16 @@ namespace graphchi {

template <typename ET>
struct dynamicdata_block {
int nedges;
int nitems;
uint8_t * data;
ET * chivecs;

dynamicdata_block() : data(NULL), chivecs(NULL) {}

dynamicdata_block(int nedges, uint8_t * data, int datasize) : nedges(nedges){
chivecs = new ET[nedges];
dynamicdata_block(int nitems, uint8_t * data, int datasize) : nitems(nitems){
chivecs = new ET[nitems];
uint8_t * ptr = data;
for(int i=0; i < nedges; i++) {
for(int i=0; i < nitems; i++) {
assert(ptr - data <= datasize);
typename ET::sizeword_t * sz = ((typename ET::sizeword_t *) ptr);
ptr += sizeof(typename ET::sizeword_t);
Expand All @@ -77,21 +77,21 @@ namespace graphchi {
}

ET * edgevec(int i) {
assert(i < nedges);
assert(i < nitems);
assert(chivecs != NULL);
return &chivecs[i];
}

void write(uint8_t ** outdata, int & size) {
// First compute size
size = 0;
for(int i=0; i < nedges; i++) {
for(int i=0; i < nitems; i++) {
size += chivecs[i].capacity() * sizeof(typename ET::element_type_t) + sizeof(typename ET::sizeword_t);
}

*outdata = (uint8_t *) malloc(size);
uint8_t * ptr = *outdata;
for(int i=0; i < nedges; i++) {
for(int i=0; i < nitems; i++) {
ET & vec = chivecs[i];
((uint16_t *) ptr)[0] = vec.size();
((uint16_t *) ptr)[1] = vec.capacity();
Expand Down
Loading

0 comments on commit 767474b

Please sign in to comment.