Skip to content

Commit

Permalink
...
Browse files Browse the repository at this point in the history
  • Loading branch information
simongog committed Jun 7, 2013
1 parent 8f5de44 commit 91749ff
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 132 deletions.
2 changes: 1 addition & 1 deletion CPM2013/generate_figure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ pdflatex ${tmp_tex_file}
mv ${tmp_pdf_file} ..
cd ..

rm ${tmp_dir}/*
#rm ${tmp_dir}/*
rm ${basename}_rev
76 changes: 68 additions & 8 deletions query_performance/query_details.R
Original file line number Diff line number Diff line change
@@ -1,24 +1,84 @@
source("../src/basic_functions.R")
library("tikzDevice")

fn="web-256MB"



fn="web-4GB"
data <- data_frame_from_key_value_pairs( paste("query_performance_details_",fn,".txt", sep="" ) )

data['rtime_full'] <- data['rtime_full']/data['full_queries']
data['utime_full'] <- data['utime_full']/data['full_queries']

data <- subset(data, data[["pattern_file_name"]]=="../pattern/web-4GB.40.1000.0.75.125.pattern")
#data <- subset(data, data[["pattern_file_name"]]=="../pattern/web-4GB.40.1000.0.8.12.pattern")
data <- subset(data, data[["fac_dens"]] < 513)

d_mean <- aggregate(data[c('rtime_full','utime_full','fac_dens','disk_access_per_query','phase')] ,list(data[['fac_dens']],data[['phase']]),mean)

dd <- d_mean[order(d_mean['fac_dens'],d_mean['phase']),]
phases=6
tab <- dd[['rtime_full']]
dim(tab) <- c(phases, length(tab)/phases)
for(i in seq(2,phases)){
tab[i] <- tab[i]-tab[i-1]
for(i in seq(phases, 2)){
tab[i,] <- tab[i,]-tab[i-1,]
}

mycol=topo.colors(2*phases)[seq(1,2*phases,2)]
barplot(tab, names.arg=unique(dd[['fac_dens']]), ylab="elapsed time per query phase in [millisec]",col=mycol, xlab="K",
main=paste("Detailed rtime of a count query ", "l=40, k=100",sep=""))
legend("topleft", legend=rev(c("condensed BWT matching","load disk block","build block tree","block tree matching","load text","match pattern")),
fill=rev(mycol))
#mycol <- topo.colors(2*phases)[seq(1,2*phases,2)]
mycol <- terrain.colors(phases)

fac_denss <- unique(dd[['fac_dens']])
fac_dens_label <- paste(rep("$",length(fac_denss)), fac_denss,rep("$",length(fac_denss)),sep="")

tikz("/Users/sgog/Downloads/cpm/fig/detailed_query_time.tex", width="2.4", height="2.3")

par( oma=c(0.1,0.1,0.1,0.1) )
par( mar=c(1.5,1.5,0.1,0.1) )

barplot(tab, names.arg=rep("", length(fac_dens_label)), col=mycol, axes=F)

legend("topleft", legend=rev(c("internal matching","load disk block","build block tree","block tree matching","load text","match pattern")),
fill=rev(mycol), bty="o", box.lwd=0, bg="white", inset=c(0.01,-0.05), y.intersp=0.8 )

#grid(col="gray")

barp <- barplot(tab, ylab="Runtime per query phase",col=mycol, add=T, cex.axis=0.8, yaxt="n")
axis(1, at=barp, seq(1, length(fac_dens_label)), labels=fac_dens_label, cex.axis=0.9, lty=0, line=-0.6)
axis(2, line=-0.5, cex.axis=0.8, lty=0)
axis(2, line=-0.2, cex.axis=0.8, labels=F )

dev.off()



tikz("/Users/sgog/Downloads/cpm/fig/detailed_space.tex", width="2.4", height="2.3")

par( oma=c(0.1,0.1,0.1,0.1) )
par( mar=c(1.5,1.5,0.1,0.1) )



data <- data_frame_from_key_value_pairs("../space_usage/space_web-4GB.txt")
data <- data[order(data[['fac_dens']]),]

data <- subset(data, data[['fac_dens']] %in% c(0,1,4,16,64,256))

size <- c( data[['compact_text_in_megabyte']], data[['header_in_megabyte']], data[['bp_ct_in_megabyte']], data[['lcp_in_megabyte']], data[['sa_in_megabyte']] )

dim(size) <- c( nrow(data), 5)
size <- t(size)

rcol=terrain.colors(5)

barp <- barplot( size/(data[["n"]]/(1024*1024)), col=rcol,
ylab="external index size in percent of orig. text", ylim=c(0, 3.2), yaxt="n" )

axis(1, at=barp, labels = data[['fac_dens']], cex.axis=0.9, lty=0, line=-0.6)
axis(2, line=-0.5, cex.axis=0.8, lty=0)
axis(2, line=-0.2, cex.axis=0.8, labels=F)

legend("topright", legend=rev(c("text representation","header","topology","LCP","text pointer")), fill=rev(rcol),
inset=c(-0.1,-0.05),
box.lwd=0, y.intersp=0.8)

dev.off()
88 changes: 0 additions & 88 deletions query_performance/query_performance_web-256MB.R

This file was deleted.

49 changes: 15 additions & 34 deletions sdsl/examples/64bit_array2int_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ using namespace sdsl;
using namespace std;

int main(int argc, char *argv[]){
if ( argc < 2 ){
cout<<"Usage: "<<argv[0]<<" input_file [output_file] [ouput_bit_width]"<<endl;
return 1;
}
size_t x = util::get_file_size(argv[1]);
const int BPI=8;
cout<<"file size in bytes = "<<x<<endl;
Expand All @@ -33,39 +29,25 @@ int main(int argc, char *argv[]){
const size_t BUF_SIZE = 6400000; // BUF_SIZE has to be a multiple of 64
uint64_t *buf = (uint64_t*)malloc(BUF_SIZE*BPI);
uint64_t max=0;
uint8_t width=0;
if ( argc < 3 ){
size_t frac=0, old_frac=0;
for (size_t i=0, len=BUF_SIZE*BPI; i<x; i+=len){
// cout<<i<<endl;
frac = 100*i/x;
if(frac>old_frac){ cout<<"."; if(frac%10==0) cout<<frac; }
old_frac=frac;
len = BUF_SIZE*BPI;
if ( i+len > x ){
len = x-i;
}
fread((char*)buf, 1, len, f);
for (size_t j=0; j<len/BPI; ++j){
if ( buf[j] > max )
max = buf[j];
// cout<<" "<<buf[j]<<endl;
}
for (size_t i=0, len=BUF_SIZE*BPI; i<x; i+=len){
len = BUF_SIZE*BPI;
if ( i+len > x ){
len = x-i;
}
fread((char*)buf, 1, len, f);
for (size_t j=0; j<len/BPI; ++j){
if ( buf[j] > max )
max = buf[j];
// cout<<" "<<buf[j]<<endl;
}
cout<<"Max value: "<<max<<endl;
width = bit_magic::l1BP(max)+1;
}else{
width = atoi(argv[3]);
}

cout<<"Max value: "<<max<<endl;
uint8_t width = bit_magic::l1BP(max)+1;
cout<<"width="<<(int)width<<endl;

// (2) scan file, bit-compress values and write to outfile
rewind(f); // reset file pointer
string ofile = string(argv[1])+".int_vector";
if ( argc>2 ){
ofile = string(argv[2]);
}
FILE *of = fopen(ofile.c_str(),"wb"); // open output file
if ( of == NULL ){
cout<<"ERROR: could not open output file "<<argv[1]<<endl;
Expand All @@ -89,12 +71,11 @@ int main(int argc, char *argv[]){
free(buf);
fclose(f);
fclose(of);
util::load_from_file(v, ofile.c_str());
cout<<"v.size()="<<v.size()<<endl;
cout<<"v[0]="<<v[0]<<endl;
const bool do_check = false;
if ( do_check){
util::load_from_file(v, ofile.c_str());
cout<<"v.size()="<<v.size()<<endl;
cout<<"v[0]="<<v[0]<<endl;

int_vector<> check;
util::load_vector_from_file(check, argv[1], BPI);
if ( check.size() != v.size() ){
Expand Down
2 changes: 1 addition & 1 deletion src/rosa_tikz.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef ROSA_TIKZ
#define ROSA_TIRKZ
#define ROSA_TIKZ

#include <vector>
#include <utility> // for pair
Expand Down

0 comments on commit 91749ff

Please sign in to comment.