diff --git a/.travis.yml b/.travis.yml index e5c1e140..86bd9cc7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: ruby before_install: - gem update --system - gem install bundler + - gem install jbundler rvm: - jruby-9.1.9.0 jdk: diff --git a/Gemfile b/Gemfile index e3ce676f..164b183b 100644 --- a/Gemfile +++ b/Gemfile @@ -9,7 +9,7 @@ platform :jruby do gem "tilt", "~> 2.0.7" group :development do - gem 'jar-dependencies', '0.3.11' + gem 'jar-dependencies', '0.3.10' gem 'jbundler', '~> 0.9.3' gem "rake" gem "warbler", "~> 2.0.3" diff --git a/Gemfile.lock b/Gemfile.lock index 5307b796..aa8c6578 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -29,7 +29,7 @@ GEM equalizer (0.0.11) ffi (1.9.18-java) ice_nine (0.11.2) - jar-dependencies (0.3.11) + jar-dependencies (0.3.10) jbundler (0.9.3) bundler (~> 1.5) jar-dependencies (~> 0.3) @@ -70,7 +70,7 @@ DEPENDENCIES bootstrap-sass (~> 3.2.0) compass cuba (~> 3.8.1) - jar-dependencies (= 0.3.11) + jar-dependencies (= 0.3.10) jbundler (~> 0.9.3) jruby-jars (= 9.1.12.0) rack (~> 2.0.3) diff --git a/lib/tabula_job_executor/jobs/generate_document_data.rb b/lib/tabula_job_executor/jobs/generate_document_data.rb index 41774d41..fad263c6 100644 --- a/lib/tabula_job_executor/jobs/generate_document_data.rb +++ b/lib/tabula_job_executor/jobs/generate_document_data.rb @@ -23,11 +23,9 @@ def perform 'size' => File.size(filepath), 'thumbnail_sizes' => options[:thumbnail_sizes] } - at(5, 100, "analyzing PDF text...") extractor = Tabula::Extraction::PagesInfoExtractor.new(filepath) - page_data = extractor.pages.to_a doc['page_count'] = page_data.size unless page_data.any? { |pd| pd[:hasText] } @@ -36,7 +34,6 @@ def perform end Tabula::Workspace.instance.add_document(doc, page_data) - at(100, 100, "complete") extractor.close! return nil diff --git a/lib/tabula_workspace.rb b/lib/tabula_workspace.rb index dcceee18..f3917414 100644 --- a/lib/tabula_workspace.rb +++ b/lib/tabula_workspace.rb @@ -6,6 +6,9 @@ class Workspace include JRuby::Synchronized include Singleton + STARTING_VALUE = {"pdfs" => [], "templates" => [], "version" => 2} + + def initialize(data_dir=TabulaSettings.getDataDir) unless File.directory?(data_dir) raise "DOCUMENTS_BASEPATH does not exist or is not a directory." @@ -13,8 +16,7 @@ def initialize(data_dir=TabulaSettings.getDataDir) @data_dir = data_dir @workspace_path = File.join(@data_dir, "pdfs", "workspace.json") - @workspace = [] - + @workspace = STARTING_VALUE if !File.exists?(@workspace_path) FileUtils.mkdir_p(File.join(@data_dir, "pdfs")) end @@ -22,14 +24,14 @@ def initialize(data_dir=TabulaSettings.getDataDir) def add_document(document, pages) read_workspace! - @workspace.unshift(document) + @workspace["pdfs"].unshift(document) add_file(pages.to_json, document['id'], 'pages.json') flush_workspace! end def delete_document(document_id) read_workspace! - @workspace.delete_if { |d| d['id'] == document_id } + @workspace["pdfs"].delete_if { |d| d['id'] == document_id } flush_workspace! FileUtils.rm_rf(get_document_dir(document_id)) @@ -42,7 +44,7 @@ def delete_page(document_id, page_number) def get_document_metadata(document_id) read_workspace! - @workspace.find { |d| d['id'] == document_id } + @workspace["pdfs"].find { |d| d['id'] == document_id } end def get_document_pages(document_id) @@ -53,14 +55,13 @@ def get_document_path(document_id) File.join(get_document_dir(document_id), 'document.pdf') end - def get_document_dir(document_id) - p = File.join(@data_dir, 'pdfs', document_id) - if !File.directory?(p) - FileUtils.mkdir_p(p) - end - p + def list_documents + read_workspace! + @workspace["pdfs"] end + + def get_data_dir() @data_dir end @@ -77,12 +78,92 @@ def move_file(path, document_id, filename) FileUtils.mv(path, File.join(get_document_dir(document_id), filename)) end + + + def list_templates + read_workspace! + @workspace["templates"] + end + + def get_template_metadata(template_id) + read_workspace! + @workspace["templates"].find { |d| d['id'] == template_id } + end + def get_template_body(template_id) + puts File.join(get_templates_dir, "#{template_id}.tabula-template.json") + open(File.join(get_templates_dir, "#{template_id}.tabula-template.json"), 'r'){|f| f.read } + end + + def add_template(template_metadata) + read_workspace! + + # write template metadata to workspace + @workspace["templates"].insert(0,{ + "name" => template_metadata["name"].gsub(".tabula-template.json", ""), + "selection_count" => template_metadata["selection_count"], + "page_count" => template_metadata["page_count"], + "time" => template_metadata["time"], + "id" => template_metadata["id"] + }) + # write template file to disk + write_template_file(template_metadata) + flush_workspace! + end + + def replace_template_metadata(template_id, template_metadata) + read_workspace! + idx = @workspace["templates"].index{|t| t["id"] == template_id} + @workspace["templates"][idx] = template_metadata.select{|k,_| ["name", "selection_count", "page_count", "time", "id"].include?(k) } + flush_workspace! + end + + + + def delete_template(template_id) + read_workspace! + @workspace["templates"].delete_if { |t| t['id'] == template_id } + flush_workspace! + File.delete(File.join(get_templates_dir, "#{template_id}.tabula-template.json")) + end + + private + def write_template_file(template_metadata) + template_name = template_metadata["name"] + template_id = Digest::SHA1.hexdigest(Time.now.to_s + template_name) # just SHA1 of time isn't unique with multiple uploads + template_filename = template_id + ".tabula-template.json" + open(File.join(get_templates_dir, template_filename), 'w'){|f| f << JSON.dump(template_metadata["template"])} + end + + def get_templates_dir + p = File.join(@data_dir, 'templates') + if !File.directory?(p) + FileUtils.mkdir_p(p) + end + p + end + def get_document_dir(document_id) + p = File.join(@data_dir, 'pdfs', document_id) + if !File.directory?(p) + FileUtils.mkdir_p(p) + end + p + end + + def read_workspace! + return STARTING_VALUE unless File.exists?(@workspace_path) File.open(@workspace_path) do |f| @workspace = JSON.parse(f.read) end + # what if the already-existing workspace is v1? i.e. if it's just an array? + # then we'll make it the new kind, seamlessly. + if @workspace.is_a? Array + @workspace = {"pdfs" => @workspace, "templates" => [], "version" => 2} + flush_workspace! + end + @workspace end def flush_workspace! diff --git a/webapp/index.html b/webapp/index.html index 543a3428..86f5af0c 100644 --- a/webapp/index.html +++ b/webapp/index.html @@ -44,6 +44,7 @@