From 71647ef169266f37894ab77c1f3bfb8b7ad505ad Mon Sep 17 00:00:00 2001 From: Seamus Abshere Date: Thu, 18 Feb 2010 14:05:34 -0500 Subject: [PATCH] Get rid of association code. Focus on idempotency. Log runs. --- .gitignore | 1 + CHANGELOG | 5 + README.rdoc | 26 +- Rakefile | 9 +- lib/data_miner.rb | 77 ++- lib/data_miner/active_record_ext.rb | 25 - lib/data_miner/attribute.rb | 397 ++++++-------- lib/data_miner/attribute_collection.rb | 51 -- lib/data_miner/configuration.rb | 113 ++-- lib/data_miner/import.rb | 57 ++ lib/data_miner/process.rb | 21 + lib/data_miner/run.rb | 7 + lib/data_miner/step.rb | 64 --- lib/data_miner/step/associate.rb | 9 - lib/data_miner/step/await.rb | 35 -- lib/data_miner/step/callback.rb | 22 - lib/data_miner/step/derive.rb | 9 - lib/data_miner/step/import.rb | 57 -- lib/data_miner/target.rb | 7 + test/data_miner_test.rb | 692 +++++++++++++++++++++++-- test/test_helper.rb | 137 ++++- 21 files changed, 1159 insertions(+), 662 deletions(-) delete mode 100644 lib/data_miner/active_record_ext.rb delete mode 100644 lib/data_miner/attribute_collection.rb create mode 100644 lib/data_miner/import.rb create mode 100644 lib/data_miner/process.rb create mode 100644 lib/data_miner/run.rb delete mode 100644 lib/data_miner/step.rb delete mode 100644 lib/data_miner/step/associate.rb delete mode 100644 lib/data_miner/step/await.rb delete mode 100644 lib/data_miner/step/callback.rb delete mode 100644 lib/data_miner/step/derive.rb delete mode 100644 lib/data_miner/step/import.rb create mode 100644 lib/data_miner/target.rb diff --git a/.gitignore b/.gitignore index 5dcb581..4d7a1b3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ coverage rdoc pkg test/test.sqlite3 +data_miner.log diff --git a/CHANGELOG b/CHANGELOG index 32ca30f..57c1228 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,2 +1,7 @@ 0.2.6 * Upgrade to remote_table 0.1.6 to handle UTF-8 CSVs and long urls. +0.3.0 +* Removed association code... now data_miner focuses on just importing. +* New, simpler DSL +* Upgrade to remote_table 0.2.1 for row_hashes and better blank row handling +* Remove all association-related code diff --git a/README.rdoc b/README.rdoc index c70ac3f..6999d75 100644 --- a/README.rdoc +++ b/README.rdoc @@ -8,15 +8,15 @@ Put this in config/environment.rb: config.gem 'data_miner' -You need to define mine_data blocks in your ActiveRecord models. For example, in app/models/country.rb: +You need to define data_miner blocks in your ActiveRecord models. For example, in app/models/country.rb: class Country < ActiveRecord::Base - mine_data do |step| + data_miner do |step| # import country names and country codes step.import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr| - attr.key :iso_3166, :name_in_source => 'country code' - attr.store :iso_3166, :name_in_source => 'country code' - attr.store :name, :name_in_source => 'country' + attr.key :iso_3166, :field_name => 'country code' + attr.store :iso_3166, :field_name => 'country code' + attr.store :name, :field_name => 'country' end end end @@ -26,7 +26,7 @@ You need to define mine_data blocks in your ActiveRecord models. For ex class Airport < ActiveRecord::Base belongs_to :country - mine_data do |step| + data_miner do |step| # import airport iata_code, name, etc. step.import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false) do |attr| attr.key :iata_code, :field_number => 3 @@ -43,12 +43,8 @@ You need to define mine_data blocks in your ActiveRecord models. For ex Put this in lib/tasks/data_miner_tasks.rake: (unfortunately I don't know a way to automatically include gem tasks, so you have to do this manually for now) namespace :data_miner do - task :mine => :environment do - DataMiner.mine :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact - end - - task :map_to_attrs => :environment do - DataMiner.map_to_attrs ENV['METHOD'], :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact + task :run => :environment do + DataMiner.run :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact end end @@ -60,9 +56,9 @@ You need to specify what order to mine data. For example, in config/initiali # etc end -Once you have (1) set up the order of data mining and (2) defined mine_data blocks in your classes, you can: +Once you have (1) set up the order of data mining and (2) defined data_miner blocks in your classes, you can: - $ rake data_miner:mine + $ rake data_miner:run ==Complete example @@ -75,7 +71,7 @@ Once you have (1) set up the order of data mining and (2) defined mine_data< [...edit per quick start...] ~/testapp $ touch config/initializers/data_miner_config.rake [...edit per quick start...] - ~/testapp $ rake data_miner:mine + ~/testapp $ rake data_miner:run Now you should have diff --git a/Rakefile b/Rakefile index 72c1bb0..32d693c 100644 --- a/Rakefile +++ b/Rakefile @@ -10,8 +10,13 @@ begin gem.email = "seamus@abshere.net" gem.homepage = "http://github.com/seamusabshere/data_miner" gem.authors = ["Seamus Abshere", "Andy Rossmeissl"] - %w{ activerecord activesupport andand errata conversions }.each { |name| gem.add_dependency name } - gem.add_dependency 'remote_table', '0.1.6' + gem.add_dependency 'remote_table', '~>0.2.1' + gem.add_dependency 'activerecord', '~>2.3.4' + gem.add_dependency 'activesupport', '~>2.3.4' + gem.add_dependency 'andand', '~>1.3.1' + gem.add_dependency 'errata', '~>0.1.4' + gem.add_dependency 'conversions', '~>1.4.3' + gem.add_dependency 'blockenspiel', '~>0.3.2' gem.require_path = "lib" gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild gem.rdoc_options << '--line-numbers' << '--inline-source' diff --git a/lib/data_miner.rb b/lib/data_miner.rb index 8d660c0..9821a4b 100644 --- a/lib/data_miner.rb +++ b/lib/data_miner.rb @@ -1,43 +1,66 @@ -require 'rubygems' -require 'activesupport' -require 'activerecord' +require 'active_support' +require 'active_record' +require 'blockenspiel' require 'conversions' require 'remote_table' require 'errata' +require 'andand' +require 'log4r' -require 'data_miner/active_record_ext' require 'data_miner/attribute' -require 'data_miner/attribute_collection' require 'data_miner/configuration' require 'data_miner/dictionary' -require 'data_miner/step' -require 'data_miner/step/associate' -require 'data_miner/step/await' -require 'data_miner/step/callback' -require 'data_miner/step/derive' -require 'data_miner/step/import' -require 'data_miner/william_james_cartesian_product' # TODO: move to gem +require 'data_miner/import' +require 'data_miner/process' +require 'data_miner/target' +require 'data_miner/run' + +# TODO: move to gem +require 'data_miner/william_james_cartesian_product' module DataMiner - class << self - def mine(options = {}) - DataMiner::Configuration.mine options - end - - def map_to_attrs(method, options = {}) - puts DataMiner::Configuration.map_to_attrs(method, options) - end + class MissingHashColumn < RuntimeError; end + + include Log4r - def enqueue(&block) - DataMiner::Configuration.enqueue &block - end - - def classes - DataMiner::Configuration.classes + mattr_accessor :logger + + def self.start_logging + if defined?(Rails) + self.logger = Rails.logger + else + self.logger = Logger.new 'data_miner' + logger.outputters = FileOutputter.new 'f1', :filename => 'data_miner.log' end end + + def self.run(options = {}) + DataMiner::Configuration.run options + end + + def self.enqueue(&block) + DataMiner::Configuration.enqueue &block + end + + def self.classes + DataMiner::Configuration.classes + end + + def self.create_tables + DataMiner::Configuration.create_tables + end end ActiveRecord::Base.class_eval do - include DataMiner::ActiveRecordExt + def self.data_miner(&block) + # this is class_eval'ed here so that each ActiveRecord descendant has its own copy, or none at all + class_eval { cattr_accessor :data_miner_config } + self.data_miner_config = DataMiner::Configuration.new self + + data_miner_config.before_invoke + Blockenspiel.invoke block, data_miner_config + data_miner_config.after_invoke + end end + +DataMiner.start_logging diff --git a/lib/data_miner/active_record_ext.rb b/lib/data_miner/active_record_ext.rb deleted file mode 100644 index b91ee94..0000000 --- a/lib/data_miner/active_record_ext.rb +++ /dev/null @@ -1,25 +0,0 @@ -module DataMiner - module ActiveRecordExt - def self.included(klass) - klass.extend(ClassMethods) - end - - module ClassMethods - def mine_data(options = {}, &block) - if defined?(NO_DATA_MINER) and NO_DATA_MINER == true - class_eval do - class << self - def data_mine - raise "NO_DATA_MINER is set to true, so data_mine is not available" - end - end - end - else - class_eval { cattr_accessor :data_mine } - self.data_mine = Configuration.new(self) - yield data_mine - end - end - end - end -end diff --git a/lib/data_miner/attribute.rb b/lib/data_miner/attribute.rb index 23a94c9..4fbf40a 100644 --- a/lib/data_miner/attribute.rb +++ b/lib/data_miner/attribute.rb @@ -1,299 +1,216 @@ module DataMiner class Attribute - attr_accessor :klass, :name, :options_for_step, :affected_by_steps, :key_for_steps + attr_accessor :klass, :name, :options_for_import def initialize(klass, name) @klass = klass - @name = name.to_sym - @options_for_step = {} - @affected_by_steps = [] - @key_for_steps = [] + @name = name + @options_for_import = {} end - - # polling questions - def report_find_or_create(step) - "Creates parents: #{klass}##{name} is set with #{reflection_klass(step)}.find_or_create_by_#{foreign_key(step)}" if wants_create?(step) - end - - def report_unnatural_order(step) - if ( - (rk = klass.reflect_on_association(weighting_association(step)).andand.klass) or - (wants_inline_association? and rk = reflection_klass(step)) - ) and - step.configuration.classes.index(rk) > step.configuration.classes.index(klass) and - step.options[:awaiting].andand.klass != klass - "Unnatural order: #{klass} comes before #{rk}" - end - end - + def inspect - "Attribute(#{klass}.#{name})" + "Attribute(#{klass}##{name})" end - def affected_by!(step, options = {}) - self.options_for_step[step] = options - self.affected_by_steps << step - end - - def affected_by?(step) - affected_by_steps.include?(step) - end - - def key_for!(step, options = {}) - self.options_for_step[step] = options - self.key_for_steps << step - end - - def key_for?(step) - key_for_steps.include?(step) + def stored_by?(import) + options_for_import.has_key?(import) end - def value_in_dictionary(step, key) - return *dictionary(step).lookup(key) # strip the array wrapper if there's only one element + def value_in_dictionary(import, key) + return *dictionary(import).lookup(key) # strip the array wrapper if there's only one element end - def value_in_source(step, row) - if wants_static?(step) - value = static(step) - elsif field_number(step) - if field_number(step).is_a?(Range) - value = field_number(step).map { |n| row[n] }.join(delimiter(step)) + def value_in_source(import, row) + if wants_static?(import) + value = static(import) + elsif field_number(import) + if field_number(import).is_a?(Range) + value = field_number(import).map { |n| row[n] }.join(delimiter(import)) else - value = row[field_number(step)] + value = row[field_number(import)] end else - value = row[name_in_source(step)] + value = row[field_name(import)] end return nil if value.nil? return value if value.is_a?(ActiveRecord::Base) # escape valve for parsers that look up associations directly value = value.to_s - value = value[keep(step)] if wants_keep?(step) - value = do_split(step, value) if wants_split?(step) + value = value[chars(import)] if wants_chars?(import) + value = do_split(import, value) if wants_split?(import) # taken from old errata... maybe we want to do this here value.gsub!(/[ ]+/, ' ') # text.gsub!('- ', '-') value.gsub!(/([^\\])~/, '\1 ') value.strip! - value.upcase! if wants_upcase?(step) - value = do_convert(step, row, value) if wants_conversion?(step) - value = do_sprintf(step, value) if wants_sprintf?(step) + value.upcase! if wants_upcase?(import) + value = do_convert(import, row, value) if wants_conversion?(import) + value = do_sprintf(import, value) if wants_sprintf?(import) value end - def value_from_row(step, row) - value = value_in_source(step, row) + def value_from_row(import, row) + value = value_in_source(import, row) return value if value.is_a?(ActiveRecord::Base) # carry through trapdoor - value = value_in_dictionary(step, value) if wants_dictionary?(step) - value = value_as_association(step, value) if wants_inline_association? + value = value_in_dictionary(import, value) if wants_dictionary?(import) value end - - def value_as_association(step, value) - @_value_as_association ||= {} - @_value_as_association[step] ||= {} - if !@_value_as_association[step].has_key?(value) - dynamic_matcher = wants_create?(step) ? "find_or_create_by_#{foreign_key(step)}" : "find_by_#{foreign_key(step)}" - @_value_as_association[step][value] = reflection_klass(step).send(dynamic_matcher, value) - end - @_value_as_association[step][value] - end - - # this will overwrite nils, even if wants_overwriting?(step) is false - def set_record_from_row(step, record, row) - return if !wants_overwriting?(step) and !record.send(name).nil? - value = value_from_row(step, row) + + # this will overwrite nils, even if wants_overwriting?(import) is false + def set_record_from_row(import, record, row) + return if !wants_overwriting?(import) and !record.send(name).nil? + value = value_from_row(import, row) record.send "#{name}=", value - $stderr.puts("ActiveRecord didn't like trying to set #{klass}.#{name} = #{value}") if !value.nil? and record.send(name).nil? - end - - def perform(step) - case step.variant - when :associate - perform_association(step) - when :derive - if wants_update_all?(step) - perform_update_all(step) - elsif wants_weighted_average?(step) - perform_weighted_average(step) - else - perform_callback(step) - end - when :import - raise "This shouldn't be called, the import step is special" - end - end - - def perform_association(step) - raise "dictionary and prefix don't mix" if wants_dictionary?(step) and wants_prefix?(step) - klass.update_all("#{reflection.primary_key_name} = NULL") if wants_nullification?(step) - if wants_create?(step) - klass.find_in_batches do |batch| - batch.each do |record| - if wants_prefix?(step) - sql = "SELECT reflection_table.id FROM #{reflection_klass(step).quoted_table_name} AS reflection_table INNER JOIN #{klass.quoted_table_name} AS klass_table ON LEFT(klass_table.#{key(step)}, LENGTH(reflection_table.#{foreign_key(step)})) = reflection_table.#{foreign_key(step)} WHERE klass_table.id = #{record.id} ORDER BY LENGTH(reflection_table.#{foreign_key(step)}) DESC" - associated_id = ActiveRecord::Base.connection.select_value(sql) - next if associated_id.blank? - record.send("#{reflection.primary_key_name}=", associated_id) - else - dynamic_finder_value = record.send(key(step)) - dynamic_finder_value = value_in_dictionary(step, dynamic_finder_value) if wants_dictionary?(step) - next if dynamic_finder_value.blank? - associated = reflection_klass(step).send("find_or_create_by_#{foreign_key(step)}", dynamic_finder_value) # TODO cache results - record.send("#{name}=", associated) - end - record.save - end - end - else - reflection_klass(step).find_in_batches do |batch| - batch.each do |reflection_record| - klass.update_all ["#{reflection.primary_key_name} = ?", reflection_record.id], ["#{key(step)} = ?", reflection_record.send(foreign_key(step))] - end - end - end + DataMiner.logger.info("ActiveRecord didn't like trying to set #{klass}.#{name} = #{value}") if !value.nil? and record.send(name).nil? end - def perform_update_all(step) - klass.update_all("#{name} = #{set(step)}", conditions(step)) + def unit_from_source(import, row) + row[units_field_name(import)].to_s.strip.underscore.to_sym end - def perform_weighted_average(step) - # handle weighting by scopes instead of associations - if weighting_association(step) and !klass.reflect_on_association(weighting_association(step)) - klass.find_in_batches do |batch| - batch.each do |record| - record.send "#{name}=", record.send(weighting_association(step)).weighted_average(name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step)) - record.save - end - end - else # there's no weighting association OR there is one and it's a valid association - klass.update_all_weighted_averages name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step), :association => weighting_association(step) - end - end - - def perform_callback(step) - case klass.method(callback(step)).arity - when 0: - klass.send(callback(step)) - when 1: - klass.send(callback(step), name) - when 2: - klass.send(callback(step), name, options_for_step[step]) - end - end - - def unit_from_source(step, row) - row[unit_in_source(step)].to_s.strip.underscore.to_sym + def do_convert(import, row, value) + value.to_f.convert((from_units(import) || unit_from_source(import, row)), to_units(import)) end - def do_convert(step, row, value) - from_unit = from(step) || unit_from_source(step, row) - value.to_f.convert(from_unit, to(step)) - end - - def do_sprintf(step, value) - if /\%[0-9\.]*f/.match(sprintf(step)) + def do_sprintf(import, value) + if /\%[0-9\.]*f/.match(sprintf(import)) value = value.to_f - elsif /\%[0-9\.]*d/.match(sprintf(step)) + elsif /\%[0-9\.]*d/.match(sprintf(import)) value = value.to_i end - sprintf(step) % value + sprintf(import) % value end - def do_split(step, value) - pattern = split_options(step)[:pattern] || /\s+/ # default is split on whitespace - keep = split_options(step)[:keep] || 0 # default is keep first element + def do_split(import, value) + pattern = split_options(import)[:pattern] || /\s+/ # default is split on whitespace + keep = split_options(import)[:keep] || 0 # default is keep first element value.to_s.split(pattern)[keep].to_s end def column_type - @column_type ||= klass.columns_hash[name.to_s].type + klass.columns_hash[name.to_s].type + end + + def dictionary(import) + raise "shouldn't ask for this" unless wants_dictionary?(import) # don't try to initialize if there are no dictionary options + Dictionary.new dictionary_options(import) + end + + # { + # :static => 'options_for_import[import].has_key?(:static)', + # :chars => :chars, + # :upcase => :upcase, + # :conversion => '!from_units(import).nil? or !units_field_name(import).nil?', + # :sprintf => :sprintf, + # :dictionary => :dictionary_options, + # :split => :split_options, + # :nullification => 'nullify(import) != false', + # :overwriting => 'overwrite(import) != false', + # }.each do |name, condition| + # condition = "!#{condition}(import).nil?" if condition.is_a?(Symbol) + # puts <<-EOS + # def wants_#{name}?(import) + # #{condition} + # end + # EOS + # end + def wants_split?(import) + !split_options(import).nil? + end + def wants_sprintf?(import) + !sprintf(import).nil? + end + def wants_upcase?(import) + !upcase(import).nil? + end + def wants_static?(import) + options_for_import[import].has_key?(:static) + end + def wants_nullification?(import) + nullify(import) != false + end + def wants_chars?(import) + !chars(import).nil? + end + def wants_overwriting?(import) + overwrite(import) != false + end + def wants_conversion?(import) + !from_units(import).nil? or !units_field_name(import).nil? + end + def wants_dictionary?(import) + !dictionary_options(import).nil? + end + + # { + # :field_name => { :default => :name, :stringify => true }, + # :delimiter => { :default => '", "' } + # }.each do |name, options| + # puts <<-EOS + # def #{name}(import) + # (options_for_import[import][:#{name}] || #{options[:default]})#{'.to_s' if options[:stringify]} + # end + # EOS + # end + def field_name(import) + (options_for_import[import][:field_name] || name).to_s + end + def delimiter(import) + (options_for_import[import][:delimiter] || ", ") + end + + # %w(dictionary split).each do |name| + # puts <<-EOS + # def #{name}_options(import) + # options_for_import[import][:#{name}] + # end + # EOS + # end + def dictionary_options(import) + options_for_import[import][:dictionary] + end + def split_options(import) + options_for_import[import][:split] end - - { - :static => 'options_for_step[step].has_key?(:static)', - :prefix => :prefix, - :create => :create, - :keep => :keep, - :upcase => :upcase, - :conversion => '!from(step).nil? or !unit_in_source(step).nil?', - :sprintf => :sprintf, - :dictionary => :dictionary_options, - :split => :split_options, - :update_all => :set, - :nullification => 'nullify(step) != false', - :overwriting => 'overwrite(step) != false', - :weighted_average => '!weighting_association(step).nil? or !weighting_column(step).nil?' - }.each do |name, condition| - condition = "!#{condition}(step).nil?" if condition.is_a?(Symbol) - eval <<-EOS - def wants_#{name}?(step) - #{condition} - end - EOS + + # %w(from_units to_units conditions sprintf nullify overwrite upcase units_field_name field_number chars static).each do |name| + # puts <<-EOS + # def #{name}(import) + # options_for_import[import][:#{name}] + # end + # EOS + # end + def from_units(import) + options_for_import[import][:from_units] end - - { - :name_in_source => { :default => :name, :stringify => true }, - :key => { :default => :name, :stringify => true }, - :foreign_key => { :default => 'key(step)', :stringify => true }, - :delimiter => { :default => '", "' } - }.each do |name, options| - eval <<-EOS - def #{name}(step) - (options_for_step[step][:#{name}] || #{options[:default]})#{'.to_s' if options[:stringify]} - end - EOS + def to_units(import) + options_for_import[import][:to_units] end - - def reflection - if @_reflection.nil? - @_reflection = klass.reflect_on_association(name) || :missing - reflection - elsif @_reflection == :missing - nil - else - @_reflection - end + def conditions(import) + options_for_import[import][:conditions] end - - def reflection_klass(step) - return nil unless reflection - if reflection.options[:polymorphic] - polymorphic_type(step).andand.constantize - else - reflection.klass - end + def sprintf(import) + options_for_import[import][:sprintf] end - - def wants_inline_association? - reflection.present? + def nullify(import) + options_for_import[import][:nullify] end - - def callback(step) - (options_for_step[step][:callback] || "derive_#{name}").to_sym + def overwrite(import) + options_for_import[import][:overwrite] end - - def dictionary(step) - raise "shouldn't ask for this" unless wants_dictionary?(step) # don't try to initialize if there are no dictionary options - @dictionaries ||= {} - @dictionaries[step] ||= Dictionary.new(dictionary_options(step)) + def upcase(import) + options_for_import[import][:upcase] end - - %w(dictionary split).each do |name| - eval <<-EOS - def #{name}_options(step) - options_for_step[step][:#{name}] - end - EOS + def units_field_name(import) + options_for_import[import][:units_field_name] end - - %w(from to set conditions weighting_association weighting_column weighting_disaggregator sprintf nullify overwrite upcase prefix unit_in_source field_number keep create static polymorphic_type).each do |name| - eval <<-EOS - def #{name}(step) - options_for_step[step][:#{name}] - end - EOS + def field_number(import) + options_for_import[import][:field_number] + end + def chars(import) + options_for_import[import][:chars] + end + def static(import) + options_for_import[import][:static] end end end diff --git a/lib/data_miner/attribute_collection.rb b/lib/data_miner/attribute_collection.rb deleted file mode 100644 index 54631f6..0000000 --- a/lib/data_miner/attribute_collection.rb +++ /dev/null @@ -1,51 +0,0 @@ -module DataMiner - class AttributeCollection - attr_accessor :klass, :attributes - - def initialize(klass) - @klass = klass - @attributes = {} - end - - def key!(step, attr_name, attr_options = {}) - find_or_initialize(attr_name).key_for!(step, attr_options) - end - - def affect!(step, attr_name, attr_options = {}) - find_or_initialize(attr_name).affected_by!(step, attr_options) - end - - def affect_all_content_columns!(step, options = {}) - except = Array.wrap(options[:except]).map(&:to_sym) - step.klass.content_columns.map(&:name).reject { |content_column| except.include?(content_column.to_sym) }.each do |content_column| - find_or_initialize(content_column).affected_by!(step) - end - end - - def all_affected_by(step) - attributes.values.select { |attr| attr.affected_by?(step) } - end - - def all_keys_for(step) - attributes.values.select { |attr| attr.key_for?(step) } - end - - def all_for(step) - (all_affected_by(step) + all_keys_for(step)).uniq - end - - def has_keys_for?(step) - attributes.values.any? { |attr| attr.key_for?(step) } - end - - def has_conditional_writes_for?(step) - all_affected_by(step).any? { |attr| !attr.wants_overwriting?(step) } - end - - private - - def find_or_initialize(attr_name) - self.attributes[attr_name] ||= Attribute.new(klass, attr_name) - end - end -end diff --git a/lib/data_miner/configuration.rb b/lib/data_miner/configuration.rb index 9c123c0..4a060ea 100644 --- a/lib/data_miner/configuration.rb +++ b/lib/data_miner/configuration.rb @@ -1,61 +1,55 @@ module DataMiner class Configuration - attr_accessor :steps, :klass, :counter, :attributes, :awaiting + include Blockenspiel::DSL + + attr_accessor :klass, :runnables, :runnable_counter, :attributes, :unique_indices def initialize(klass) - @steps = [] + @runnables = Array.new + @unique_indices = Set.new @klass = klass - @counter = 0 - @attributes = AttributeCollection.new(klass) + @runnable_counter = 0 + @attributes = HashWithIndifferentAccess.new end - %w(import associate derive await).each do |method| - eval <<-EOS - def #{method}(*args, &block) - self.counter += 1 - if block_given? # FORM C - step_options = args[0] || {} - set_awaiting!(step_options) - self.steps << Step::#{method.camelcase}.new(self, counter, step_options, &block) - elsif args[0].is_a?(Hash) # FORM A - step_options = args[0] - set_awaiting!(step_options) - self.steps << Step::#{method.camelcase}.new(self, counter, step_options) - else # FORM B - attr_name = args[0] - attr_options = args[1] || {} - step_options = {} - set_awaiting!(step_options) - self.steps << Step::#{method.camelcase}.new(self, counter, step_options) do |attr| - attr.affect attr_name, attr_options - end - end - end - EOS + def unique_index(*args) + args.each { |arg| unique_indices.add arg } end - - def set_awaiting!(step_options) - step_options.merge!(:awaiting => awaiting) if !awaiting.nil? + + def process(callback) + self.runnable_counter += 1 + runnables << DataMiner::Process.new(self, runnable_counter, callback) end - def awaiting!(step) - self.awaiting = step + def import(options = {}, &block) + self.runnable_counter += 1 + runnables << DataMiner::Import.new(self, runnable_counter, options, &block) + end + + def before_invoke + self.class.create_tables end - def stop_awaiting! - self.awaiting = nil + def after_invoke + if unique_indices.empty? + raise(MissingHashColumn, "No unique_index defined for #{klass.name}, so you need a row_hash:string column.") unless klass.column_names.include?('row_hash') + unique_indices.add 'row_hash' + end + runnables.select { |runnable| runnable.is_a?(Import) }.each { |runnable| unique_indices.each { |unique_index| runnable.store(unique_index) unless runnable.stores?(unique_index) } } end # Mine data for this class. - def mine(options = {}) - steps.each { |step| step.perform options } + def run + target = DataMiner::Target.find_or_create_by_name klass.name + run = target.runs.create! :started_at => Time.now + begin + runnables.each(&:run) + ensure + run.update_attributes! :ended_at => Time.now + end + nil end - # Map method to attributes - def map_to_attrs(method) - steps.map { |step| step.map_to_attrs(method) }.compact - end - cattr_accessor :classes self.classes = [] class << self @@ -63,32 +57,41 @@ class << self # # Options # * :class_names: provide an array class names to mine - def mine(options = {}) + def run(options = {}) classes.each do |klass| if options[:class_names].blank? or options[:class_names].include?(klass.name) - klass.data_mine.mine options + klass.data_miner_config.run end end end - # Map a method to attrs. Defaults to all classes touched by DataMiner. - # - # Options - # * :class_names: provide an array class names to mine - def map_to_attrs(method, options = {}) - classes.map do |klass| - if options[:class_names].blank? or options[:class_names].include?(klass.name) - klass.data_mine.map_to_attrs method - end - end.flatten.compact - end - # Queue up all the ActiveRecord classes that DataMiner should touch. # # Generally done in config/initializers/data_miner_config.rb. def enqueue(&block) yield self.classes end + + def create_tables + c = ActiveRecord::Base.connection + unless c.table_exists?('data_miner_targets') + c.create_table 'data_miner_targets', :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string 'name' + t.datetime 'created_at' + t.datetime 'updated_at' + end + c.execute 'ALTER TABLE data_miner_targets ADD PRIMARY KEY (name);' + end + unless c.table_exists?('data_miner_runs') + c.create_table 'data_miner_runs', :options => 'ENGINE=InnoDB default charset=utf8' do |t| + t.string 'data_miner_target_id' + t.datetime 'started_at' + t.datetime 'ended_at' + t.datetime 'created_at' + t.datetime 'updated_at' + end + end + end end end end diff --git a/lib/data_miner/import.rb b/lib/data_miner/import.rb new file mode 100644 index 0000000..bf29b69 --- /dev/null +++ b/lib/data_miner/import.rb @@ -0,0 +1,57 @@ +module DataMiner + class Import + attr_accessor :configuration, :position_in_run, :options, :table, :errata + delegate :klass, :to => :configuration + delegate :unique_indices, :to => :configuration + + def initialize(configuration, position_in_run, options = {}, &block) + @configuration = configuration + @position_in_run = position_in_run + @options = options + yield self if block_given? # pull in attributes + @errata = Errata.new(:url => options[:errata], :klass => klass) if options[:errata] + @table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop)) + end + + def inspect + "Import(#{klass}) position #{position_in_run}" + end + + def attributes + configuration.attributes.reject { |k, v| !v.stored_by? self } + end + + def stores?(attr_name) + configuration.attributes[attr_name].andand.stored_by? self + end + + def store(attr_name, attr_options = {}) + configuration.attributes[attr_name] ||= Attribute.new(klass, attr_name) + configuration.attributes[attr_name].options_for_import[self] = attr_options + end + + def run + table.each_row do |row| + if errata + next if errata.rejects?(row) + errata.correct!(row) + end + + unifying_values = unique_indices.map do |attr_name| + [ attributes[attr_name].value_from_row(self, row) ] + end + + record_set = WilliamJamesCartesianProduct.cart_prod(*unifying_values).map do |combination| + next if combination.include?(nil) + klass.send "find_or_initialize_by_#{unique_indices.to_a.join('_and_')}", *combination + end.flatten + + Array.wrap(record_set).each do |record| + attributes.values.each { |attr| attr.set_record_from_row(self, record, row) } + record.save! + end + end + DataMiner.logger.info "performed #{inspect}" + end + end +end diff --git a/lib/data_miner/process.rb b/lib/data_miner/process.rb new file mode 100644 index 0000000..9ae1ab2 --- /dev/null +++ b/lib/data_miner/process.rb @@ -0,0 +1,21 @@ +module DataMiner + class Process + attr_accessor :configuration, :position_in_run, :callback + delegate :klass, :to => :configuration + + def initialize(configuration, position_in_run, callback) + @configuration = configuration + @position_in_run = position_in_run + @callback = callback + end + + def inspect + "Process(#{klass}) position #{position_in_run}" + end + + def run + klass.send callback + DataMiner.logger.info "ran #{inspect}" + end + end +end diff --git a/lib/data_miner/run.rb b/lib/data_miner/run.rb new file mode 100644 index 0000000..3ef5b9c --- /dev/null +++ b/lib/data_miner/run.rb @@ -0,0 +1,7 @@ +module DataMiner + class Run < ActiveRecord::Base + set_table_name 'data_miner_runs' + default_scope :order => 'id ASC' + belongs_to :target + end +end diff --git a/lib/data_miner/step.rb b/lib/data_miner/step.rb deleted file mode 100644 index 9712d8c..0000000 --- a/lib/data_miner/step.rb +++ /dev/null @@ -1,64 +0,0 @@ -module DataMiner - class Step - attr_accessor :configuration, :number, :options - delegate :klass, :to => :configuration - delegate :attributes, :to => :configuration - - def initialize(configuration, number, options = {}, &block) - @configuration = configuration - @number = number - @options = options - yield self if block_given? # pull in attributes - attributes.affect_all_content_columns!(self, :except => options[:except]) if options[:affect_all] == :content_columns - affected_attributes.each { |attr| attr.options_for_step[self][:callback] = options[:callback] } if options[:callback] - all_attributes.each { |attr| attr.options_for_step[self][:name_in_source] = attr.name_in_source(self).upcase } if options[:headers] == :upcase # TODO remove - end - - def variant - self.class.name.demodulize.underscore.to_sym - end - - def awaiting? - !options[:awaiting].nil? - end - - def inspect - "Step(#{klass} #{variant.to_s.camelcase} #{number})" - end - - def signature - "#{klass} step #{number}: #{variant}" - end - - def perform(options = {}) - return if awaiting? and !options[:force] - affected_attributes.each { |attr| attr.perform self } - $stderr.puts "performed #{signature}" - end - - def affected_attributes - @affected_attributes ||= attributes.all_affected_by self - end - - def key_attributes - @key_attributes ||= attributes.all_keys_for self - end - - def all_attributes - @all_attributes ||= attributes.all_for self - end - - def key(attr_name, attr_options = {}) - attributes.key! self, attr_name, attr_options - end - - def affect(attr_name, attr_options = {}) - attributes.affect! self, attr_name, attr_options - end - alias_method :store, :affect - - def map_to_attrs(method) - affected_attributes.map { |attr| attr.send method, self }.compact - end - end -end diff --git a/lib/data_miner/step/associate.rb b/lib/data_miner/step/associate.rb deleted file mode 100644 index 9ff36f1..0000000 --- a/lib/data_miner/step/associate.rb +++ /dev/null @@ -1,9 +0,0 @@ -module DataMiner - class Step - class Associate < Step - def signature - "#{super} #{affected_attributes.first.name}" - end - end - end -end diff --git a/lib/data_miner/step/await.rb b/lib/data_miner/step/await.rb deleted file mode 100644 index e61f01b..0000000 --- a/lib/data_miner/step/await.rb +++ /dev/null @@ -1,35 +0,0 @@ -module DataMiner - class Step - class Await < Step - attr_accessor :other_class - - def initialize(configuration, number, options = {}, &block) - # doesn't call super - @configuration = configuration - @number = number - @options = options - @other_class = options.delete :other_class - configuration.awaiting! self - yield configuration # pull in steps - configuration.stop_awaiting! - end - - def perform(*args) - other_class.data_mine.steps << Step::Callback.new(other_class.data_mine, self) - $stderr.puts "added #{signature} to callbacks after #{other_class}" - end - - def callback - $stderr.puts "starting to perform deferred steps in #{signature}..." - all_awaiting.each { |step| step.perform :force => true } - $stderr.puts "...done" - end - - private - - def all_awaiting - configuration.steps.select { |step| step.options and step.options[:awaiting] == self } - end - end - end -end diff --git a/lib/data_miner/step/callback.rb b/lib/data_miner/step/callback.rb deleted file mode 100644 index fab8299..0000000 --- a/lib/data_miner/step/callback.rb +++ /dev/null @@ -1,22 +0,0 @@ -module DataMiner - class Step - class Callback < Step - attr_accessor :foreign_step - - def initialize(configuration, foreign_step) - @configuration = configuration - @foreign_step = foreign_step - @number = "(last)" - end - - def perform(*args) - foreign_step.callback - $stderr.puts "performed #{signature}" - end - - def signature - "#{super} (on behalf of #{foreign_step.signature})" - end - end - end -end diff --git a/lib/data_miner/step/derive.rb b/lib/data_miner/step/derive.rb deleted file mode 100644 index 68e5db5..0000000 --- a/lib/data_miner/step/derive.rb +++ /dev/null @@ -1,9 +0,0 @@ -module DataMiner - class Step - class Derive < Step - def signature - "#{super} #{affected_attributes.first.name}" - end - end - end -end diff --git a/lib/data_miner/step/import.rb b/lib/data_miner/step/import.rb deleted file mode 100644 index 955717f..0000000 --- a/lib/data_miner/step/import.rb +++ /dev/null @@ -1,57 +0,0 @@ -module DataMiner - class Step - class Import < Step - attr_accessor :table, :errata - - def initialize(configuration, number, options = {}, &block) - super - @errata = Errata.new(:url => options[:errata], :klass => klass) if options[:errata] - @table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop)) - end - - def signature - "#{super} #{options[:url]}" - end - - def perform(*args) - ActiveRecord::Base.connection.execute("TRUNCATE #{klass.quoted_table_name}") if wants_truncate? - table.each_row do |row| - if errata - next if errata.rejects?(row) - errata.correct!(row) - end - if uses_existing_data? - key_values = key_attributes.map { |key_attr| [ key_attr.value_from_row(self, row) ] } - record_set = WilliamJamesCartesianProduct.cart_prod(*key_values).map do |combination| - next if combination.include?(nil) and !wants_nil_keys? - klass.send(dynamic_finder_name, *combination) - end.flatten - else - record_set = klass.new - end - Array.wrap(record_set).each do |record| - affected_attributes.each { |attr| attr.set_record_from_row(self, record, row) } - record.save - end - end - $stderr.puts "performed #{signature}" - end - - def wants_truncate? - options[:truncate] == true or (!(options[:truncate] == false) and !uses_existing_data?) - end - - def wants_nil_keys? - options[:allow_nil_keys] == true - end - - def uses_existing_data? - @uses_existing_data ||= attributes.has_keys_for?(self) or attributes.has_conditional_writes_for?(self) - end - - def dynamic_finder_name - "find_or_initialize_by_#{key_attributes.map(&:name).join('_and_')}".to_sym - end - end - end -end diff --git a/lib/data_miner/target.rb b/lib/data_miner/target.rb new file mode 100644 index 0000000..d003026 --- /dev/null +++ b/lib/data_miner/target.rb @@ -0,0 +1,7 @@ +module DataMiner + class Target < ActiveRecord::Base + set_table_name 'data_miner_targets' + set_primary_key :name + has_many :runs, :foreign_key => 'data_miner_target_id' + end +end diff --git a/test/data_miner_test.rb b/test/data_miner_test.rb index d113bad..c2529e8 100644 --- a/test/data_miner_test.rb +++ b/test/data_miner_test.rb @@ -1,47 +1,591 @@ require 'test_helper' -ActiveRecord::Schema.define(:version => 20090819143429) do - create_table "airports", :force => true do |t| - t.string "iata_code" - t.string "name" - t.string "city" - t.integer "country_id" - t.float "latitude" - t.float "longitude" - t.datetime "created_at" - t.datetime "updated_at" - end - create_table "countries", :force => true do |t| - t.string "iso_3166" - t.string "name" - t.datetime "created_at" - t.datetime "updated_at" +module FuelEconomyGuide + TRANSMISSIONS = { + 'A' => 'automatic', + 'M' => 'manual', + 'L' => 'automatic', # Lockup/automatic + 'S' => 'semiautomatic', # Semiautomatic + 'C' => 'manual' # TODO verify for VW Syncro + } + + ENGINE_TYPES = { + '(GUZZLER)' => nil, # "gas guzzler" + '(POLICE)' => nil, # police automobile_variant + '(MPFI)' => 'injection', + '(MPI*)' => 'injection', + '(SPFI)' => 'injection', + '(FFS)' => 'injection', + '(TURBO)' => 'turbo', + '(TRBO)' => 'turbo', + '(TC*)' => 'turbo', + '(FFS,TRBO)' => %w(injection turbo), + '(S-CHARGE)' => 'supercharger', + '(SC*)' => 'supercharger', + '(DIESEL)' => nil, # diesel + '(DSL)' => nil, # diesel + '(ROTARY)' => nil, # rotary + '(VARIABLE)' => nil, # variable displacement + '(NO-CAT)' => nil, # no catalytic converter + '(OHC)' => nil, # overhead camshaft + '(OHV)' => nil, # overhead valves + '(16-VALVE)' => nil, # 16V + '(305)' => nil, # 305 cubic inch displacement + '(307)' => nil, # 307 cubic inch displacement + '(M-ENG)' => nil, + '(W-ENG)' => nil, + '(GM-BUICK)' => nil, + '(GM-CHEV)' => nil, + '(GM-OLDS)' => nil, + '(GM-PONT)' => nil, + } + + class ParserB + attr_accessor :year + def initialize(options = {}) + @year = options[:year] + end + + def apply(row) + row.merge!({ + 'make' => row['carline_mfr_name'], # make it line up with the errata + 'model' => row['carline_name'], # ditto + 'transmission' => TRANSMISSIONS[row['model_trans'][0, 1]], + 'speeds' => (row['model_trans'][1, 1] == 'V') ? 'variable' : row['model_trans'][1, 1], + 'turbo' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('turbo'), + 'supercharger' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('supercharger'), + 'injection' => [ENGINE_TYPES[row['engine_desc1']], ENGINE_TYPES[row['engine_desc2']]].flatten.include?('injection'), + 'displacement' => _displacement(row['opt_disp']), + 'year' => year + }) + row + end + + def _displacement(str) + str = str.gsub(/[\(\)]/, '').strip + if str =~ /^(.+)L$/ + $1.to_f + elsif str =~ /^(.+)CC$/ + $1.to_f / 1000 + end + end + + def add_hints!(bus) + bus[:format] = :fixed_width + bus[:cut] = '13-' if year == 1995 + bus[:schema_name] = :fuel_economy_guide_b + bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' } + Slither.define :fuel_economy_guide_b do |d| + d.rows do |row| + row.trap { true } # there's only one section + row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR + row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA + row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE + row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE + row.column 'carline_name' , 28, :type => :string # CARLINE NAME + row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES + row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM. + row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE + row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS + row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND) + row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY) + row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE + row.column 'carline_code' , 5, :type => :integer # CARLINE CODE + row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX + row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME + row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS) + row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE + row.spacer 2 + row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE + row.spacer 2 + row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON + row.spacer 2 + row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON + row.spacer 2 + row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON + row.spacer 2 + row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON + row.spacer 2 + row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5 + row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT + row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1 + row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2 + row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3 + row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME. + row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME. + row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME. + row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY + row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE + row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL + row.column 'filler' , 1, :type => :string # NOT USED + row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL + row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS + end + end + end + end + class ParserC + attr_accessor :year + def initialize(options = {}) + @year = options[:year] + end + + def add_hints!(bus) + # File will decide format based on filename + end + + def apply(row) + row.merge!({ + 'make' => row['Manufacturer'], # make it line up with the errata + 'model' => row['carline name'], # ditto + 'drive' => row['drv'] + 'WD', + 'transmission' => TRANSMISSIONS[row['trans'][-3, 1]], + 'speeds' => (row['trans'][-2, 1] == 'V') ? 'variable' : row['trans'][-2, 1], + 'turbo' => row['T'] == 'T', + 'supercharger' => row['S'] == 'S', + 'injection' => true, + 'year' => year + }) + row + end + end + class ParserD + attr_accessor :year + def initialize(options = {}) + @year = options[:year] + end + + def add_hints!(bus) + bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007 + end + + def apply(row) + row.merge!({ + 'make' => row['MFR'], # make it line up with the errata + 'model' => row['CAR LINE'], # ditto + 'drive' => row['DRIVE SYS'] + 'WD', + 'transmission' => TRANSMISSIONS[row['TRANS'][-3, 1]], + 'speeds' => (row['TRANS'][-2, 1] == 'V') ? 'variable' : row['TRANS'][-2, 1], + 'turbo' => row['TURBO'] == 'T', + 'supercharger' => row['SPCHGR'] == 'S', + 'injection' => true, + 'year' => year + }) + row + end + end +end + +class AutomobileMakeYear < ActiveRecord::Base + set_primary_key :row_hash + + belongs_to :make, :class_name => 'AutomobileMake', :foreign_key => 'automobile_make_id' + belongs_to :model_year, :class_name => 'AutomobileModelYear', :foreign_key => 'automobile_model_year_id' + has_many :fleet_years, :class_name => 'AutomobileMakeFleetYear' + + data_miner do + process :derive_from_make_fleet_years + process :derive_association_to_make_fleet_years + process :derive_fuel_efficiency + process :derive_volume + end + + # validates_numericality_of :fuel_efficiency, :greater_than => 0, :allow_nil => true + + class << self + def derive_from_make_fleet_years + AutomobileMakeFleetYear.find_in_batches do |batch| + batch.each do |record| + #puts " * Considering AMFY #{record.inspect}" + if record.make and record.model_year + find_or_create_by_automobile_make_id_and_automobile_model_year_id record.make.id, record.model_year.id + end + end + end + end + + def derive_association_to_make_fleet_years + AutomobileMakeFleetYear.find_in_batches do |batch| + batch.each do |record| + if record.make and record.model_year + record.make_year = find_by_automobile_make_id_and_automobile_model_year_id record.make.id, record.model_year.id + record.save! if record.changed? + end + end + end + end + + def derive_fuel_efficiency + AutomobileMakeFleetYear.find_in_batches do |batch| + batch.each do |record| + if record.make and record.model_year + make_year = find_by_automobile_make_id_and_automobile_model_year_id record.make.id, record.model_year.id + # make_year.fuel_efficiency = make_year.fleet_years.weighted_average :fuel_efficiency, :by => :volume + make_year.save! + end + end + end + end + + def derive_volume + find_in_batches do |batch| + batch.each do |record| + record.volume = record.fleet_years.collect(&:volume).sum + record.save! + end + end + end + end +end + +class AutomobileMakeFleetYear < ActiveRecord::Base + set_primary_key :row_hash + belongs_to :make, :class_name => 'AutomobileMake', :foreign_key => 'automobile_make_id' + belongs_to :model_year, :class_name => 'AutomobileModelYear', :foreign_key => 'automobile_model_year_id' + belongs_to :make_year, :class_name => 'AutomobileMakeYear', :foreign_key => 'automobile_make_year_id' + + data_miner do + # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA + import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv', + :errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv', + :select => lambda { |row| row['volume'].to_i > 0 } do |attr| + attr.store 'make_name', :field_name => 'manufacturer_name' # prefix + attr.store 'year', :field_name => 'year_content' + attr.store 'fleet', :chars => 2..3 + attr.store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre + attr.store 'volume' + end + end +end + +class AutomobileModelYear < ActiveRecord::Base + set_primary_key :year + + has_many :make_years, :class_name => 'AutomobileMakeYear' + has_many :variants, :class_name => 'AutomobileVariant' + + data_miner do + unique_index 'year' + + # await :other_class => AutomobileMakeYear do |deferred| + # # deferred.derive :fuel_efficiency, :weighting_association => :make_years, :weighting_column => :volume + # end + end +end + +class AutomobileFuelType < ActiveRecord::Base + set_primary_key :code + + data_miner do + unique_index 'code' + + import(:url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', + :filename => 'Gd6-dsc.txt', + :format => :fixed_width, + :crop => 21..26, # inclusive + :cut => '2-', + :select => lambda { |row| /\A[A-Z]/.match row[:code] }, + :schema => [[ 'code', 2, { :type => :string } ], + [ 'spacer', 2 ], + [ 'name', 52, { :type => :string } ]]) do |attr| + attr.store 'name' + end + + import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/models_export/automobile_fuel_type.csv' do |attr| + attr.store 'name' + attr.store 'annual_distance' + attr.store 'emission_factor' + end + + # pull electricity emission factor from residential electricity + import(:url => 'http://spreadsheets.google.com/pub?key=rukxnmuhhsOsrztTrUaFCXQ', + :select => lambda { |row| row['code'] == 'El' }) do |attr| + attr.store 'name' + attr.store 'emission_factor' + end + + # still need distance estimate for electric cars + end + + CODES = { + :electricity => 'El', + :diesel => 'D' + } +end + +class AutomobileModel < ActiveRecord::Base + set_primary_key :row_hash + + has_many :variants, :class_name => 'AutomobileVariant' + belongs_to :make, :class_name => 'AutomobileMake', :foreign_key => 'automobile_make_id' + + data_miner do + # derived from FEG automobile variants + end +end + +class AutomobileMake < ActiveRecord::Base + set_primary_key :name + + has_many :make_years, :class_name => 'AutomobileMakeYear' + has_many :models, :class_name => 'AutomobileModel' + has_many :fleet_years, :class_name => 'AutomobileMakeFleetYear' + has_many :variants, :class_name => 'AutomobileVariant' + + data_miner do + unique_index 'name' + + import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/makes/make_importance.csv' do |attr| + attr.store 'major' + end + # await :other_class => AutomobileMakeYear do |deferred| + # deferred.derive :fuel_efficiency, :weighting_association => :make_years, :weighting_column => 'volume' + # end + end +end + +class AutomobileVariant < ActiveRecord::Base + set_primary_key :row_hash + + belongs_to :make, :class_name => 'AutomobileMake', :foreign_key => 'automobile_make_id' + belongs_to :model, :class_name => 'AutomobileModel', :foreign_key => 'automobile_model_id' + belongs_to :model_year, :class_name => 'AutomobileModelYear', :foreign_key => 'automobile_model_year_id' + belongs_to :fuel_type, :class_name => 'AutomobileFuelType', :foreign_key => 'automobile_fuel_type_id' + + data_miner do + # 1985---1997 + (85..97).each do |yy| + filename = (yy == 96) ? "#{yy}MFGUI.ASC" : "#{yy}MFGUI.DAT" + import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip", + :filename => filename, + :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i }, + :errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv') do |attr| + attr.store 'make_name', :field_name => 'make' + attr.store 'model_name', :field_name => 'model' + attr.store 'year' + attr.store 'fuel_type_code', :field_name => 'fuel_type' + attr.store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre + attr.store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre + attr.store 'cylinders', :field_name => 'no_cyc' + attr.store 'drive', :field_name => 'drive_system' + attr.store 'carline_mfr_code' + attr.store 'vi_mfr_code' + attr.store 'carline_code' + attr.store 'carline_class_code', :field_name => 'carline_clss' + attr.store 'transmission' + attr.store 'speeds' + attr.store 'turbo' + attr.store 'supercharger' + attr.store 'injection' + attr.store 'displacement' + end + end + + # 1998--2005 + { + 1998 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv' }, + 1999 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/99guide.zip', :filename => '99guide6.csv' }, + 2000 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/00data.zip', :filename => 'G6080900.xls' }, + 2001 => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/01guide0918.csv' }, # parseexcel 0.5.2 can't read Excel 5.0 { :url => 'http://www.fueleconomy.gov/FEG/epadata/01data.zip', :filename => '01guide0918.xls' } + 2002 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/02data.zip', :filename => 'guide_jan28.xls' }, + 2003 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/03data.zip', :filename => 'guide_2003_feb04-03b.csv' }, + 2004 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/04data.zip', :filename => 'gd04-Feb1804-RelDtFeb20.csv' }, + 2005 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/05data.zip', :filename => 'guide2005-2004oct15.csv' } + }.sort { |a, b| a.first <=> b.first }.each do |year, options| + import options.merge(:transform => { :class => FuelEconomyGuide::ParserC, :year => year }, + :errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv') do |attr| + attr.store 'make_name', :field_name => 'make' + attr.store 'model_name', :field_name => 'model' + attr.store 'fuel_type_code', :field_name => 'fl' + attr.store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre + attr.store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre + attr.store 'cylinders', :field_name => 'cyl' + attr.store 'displacement', :field_name => 'displ' + attr.store 'carline_class_code', :field_name => 'cls' if year >= 2000 + attr.store 'carline_class_name', :field_name => 'Class' + attr.store 'year' + attr.store 'transmission' + attr.store 'speeds' + attr.store 'turbo' + attr.store 'supercharger' + attr.store 'injection' + attr.store 'drive' + end + end + + # 2006--2010 + { + 2006 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/06data.zip', :filename => '2006_FE_Guide_14-Nov-2005_download.csv' }, + 2007 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/07data.zip', :filename => '2007_FE_guide_ALL_no_sales_May_01_2007.xls' }, + 2008 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv' }, + 2009 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/09data.zip', :filename => '2009_FE_guide for DOE_ALL-rel dates-no-sales-8-28-08download.csv' }, + # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' } + }.sort { |a, b| a.first <=> b.first }.each do |year, options| + import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year }, + :errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv') do |attr| + attr.store 'make_name', :field_name => 'make' + attr.store 'model_name', :field_name => 'model' + attr.store 'fuel_type_code', :field_name => 'FUEL TYPE' + attr.store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre + attr.store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre + attr.store 'cylinders', :field_name => 'NUMB CYL' + attr.store 'displacement', :field_name => 'DISPLACEMENT' + attr.store 'carline_class_code', :field_name => 'CLS' + attr.store 'carline_class_name', :field_name => 'CLASS' + attr.store 'year' + attr.store 'transmission' + attr.store 'speeds' + attr.store 'turbo' + attr.store 'supercharger' + attr.store 'injection' + attr.store 'drive' + end + end + + # associate :make, :key => :original_automobile_make_name, :foreign_key => :name + # derive :automobile_model_id # creates models by name + # associate :model_year, :key => :original_automobile_model_year_year, :foreign_key => :year + # associate :fuel_type, :key => :original_automobile_fuel_type_code, :foreign_key => :code + process :set_adjusted_fuel_economy + end + + def name + extra = [] + extra << "V#{cylinders}" if cylinders + extra << "#{displacement}L" if displacement + extra << "turbo" if turbo + extra << "FI" if injection + extra << "#{speeds}spd" if speeds.present? + extra << transmission if transmission.present? + extra << "(#{fuel_type.name})" if fuel_type + extra.join(' ') + end + + def fuel_economy_description + [ fuel_efficiency_city, fuel_efficiency_highway ].map { |f| f.kilometres_per_litre.to(:miles_per_gallon).round }.join('/') + end + + class << self + def set_adjusted_fuel_economy + update_all 'fuel_efficiency_city = 1 / ((0.003259 / 0.425143707) + (1.1805 / raw_fuel_efficiency_city))' + update_all 'fuel_efficiency_highway = 1 / ((0.001376 / 0.425143707) + (1.3466 / raw_fuel_efficiency_highway))' + end + + # the following matching methods are needed by the errata + # per https://brighterplanet.sifterapp.com/projects/30/issues/750/comments + + def transmission_is_blank?(row) + row['transmission'].blank? + end + + def is_a_2007_gmc_or_chevrolet?(row) + row['year'] == 2007 and %w(GMC CHEVROLET).include? row['MFR'].upcase + end + + def is_a_porsche?(row) + row['make'].upcase == 'PORSCHE' + end + + def is_not_a_porsche?(row) + !is_a_porsche? row + end + + def is_a_mercedes_benz?(row) + row['make'] =~ /MERCEDES/i + end + + def is_a_lexus?(row) + row['make'].upcase == 'LEXUS' + end + + def is_a_bmw?(row) + row['make'].upcase == 'BMW' + end + + def is_a_ford?(row) + row['make'].upcase == 'FORD' + end + + def is_a_rolls_royce_and_model_contains_bentley?(row) + is_a_rolls_royce?(row) and model_contains_bentley?(row) + end + + def is_a_bentley?(row) + row['make'].upcase == 'BENTLEY' + end + + def is_a_rolls_royce?(row) + row['make'] =~ /ROLLS/i + end + + def is_a_turbo_brooklands?(row) + row['model'] =~ /TURBO R\/RL BKLDS/i + end + + def model_contains_maybach?(row) + row['model'] =~ /MAYBACH/i + end + + def model_contains_bentley?(row) + row['model'] =~ /BENTLEY/i + end end end class Country < ActiveRecord::Base - mine_data do |step| - # import country names and country codes - step.import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr| - attr.key :iso_3166, :name_in_source => 'country code' - attr.store :iso_3166, :name_in_source => 'country code' - attr.store :name, :name_in_source => 'country' + set_primary_key :iso_3166 + + data_miner do + unique_index 'iso_3166' + + # get a complete list + import :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do |attr| + attr.store 'iso_3166', :field_number => 1 + attr.store 'name', :field_number => 0 + end + + # get nicer names + import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr| + attr.store 'iso_3166', :field_name => 'country code' + attr.store 'name', :field_name => 'country' end end end class Airport < ActiveRecord::Base + set_primary_key :iata_code belongs_to :country - mine_data do |step| + + data_miner do + unique_index 'iata_code' + # import airport iata_code, name, etc. - step.import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false) do |attr| - attr.key :iata_code, :field_number => 3 - attr.store :name, :field_number => 0 - attr.store :city, :field_number => 1 - attr.store :country, :field_number => 2, :foreign_key => :name # will use Country.find_by_name(X) - attr.store :iata_code, :field_number => 3 - attr.store :latitude, :field_number => 5 - attr.store :longitude, :field_number => 6 + import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? }) do |attr| + attr.store 'name', :field_number => 1 + attr.store 'city', :field_number => 2 + attr.store 'country_name', :field_number => 3 + attr.store 'iata_code', :field_number => 4 + attr.store 'latitude', :field_number => 6 + attr.store 'longitude', :field_number => 7 + end + end +end + +class CensusRegion < ActiveRecord::Base + set_primary_key :number + + data_miner do + unique_index 'number' + + import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do |attr| + attr.store 'name', :field_name => 'Name' + attr.store 'number', :field_name => 'Region' + end + + # pretend this is a different data source + import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do |attr| + attr.store 'name', :field_name => 'Name' + attr.store 'number', :field_name => 'Region' end end end @@ -49,30 +593,82 @@ class Airport < ActiveRecord::Base DataMiner.enqueue do |queue| queue << Country queue << Airport + queue << CensusRegion + queue << AutomobileFuelType # OK + queue << AutomobileModel # OK + queue << AutomobileMake # OK + queue << AutomobileModelYear # OK + queue << AutomobileVariant # OK + queue << AutomobileMakeFleetYear # OK; third-party data not yet hosted on third-party site + queue << AutomobileMakeYear # OK end -class DataMinerTest < Test::Unit::TestCase - def teardown - Airport.delete_all - Country.delete_all +class DataMinerTest < Test::Unit::TestCase + should "be idempotent" do + Country.data_miner_config.run + a = Country.count + Country.data_miner_config.run + b = Country.count + assert_equal a, b + + CensusRegion.data_miner_config.run + a = CensusRegion.count + CensusRegion.data_miner_config.run + b = CensusRegion.count + assert_equal a, b end + + should "assume that no unique indices means it wants a big hash" do + assert_raises DataMiner::MissingHashColumn do + class IncompleteCountry < ActiveRecord::Base + set_table_name 'countries' + + data_miner do + # no unique index + + # get a complete list + import :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do |attr| + attr.store 'iso_3166', :field_number => 1 + attr.store 'name', :field_number => 0 + end + + # get nicer names + import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr| + attr.store 'iso_3166', :field_name => 'country code' + attr.store 'name', :field_name => 'country' + end + end + end + end + end + + should "hash things if no unique index is listed" do + AutomobileVariant.data_miner_config.runnables[0].run + assert AutomobileVariant.first.row_hash.present? + end + + # should "mine multiple classes in the correct order" do + # DataMiner.run :class_names => DataMiner.classes.map(&:class_name) + # uy = Country.find_by_iso_3166('UY') + # assert_equal 'Uruguay', uy.name + # end - should "mine a single class" do - Country.data_mine.mine - assert_equal 'Uruguay', Country.find_by_iso_3166('UY').name - assert_equal 0, Airport.count + should "have a target record for every class that is mined" do + DataMiner.run :class_names => %w{ Country } + assert DataMiner::Target.exists?(:name => 'Country') + assert_equal 1, DataMiner::Target.count(:conditions => {:name => 'country'}) end - should "mine a single class using the API" do - DataMiner.mine :class_names => ['Country'] - assert_equal 'Uruguay', Country.find_by_iso_3166('UY').name - assert_equal 0, Airport.count + should "keep a log when it does a run" do + approx_started_at = Time.now + DataMiner.run :class_names => %w{ Country } + approx_ended_at = Time.now + target = DataMiner::Target.find_by_name('Country') + assert (target.runs.last.started_at - approx_started_at).abs < 5 # seconds + assert (target.runs.last.ended_at - approx_ended_at).abs < 5 # seconds end - should "mine all classes" do - DataMiner.mine - uy = Country.find_by_iso_3166('UY') - assert_equal 'Uruguay', uy.name - assert_equal uy, Airport.find_by_iata_code('MVD').country + should "remove rows that have disappeared from the external data source" do + flunk "not implemented yet" end end diff --git a/test/test_helper.rb b/test/test_helper.rb index a2d58a5..7ca72b9 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,16 +1,147 @@ require 'rubygems' require 'test/unit' require 'shoulda' -require 'sqlite3' +require 'ruby-debug' $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) $LOAD_PATH.unshift(File.dirname(__FILE__)) require 'data_miner' ActiveRecord::Base.establish_connection( - 'adapter' => 'sqlite3', - 'database' => 'test/test.sqlite3' + 'adapter' => 'mysql', + 'database' => 'data_miner_test', + 'username' => 'root', + 'password' => '' ) class Test::Unit::TestCase end + +ActiveRecord::Schema.define(:version => 20090819143429) do + create_table "airports", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "country_id" + + t.string "iata_code" + t.string "name" + t.string "city" + t.string "country_name" + t.float "latitude" + t.float "longitude" + t.datetime "created_at" + t.datetime "updated_at" + end + execute "ALTER TABLE airports ADD PRIMARY KEY (iata_code);" + + create_table "countries", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "iso_3166" + t.string "name" + t.datetime "created_at" + t.datetime "updated_at" + end + execute "ALTER TABLE countries ADD PRIMARY KEY (iso_3166);" + + create_table "census_regions", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "name" + t.datetime "updated_at" + t.datetime "created_at" + t.integer "number" + end + execute "ALTER TABLE census_regions ADD PRIMARY KEY (number);" + + create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "automobile_make_id" + t.string "automobile_model_id" + t.string "automobile_model_year_id" + t.string "automobile_fuel_type_id" + + t.float "fuel_efficiency_city" + t.float "fuel_efficiency_highway" + t.string "make_name" + t.string "model_name" + t.string "year" + t.string "fuel_type_code" + t.datetime "updated_at" + t.datetime "created_at" + t.string "transmission" + t.string "drive" + t.boolean "turbo" + t.boolean "supercharger" + t.integer "cylinders" + t.float "displacement" + t.float "raw_fuel_efficiency_city" + t.float "raw_fuel_efficiency_highway" + t.integer "carline_mfr_code" + t.integer "vi_mfr_code" + t.integer "carline_code" + t.integer "carline_class_code" + t.boolean "injection" + t.string "carline_class_name" + t.string "speeds" + t.string "row_hash" + end + execute "ALTER TABLE automobile_variants ADD PRIMARY KEY (row_hash);" + + create_table "automobile_fuel_types", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "name" + t.datetime "created_at" + t.datetime "updated_at" + t.float "emission_factor" + t.float "annual_distance" + t.string "code" + end + execute "ALTER TABLE automobile_fuel_types ADD PRIMARY KEY (code);" + + create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "automobile_make_id" + t.string "automobile_model_year_id" + t.integer "automobile_make_year_id" + + t.string "fleet" + t.string "make_name" + t.string "year" + t.float "fuel_efficiency" + t.integer "volume" + t.datetime "created_at" + t.datetime "updated_at" + + t.string "row_hash" + end + execute "ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (row_hash);" + + create_table "automobile_make_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.integer "automobile_make_id" # user-defined + t.integer "automobile_model_year_id" # user-defined + t.datetime "created_at" + t.datetime "updated_at" + t.float "fuel_efficiency" + t.integer "volume" + t.string "row_hash" + end + execute "ALTER TABLE automobile_make_years ADD PRIMARY KEY (row_hash);" + + create_table "automobile_makes", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "name" + t.datetime "updated_at" + t.datetime "created_at" + t.float "fuel_efficiency" + t.boolean "major" + end + execute "ALTER TABLE automobile_makes ADD PRIMARY KEY (name);" + + create_table "automobile_model_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.integer "year" + t.datetime "updated_at" + t.datetime "created_at" + t.float "fuel_efficiency" + end + execute "ALTER TABLE automobile_model_years ADD PRIMARY KEY (year);" + + create_table "automobile_models", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t| + t.string "name" + t.string "automobile_make_id" + t.datetime "updated_at" + t.datetime "created_at" + t.string "row_hash" + end + execute "ALTER TABLE automobile_models ADD PRIMARY KEY (row_hash);" +end