Skip to content

Commit

Permalink
Update dependencies.
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Jan 11, 2012
1 parent c4655fb commit 9a38501
Show file tree
Hide file tree
Showing 9 changed files with 306 additions and 81 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
/lib
/out
/output*
ch15/maxwidget
snippet/actual
target
/target
Expand Down
16 changes: 8 additions & 8 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ for the first edition at http://github.com/tomwhite/hadoop-book/tree/1e, and
for the second edition at http://github.com/tomwhite/hadoop-book/tree/2e.

This version of the code has been tested with:
* Hadoop 0.20.2
* Hadoop 1.0.0/0.22.0/0.23.0
* Avro 1.4.1
* Pig 0.8.3
* Hive 0.7.1-SNAPSHOT
* HBase 0.90.3
* ZooKeeper 3.3.3
* Sqoop 1.2.0-cdh3u0
* Pig 0.9.0
* Hive 0.8.0
* HBase 0.90.4
* ZooKeeper 3.4.2
* Sqoop 1.4.0-incubating

Before running the examples you need to install Hadoop, Pig, Hive, HBase,
ZooKeeper, and Sqoop (as appropriate) as explained in the book.
Expand All @@ -24,11 +24,11 @@ You also need to install Maven.

Then you can build the code with:

% mvn package -DskipTests -Papache-20
% mvn package -DskipTests -Phadoop.version=1.0.0

You should then be able to run the examples from the book.

Chapter names for "Hadoop: The Definitive Guide", Second Edition
Chapter names for "Hadoop: The Definitive Guide", Third Edition

ch01 - Meet Hadoop
ch02 - MapReduce
Expand Down
17 changes: 11 additions & 6 deletions avro/src/main/java/AvroGenericMaxTemperature.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@

public class AvroGenericMaxTemperature extends Configured implements Tool {

private static final Schema SCHEMA = Schema.parse("{\"type\":\"record\", \"name\":\"WeatherRecord\", \"fields\":"
+ "[{\"name\":\"year\", \"type\":\"int\"}, " +
private static final Schema SCHEMA = Schema.parse(
"{\"type\":\"record\", \"name\":\"WeatherRecord\", \"fields\":" +
"[{\"name\":\"year\", \"type\":\"int\"}, " +
"{\"name\":\"temperature\", \"type\":\"int\", \"order\": \"ignore\"}, " +
"{\"name\":\"stationId\", \"type\":\"string\", \"order\": \"ignore\"}]}");

private static GenericRecord newWeatherRecord(int year, int temperature, String stationId) {
private static GenericRecord newWeatherRecord(int year, int temperature,
String stationId) {
GenericRecord value = new GenericData.Record(SCHEMA);
value.put("year", year);
value.put("temperature", temperature);
Expand All @@ -43,15 +45,17 @@ private static GenericRecord newWeatherRecord(GenericRecord other) {
return value;
}

public static class MaxTemperatureMapper extends AvroMapper<Utf8, Pair<Integer, GenericRecord>> {
public static class MaxTemperatureMapper extends AvroMapper<Utf8,
Pair<Integer, GenericRecord>> {
private NcdcRecordParser parser = new NcdcRecordParser();
@Override
public void map(Utf8 line,
AvroCollector<Pair<Integer, GenericRecord>> collector,
Reporter reporter) throws IOException {
parser.parse(line.toString());
if (parser.isValidTemperature()) {
GenericRecord record = newWeatherRecord(parser.getYearInt(), parser.getAirTemperature(), parser.getStationId());
GenericRecord record = newWeatherRecord(parser.getYearInt(),
parser.getAirTemperature(), parser.getStationId());
Pair<Integer, GenericRecord> pair =
new Pair<Integer, GenericRecord>(parser.getYearInt(), record);
collector.collect(pair);
Expand All @@ -64,7 +68,8 @@ public static class MaxTemperatureReducer extends

@Override
public void reduce(Integer key, Iterable<GenericRecord> values,
AvroCollector<GenericRecord> collector, Reporter reporter) throws IOException {
AvroCollector<GenericRecord> collector, Reporter reporter)
throws IOException {
GenericRecord max = null;
for (GenericRecord value : values) {
if (max == null) {
Expand Down
10 changes: 6 additions & 4 deletions avro/src/main/java/AvroSpecificMaxTemperature.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

public class AvroSpecificMaxTemperature extends Configured implements Tool {

public static class MaxTemperatureMapper extends AvroMapper<Utf8, Pair<Integer, WeatherRecord>> {
public static class MaxTemperatureMapper
extends AvroMapper<Utf8, Pair<Integer, WeatherRecord>> {
private NcdcRecordParser parser = new NcdcRecordParser();
private WeatherRecord record = new WeatherRecord();
@Override
Expand All @@ -43,7 +44,8 @@ public static class MaxTemperatureReducer extends

@Override
public void reduce(Integer key, Iterable<WeatherRecord> values,
AvroCollector<WeatherRecord> collector, Reporter reporter) throws IOException {
AvroCollector<WeatherRecord> collector, Reporter reporter)
throws IOException {
WeatherRecord max = null;
for (WeatherRecord value : values) {
if (max == null || value.temperature > max.temperature) {
Expand Down Expand Up @@ -77,8 +79,8 @@ public int run(String[] args) throws Exception {
FileOutputFormat.setOutputPath(conf, new Path(args[1]));

AvroJob.setInputSchema(conf, Schema.create(Schema.Type.STRING));
AvroJob.setMapOutputSchema(conf,
Pair.getPairSchema(Schema.create(Schema.Type.INT), WeatherRecord.SCHEMA$));
AvroJob.setMapOutputSchema(conf, Pair.getPairSchema(
Schema.create(Schema.Type.INT), WeatherRecord.SCHEMA$));
AvroJob.setOutputSchema(conf, WeatherRecord.SCHEMA$);
conf.setInputFormat(AvroUtf8InputFormat.class);

Expand Down
68 changes: 58 additions & 10 deletions book/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,45 +11,85 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>

<!-- Component versions are defined here -->
<!-- Note that Hadoop dependencies are defined in hadoop-meta -->
<avro.version>1.4.1</avro.version>
<pig.version>0.9.0</pig.version>
<hive.version>0.8.0</hive.version>
<hbase.version>0.90.4</hbase.version>
<zookeeper.version>3.4.2</zookeeper.version>
<sqoop.version>1.4.0-incubating</sqoop.version>
</properties>

<dependencyManagement>
<!-- Note that Hadoop dependencies are defined in hadoop-meta -->
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.4.1</version>
<version>${avro.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<version>0.90.3</version>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>0.7.1-SNAPSHOT</version>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.7.1-SNAPSHOT</version>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</exclusion>
<exclusion>
<groupId>javax.jdo</groupId>
<artifactId>jdo2-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-connectionpool</artifactId>
</exclusion>
<exclusion>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-enhancer</artifactId>
</exclusion>
<exclusion>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-rdbms</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
<version>0.8.3</version>
<version>${pig.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.3.3</version>
<version>${zookeeper.version}</version>
</dependency>
<dependency>
<groupId>com.cloudera.sqoop</groupId>
<groupId>org.apache.sqoop</groupId>
<artifactId>sqoop</artifactId>
<version>1.2.0-cdh3u0</version>
<version>${sqoop.version}</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
Expand Down Expand Up @@ -187,6 +227,14 @@
</build>

<repositories>
<repository>
<id>apache.releases</id>
<name>Apache Snapshot Repository</name>
<url>https://repository.apache.org/content/repositories/releases/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<!-- For Hive snapshots until they get into Maven Central -->
<id>apache.snapshots</id>
Expand All @@ -197,7 +245,7 @@
</releases>
</repository>
<repository>
<!-- For Sqoop and CDH -->
<!-- For CDH -->
<id>cloudera</id>
<url>https://repository.cloudera.com/content/repositories/releases/</url>
<snapshots>
Expand Down
2 changes: 1 addition & 1 deletion ch15/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<version>0.20.2-cdh3u2</version>
</dependency>
<dependency>
<groupId>com.cloudera.sqoop</groupId>
<groupId>org.apache.sqoop</groupId>
<artifactId>sqoop</artifactId>
<exclusions>
<exclusion>
Expand Down
Loading

0 comments on commit 9a38501

Please sign in to comment.