Skip to content

Commit

Permalink
[SPARK-12749][SQL] add json option to parse floating-point types as D…
Browse files Browse the repository at this point in the history
…ecimalType

I tried to add this via `USE_BIG_DECIMAL_FOR_FLOATS` option from Jackson with no success.

Added test for non-complex types. Should I add a test for complex types?

Author: Brandon Bradley <bradleytastic@gmail.com>

Closes apache#10936 from blbradley/spark-12749.
  • Loading branch information
blbradley authored and rxin committed Jan 28, 2016
1 parent abae889 commit 3a40c0e
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 2 deletions.
2 changes: 2 additions & 0 deletions python/pyspark/sql/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def json(self, path, schema=None):
You can set the following JSON-specific options to deal with non-standard JSON files:
* ``primitivesAsString`` (default ``false``): infers all primitive values as a string \
type
* `floatAsBigDecimal` (default `false`): infers all floating-point values as a decimal \
type
* ``allowComments`` (default ``false``): ignores Java/C++ style comment in JSON records
* ``allowUnquotedFieldNames`` (default ``false``): allows unquoted JSON field names
* ``allowSingleQuotes`` (default ``true``): allows single quotes in addition to double \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
*
* You can set the following JSON-specific options to deal with non-standard JSON files:
* <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li>
* <li>`floatAsBigDecimal` (default `false`): infers all floating-point values as a decimal
* type</li>
* <li>`allowComments` (default `false`): ignores Java/C++ style comment in JSON records</li>
* <li>`allowUnquotedFieldNames` (default `false`): allows unquoted JSON field names</li>
* <li>`allowSingleQuotes` (default `true`): allows single quotes in addition to double quotes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,12 @@ private[json] object InferSchema {
val v = parser.getDecimalValue
DecimalType(v.precision(), v.scale())
case FLOAT | DOUBLE =>
// TODO(davies): Should we use decimal if possible?
DoubleType
if (configOptions.floatAsBigDecimal) {
val v = parser.getDecimalValue
DecimalType(v.precision(), v.scale())
} else {
DoubleType
}
}

case VALUE_TRUE | VALUE_FALSE => BooleanType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ private[sql] class JSONOptions(
parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
val primitivesAsString =
parameters.get("primitivesAsString").map(_.toBoolean).getOrElse(false)
val floatAsBigDecimal =
parameters.get("floatAsBigDecimal").map(_.toBoolean).getOrElse(false)
val allowComments =
parameters.get("allowComments").map(_.toBoolean).getOrElse(false)
val allowUnquotedFieldNames =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,34 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
)
}

test("Loading a JSON dataset floatAsBigDecimal returns schema with float types as BigDecimal") {
val jsonDF = sqlContext.read.option("floatAsBigDecimal", "true").json(primitiveFieldAndType)

val expectedSchema = StructType(
StructField("bigInteger", DecimalType(20, 0), true) ::
StructField("boolean", BooleanType, true) ::
StructField("double", DecimalType(17, -292), true) ::
StructField("integer", LongType, true) ::
StructField("long", LongType, true) ::
StructField("null", StringType, true) ::
StructField("string", StringType, true) :: Nil)

assert(expectedSchema === jsonDF.schema)

jsonDF.registerTempTable("jsonTable")

checkAnswer(
sql("select * from jsonTable"),
Row(BigDecimal("92233720368547758070"),
true,
BigDecimal("1.7976931348623157E308"),
10,
21474836470L,
null,
"this is a simple string.")
)
}

test("Loading a JSON dataset from a text file with SQL") {
val dir = Utils.createTempDir()
dir.delete()
Expand Down

0 comments on commit 3a40c0e

Please sign in to comment.