Skip to content

Commit

Permalink
[SPARK-8248][SQL] string function: length
Browse files Browse the repository at this point in the history
Author: Cheng Hao <hao.cheng@intel.com>

Closes apache#6724 from chenghao-intel/length and squashes the following commits:

aaa3c31 [Cheng Hao] revert the additional change
97148a9 [Cheng Hao] remove the codegen testing temporally
ae08003 [Cheng Hao] update the comments
1eb1fd1 [Cheng Hao] simplify the code as commented
3e92d32 [Cheng Hao] use the selectExpr in unit test intead of SQLQuery
3c729aa [Cheng Hao] fix bug for constant null value in codegen
3641f06 [Cheng Hao] keep the length() method for registered function
8e30171 [Cheng Hao] update the code as comment
db604ae [Cheng Hao] Add code gen support
548d2ef [Cheng Hao] register the length()
09a0738 [Cheng Hao] add length support
  • Loading branch information
chenghao-intel authored and rxin committed Jun 11, 2015
1 parent 4e42842 commit 9fe3adc
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,10 @@ object FunctionRegistry {
expression[CreateArray]("array"),
expression[Coalesce]("coalesce"),
expression[Explode]("explode"),
expression[Lower]("lower"),
expression[Substring]("substr"),
expression[Substring]("substring"),
expression[Rand]("rand"),
expression[Randn]("randn"),
expression[CreateStruct]("struct"),
expression[Sqrt]("sqrt"),
expression[Upper]("upper"),

// Math functions
expression[Acos]("acos"),
Expand Down Expand Up @@ -132,7 +128,14 @@ object FunctionRegistry {
expression[Last]("last"),
expression[Max]("max"),
expression[Min]("min"),
expression[Sum]("sum")
expression[Sum]("sum"),

// string functions
expression[Lower]("lower"),
expression[StringLength]("length"),
expression[Substring]("substr"),
expression[Substring]("substring"),
expression[Upper]("upper")
)

val builtin: FunctionRegistry = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ abstract class LeafExpression extends Expression with trees.LeafNode[Expression]
abstract class UnaryExpression extends Expression with trees.UnaryNode[Expression] {
self: Product =>

override def foldable: Boolean = child.foldable
override def nullable: Boolean = child.nullable

/**
* Called by unary expressions to generate a code block that returns null if its parent returns
* null, and if not not null, use `f` to generate the expression.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,3 +294,24 @@ object Substring {
apply(str, pos, Literal(Integer.MAX_VALUE))
}
}

/**
* A function that return the length of the given string expression.
*/
case class StringLength(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def expectedChildTypes: Seq[DataType] = Seq(StringType)

override def eval(input: Row): Any = {
val string = child.eval(input)
if (string == null) null else string.asInstanceOf[UTF8String].length
}

override def toString: String = s"length($child)"

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
defineCodeGen(ctx, ev, c => s"($c).length()")
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -215,4 +215,16 @@ class StringFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
evaluate("abbbbc" rlike regEx, create_row("**"))
}
}

test("length for string") {
val regEx = 'a.string.at(0)
checkEvaluation(StringLength(Literal("abc")), 3, create_row("abdef"))
checkEvaluation(StringLength(regEx), 5, create_row("abdef"))
checkEvaluation(StringLength(regEx), 0, create_row(""))
checkEvaluation(StringLength(regEx), null, create_row(null))
// TODO currently bug in codegen, let's temporally disable this
// checkEvaluation(StringLength(Literal.create(null, StringType)), null, create_row("abdef"))
}


}
18 changes: 18 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import org.apache.spark.util.Utils
* @groupname normal_funcs Non-aggregate functions
* @groupname math_funcs Math functions
* @groupname window_funcs Window functions
* @groupname string_funcs String functions
* @groupname Ungrouped Support functions for DataFrames.
* @since 1.3.0
*/
Expand Down Expand Up @@ -1317,6 +1318,23 @@ object functions {
*/
def toRadians(columnName: String): Column = toRadians(Column(columnName))

//////////////////////////////////////////////////////////////////////////////////////////////
// String functions
//////////////////////////////////////////////////////////////////////////////////////////////

/**
* Computes the length of a given string value
* @group string_funcs
* @since 1.5.0
*/
def strlen(e: Column): Column = StringLength(e.expr)

/**
* Computes the length of a given string column
* @group string_funcs
* @since 1.5.0
*/
def strlen(columnName: String): Column = strlen(Column(columnName))

//////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,24 @@ class DataFrameFunctionsSuite extends QueryTest {
testData2.select(bitwiseNOT($"a")),
testData2.collect().toSeq.map(r => Row(~r.getInt(0))))
}

test("length") {
checkAnswer(
nullStrings.select(strlen($"s"), strlen("s")),
nullStrings.collect().toSeq.map { r =>
val v = r.getString(1)
val l = if (v == null) null else v.length
Row(l, l)
})

checkAnswer(
nullStrings.selectExpr("length(s)"),
nullStrings.collect().toSeq.map { r =>
val v = r.getString(1)
val l = if (v == null) null else v.length
Row(l)
})
}


}

0 comments on commit 9fe3adc

Please sign in to comment.