Skip to content

Commit

Permalink
finos#1353 improve in-memory typeahead's performance
Browse files Browse the repository at this point in the history
  • Loading branch information
junaidzm13 committed May 23, 2024
1 parent b41dc97 commit b647a3b
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,8 @@ package org.finos.vuu.core.table
import com.typesafe.scalalogging.StrictLogging

trait ColumnValueProvider {

//todo currently only returns first 10 results.. so can't scrolling through values
//could return everything let ui decide how many results to display but there is cost to the json serialisig for large dataset
//todo how to handle nulls - for different data types
//todo should this be returning null or rely on json deserialiser rules?

def getUniqueValues(columnName:String):Array[String]
def getUniqueValuesStartingWith(columnName:String, starts: String):Array[String]

}

class EmptyColumnValueProvider extends ColumnValueProvider {
Expand All @@ -28,41 +21,38 @@ object InMemColumnValueProvider {
}
}
}

class InMemColumnValueProvider(dataTable: InMemDataTable) extends ColumnValueProvider with StrictLogging {
private val get10DistinctValues = DistinctValuesGetter(10)

override def getUniqueValues(columnName: String): Array[String] =
dataTable.columnForName(columnName) match {
case c: Column =>
dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).toArray.sorted.take(10)
case null =>
logger.error(s"Column $columnName not found in table ${dataTable.name}")
Array()
case c: Column => get10DistinctValues(c)
case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty;
}

override def getUniqueValuesStartingWith(columnName: String, starts: String): Array[String] =
dataTable.columnForName(columnName) match {
case c: Column =>
dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).filter(_.startsWith(starts)).toArray.sorted.take(10)
case null =>
logger.error(s"Column $columnName not found in table ${dataTable.name}")
Array()
case c: Column => get10DistinctValues(c, _.startsWith(starts))
case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty;
}

private def addUnique(dt: DataTable, c: Column, set: Set[String], key: String): Set[String] = {
val row = dt.pullRow(key)
row.get(c) match {
case null =>
Set()
case x: String =>
set.+(x)
case x: Long =>
set.+(x.toString)
case x: Double =>
set.+(x.toString)
case x: Int =>
set.+(x.toString)
case x =>
set.+(x.toString)

private case class DistinctValuesGetter(n: Int) {
private type Filter = String => Boolean

def apply(c: Column, filter: Filter = _ => true): Array[String] = getDistinctValues(c, filter).take(n).toArray

private def getDistinctValues(c: Column, filter: Filter): Iterator[String] = {
dataTable.primaryKeys
.iterator
.map(dataTable.pullRow(_).get(c))
.distinct
.flatMap(valueToString)
.filter(filter)
}

private def valueToString(value: Any): Option[String] = Option(value).map(_.toString)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import org.finos.toolbox.jmx.{MetricsProvider, MetricsProviderImpl}
import org.finos.toolbox.lifecycle.LifecycleContainer
import org.finos.toolbox.time.{Clock, TestFriendlyClock}
import org.finos.vuu.api.TableDef
import org.finos.vuu.core.table.InMemColumnValueProviderTest.randomRic
import org.finos.vuu.provider.{JoinTableProviderImpl, MockProvider}
import org.scalatest.featurespec.AnyFeatureSpec
import org.scalatest.matchers.should.Matchers

import java.security.SecureRandom

class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers {

implicit val clock: Clock = new TestFriendlyClock(10001L)
Expand All @@ -16,45 +19,72 @@ class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers {

private val pricesDef: TableDef = TableDef(
"prices",
"ric",
Columns.fromNames("ric:String", "bid:Double", "ask:Double"),
"id",
Columns.fromNames("id:Long", "ric:String", "bid:Double", "ask:Double"),
)

Feature("InMemColumnValueProvider") {

Scenario("Get all unique value of a given column") {

val joinProvider = JoinTableProviderImpl()
val table = new InMemDataTable(pricesDef, joinProvider)
val table = givenTable(pricesDef)
val provider = new MockProvider(table)
val columnValueProvider = new InMemColumnValueProvider(table)

provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 220, "ask" -> 223))
provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 240, "ask" -> 244))
provider.tick("1", Map("id" -> "1", "ric" -> "VOD.L", "bid" -> 220, "ask" -> 223))
provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
provider.tick("3", Map("id" -> "3", "ric" -> "VOD.L", "bid" -> 240, "ask" -> 244))

val uniqueValues = columnValueProvider.getUniqueValues("ric")

uniqueValues shouldBe Array("BT.L", "VOD.L")
uniqueValues.toSet shouldBe Set("BT.L", "VOD.L")
}


Scenario("Get all unique value of a given column that starts with specified string") {

val joinProvider = JoinTableProviderImpl()
val table = new InMemDataTable(pricesDef, joinProvider)
val table = givenTable(pricesDef)
val provider = new MockProvider(table)
val columnValueProvider = new InMemColumnValueProvider(table)

provider.tick("VOA.L", Map("ric" -> "VOA.L", "bid" -> 220, "ask" -> 223))
provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
provider.tick("VOV.L", Map("ric" -> "VOV.L", "bid" -> 240, "ask" -> 244))
provider.tick("1", Map("id" -> "1", "ric" -> "VOA.L", "bid" -> 220, "ask" -> 223))
provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
provider.tick("3", Map("id" -> "3", "ric" -> "VOV.L", "bid" -> 240, "ask" -> 244))

val uniqueValues = columnValueProvider.getUniqueValuesStartingWith("ric", "VO")

uniqueValues shouldBe Array("VOA.L", "VOV.L")
uniqueValues.toSet shouldBe Set("VOA.L", "VOV.L")
}

ignore("Performance test with 1 million rows") {
val table = givenTable(pricesDef)
val provider = new MockProvider(table)
val columnValueProvider = new InMemColumnValueProvider(table)

Range.inclusive(1, 1_000_000).foreach(id => {
provider.tick(id.toString, Map("id" -> id, "ric" -> randomRic, "bid" -> 220, "ask" -> 223))
})

val startTime = System.currentTimeMillis()
val values = columnValueProvider.getUniqueValuesStartingWith("ric", "A")
val endTime = System.currentTimeMillis()
val timeTakenMs = endTime - startTime

println(s"time-taken: $timeTakenMs | values: ${values.mkString("Array(", ", ", ")")}")

timeTakenMs should be < 20L
}

//todo match for start with string should not be case sensitive
}

private def givenTable(tableDef: TableDef): InMemDataTable = new InMemDataTable(tableDef, JoinTableProviderImpl())
}

private object InMemColumnValueProviderTest {
private val alphabets = List("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z")
private val secureRandom = SecureRandom.getInstanceStrong
private def randomAlphabet: String = alphabets(secureRandom.nextInt(alphabets.length))
private def randomRic: String = {
val ricPrefix = Range.inclusive(1, 3).map(_ => randomAlphabet).mkString("")
val ricPostfix = Range.inclusive(1, 2).map(_ => randomAlphabet).mkString("")
s"$ricPrefix.$ricPostfix"
}
}

0 comments on commit b647a3b

Please sign in to comment.