From b647a3ba613162decec5bf1b6fd24638e4fa5b97 Mon Sep 17 00:00:00 2001
From: "Malik, Junaid"
Date: Wed, 22 May 2024 10:43:08 +0800
Subject: [PATCH] #1353 improve in-memory typeahead's performance
---
.../vuu/core/table/ColumnValueProvider.scala | 54 +++++++---------
.../table/InMemColumnValueProviderTest.scala | 64 ++++++++++++++-----
2 files changed, 69 insertions(+), 49 deletions(-)
diff --git a/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala b/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala
index e57db5b6a..c23808d08 100644
--- a/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala
+++ b/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala
@@ -3,15 +3,8 @@ package org.finos.vuu.core.table
import com.typesafe.scalalogging.StrictLogging
trait ColumnValueProvider {
-
- //todo currently only returns first 10 results.. so can't scrolling through values
- //could return everything let ui decide how many results to display but there is cost to the json serialisig for large dataset
- //todo how to handle nulls - for different data types
- //todo should this be returning null or rely on json deserialiser rules?
-
def getUniqueValues(columnName:String):Array[String]
def getUniqueValuesStartingWith(columnName:String, starts: String):Array[String]
-
}
class EmptyColumnValueProvider extends ColumnValueProvider {
@@ -28,41 +21,38 @@ object InMemColumnValueProvider {
}
}
}
+
class InMemColumnValueProvider(dataTable: InMemDataTable) extends ColumnValueProvider with StrictLogging {
+ private val get10DistinctValues = DistinctValuesGetter(10)
override def getUniqueValues(columnName: String): Array[String] =
dataTable.columnForName(columnName) match {
- case c: Column =>
- dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).toArray.sorted.take(10)
- case null =>
- logger.error(s"Column $columnName not found in table ${dataTable.name}")
- Array()
+ case c: Column => get10DistinctValues(c)
+ case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty;
}
override def getUniqueValuesStartingWith(columnName: String, starts: String): Array[String] =
dataTable.columnForName(columnName) match {
- case c: Column =>
- dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).filter(_.startsWith(starts)).toArray.sorted.take(10)
- case null =>
- logger.error(s"Column $columnName not found in table ${dataTable.name}")
- Array()
+ case c: Column => get10DistinctValues(c, _.startsWith(starts))
+ case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty;
}
- private def addUnique(dt: DataTable, c: Column, set: Set[String], key: String): Set[String] = {
- val row = dt.pullRow(key)
- row.get(c) match {
- case null =>
- Set()
- case x: String =>
- set.+(x)
- case x: Long =>
- set.+(x.toString)
- case x: Double =>
- set.+(x.toString)
- case x: Int =>
- set.+(x.toString)
- case x =>
- set.+(x.toString)
+
+ private case class DistinctValuesGetter(n: Int) {
+ private type Filter = String => Boolean
+
+ def apply(c: Column, filter: Filter = _ => true): Array[String] = getDistinctValues(c, filter).take(n).toArray
+
+ private def getDistinctValues(c: Column, filter: Filter): Iterator[String] = {
+ dataTable.primaryKeys
+ .iterator
+ .map(dataTable.pullRow(_).get(c))
+ .distinct
+ .flatMap(valueToString)
+ .filter(filter)
}
+
+ private def valueToString(value: Any): Option[String] = Option(value).map(_.toString)
}
+
}
\ No newline at end of file
diff --git a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala
index 9b5f7522c..0f93e13e0 100644
--- a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala
+++ b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala
@@ -4,10 +4,13 @@ import org.finos.toolbox.jmx.{MetricsProvider, MetricsProviderImpl}
import org.finos.toolbox.lifecycle.LifecycleContainer
import org.finos.toolbox.time.{Clock, TestFriendlyClock}
import org.finos.vuu.api.TableDef
+import org.finos.vuu.core.table.InMemColumnValueProviderTest.randomRic
import org.finos.vuu.provider.{JoinTableProviderImpl, MockProvider}
import org.scalatest.featurespec.AnyFeatureSpec
import org.scalatest.matchers.should.Matchers
+import java.security.SecureRandom
+
class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers {
implicit val clock: Clock = new TestFriendlyClock(10001L)
@@ -16,45 +19,72 @@ class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers {
private val pricesDef: TableDef = TableDef(
"prices",
- "ric",
- Columns.fromNames("ric:String", "bid:Double", "ask:Double"),
+ "id",
+ Columns.fromNames("id:Long", "ric:String", "bid:Double", "ask:Double"),
)
Feature("InMemColumnValueProvider") {
Scenario("Get all unique value of a given column") {
-
- val joinProvider = JoinTableProviderImpl()
- val table = new InMemDataTable(pricesDef, joinProvider)
+ val table = givenTable(pricesDef)
val provider = new MockProvider(table)
val columnValueProvider = new InMemColumnValueProvider(table)
- provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 220, "ask" -> 223))
- provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
- provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 240, "ask" -> 244))
+ provider.tick("1", Map("id" -> "1", "ric" -> "VOD.L", "bid" -> 220, "ask" -> 223))
+ provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
+ provider.tick("3", Map("id" -> "3", "ric" -> "VOD.L", "bid" -> 240, "ask" -> 244))
val uniqueValues = columnValueProvider.getUniqueValues("ric")
- uniqueValues shouldBe Array("BT.L", "VOD.L")
+ uniqueValues.toSet shouldBe Set("BT.L", "VOD.L")
}
-
Scenario("Get all unique value of a given column that starts with specified string") {
-
- val joinProvider = JoinTableProviderImpl()
- val table = new InMemDataTable(pricesDef, joinProvider)
+ val table = givenTable(pricesDef)
val provider = new MockProvider(table)
val columnValueProvider = new InMemColumnValueProvider(table)
- provider.tick("VOA.L", Map("ric" -> "VOA.L", "bid" -> 220, "ask" -> 223))
- provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
- provider.tick("VOV.L", Map("ric" -> "VOV.L", "bid" -> 240, "ask" -> 244))
+ provider.tick("1", Map("id" -> "1", "ric" -> "VOA.L", "bid" -> 220, "ask" -> 223))
+ provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550))
+ provider.tick("3", Map("id" -> "3", "ric" -> "VOV.L", "bid" -> 240, "ask" -> 244))
val uniqueValues = columnValueProvider.getUniqueValuesStartingWith("ric", "VO")
- uniqueValues shouldBe Array("VOA.L", "VOV.L")
+ uniqueValues.toSet shouldBe Set("VOA.L", "VOV.L")
+ }
+
+ ignore("Performance test with 1 million rows") {
+ val table = givenTable(pricesDef)
+ val provider = new MockProvider(table)
+ val columnValueProvider = new InMemColumnValueProvider(table)
+
+ Range.inclusive(1, 1_000_000).foreach(id => {
+ provider.tick(id.toString, Map("id" -> id, "ric" -> randomRic, "bid" -> 220, "ask" -> 223))
+ })
+
+ val startTime = System.currentTimeMillis()
+ val values = columnValueProvider.getUniqueValuesStartingWith("ric", "A")
+ val endTime = System.currentTimeMillis()
+ val timeTakenMs = endTime - startTime
+
+ println(s"time-taken: $timeTakenMs | values: ${values.mkString("Array(", ", ", ")")}")
+
+ timeTakenMs should be < 20L
}
//todo match for start with string should not be case sensitive
}
+
+ private def givenTable(tableDef: TableDef): InMemDataTable = new InMemDataTable(tableDef, JoinTableProviderImpl())
+}
+
+private object InMemColumnValueProviderTest {
+ private val alphabets = List("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z")
+ private val secureRandom = SecureRandom.getInstanceStrong
+ private def randomAlphabet: String = alphabets(secureRandom.nextInt(alphabets.length))
+ private def randomRic: String = {
+ val ricPrefix = Range.inclusive(1, 3).map(_ => randomAlphabet).mkString("")
+ val ricPostfix = Range.inclusive(1, 2).map(_ => randomAlphabet).mkString("")
+ s"$ricPrefix.$ricPostfix"
+ }
}