From b647a3ba613162decec5bf1b6fd24638e4fa5b97 Mon Sep 17 00:00:00 2001 From: "Malik, Junaid" Date: Wed, 22 May 2024 10:43:08 +0800 Subject: [PATCH] #1353 improve in-memory typeahead's performance --- .../vuu/core/table/ColumnValueProvider.scala | 54 +++++++--------- .../table/InMemColumnValueProviderTest.scala | 64 ++++++++++++++----- 2 files changed, 69 insertions(+), 49 deletions(-) diff --git a/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala b/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala index e57db5b6a..c23808d08 100644 --- a/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala +++ b/vuu/src/main/scala/org/finos/vuu/core/table/ColumnValueProvider.scala @@ -3,15 +3,8 @@ package org.finos.vuu.core.table import com.typesafe.scalalogging.StrictLogging trait ColumnValueProvider { - - //todo currently only returns first 10 results.. so can't scrolling through values - //could return everything let ui decide how many results to display but there is cost to the json serialisig for large dataset - //todo how to handle nulls - for different data types - //todo should this be returning null or rely on json deserialiser rules? - def getUniqueValues(columnName:String):Array[String] def getUniqueValuesStartingWith(columnName:String, starts: String):Array[String] - } class EmptyColumnValueProvider extends ColumnValueProvider { @@ -28,41 +21,38 @@ object InMemColumnValueProvider { } } } + class InMemColumnValueProvider(dataTable: InMemDataTable) extends ColumnValueProvider with StrictLogging { + private val get10DistinctValues = DistinctValuesGetter(10) override def getUniqueValues(columnName: String): Array[String] = dataTable.columnForName(columnName) match { - case c: Column => - dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).toArray.sorted.take(10) - case null => - logger.error(s"Column $columnName not found in table ${dataTable.name}") - Array() + case c: Column => get10DistinctValues(c) + case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty; } override def getUniqueValuesStartingWith(columnName: String, starts: String): Array[String] = dataTable.columnForName(columnName) match { - case c: Column => - dataTable.primaryKeys.foldLeft(Set[String]())(addUnique(dataTable, c, _, _)).filter(_.startsWith(starts)).toArray.sorted.take(10) - case null => - logger.error(s"Column $columnName not found in table ${dataTable.name}") - Array() + case c: Column => get10DistinctValues(c, _.startsWith(starts)) + case null => logger.error(s"Column $columnName not found in table ${dataTable.name}"); Array.empty; } - private def addUnique(dt: DataTable, c: Column, set: Set[String], key: String): Set[String] = { - val row = dt.pullRow(key) - row.get(c) match { - case null => - Set() - case x: String => - set.+(x) - case x: Long => - set.+(x.toString) - case x: Double => - set.+(x.toString) - case x: Int => - set.+(x.toString) - case x => - set.+(x.toString) + + private case class DistinctValuesGetter(n: Int) { + private type Filter = String => Boolean + + def apply(c: Column, filter: Filter = _ => true): Array[String] = getDistinctValues(c, filter).take(n).toArray + + private def getDistinctValues(c: Column, filter: Filter): Iterator[String] = { + dataTable.primaryKeys + .iterator + .map(dataTable.pullRow(_).get(c)) + .distinct + .flatMap(valueToString) + .filter(filter) } + + private def valueToString(value: Any): Option[String] = Option(value).map(_.toString) } + } \ No newline at end of file diff --git a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala index 9b5f7522c..0f93e13e0 100644 --- a/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala +++ b/vuu/src/test/scala/org/finos/vuu/core/table/InMemColumnValueProviderTest.scala @@ -4,10 +4,13 @@ import org.finos.toolbox.jmx.{MetricsProvider, MetricsProviderImpl} import org.finos.toolbox.lifecycle.LifecycleContainer import org.finos.toolbox.time.{Clock, TestFriendlyClock} import org.finos.vuu.api.TableDef +import org.finos.vuu.core.table.InMemColumnValueProviderTest.randomRic import org.finos.vuu.provider.{JoinTableProviderImpl, MockProvider} import org.scalatest.featurespec.AnyFeatureSpec import org.scalatest.matchers.should.Matchers +import java.security.SecureRandom + class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { implicit val clock: Clock = new TestFriendlyClock(10001L) @@ -16,45 +19,72 @@ class InMemColumnValueProviderTest extends AnyFeatureSpec with Matchers { private val pricesDef: TableDef = TableDef( "prices", - "ric", - Columns.fromNames("ric:String", "bid:Double", "ask:Double"), + "id", + Columns.fromNames("id:Long", "ric:String", "bid:Double", "ask:Double"), ) Feature("InMemColumnValueProvider") { Scenario("Get all unique value of a given column") { - - val joinProvider = JoinTableProviderImpl() - val table = new InMemDataTable(pricesDef, joinProvider) + val table = givenTable(pricesDef) val provider = new MockProvider(table) val columnValueProvider = new InMemColumnValueProvider(table) - provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 220, "ask" -> 223)) - provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) - provider.tick("VOD.L", Map("ric" -> "VOD.L", "bid" -> 240, "ask" -> 244)) + provider.tick("1", Map("id" -> "1", "ric" -> "VOD.L", "bid" -> 220, "ask" -> 223)) + provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) + provider.tick("3", Map("id" -> "3", "ric" -> "VOD.L", "bid" -> 240, "ask" -> 244)) val uniqueValues = columnValueProvider.getUniqueValues("ric") - uniqueValues shouldBe Array("BT.L", "VOD.L") + uniqueValues.toSet shouldBe Set("BT.L", "VOD.L") } - Scenario("Get all unique value of a given column that starts with specified string") { - - val joinProvider = JoinTableProviderImpl() - val table = new InMemDataTable(pricesDef, joinProvider) + val table = givenTable(pricesDef) val provider = new MockProvider(table) val columnValueProvider = new InMemColumnValueProvider(table) - provider.tick("VOA.L", Map("ric" -> "VOA.L", "bid" -> 220, "ask" -> 223)) - provider.tick("BT.L", Map("ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) - provider.tick("VOV.L", Map("ric" -> "VOV.L", "bid" -> 240, "ask" -> 244)) + provider.tick("1", Map("id" -> "1", "ric" -> "VOA.L", "bid" -> 220, "ask" -> 223)) + provider.tick("2", Map("id" -> "2", "ric" -> "BT.L", "bid" -> 500, "ask" -> 550)) + provider.tick("3", Map("id" -> "3", "ric" -> "VOV.L", "bid" -> 240, "ask" -> 244)) val uniqueValues = columnValueProvider.getUniqueValuesStartingWith("ric", "VO") - uniqueValues shouldBe Array("VOA.L", "VOV.L") + uniqueValues.toSet shouldBe Set("VOA.L", "VOV.L") + } + + ignore("Performance test with 1 million rows") { + val table = givenTable(pricesDef) + val provider = new MockProvider(table) + val columnValueProvider = new InMemColumnValueProvider(table) + + Range.inclusive(1, 1_000_000).foreach(id => { + provider.tick(id.toString, Map("id" -> id, "ric" -> randomRic, "bid" -> 220, "ask" -> 223)) + }) + + val startTime = System.currentTimeMillis() + val values = columnValueProvider.getUniqueValuesStartingWith("ric", "A") + val endTime = System.currentTimeMillis() + val timeTakenMs = endTime - startTime + + println(s"time-taken: $timeTakenMs | values: ${values.mkString("Array(", ", ", ")")}") + + timeTakenMs should be < 20L } //todo match for start with string should not be case sensitive } + + private def givenTable(tableDef: TableDef): InMemDataTable = new InMemDataTable(tableDef, JoinTableProviderImpl()) +} + +private object InMemColumnValueProviderTest { + private val alphabets = List("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z") + private val secureRandom = SecureRandom.getInstanceStrong + private def randomAlphabet: String = alphabets(secureRandom.nextInt(alphabets.length)) + private def randomRic: String = { + val ricPrefix = Range.inclusive(1, 3).map(_ => randomAlphabet).mkString("") + val ricPostfix = Range.inclusive(1, 2).map(_ => randomAlphabet).mkString("") + s"$ricPrefix.$ricPostfix" + } }