diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100755 index 0000000..e0f497c --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +set -x + +declare -a classes=("RandomProjection" "RandomProjectionWithDistance") + +for class in "${classes[@]}" +do + for i in $(seq 2 4) + do + for j in $(seq 1 4) + do + /usr/bin/time spark-submit --num-executors $i --executor-cores $j --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --k 3 --m 10 + /usr/bin/time spark-submit --num-executors $i --executor-cores $j --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --k 5 --m 20 + /usr/bin/time spark-submit --num-executors $i --executor-cores $j --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --k 7 --m 25 + /usr/bin/time spark-submit --num-executors $i --executor-cores $j --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --k 5 --m 10 + done + done +done + +declare -a classes=("SparkLSH") + +for class in "${classes[@]}" +do + /usr/bin/time spark-submit --num-executors 2 --executor-cores 1 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 2 --executor-cores 2 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 2 --executor-cores 3 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 2 --executor-cores 4 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 2 --executor-cores 5 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 3 --executor-cores 1 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 3 --executor-cores 2 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 3 --executor-cores 3 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 3 --executor-cores 4 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 3 --executor-cores 5 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 4 --executor-cores 1 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 4 --executor-cores 2 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 4 --executor-cores 3 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 4 --executor-cores 4 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval + /usr/bin/time spark-submit --num-executors 5 --executor-cores 3 --executor-memory 4g --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties" --class "largelsh.$class" target/scala-2.11/LargeLSH-assembly-0.1.0-SNAPSHOT.jar --dataset svhn --mode eval +done diff --git a/src/main/scala/largelsh/SparkLSHSift.scala b/src/main/scala/largelsh/SparkLSHSift.scala index 452793f..90bde0c 100644 --- a/src/main/scala/largelsh/SparkLSHSift.scala +++ b/src/main/scala/largelsh/SparkLSHSift.scala @@ -105,7 +105,7 @@ object SparkLSHSift { val pre_gt = prediction.join(groundtruth_id, "testID") val res = pre_gt.map{ - case Row(testID: Int, pred: Array[Int], gts: Array[Int]) => + case Row(testID: Int, pred: collection.mutable.WrappedArray[Int], gts: collection.mutable.WrappedArray[Int]) => (gts intersect pred).size }.reduce(_+_)