From 2f53421a86386b9c460170b4bf9eb1c3a569441e Mon Sep 17 00:00:00 2001
From: vibhaa <vibhaa@princeton.edu>
Date: Sun, 13 Dec 2015 00:35:56 -0500
Subject: [PATCH] automation of data collection in process

---
 code/AssymetricHashTableSimulation.java |  87 ++++++++++++---
 code/EvictingHashTableSimulation.java   |  37 +++++-
 code/FlowWithCount.java                 |   7 ++
 code/Sketch.java                        |  44 +++++---
 code/SmartEvictionHashTable.java        | 142 ++++++++++++++++++++++++
 5 files changed, 282 insertions(+), 35 deletions(-)
 create mode 100644 code/SmartEvictionHashTable.java
diff --git a/code/AssymetricHashTableSimulation.java b/code/AssymetricHashTableSimulation.java
index c21e8eb..a680a05 100644
--- a/code/AssymetricHashTableSimulation.java
+++ b/code/AssymetricHashTableSimulation.java
@@ -5,8 +5,11 @@ public static void main(String[] args){
 		int numberOfTrials = Integer.parseInt(args[0]);
 		int numberOfFlows = Integer.parseInt(args[1]);
 		int tableSize = Integer.parseInt(args[2]);
+		double threshold = Double.parseDouble(args[3]);
 		FlowWithCount[] buckets = new FlowWithCount[tableSize];
-		int lostPacketCount = 0;
+		int droppedPacketInfoCount = 0;
+		int cumDroppedPacketInfoCount = 0;
+		int totalNumberOfPackets = 0;
 		int D = 2;
 
 		// hardcoded values for the hash functions given that the number of flows is 100
@@ -14,10 +17,27 @@ public static void main(String[] args){
 		final int hashA[] = {421, 149};
 		final int hashB[] = {73, 109};
 
+		// create a set of lost packets which consists of i lost packets of flow i
+		ArrayList<Integer> packets = new ArrayList<Integer>();
+		// add i packets of  flowid i
+		for (int i = 1; i <= numberOfFlows; i++)
+			for (int j = 0; j < i; j++)
+				packets.add(i);
+
+		// ideally the big losers should be the highest flow ids until the loss falls below the threshold
+		HashSet<Integer> expectedLossyFlows = new HashSet<Integer>();
+		for (int i = numberOfFlows; i >= 0; i--){
+			// we know that there are i lost packets of flow i
+			if (i > (int) Math.floor(threshold*packets.size())) {
+				expectedLossyFlows.add(i);
+			}
+		}
+		//System.out.println("expected lossy flow size" + expectedLossyFlows.size());
+
 		// array that counts the number of ith packets lost across the trials
-		int flowsLostAtIndex[] = new int[numberOfFlows];
-		double observedProbFlowLostAtIndex[] = new double[numberOfFlows];
-		double expectedProbFlowLostAtIndex[] = new double[numberOfFlows];
+		int packetsInfoDroppedAtFlow[] = new int[numberOfFlows];
+		double observedProbPacketsDroppedAtFlow[] = new double[numberOfFlows];
+		double expectedProbPacketsDroppedAtFlow[] = new double[numberOfFlows];
 
 		// initialize all the flow tracking buckets to flows with id 0 and count 0
 		buckets = new FlowWithCount[tableSize];
@@ -25,16 +45,22 @@ public static void main(String[] args){
 			buckets[j] = new FlowWithCount(0, 0);
 		}
 
-		List<Integer> packets = new ArrayList<>();
-		// add i packets of  flowid i
-		for (int i = 1; i <= numberOfFlows; i++)
-			for (int j = 0; j < i; j++)
-				packets.add(i);
-
+		// Across many trials, find the total number of packets lost, track the flows they belong to in a dleft hash table
+		// at the end of the hashing procedure, look through all the tracked flows and see which of them are the big losers
+		// compare this against the expected big losers and see if there is a discrepancy between the answer as to what the
+		// big losers are, if yes, find how much of the loss went unreported as a fraction of the total loss
+		double cumErrorMargin = 0;
+		int errorInBinaryAnswer = 0;
 		for (int i = 0; i < numberOfTrials; i++){
 			Collections.shuffle(packets);
-			//lostFlowCount = 0;
+			FlowWithCount.reset(buckets);
+			droppedPacketInfoCount = 0;
+			totalNumberOfPackets = 0; // needed for the denominator to compute the threshold for loss count
+
+			// data plane operation - as lost packets flow, hash them using d-left hashing to track the lossy flows
 			for (int j = 0; j < packets.size(); j++){
+				totalNumberOfPackets++;
+
 				/* uniform hashing into a chunk N/d and then dependent picking of the choice*/
 				int k = 0;
 				for (k = 0; k < D; k++){
@@ -56,12 +82,14 @@ public static void main(String[] args){
 
 				// none of the D locations were free
 				if (k == D) {
-					flowsLostAtIndex[packets.get(j) - 1]++;
-					lostPacketCount++;
+					packetsInfoDroppedAtFlow[packets.get(j) - 1]++;
+					droppedPacketInfoCount++;
 				}						
 			}
 
-			/* print out the status of the hashtable - the buckets and the counts*/
+			cumDroppedPacketInfoCount += droppedPacketInfoCount;
+
+			/* print out the status of the hashtable - the buckets and the counts
 			int nonzero = 0;
 			for (int j = 0; j < tableSize; j++){
 				if (buckets[j].flowid != 0){
@@ -70,14 +98,41 @@ public static void main(String[] args){
 				}			
 			}
 			System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount);
-			//System.out.println(lostPacketCount);
+
+			System.out.println(lostPacketCount);*/
+
+			// controller operation at regular intervals
+			// go through all the entries in the hash table and check if any of them are above the total loss count
+			HashSet<Integer> observedLossyFlows = new HashSet<Integer>();
+			for (FlowWithCount f : buckets){
+				if (f.count > threshold*totalNumberOfPackets)
+					observedLossyFlows.add(f.flowid);
+			}
+
+			// compare observed and expected lossy flows and compute the probability of error
+			int bigLoserPacketsLost = 0;
+			int flag = 0;
+			for (Integer flowid : expectedLossyFlows){
+				if (!observedLossyFlows.contains(flowid)){
+					if (flag != 1){
+						errorInBinaryAnswer++; // if there is even one point of difference, there is an error in binary yes/no or what flows contributed to loss
+						flag = 1;
+					}
+					bigLoserPacketsLost += flowid; // as many packets as the flowid have been lost from the information gathered
+				}
+			}
+			double errorMargin = bigLoserPacketsLost/(double) totalNumberOfPackets;
+			cumErrorMargin += errorMargin;
 		}
 
+
 		/* compare the probabilities of losing the ith packet against the recursive formula */
 		/*for (int i = 0; i < numberOfPackets; i++){
 			observedProbFlowLostAtIndex[i] = (double) flowsLostAtIndex[i]/numberOfTrials;
 			System.out.println(observedProbFlowLostAtIndex[i]);
 		}*/
-		System.out.println(lostPacketCount/(double) numberOfTrials);
+		// chances of an error in binary answer
+		System.out.print(errorInBinaryAnswer/(double) numberOfTrials + "," + cumErrorMargin/ numberOfTrials + ",");
+		System.out.println(cumDroppedPacketInfoCount/(double) numberOfTrials + "," + tableSize);
 	}
 }
\ No newline at end of file
diff --git a/code/EvictingHashTableSimulation.java b/code/EvictingHashTableSimulation.java
index 446f7cd..2d9e163 100644
--- a/code/EvictingHashTableSimulation.java
+++ b/code/EvictingHashTableSimulation.java
@@ -9,6 +9,9 @@ public static void main(String[] args){
 		int lostPacketCount = 0;
 		int D = 2;
 
+		/*Sketch that maintains the loss of each flow*/
+		Sketch countMinSketch = new Sketch(100, 3, numberOfFlows);
+
 		// hardcoded values for the hash functions given that the number of flows is 100
 		final int P = 1019;
 		final int hashA[] = {421, 149};
@@ -34,11 +37,19 @@ public static void main(String[] args){
 		for (int i = 0; i < numberOfTrials; i++){
 			Collections.shuffle(packets);
 			//lostFlowCount = 0;
+
+			// reset counters
+			countMinSketch.reset();
+			// each packets comes in as a lost packet, put it in the count min sketch and also the hash table
 			for (int j = 0; j < packets.size(); j++){
+				// update the count-min sketch for this flowid
+				countMinSketch.updateCount(packets.get(j));
+
 				/* uniform hashing into a chunk N/d and then dependent picking of the choice*/
+				int index = 0;
 				int k = 0;
 				for (k = 0; k < D; k++){
-					int index = ((hashA[k]*packets.get(j) + hashB[k]) % P) % (tableSize/D) + (k*tableSize/D);
+					index = ((hashA[k]*packets.get(j) + hashB[k]) % P) % (tableSize/D) + (k*tableSize/D);
 					//int index = (int) ((packets.get(j)%(tableSize/D)) *(tableSize/D) + k*tableSize/D);
 					// this flow has been seen before
 					if (buckets[index].flowid == packets.get(j)) {
@@ -61,8 +72,15 @@ public static void main(String[] args){
 				// find a way of tracking the information of the incoming flow because it isnt the hash table
 				// so we don't have information on what its loss count is nd the very first time it comes in, loss is 0
 				if (k == D) {
-					flowsLostAtIndex[packets.get(j) - 1]++;
-					lostPacketCount++;
+					if (countMinSketch.estimateLossCount(buckets[index].flowid) < countMinSketch.estimateLossCount(packets.get(j))){
+						flowsLostAtIndex[packets.get(j) - 1] = 0;
+						flowsLostAtIndex[buckets[index].flowid] = buckets[index].count;
+						lostPacketCount = lostPacketCount + buckets[index].count - (int) countMinSketch.estimateLossCount(packets.get(j));
+					}
+					else{
+						flowsLostAtIndex[packets.get(j) - 1]++;
+						lostPacketCount++;
+					}
 				}						
 			}
 
@@ -74,7 +92,7 @@ public static void main(String[] args){
 					nonzero++;
 				}			
 			}
-			System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount);
+			//System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount);
 			//System.out.println(lostPacketCount);
 		}
 
@@ -84,5 +102,16 @@ public static void main(String[] args){
 			System.out.println(observedProbFlowLostAtIndex[i]);
 		}*/
 		System.out.println(lostPacketCount/(double) numberOfTrials);
+
+		for (int i = 1; i <= numberOfFlows; i++){
+			System.out.println(countMinSketch.estimateLossCount(i));
+		}
+
+		/*long[][] matrix = countMinSketch.getMatrix();
+		for (int i = 0; i < matrix.length; i++){
+			for (int j = 0; j < countMinSketch.getSize(); j++)
+				System.out.print(matrix[i][j] + " ");
+			System.out.println();
+		}*/
 	}
 }
\ No newline at end of file
diff --git a/code/FlowWithCount.java b/code/FlowWithCount.java
index 664532d..333b40e 100644
--- a/code/FlowWithCount.java
+++ b/code/FlowWithCount.java
@@ -6,4 +6,11 @@ public FlowWithCount(int flowid ,int count){
 			this.count = count;
 			this.flowid = flowid;
 		}
+
+		public static void reset(FlowWithCount[] buckets){
+			for (int i = 0; i < buckets.length; i++){
+				buckets[i].flowid = 0;
+				buckets[i].count = 0;
+			}
+		}
 	}
\ No newline at end of file
diff --git a/code/Sketch.java b/code/Sketch.java
index 97e9b9c..d3c633f 100644
--- a/code/Sketch.java
+++ b/code/Sketch.java
@@ -14,7 +14,7 @@ public class Sketch{
 	// a and b to compute the hashFunctions needed, every ith index in the hashSeedA and hashSeedB arrays are
 	//used to form a linear combination to get a hashfunction of the form ((ax + b) %p) %size
 	private final long[] hashSeedA;
-	private final long[] hashSeedB;
+	private long[] hashSeedB;
 	private final long p;
 
 	public Sketch(int size, int numberOfHashFunctions, int totalNumberOfKeys){
@@ -25,21 +25,25 @@ public Sketch(int size, int numberOfHashFunctions, int totalNumberOfKeys){
 		hashMatrix = new long[numberOfHashFunctions][size];
 		this.totalNumberOfPackets = 0;
 
+		this.p = 1019L;
+
 		// a and b to compute the hashFunctions needed, every ith index in the hashSeedA and hashSeedB arrays are
 		//used to form a linear combination to get a hashfunction of the form ((ax + b) %p) %size
-		hashSeedA = { 59032440799460394L,\
-		      1380096083914250750L,\
-		      9216393848249138261L,\
-		      1829347879307711444L,\
+		long[] hashSeedA = { 421, 149, 151, 59032440799460394L,
+		      1380096083914250750L,
+		      9216393848249138261L,
+		      1829347879307711444L,
 		      9218705108064111365L};
+
+		this.hashSeedA = hashSeedA;
 		
-		hashSeedB = { 832108633134565846L,\
-		      9207888196126356626L,\
-		      1106582827276932161L,\
-		      7850759173320174309L,\
+		long[] hashSeedB = {73L, 109L, 87L,
+			  832108633134565846L,
+		      9207888196126356626L,
+		      1106582827276932161L,
+		      7850759173320174309L,
 		      8297516128533878091L};
-
-		p = 31;
+		this.hashSeedB = hashSeedB;
 	}
 
 	public int getSize(){
@@ -66,8 +70,7 @@ private int hash(long word, int hashFunctionIndex){
 
 	// update the sketch to reflect that a packet with the id has been received
 	// asume updateCount is called on a packet only once
-	public void updateCount(Packet p){
-		long flowid = p.getSrcIp();
+	public void updateCount(int flowid){
 		//String flowid = p.fivetuple();
 
 		/* mangle the ip
@@ -81,11 +84,15 @@ public void updateCount(Packet p){
 		int word4 = ip & 0xFF;
 
 		totalNumberOfPackets++;*/
+		//long flowid = p.getSrcIp();
+		//String flowid = p.fivetuple();
+		totalNumberOfPackets++;
 
 		// hash the ip and update the appropriate counters
 		for (int i = 0; i < numberOfHashFunctions; i++){
 			// hash word by word numberofHashFunctions times independently
 			int hashbucket = hash(flowid, i);
+			//System.out.println(hashbucket + " " + flowid);
 			hashMatrix[i][hashbucket]++; 
 		}
 	}
@@ -93,7 +100,7 @@ public void updateCount(Packet p){
 	// update the sketch to reflect that a packet with the id has been received
 	// asume updateCount is called on a packet only once
 	// return an estimate for the flowid associated with the packet p
-	public void updateCountInMinSketch(Packet p){
+	public void updateCountInSketch(Packet p){
 		long flowid = p.getSrcIp();
 		//String flowid = p.fivetuple();
 
@@ -124,7 +131,7 @@ public void subtract(Sketch otherTable) throws Exception{
 	// query an estimate for the loss of this flow identified by its flow id
 	// using the count-min approach
 	public long estimateLossCount(long flowid){
-		long min = hashMatrix[1][hash(flowid, 0)];
+		long min = hashMatrix[0][hash(flowid, 0)];
 		for (int i = 1; i < numberOfHashFunctions; i++){
 			int hashbucket = hash(flowid, i);
 			if (hashMatrix[i][hashbucket] < min)
@@ -133,4 +140,11 @@ public long estimateLossCount(long flowid){
 		return min;
 	}
 
+	// reset the sketch by setting all counters to 0
+	public void reset(){
+		for (int i = 0; i < numberOfHashFunctions; i++){
+			for (int j = 0; j < size; j++)
+				hashMatrix[i][j] = 0;
+		}
+	}
 } 
\ No newline at end of file
diff --git a/code/SmartEvictionHashTable.java b/code/SmartEvictionHashTable.java
new file mode 100644
index 0000000..6407a4f
--- /dev/null
+++ b/code/SmartEvictionHashTable.java
@@ -0,0 +1,142 @@
+import java.util.*;
+
+public class SmartEvictionHashTableSimulation{
+	public static void main(String[] args){
+		int numberOfTrials = Integer.parseInt(args[0]);
+		int numberOfFlows = Integer.parseInt(args[1]);
+		int tableSize = Integer.parseInt(args[2]);
+		double threshold = Double.parseDouble(args[3]);
+		FlowWithCount[] buckets = new FlowWithCount[tableSize];
+		int droppedPacketInfoCount = 0;
+		int cumDroppedPacketInfoCount = 0;
+		int totalNumberOfPackets = 0;
+		int D = 2;
+
+		// hardcoded values for the hash functions given that the number of flows doesn't exceed 850
+		final int P = 1019;
+		final int hashA[] = {421, 149};
+		final int hashB[] = {73, 109};
+
+		/*final int numberOfFlows[] = {200, 300, 400, 500, 600, 700, 800, 850};
+		final int tableSize[] = {50, 100, 150, 200, 250, 300, 350, 400, 450, 500};
+		final double threshold[] = {0.008, 0.006, 0.0035, 0.0025, 0.001, 0.0008, 0.0006, 0.00035, 0.00025, 0.0001};*/
+
+		// create a set of lost packets which consists of i lost packets of flow i
+		ArrayList<Integer> packets = new ArrayList<Integer>();
+		// add i packets of  flowid i
+		for (int i = 1; i <= numberOfFlows; i++)
+			for (int j = 0; j < i; j++)
+				packets.add(i);
+
+		// ideally the big losers should be the highest flow ids until the loss falls below the threshold
+		HashSet<Integer> expectedLossyFlows = new HashSet<Integer>();
+		for (int i = numberOfFlows; i >= 0; i--){
+			// we know that there are i lost packets of flow i
+			if (i > (int) Math.floor(threshold*packets.size())) {
+				expectedLossyFlows.add(i);
+			}
+		}
+		System.out.println("expected lossy flow size" + expectedLossyFlows.size());
+
+		// array that counts the number of ith packets lost across the trials
+		int packetsInfoDroppedAtFlow[] = new int[numberOfFlows];
+		double observedProbPacketsDroppedAtFlow[] = new double[numberOfFlows];
+		double expectedProbPacketsDroppedAtFlow[] = new double[numberOfFlows];
+
+		// initialize all the flow tracking buckets to flows with id 0 and count 0
+		buckets = new FlowWithCount[tableSize];
+		for (int j = 0; j < tableSize; j++){
+			buckets[j] = new FlowWithCount(0, 0);
+		}
+
+		// Across many trials, find the total number of packets lost, track the flows they belong to in a dleft hash table
+		// at the end of the hashing procedure, look through all the tracked flows and see which of them are the big losers
+		// compare this against the expected big losers and see if there is a discrepancy between the answer as to what the
+		// big losers are, if yes, find how much of the loss went unreported as a fraction of the total loss
+		double cumErrorMargin = 0;
+		int errorInBinaryAnswer = 0;
+		for (int i = 0; i < numberOfTrials; i++){
+			Collections.shuffle(packets);
+			FlowWithCount.reset(buckets);
+			droppedPacketInfoCount = 0;
+			totalNumberOfPackets = 0; // needed for the denominator to compute the threshold for loss count
+
+			// data plane operation - as lost packets flow, hash them using d-left hashing to track the lossy flows
+			for (int j = 0; j < packets.size(); j++){
+				totalNumberOfPackets++;
+
+				/* uniform hashing into a chunk N/d and then dependent picking of the choice*/
+				int k = 0;
+				for (k = 0; k < D; k++){
+					int index = ((hashA[k]*packets.get(j) + hashB[k]) % P) % (tableSize/D) + (k*tableSize/D);
+					//int index = (int) ((packets.get(j)%(tableSize/D)) *(tableSize/D) + k*tableSize/D);
+					// this flow has been seen before
+					if (buckets[index].flowid == packets.get(j)) {
+						buckets[index].count++;
+						break;
+					}
+
+					// new flow
+					if (buckets[index].flowid == 0) {
+						buckets[index].flowid = packets.get(j);
+						buckets[index].count = 1;
+						break;
+					}
+				}
+
+				// none of the D locations were free
+				if (k == D) {
+					packetsInfoDroppedAtFlow[packets.get(j) - 1]++;
+					droppedPacketInfoCount++;
+				}						
+			}
+
+			cumDroppedPacketInfoCount += droppedPacketInfoCount;
+
+			/* print out the status of the hashtable - the buckets and the counts
+			int nonzero = 0;
+			for (int j = 0; j < tableSize; j++){
+				if (buckets[j].flowid != 0){
+					//System.out.println("index " + i + " has flow " + buckets[i].flowid + " with count " + buckets[i].count);
+					nonzero++;
+				}			
+			}
+			System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount);
+
+			System.out.println(lostPacketCount);*/
+
+			// controller operation at regular intervals
+			// go through all the entries in the hash table and check if any of them are above the total loss count
+			HashSet<Integer> observedLossyFlows = new HashSet<Integer>();
+			for (FlowWithCount f : buckets){
+				if (f.count > threshold*totalNumberOfPackets)
+					observedLossyFlows.add(f.flowid);
+			}
+
+			// compare observed and expected lossy flows and compute the probability of error
+			int bigLoserPacketsLost = 0;
+			int flag = 0;
+			for (Integer flowid : expectedLossyFlows){
+				if (!observedLossyFlows.contains(flowid)){
+					if (flag != 1){
+						errorInBinaryAnswer++; // if there is even one point of difference, there is an error in binary yes/no or what flows contributed to loss
+						flag = 1;
+					}
+					bigLoserPacketsLost += flowid; // as many packets as the flowid have been lost from the information gathered
+				}
+			}
+			double errorMargin = bigLoserPacketsLost/(double) totalNumberOfPackets;
+			cumErrorMargin += errorMargin;
+		}
+
+
+		/* compare the probabilities of losing the ith packet against the recursive formula */
+		/*for (int i = 0; i < numberOfPackets; i++){
+			observedProbFlowLostAtIndex[i] = (double) flowsLostAtIndex[i]/numberOfTrials;
+			System.out.println(observedProbFlowLostAtIndex[i]);
+		}*/
+		// chances of an error in binary answer
+		System.out.print(errorInBinaryAnswer/(double) numberOfTrials + "," + cumErrorMargin/ numberOfTrials + ",");
+		System.out.println(cumDroppedPacketInfoCount/(double) numberOfTrials + "," + tableSize);
+	}
+}
\ No newline at end of file