From 2f53421a86386b9c460170b4bf9eb1c3a569441e Mon Sep 17 00:00:00 2001 From: vibhaa Date: Sun, 13 Dec 2015 00:35:56 -0500 Subject: [PATCH] automation of data collection in process --- code/AssymetricHashTableSimulation.java | 87 ++++++++++++--- code/EvictingHashTableSimulation.java | 37 +++++- code/FlowWithCount.java | 7 ++ code/Sketch.java | 44 +++++--- code/SmartEvictionHashTable.java | 142 ++++++++++++++++++++++++ 5 files changed, 282 insertions(+), 35 deletions(-) create mode 100644 code/SmartEvictionHashTable.java diff --git a/code/AssymetricHashTableSimulation.java b/code/AssymetricHashTableSimulation.java index c21e8eb..a680a05 100644 --- a/code/AssymetricHashTableSimulation.java +++ b/code/AssymetricHashTableSimulation.java @@ -5,8 +5,11 @@ public static void main(String[] args){ int numberOfTrials = Integer.parseInt(args[0]); int numberOfFlows = Integer.parseInt(args[1]); int tableSize = Integer.parseInt(args[2]); + double threshold = Double.parseDouble(args[3]); FlowWithCount[] buckets = new FlowWithCount[tableSize]; - int lostPacketCount = 0; + int droppedPacketInfoCount = 0; + int cumDroppedPacketInfoCount = 0; + int totalNumberOfPackets = 0; int D = 2; // hardcoded values for the hash functions given that the number of flows is 100 @@ -14,10 +17,27 @@ public static void main(String[] args){ final int hashA[] = {421, 149}; final int hashB[] = {73, 109}; + // create a set of lost packets which consists of i lost packets of flow i + ArrayList packets = new ArrayList(); + // add i packets of flowid i + for (int i = 1; i <= numberOfFlows; i++) + for (int j = 0; j < i; j++) + packets.add(i); + + // ideally the big losers should be the highest flow ids until the loss falls below the threshold + HashSet expectedLossyFlows = new HashSet(); + for (int i = numberOfFlows; i >= 0; i--){ + // we know that there are i lost packets of flow i + if (i > (int) Math.floor(threshold*packets.size())) { + expectedLossyFlows.add(i); + } + } + //System.out.println("expected lossy flow size" + expectedLossyFlows.size()); + // array that counts the number of ith packets lost across the trials - int flowsLostAtIndex[] = new int[numberOfFlows]; - double observedProbFlowLostAtIndex[] = new double[numberOfFlows]; - double expectedProbFlowLostAtIndex[] = new double[numberOfFlows]; + int packetsInfoDroppedAtFlow[] = new int[numberOfFlows]; + double observedProbPacketsDroppedAtFlow[] = new double[numberOfFlows]; + double expectedProbPacketsDroppedAtFlow[] = new double[numberOfFlows]; // initialize all the flow tracking buckets to flows with id 0 and count 0 buckets = new FlowWithCount[tableSize]; @@ -25,16 +45,22 @@ public static void main(String[] args){ buckets[j] = new FlowWithCount(0, 0); } - List packets = new ArrayList<>(); - // add i packets of flowid i - for (int i = 1; i <= numberOfFlows; i++) - for (int j = 0; j < i; j++) - packets.add(i); - + // Across many trials, find the total number of packets lost, track the flows they belong to in a dleft hash table + // at the end of the hashing procedure, look through all the tracked flows and see which of them are the big losers + // compare this against the expected big losers and see if there is a discrepancy between the answer as to what the + // big losers are, if yes, find how much of the loss went unreported as a fraction of the total loss + double cumErrorMargin = 0; + int errorInBinaryAnswer = 0; for (int i = 0; i < numberOfTrials; i++){ Collections.shuffle(packets); - //lostFlowCount = 0; + FlowWithCount.reset(buckets); + droppedPacketInfoCount = 0; + totalNumberOfPackets = 0; // needed for the denominator to compute the threshold for loss count + + // data plane operation - as lost packets flow, hash them using d-left hashing to track the lossy flows for (int j = 0; j < packets.size(); j++){ + totalNumberOfPackets++; + /* uniform hashing into a chunk N/d and then dependent picking of the choice*/ int k = 0; for (k = 0; k < D; k++){ @@ -56,12 +82,14 @@ public static void main(String[] args){ // none of the D locations were free if (k == D) { - flowsLostAtIndex[packets.get(j) - 1]++; - lostPacketCount++; + packetsInfoDroppedAtFlow[packets.get(j) - 1]++; + droppedPacketInfoCount++; } } - /* print out the status of the hashtable - the buckets and the counts*/ + cumDroppedPacketInfoCount += droppedPacketInfoCount; + + /* print out the status of the hashtable - the buckets and the counts int nonzero = 0; for (int j = 0; j < tableSize; j++){ if (buckets[j].flowid != 0){ @@ -70,14 +98,41 @@ public static void main(String[] args){ } } System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount); - //System.out.println(lostPacketCount); + + System.out.println(lostPacketCount);*/ + + // controller operation at regular intervals + // go through all the entries in the hash table and check if any of them are above the total loss count + HashSet observedLossyFlows = new HashSet(); + for (FlowWithCount f : buckets){ + if (f.count > threshold*totalNumberOfPackets) + observedLossyFlows.add(f.flowid); + } + + // compare observed and expected lossy flows and compute the probability of error + int bigLoserPacketsLost = 0; + int flag = 0; + for (Integer flowid : expectedLossyFlows){ + if (!observedLossyFlows.contains(flowid)){ + if (flag != 1){ + errorInBinaryAnswer++; // if there is even one point of difference, there is an error in binary yes/no or what flows contributed to loss + flag = 1; + } + bigLoserPacketsLost += flowid; // as many packets as the flowid have been lost from the information gathered + } + } + double errorMargin = bigLoserPacketsLost/(double) totalNumberOfPackets; + cumErrorMargin += errorMargin; } + /* compare the probabilities of losing the ith packet against the recursive formula */ /*for (int i = 0; i < numberOfPackets; i++){ observedProbFlowLostAtIndex[i] = (double) flowsLostAtIndex[i]/numberOfTrials; System.out.println(observedProbFlowLostAtIndex[i]); }*/ - System.out.println(lostPacketCount/(double) numberOfTrials); + // chances of an error in binary answer + System.out.print(errorInBinaryAnswer/(double) numberOfTrials + "," + cumErrorMargin/ numberOfTrials + ","); + System.out.println(cumDroppedPacketInfoCount/(double) numberOfTrials + "," + tableSize); } } \ No newline at end of file diff --git a/code/EvictingHashTableSimulation.java b/code/EvictingHashTableSimulation.java index 446f7cd..2d9e163 100644 --- a/code/EvictingHashTableSimulation.java +++ b/code/EvictingHashTableSimulation.java @@ -9,6 +9,9 @@ public static void main(String[] args){ int lostPacketCount = 0; int D = 2; + /*Sketch that maintains the loss of each flow*/ + Sketch countMinSketch = new Sketch(100, 3, numberOfFlows); + // hardcoded values for the hash functions given that the number of flows is 100 final int P = 1019; final int hashA[] = {421, 149}; @@ -34,11 +37,19 @@ public static void main(String[] args){ for (int i = 0; i < numberOfTrials; i++){ Collections.shuffle(packets); //lostFlowCount = 0; + + // reset counters + countMinSketch.reset(); + // each packets comes in as a lost packet, put it in the count min sketch and also the hash table for (int j = 0; j < packets.size(); j++){ + // update the count-min sketch for this flowid + countMinSketch.updateCount(packets.get(j)); + /* uniform hashing into a chunk N/d and then dependent picking of the choice*/ + int index = 0; int k = 0; for (k = 0; k < D; k++){ - int index = ((hashA[k]*packets.get(j) + hashB[k]) % P) % (tableSize/D) + (k*tableSize/D); + index = ((hashA[k]*packets.get(j) + hashB[k]) % P) % (tableSize/D) + (k*tableSize/D); //int index = (int) ((packets.get(j)%(tableSize/D)) *(tableSize/D) + k*tableSize/D); // this flow has been seen before if (buckets[index].flowid == packets.get(j)) { @@ -61,8 +72,15 @@ public static void main(String[] args){ // find a way of tracking the information of the incoming flow because it isnt the hash table // so we don't have information on what its loss count is nd the very first time it comes in, loss is 0 if (k == D) { - flowsLostAtIndex[packets.get(j) - 1]++; - lostPacketCount++; + if (countMinSketch.estimateLossCount(buckets[index].flowid) < countMinSketch.estimateLossCount(packets.get(j))){ + flowsLostAtIndex[packets.get(j) - 1] = 0; + flowsLostAtIndex[buckets[index].flowid] = buckets[index].count; + lostPacketCount = lostPacketCount + buckets[index].count - (int) countMinSketch.estimateLossCount(packets.get(j)); + } + else{ + flowsLostAtIndex[packets.get(j) - 1]++; + lostPacketCount++; + } } } @@ -74,7 +92,7 @@ public static void main(String[] args){ nonzero++; } } - System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount); + //System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount); //System.out.println(lostPacketCount); } @@ -84,5 +102,16 @@ public static void main(String[] args){ System.out.println(observedProbFlowLostAtIndex[i]); }*/ System.out.println(lostPacketCount/(double) numberOfTrials); + + for (int i = 1; i <= numberOfFlows; i++){ + System.out.println(countMinSketch.estimateLossCount(i)); + } + + /*long[][] matrix = countMinSketch.getMatrix(); + for (int i = 0; i < matrix.length; i++){ + for (int j = 0; j < countMinSketch.getSize(); j++) + System.out.print(matrix[i][j] + " "); + System.out.println(); + }*/ } } \ No newline at end of file diff --git a/code/FlowWithCount.java b/code/FlowWithCount.java index 664532d..333b40e 100644 --- a/code/FlowWithCount.java +++ b/code/FlowWithCount.java @@ -6,4 +6,11 @@ public FlowWithCount(int flowid ,int count){ this.count = count; this.flowid = flowid; } + + public static void reset(FlowWithCount[] buckets){ + for (int i = 0; i < buckets.length; i++){ + buckets[i].flowid = 0; + buckets[i].count = 0; + } + } } \ No newline at end of file diff --git a/code/Sketch.java b/code/Sketch.java index 97e9b9c..d3c633f 100644 --- a/code/Sketch.java +++ b/code/Sketch.java @@ -14,7 +14,7 @@ public class Sketch{ // a and b to compute the hashFunctions needed, every ith index in the hashSeedA and hashSeedB arrays are //used to form a linear combination to get a hashfunction of the form ((ax + b) %p) %size private final long[] hashSeedA; - private final long[] hashSeedB; + private long[] hashSeedB; private final long p; public Sketch(int size, int numberOfHashFunctions, int totalNumberOfKeys){ @@ -25,21 +25,25 @@ public Sketch(int size, int numberOfHashFunctions, int totalNumberOfKeys){ hashMatrix = new long[numberOfHashFunctions][size]; this.totalNumberOfPackets = 0; + this.p = 1019L; + // a and b to compute the hashFunctions needed, every ith index in the hashSeedA and hashSeedB arrays are //used to form a linear combination to get a hashfunction of the form ((ax + b) %p) %size - hashSeedA = { 59032440799460394L,\ - 1380096083914250750L,\ - 9216393848249138261L,\ - 1829347879307711444L,\ + long[] hashSeedA = { 421, 149, 151, 59032440799460394L, + 1380096083914250750L, + 9216393848249138261L, + 1829347879307711444L, 9218705108064111365L}; + + this.hashSeedA = hashSeedA; - hashSeedB = { 832108633134565846L,\ - 9207888196126356626L,\ - 1106582827276932161L,\ - 7850759173320174309L,\ + long[] hashSeedB = {73L, 109L, 87L, + 832108633134565846L, + 9207888196126356626L, + 1106582827276932161L, + 7850759173320174309L, 8297516128533878091L}; - - p = 31; + this.hashSeedB = hashSeedB; } public int getSize(){ @@ -66,8 +70,7 @@ private int hash(long word, int hashFunctionIndex){ // update the sketch to reflect that a packet with the id has been received // asume updateCount is called on a packet only once - public void updateCount(Packet p){ - long flowid = p.getSrcIp(); + public void updateCount(int flowid){ //String flowid = p.fivetuple(); /* mangle the ip @@ -81,11 +84,15 @@ public void updateCount(Packet p){ int word4 = ip & 0xFF; totalNumberOfPackets++;*/ + //long flowid = p.getSrcIp(); + //String flowid = p.fivetuple(); + totalNumberOfPackets++; // hash the ip and update the appropriate counters for (int i = 0; i < numberOfHashFunctions; i++){ // hash word by word numberofHashFunctions times independently int hashbucket = hash(flowid, i); + //System.out.println(hashbucket + " " + flowid); hashMatrix[i][hashbucket]++; } } @@ -93,7 +100,7 @@ public void updateCount(Packet p){ // update the sketch to reflect that a packet with the id has been received // asume updateCount is called on a packet only once // return an estimate for the flowid associated with the packet p - public void updateCountInMinSketch(Packet p){ + public void updateCountInSketch(Packet p){ long flowid = p.getSrcIp(); //String flowid = p.fivetuple(); @@ -124,7 +131,7 @@ public void subtract(Sketch otherTable) throws Exception{ // query an estimate for the loss of this flow identified by its flow id // using the count-min approach public long estimateLossCount(long flowid){ - long min = hashMatrix[1][hash(flowid, 0)]; + long min = hashMatrix[0][hash(flowid, 0)]; for (int i = 1; i < numberOfHashFunctions; i++){ int hashbucket = hash(flowid, i); if (hashMatrix[i][hashbucket] < min) @@ -133,4 +140,11 @@ public long estimateLossCount(long flowid){ return min; } + // reset the sketch by setting all counters to 0 + public void reset(){ + for (int i = 0; i < numberOfHashFunctions; i++){ + for (int j = 0; j < size; j++) + hashMatrix[i][j] = 0; + } + } } \ No newline at end of file diff --git a/code/SmartEvictionHashTable.java b/code/SmartEvictionHashTable.java new file mode 100644 index 0000000..6407a4f --- /dev/null +++ b/code/SmartEvictionHashTable.java @@ -0,0 +1,142 @@ +import java.util.*; + +public class SmartEvictionHashTableSimulation{ + public static void main(String[] args){ + int numberOfTrials = Integer.parseInt(args[0]); + int numberOfFlows = Integer.parseInt(args[1]); + int tableSize = Integer.parseInt(args[2]); + double threshold = Double.parseDouble(args[3]); + FlowWithCount[] buckets = new FlowWithCount[tableSize]; + int droppedPacketInfoCount = 0; + int cumDroppedPacketInfoCount = 0; + int totalNumberOfPackets = 0; + int D = 2; + + // hardcoded values for the hash functions given that the number of flows doesn't exceed 850 + final int P = 1019; + final int hashA[] = {421, 149}; + final int hashB[] = {73, 109}; + + /*final int numberOfFlows[] = {200, 300, 400, 500, 600, 700, 800, 850}; + final int tableSize[] = {50, 100, 150, 200, 250, 300, 350, 400, 450, 500}; + final double threshold[] = {0.008, 0.006, 0.0035, 0.0025, 0.001, 0.0008, 0.0006, 0.00035, 0.00025, 0.0001};*/ + + // create a set of lost packets which consists of i lost packets of flow i + ArrayList packets = new ArrayList(); + // add i packets of flowid i + for (int i = 1; i <= numberOfFlows; i++) + for (int j = 0; j < i; j++) + packets.add(i); + + // ideally the big losers should be the highest flow ids until the loss falls below the threshold + HashSet expectedLossyFlows = new HashSet(); + for (int i = numberOfFlows; i >= 0; i--){ + // we know that there are i lost packets of flow i + if (i > (int) Math.floor(threshold*packets.size())) { + expectedLossyFlows.add(i); + } + } + System.out.println("expected lossy flow size" + expectedLossyFlows.size()); + + // array that counts the number of ith packets lost across the trials + int packetsInfoDroppedAtFlow[] = new int[numberOfFlows]; + double observedProbPacketsDroppedAtFlow[] = new double[numberOfFlows]; + double expectedProbPacketsDroppedAtFlow[] = new double[numberOfFlows]; + + // initialize all the flow tracking buckets to flows with id 0 and count 0 + buckets = new FlowWithCount[tableSize]; + for (int j = 0; j < tableSize; j++){ + buckets[j] = new FlowWithCount(0, 0); + } + + // Across many trials, find the total number of packets lost, track the flows they belong to in a dleft hash table + // at the end of the hashing procedure, look through all the tracked flows and see which of them are the big losers + // compare this against the expected big losers and see if there is a discrepancy between the answer as to what the + // big losers are, if yes, find how much of the loss went unreported as a fraction of the total loss + double cumErrorMargin = 0; + int errorInBinaryAnswer = 0; + for (int i = 0; i < numberOfTrials; i++){ + Collections.shuffle(packets); + FlowWithCount.reset(buckets); + droppedPacketInfoCount = 0; + totalNumberOfPackets = 0; // needed for the denominator to compute the threshold for loss count + + // data plane operation - as lost packets flow, hash them using d-left hashing to track the lossy flows + for (int j = 0; j < packets.size(); j++){ + totalNumberOfPackets++; + + /* uniform hashing into a chunk N/d and then dependent picking of the choice*/ + int k = 0; + for (k = 0; k < D; k++){ + int index = ((hashA[k]*packets.get(j) + hashB[k]) % P) % (tableSize/D) + (k*tableSize/D); + //int index = (int) ((packets.get(j)%(tableSize/D)) *(tableSize/D) + k*tableSize/D); + // this flow has been seen before + if (buckets[index].flowid == packets.get(j)) { + buckets[index].count++; + break; + } + + // new flow + if (buckets[index].flowid == 0) { + buckets[index].flowid = packets.get(j); + buckets[index].count = 1; + break; + } + } + + // none of the D locations were free + if (k == D) { + packetsInfoDroppedAtFlow[packets.get(j) - 1]++; + droppedPacketInfoCount++; + } + } + + cumDroppedPacketInfoCount += droppedPacketInfoCount; + + /* print out the status of the hashtable - the buckets and the counts + int nonzero = 0; + for (int j = 0; j < tableSize; j++){ + if (buckets[j].flowid != 0){ + //System.out.println("index " + i + " has flow " + buckets[i].flowid + " with count " + buckets[i].count); + nonzero++; + } + } + System.out.println("non-zero buckets " + nonzero + " lost flows " + lostPacketCount); + + System.out.println(lostPacketCount);*/ + + // controller operation at regular intervals + // go through all the entries in the hash table and check if any of them are above the total loss count + HashSet observedLossyFlows = new HashSet(); + for (FlowWithCount f : buckets){ + if (f.count > threshold*totalNumberOfPackets) + observedLossyFlows.add(f.flowid); + } + + // compare observed and expected lossy flows and compute the probability of error + int bigLoserPacketsLost = 0; + int flag = 0; + for (Integer flowid : expectedLossyFlows){ + if (!observedLossyFlows.contains(flowid)){ + if (flag != 1){ + errorInBinaryAnswer++; // if there is even one point of difference, there is an error in binary yes/no or what flows contributed to loss + flag = 1; + } + bigLoserPacketsLost += flowid; // as many packets as the flowid have been lost from the information gathered + } + } + double errorMargin = bigLoserPacketsLost/(double) totalNumberOfPackets; + cumErrorMargin += errorMargin; + } + + + /* compare the probabilities of losing the ith packet against the recursive formula */ + /*for (int i = 0; i < numberOfPackets; i++){ + observedProbFlowLostAtIndex[i] = (double) flowsLostAtIndex[i]/numberOfTrials; + System.out.println(observedProbFlowLostAtIndex[i]); + }*/ + // chances of an error in binary answer + System.out.print(errorInBinaryAnswer/(double) numberOfTrials + "," + cumErrorMargin/ numberOfTrials + ","); + System.out.println(cumDroppedPacketInfoCount/(double) numberOfTrials + "," + tableSize); + } +} \ No newline at end of file