-
Notifications
You must be signed in to change notification settings - Fork 106
/
twin_coding.py
215 lines (183 loc) · 8.01 KB
/
twin_coding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import numpy as np
import galois
from galois import FieldArray
from numpy._typing import NDArray
from encoding.fields import get_field
#
# Twin Coding is a hybrid encoding scheme that works with any two linear
# coding schemes and combines them to achieve a space-bandwidth tradeoff,
# minimizing the amount of data that must be transferred between storage
# nodes in order to recover a lost shard of data.
#
# Two copies of the data are encoded, one using each of the sub-schemes,
# and then split among corresponding sets of nodes. Unlike a traditional k of n
# coding scheme in which the reconstruction of a single lost shard requires
# gathering the full data from k nodes, Twin Coding allows nodes to cooperate
# by performing a computation and transferring a smaller amount of data,
# reducing the total transfer to exactly the size of the lost shard.
# Specifically, a message of k^2 symbols in length is encoded and split into
# k-length shards. Utilizing twin coding the recovery of a shard requires
# downloading only a single (calculated) symbol from any k nodes of the
# alternate node type. This is in contrast to the k^2 symbols that would
# be required for recovery with a single coding scheme.
#
# The message is reshaped to a k x k matrix and encoded using the supplied
# generator matrices of the code schemes into two, k x n, matrices. Each
# column of the encoded matrices is then assigned to a node of its respective
# type, resulting in two distinct sets of n nodes.
#
# message:
# The message to be encoded. This is an array of k^2 symbols length.
#
# C0, C1:
# The supplied pair of coding schemes can be any k of n linear codes (e.g.
# Reed-Solomon). They may be the same or different schemes. They should
# share k but may have different encoded lengths n.
#
# Return:
# This method returns the two array of column vectors to be stored at the
# respective node types. The two node sets will be different sizes if the codes
# have different n values.
#
# The original paper:
# https://www.cs.cmu.edu/~nihars/publications/repairAnyStorageCode_ISIT2011.pdf
#
def twin_code(message: np.ndarray, C0: 'Code', C1: 'Code') -> (NDArray[FieldArray], NDArray[FieldArray]):
assert C0.k == C1.k
# Reshape the message into k x k matrix
M0 = message.reshape((C0.k, C0.k))
M1 = M0.T # M1 is the transpose of M0
# Encode the messages using the respective coding schemes
E0 = M0 @ C0.G # size k x C0.n
E1 = M1 @ C1.G # size k x C1.n
# Nodes store k symbols corresponding to a column of the (k × n) encoded
# matrix of their type.
type_0_nodes = [E0[:, i] for i in range(C0.n)]
type_1_nodes = [E1[:, i] for i in range(C1.n)]
return np.array(type_0_nodes), np.array(type_1_nodes)
#
# Construct a Reed Solomon generator matrix for consecutive evaluation points
# of the specified Galois field. This is a k x n matrix where each column
# corresponds to an evaluation point and holds consecutive powers of
# that value (e.g. x^0, x^1, x^2, x^3, ...). Multiplying on the left by a
# row vector of k data symbols produces a polynomial of degree k-1 having
# coefficients of the k data points multiplying consecutive powers of the
# evaluation point, comprising an evaluation of the polynomial at that point.
# (e.g. k0 + k1*x + k2*x^2 + k3*x^3...) The end result is a row vector of n
# symbols, each corresponding to an evaluation of the polynomial at its
# column's eval point.
#
# Note: This is the same as the transpose of the Vandermonde matrix. However,
# we cannot use np.vander() here because the galois package does not override
# it to operate on GF elements.
#
# More information:
# https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction
#
def rs_generator_matrix(GF: galois.GF, k: int, n: int):
# produce n consecutive elements of GF
eval_points = [GF(i) for i in range(1, n + 1)]
matrix = GF(np.zeros(shape=(k, n), dtype=int))
for row in range(k):
for col in range(n):
matrix[row, col] = eval_points[col] ** row
return matrix
# The parameters of a linear coding scheme
class Code:
def __init__(self, GF: galois.GF, k: int, n: int, G: np.matrix):
self.GF = GF # The symbol space
self.k = k # Dimension of message
self.n = n # Dimension of the coded message
self.G = G # Generator matrix
#
# Tests.
#
if __name__ == "__main__":
# The symbol space
GF = get_field()
# The two coding schemes.
k = 3 # The schemes share k but may have different encoded lengths n
C0 = Code(k=k, n=5, GF=GF, G=rs_generator_matrix(GF, k=k, n=5))
print("initialized C0")
C1 = Code(k=k, n=7, GF=GF, G=rs_generator_matrix(GF, k=k, n=7))
print("initialized C1")
message = GF([1, 2, 3, 5, 8, 13, 21, 34, 55]) # k^2 = 9 symbols in GF(n)
print("message:", message)
# Twin code the message
nodes0, nodes1 = twin_code(message, C0, C1)
print(f"{len(nodes0)} type 0 nodes\n{len(nodes1)} type 1 nodes")
print(f"nodes0[0] = {nodes0[0]}, len(nodes0[0]) = {len(nodes0[0])}")
#
# Simulate regular data collection: Gather data from any k nodes and
# decode it.
#
print("\nSimulate data collection:")
collect_from_node_type = 0
nodes = nodes0 if collect_from_node_type == 0 else nodes1
G_mine = C0.G if collect_from_node_type == 0 else C1.G
# Download k columns from the set of nodes.
cols = nodes[:k].T
# Take the corresponding k columns of their generator matrix and invert it
g = G_mine[:, 0:k]
ginv = np.linalg.inv(g)
# Use it to decode the data
decoded = GF(cols) @ ginv
# (This works because if E = M * G, then M = E * Ginv)
print("decoded:", decoded.reshape(-1))
# Print the results
passed = (decoded.reshape(-1) == message).all()
print('Total symbols transferred = ', np.size(cols))
print("Test passed." if passed else "Test failed.")
#
# Simulate node recovery: Cooperate with k nodes of the opposite type to
# recover a lost node with minimal data transfer.
#
print("\nSimulate node recovery:")
failed_node_number = 1
failed_node_type = 0
# The k symbols of the lost node correspond to the i'th column of the
# encoded matrix of its type, where i is the failed node number.
# We contact k nodes of the opposite type and ask for their help.
my_nodes, helper_nodes = (nodes0, nodes1) if failed_node_type == 0 else (
nodes1, nodes0)
# Pick k random helper nodes to ask
nodes_to_ask = np.random.choice(helper_nodes.shape[0], k, replace=False)
print("Picking helper nodes:", nodes_to_ask)
# Use the encoding vector for the failed node, which is the i'th column
# of the generator matrix of its type.
G_mine, G_other = (C0.G, C1.G) if failed_node_type == 0 else (C1.G, C0.G)
encoding_vector = G_mine[:, failed_node_number]
# Each helper node calculates the inner product of its k encoded symbols
# and the encoding vector of the failed node.
node_responses = GF([encoding_vector @ node_vector
for node_vector in GF(helper_nodes[nodes_to_ask])])
print("node_responses:", node_responses)
# Now treat the responses as a vector and perform erasure decoding using the
# helper node type's encoding matrix.
g = G_other[:, nodes_to_ask]
ginv = np.linalg.inv(g)
recovered = node_responses @ ginv
# TODO: concise explanation of why this works.
# Print the results
original = my_nodes[failed_node_number]
print("Original:", original)
print("Recovered:", recovered)
passed = (recovered == original).all()
print('Total symbols transferred = ', np.size(node_responses))
print("Test passed." if passed else "Test failed.")
"""
message: [ 1 2 3 5 8 13 21 34 55]
5 type 0 nodes
7 type 1 nodes
Simulate data collection:
decoded: [ 1 2 3 5 8 13 21 34 55]
Total symbols transferred = 9
Test passed.
Simulate node recovery:
Picking helper nodes: [3 1 5]
node_responses: [181 69 249]
Original: [ 9 33 141]
Recovered: [ 9 33 141]
Total symbols transferred = 3
Test passed.
"""