Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit fb5ac1c

Browse files
committed
Streamlining rdp_accountant.
1 parent b88da6e commit fb5ac1c

File tree

2 files changed

+171
-304
lines changed

2 files changed

+171
-304
lines changed

research/differential_privacy/privacy_accountant/python/rdp_accountant.py

Lines changed: 72 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,22 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
# ==============================================================================
15-
"""RDP analysis of the Gaussian-with-sampling mechanism.
15+
"""RDP analysis of the Sampled Gaussian mechanism.
1616
17-
Functionality for computing Renyi differential privacy of an additive Gaussian
18-
mechanism with sampling. Its public interface consists of two methods:
19-
compute_rdp(q, sigma, T, orders) computes RDP with the sampling rate q,
20-
noise sigma, T steps at the list of orders.
17+
Functionality for computing Renyi differential privacy (RDP) of an additive
18+
Sampled Gaussian mechanism (SGM). Its public interface consists of two methods:
19+
compute_rdp(q, stddev_to_sensitivity_ratio, T, orders) computes RDP with for
20+
SGM iterated T times.
2121
get_privacy_spent(orders, rdp, target_eps, target_delta) computes delta
2222
(or eps) given RDP at multiple orders and
2323
a target value for eps (or delta).
2424
2525
Example use:
2626
27-
Suppose that we have run an algorithm with parameters, an array of
28-
(q1, sigma1, T1) ... (qk, sigma_k, Tk), and we wish to compute eps for a given
29-
delta. The example code would be:
27+
Suppose that we have run an SGM applied to a function with l2-sensitivity 1.
28+
Its parameters are given as a list of tuples (q1, sigma1, T1), ...,
29+
(qk, sigma_k, Tk), and we wish to compute eps for a given delta.
30+
The example code would be:
3031
3132
max_order = 32
3233
orders = range(2, max_order + 1)
@@ -43,16 +44,13 @@
4344

4445
from absl import app
4546
from absl import flags
47+
4648
import math
49+
import sys
50+
4751
import numpy as np
4852
from scipy import special
4953

50-
FLAGS = flags.FLAGS
51-
flags.DEFINE_boolean("rdp_verbose", False,
52-
"Output intermediate results for RDP computation.")
53-
FLAGS(sys.argv) # Load the flags (including on import)
54-
55-
5654
########################
5755
# LOG-SPACE ARITHMETIC #
5856
########################
@@ -68,10 +66,13 @@ def _log_add(logx, logy):
6866

6967

7068
def _log_sub(logx, logy):
71-
"""Subtract two numbers in the log space. Answer must be positive."""
69+
"""Subtract two numbers in the log space. Answer must be non-negative."""
70+
if logx < logy:
71+
raise ValueError("The result of subtraction must be non-negative .")
7272
if logy == -np.inf: # subtracting 0
7373
return logx
74-
assert logx > logy
74+
if logx == logy:
75+
return -np.inf # 0 is represented as -np.inf in the log space.
7576

7677
try:
7778
# Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y).
@@ -89,84 +90,58 @@ def _log_print(logx):
8990

9091

9192
def _compute_log_a_int(q, sigma, alpha):
92-
"""Compute log(A_alpha) for integer alpha."""
93+
"""Compute log(A_alpha) for integer alpha. 0 < q < 1."""
9394
assert isinstance(alpha, (int, long))
9495

95-
# The first and second terms of A_alpha in the log space:
96-
log_a1, log_a2 = -np.inf, -np.inf
96+
# Initialize with 0 in the log space.
97+
log_a = -np.inf
9798

9899
for i in range(alpha + 1):
99-
# Compute in the log space. Extra care needed for q = 0 or 1.
100-
log_coef_i = math.log(special.binom(alpha, i))
101-
if q > 0:
102-
log_coef_i += i * math.log(q)
103-
elif i > 0:
104-
continue # The term is 0, skip the rest.
105-
106-
if q < 1.0:
107-
log_coef_i += (alpha - i) * math.log(1 - q)
108-
elif i < alpha:
109-
continue # The term is 0, skip the rest.
110-
111-
s1 = log_coef_i + (i * i - i) / (2.0 * (sigma ** 2))
112-
s2 = log_coef_i + (i * i + i) / (2.0 * (sigma ** 2))
113-
log_a1 = _log_add(log_a1, s1)
114-
log_a2 = _log_add(log_a2, s2)
115-
116-
log_a = _log_add(math.log(1 - q) + log_a1, math.log(q) + log_a2)
117-
if FLAGS.rdp_verbose:
118-
print("A: by binomial expansion {} = {} + {}".format(
119-
_log_print(log_a),
120-
_log_print(math.log(1 - q) + log_a1), _log_print(math.log(q) + log_a2)))
100+
log_coef_i = (
101+
math.log(special.binom(alpha, i)) + i * math.log(q) +
102+
(alpha - i) * math.log(1 - q))
103+
104+
s = log_coef_i + (i * i - i) / (2 * (sigma**2))
105+
log_a = _log_add(log_a, s)
106+
121107
return float(log_a)
122108

123109

124110
def _compute_log_a_frac(q, sigma, alpha):
125-
"""Compute log(A_alpha) for fractional alpha."""
126-
# The four parts of A_alpha in the log space:
127-
log_a11, log_a12 = -np.inf, -np.inf
128-
log_a21, log_a22 = -np.inf, -np.inf
111+
"""Compute log(A_alpha) for fractional alpha. 0 < q < 1."""
112+
# The two parts of A_alpha, integrals over (-inf,z0] and (z0, +inf), are
113+
# initialized to 0 in the log space:
114+
log_a0, log_a1 = -np.inf, -np.inf
129115
i = 0
130116

131-
z0, _ = _compute_zs(sigma, q)
117+
z0 = sigma**2 * math.log(1 / q - 1) + .5
132118

133119
while True: # do ... until loop
134120
coef = special.binom(alpha, i)
135121
log_coef = math.log(abs(coef))
136122
j = alpha - i
137123

138-
log_t1 = log_coef + i * math.log(q) + j * math.log(1 - q)
139-
log_t2 = log_coef + j * math.log(q) + i * math.log(1 - q)
124+
log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q)
125+
log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q)
140126

141-
log_e11 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma))
142-
log_e12 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma))
143-
log_e21 = math.log(.5) + _log_erfc((i - (z0 - 1)) / (math.sqrt(2) * sigma))
144-
log_e22 = math.log(.5) + _log_erfc((z0 - 1 - j) / (math.sqrt(2) * sigma))
127+
log_e0 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma))
128+
log_e1 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma))
145129

146-
log_s11 = log_t1 + (i * i - i) / (2 * (sigma ** 2)) + log_e11
147-
log_s12 = log_t2 + (j * j - j) / (2 * (sigma ** 2)) + log_e12
148-
log_s21 = log_t1 + (i * i + i) / (2 * (sigma ** 2)) + log_e21
149-
log_s22 = log_t2 + (j * j + j) / (2 * (sigma ** 2)) + log_e22
130+
log_s0 = log_t0 + (i * i - i) / (2 * (sigma**2)) + log_e0
131+
log_s1 = log_t1 + (j * j - j) / (2 * (sigma**2)) + log_e1
150132

151133
if coef > 0:
152-
log_a11 = _log_add(log_a11, log_s11)
153-
log_a12 = _log_add(log_a12, log_s12)
154-
log_a21 = _log_add(log_a21, log_s21)
155-
log_a22 = _log_add(log_a22, log_s22)
134+
log_a0 = _log_add(log_a0, log_s0)
135+
log_a1 = _log_add(log_a1, log_s1)
156136
else:
157-
log_a11 = _log_sub(log_a11, log_s11)
158-
log_a12 = _log_sub(log_a12, log_s12)
159-
log_a21 = _log_sub(log_a21, log_s21)
160-
log_a22 = _log_sub(log_a22, log_s22)
137+
log_a0 = _log_sub(log_a0, log_s0)
138+
log_a1 = _log_sub(log_a1, log_s1)
161139

162140
i += 1
163-
if max(log_s11, log_s21, log_s21, log_s22) < -30:
141+
if max(log_s0, log_s1) < -30:
164142
break
165143

166-
log_a = _log_add(
167-
math.log(1. - q) + _log_add(log_a11, log_a12),
168-
math.log(q) + _log_add(log_a21, log_a22))
169-
return log_a
144+
return _log_add(log_a0, log_a1)
170145

171146

172147
def _compute_log_a(q, sigma, alpha):
@@ -178,91 +153,20 @@ def _compute_log_a(q, sigma, alpha):
178153

179154

180155
def _log_erfc(x):
181-
# Can be replaced with a single call to log_ntdr if available:
182-
# return np.log(2.) + special.log_ntdr(-x * 2**.5)
183-
r = special.erfc(x)
184-
if r == 0.0:
185-
# Using the Laurent series at infinity for the tail of the erfc function:
186-
# erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
187-
# To verify in Mathematica:
188-
# Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
189-
return (-math.log(math.pi) / 2 - math.log(x) - x ** 2 - .5 * x ** -2 +
190-
.625 * x ** -4 - 37. / 24. * x ** -6 + 353. / 64. * x ** -8)
191-
else:
192-
return math.log(r)
193-
194-
195-
def _compute_zs(sigma, q):
196-
z0 = sigma ** 2 * math.log(1 / q - 1) + .5
197-
z1 = min(z0 - 2, z0 / 2)
198-
return z0, z1
199-
200-
201-
def _compute_log_b0(sigma, q, alpha, z1):
202-
"""Return an approximation to log(B0) or None if failed to converge."""
203-
z0, _ = _compute_zs(sigma, q)
204-
s, log_term, log_b0, k, sign, max_log_term = 0, 1., 0, 0, 1, -np.inf
205-
# Keep adding new terms until precision is no longer preserved.
206-
# Don't stop on the negative.
207-
while (k < alpha or (log_term > max_log_term - 36 and log_term > -30) or
208-
sign < 0.):
209-
log_b1 = k * (k - 2 * z0) / (2 * sigma ** 2)
210-
log_b2 = _log_erfc((k - z1) / (math.sqrt(2) * sigma))
211-
log_term = log_b0 + log_b1 + log_b2
212-
max_log_term = max(max_log_term, log_term)
213-
s += sign * math.exp(log_term)
214-
k += 1
215-
# Maintain invariant: sign * exp(log_b0) = {-alpha choose k}
216-
log_b0 += math.log(abs(-alpha - k + 1)) - math.log(k)
217-
sign *= -1
218-
219-
if s == 0: # May happen if all terms are < 1e-324.
220-
return -np.inf
221-
if s < 0 or math.log(s) < max_log_term - 25: # The series failed to converge.
222-
return None
223-
c = math.log(.5) - math.log(1 - q) * alpha
224-
return c + math.log(s)
225-
226-
227-
def _bound_log_b1(sigma, q, alpha, z1):
228-
log_c = _log_add(math.log(1 - q),
229-
math.log(q) + (2 * z1 - 1.) / (2 * sigma ** 2))
230-
return math.log(.5) - log_c * alpha + _log_erfc(z1 / (math.sqrt(2) * sigma))
231-
232-
233-
def _bound_log_b(q, sigma, alpha):
234-
"""Compute a numerically stable bound on log(B_alpha)."""
235-
if q == 1.: # If the sampling rate is 100%, A and B are symmetric.
236-
return _compute_log_a(q, sigma, alpha)
237-
238-
z0, z1 = _compute_zs(sigma, q)
239-
log_b_bound = np.inf
240-
241-
# Puts a lower bound on B1: it cannot be less than its value at z0.
242-
log_lb_b1 = _bound_log_b1(sigma, q, alpha, z0)
243-
244-
while z0 - z1 > 1e-3:
245-
m = (z0 + z1) / 2
246-
log_b0 = _compute_log_b0(sigma, q, alpha, m)
247-
if log_b0 is None:
248-
z0 = m
249-
continue
250-
log_b1 = _bound_log_b1(sigma, q, alpha, m)
251-
log_b_bound = min(log_b_bound, _log_add(log_b0, log_b1))
252-
log_b_min_bound = _log_add(log_b0, log_lb_b1)
253-
if (log_b_bound < 0 or
254-
log_b_min_bound < 0 or
255-
log_b_bound > log_b_min_bound + .01):
256-
# If the bound is likely to be too loose, move z1 closer to z0 and repeat.
257-
z1 = m
156+
try:
157+
return math.log(2) + special.log_ndtr(-x * 2**.5)
158+
except NameError:
159+
# If log_ndtr is not available, approximate as follows:
160+
r = special.erfc(x)
161+
if r == 0.0:
162+
# Using the Laurent series at infinity for the tail of the erfc function:
163+
# erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
164+
# To verify in Mathematica:
165+
# Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
166+
return (-math.log(math.pi) / 2 - math.log(x) - x**2 - .5 * x**-2 +
167+
.625 * x**-4 - 37. / 24. * x**-6 + 353. / 64. * x**-8)
258168
else:
259-
break
260-
261-
return log_b_bound
262-
263-
264-
def _log_bound_b_elementary(q, alpha):
265-
return -math.log(1 - q) * alpha
169+
return math.log(r)
266170

267171

268172
def _compute_delta(orders, rdp, eps):
@@ -319,7 +223,7 @@ def _compute_eps(orders, rdp, delta):
319223

320224

321225
def _compute_rdp(q, sigma, alpha):
322-
"""Compute RDP of the Gaussian mechanism with sampling at order alpha.
226+
"""Compute RDP of the Sampled Gaussian mechanism at order alpha.
323227
324228
Args:
325229
q: The sampling rate.
@@ -329,37 +233,25 @@ def _compute_rdp(q, sigma, alpha):
329233
Returns:
330234
RDP at alpha, can be np.inf.
331235
"""
332-
if np.isinf(alpha):
333-
return np.inf
334-
335-
log_moment_a = _compute_log_a(q, sigma, alpha - 1)
236+
if q == 0:
237+
return 0
336238

337-
log_bound_b = _log_bound_b_elementary(q, alpha - 1) # does not require sigma
239+
if q == 1.:
240+
return alpha / (2 * sigma**2)
338241

339-
if log_bound_b < log_moment_a:
340-
if FLAGS.rdp_verbose:
341-
print("Elementary bound suffices : {} < {}".format(
342-
_log_print(log_bound_b), _log_print(log_moment_a)))
343-
else:
344-
log_bound_b2 = _bound_log_b(q, sigma, alpha - 1)
345-
if math.isnan(log_bound_b2):
346-
if FLAGS.rdp_verbose:
347-
print("B bound failed to converge")
348-
else:
349-
if FLAGS.rdp_verbose and (log_bound_b2 < log_bound_b):
350-
print("Elementary bound is stronger: {} < {}".format(
351-
_log_print(log_bound_b2), _log_print(log_bound_b)))
352-
log_bound_b = min(log_bound_b, log_bound_b2)
242+
if np.isinf(alpha):
243+
return np.inf
353244

354-
return max(log_moment_a, log_bound_b) / (alpha - 1)
245+
return _compute_log_a(q, sigma, alpha) / (alpha - 1)
355246

356247

357-
def compute_rdp(q, sigma, steps, orders):
358-
"""Compute RDP of Gaussian mechanism with sampling for given parameters.
248+
def compute_rdp(q, stddev_to_sensitivity_ratio, steps, orders):
249+
"""Compute RDP of the Sampled Gaussian Mechanism for given parameters.
359250
360251
Args:
361252
q: The sampling rate.
362-
sigma: The std of the additive Gaussian noise.
253+
stddev_to_sensitivity_ratio: The ratio of std of the Gaussian noise to the
254+
l2-sensitivity of the function to which it is added.
363255
steps: The number of steps.
364256
orders: An array (or a scalar) of RDP orders.
365257
@@ -368,9 +260,10 @@ def compute_rdp(q, sigma, steps, orders):
368260
"""
369261

370262
if np.isscalar(orders):
371-
rdp = _compute_rdp(q, sigma, orders)
263+
rdp = _compute_rdp(q, stddev_to_sensitivity_ratio, orders)
372264
else:
373-
rdp = np.array([_compute_rdp(q, sigma, order) for order in orders])
265+
rdp = np.array([_compute_rdp(q, stddev_to_sensitivity_ratio, order)
266+
for order in orders])
374267

375268
return rdp * steps
376269

@@ -405,11 +298,3 @@ def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None):
405298
else:
406299
eps, opt_order = _compute_eps(orders, rdp, target_delta)
407300
return eps, target_delta, opt_order
408-
409-
410-
def main(_):
411-
pass
412-
413-
414-
if __name__ == "__main__":
415-
app.run(main)

0 commit comments

Comments
 (0)