tensorflow2.0，自定义求导及梯度下降的实现

Rosalia ·

更新时间:2024-09-21

· 916 次阅读

首先先回顾一下高中的通过定义求导

# 1.单变量求导
def f(x):
    return 3. * x ** 2 + 2. * x - 1
def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps)
print(approximate_derivative(f, 1.))	# 7.999999
# 2.求偏导
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)
def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2
print(approximate_gradient(g, 2., 3.))
# (8.999999999993236, 41.999999999994486)

tensorflow里可以通过tf.GradientTape()来自己定义求导过程，如果是常量进行求导需要tape.watch()相关参数，如果是对变量求导可以直接求导，当tf.GradientTape()的persistent设置为True时可在一个缩进之后通过多个tape.gradient（）对多个参数求导，否则只能使用一个tape.gradient（）来实现对单个或多个参数求导，并且在persistent设置为True时需要del tape来手动释放内存

# 对多变量求导 方式1
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent = True) as tape:
    z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)
del tape
# tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)
# 对多变量求导方式2
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
# [, ]
# 对常量求导
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)
# [, ]
# 多公式求导
x = tf.Variable(5.0)
with tf.GradientTape(persistent=True) as tape:
    z1 = 3 * x
    z2 = x ** 2
print(tape.gradient([z1, z2], x))
print(tape.gradient(z1, x))
print(tape.gradient(z2, x))
# tf.Tensor(13.0, shape=(), dtype=float32)
# tf.Tensor(3.0, shape=(), dtype=float32)
# tf.Tensor(10.0, shape=(), dtype=float32)
# 二阶导数
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2]) for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape
# [[None, ], [, ]]

在熟练tensorflow的求导方法之后梯度下降公式便能很容易实现，只需要将要求导的公式替换为loss函数

# 梯度下降
learning_rate = 0.1
x = tf.Variable(0.0)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx) # x -=
print(x)
# 
# 梯度下降结合optimizer
learning_rate = 0.1
x = tf.Variable(0.0)
optimizer = keras.optimizers.SGD(lr = learning_rate)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
print(x)
#

作者：WANGBINLONG-

自定义求导梯度下降梯度 tensorflow

1024 个赞