tf.GradientTape()是常用的自动求导方法,分为三种情况,一阶导数,高阶导数,和多次求导。
一阶导数:
如果是常数(tf.constant),需要调用watch()方法进行手动监视,如果是变量则不需要,在model如果变量的trainable=True则自动监视。
x = tf.constant(3.0)
with tf.GradientTape() as tape:
# 需要调用
g.watch(x)
y = x * x
dy_dx = g.gradient(y, x)
x = tf.Variable(2.0)
with tf.GradientTape() as tape:
z = x * x
dz_x = tape.gradient(z,x)
高阶导数:高阶导数需要连续调用tf.GradientTape(),且每次调用都要进行监视操作。
x = tf.constant(3.0)
with tf.GradientTape() as g:
g.watch(x)
with tf.GradientTape() as gg:
gg.watch(x)
y = x * x
dy_dx = gg.gradient(y, x) # Will compute to 6.0
d2y_dx2 = g.gradient(dy_dx, x) # Will compute to 2.0
多次求导:需要多次求导时,应tf.GradientTape(persistent=True),同时在末尾删除。因为每次调用后gradient后都自动销毁tf.GradientTape储存的信息。
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
g.watch(x)
y = x * x
z = y * y
dz_dx = g.gradient(z, x) # 108.0 (4*x^3 at x = 3)
dy_dx = g.gradient(y, x) # 6.0
del g # Drop the reference to the tape
2 自定义训练模型
自定义训练模型主要分为四步:
1.定义指标
2.定义训练过程,包括前向和后向计算
3.定义测试过成
4.完成循环
# 1.定义指标
# 损失函数,输入为一批的预测以及一批的真值
loss_fn = keras.losses.SparseCategoricalCrossentropy(
reduction = keras.losses.Reduction.SUM_OVER_BATCH_SIZE
# 按批相加
)
# 测试集的损失计算
test_loss = keras.metrics.Mean(name ='test_loss')
# 训练集的损失由于需要,记录总损失在计算过程中返回损失,在循环中手动计算损失而不使用API
# 精度计算
train_accuracy = keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_accuracy = keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
# 优化器
optimizer = keras.optimizers.SGD(lr=0.01)
# 2. 训练计算过程
@tf.function
def train_step(inputs):
'''
# 训练的时候共有4步骤:
前向计算,求损失,求导,更新参数
'''
images, labels = inputs
with tf.GradientTape() as tape:
predictions = model(images, training=True)
loss = loss_fn(labels, predictions)
graidents = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(graidents, model.trainable_variables))
train_accuracy.update_state(labels, predictions)
# 返回每一批的loss是为了统计,每个epoch训练后的loss
return loss
# 3.测试计算过程
@tf.function
def test_step(inputs):
'''
计算预测值,计算损失,更新损失,更新精度
'''
images, labels = inputs
predicitons = model(images)
t_loss = loss_fn(labels, predicitons)
test_loss.update_state(t_loss)
test_accuracy.update_state(labels, predicitons)
# 4. 循环计算
epochs = 3
for epoch in range(epochs):
total_loss = 0.0
num_batches = 0
for batch_data in train_dataset:
start_time = time.time()
total_loss += train_step(batch_data)
run_time = time.time() - start_time
num_batches += 1
# \r代表将光标移动到开始,\n代表另起一行
print('\rtotal_loss:%3.3f,epoch:%d,avg_loss:%3.3f,time:%3.3f'
%(total_loss, num_batches, total_loss/num_batches, run_time),
end='')
train_loss = total_loss/num_batches
for test_data_batch in valid_dataset:
test_step(test_data_batch)
print('\rEpoch:%d,Loss:%3.3f,Acc:%3.3f,Val_Loss:%3.3f,Val_Acc:%3.3f'
%(epoch+1, train_loss, train_accuracy.result(),
test_loss.result(),test_accuracy.result()))
# 每一个循环后都要重置metrics中的状态
test_loss.reset_states()
train_accuracy.reset_states()
test_accuracy.reset_states()