缘由

自己安装好tenforflow-gpu版本后，就想测试一下自己的运行环境搭建的是否有问题，同时也好奇gpu的运算能力，但自己才开始接触这一方面，能力不足以写出来，就尝试取搜索，结果很多，很杂乱。后自己整理了一下，我自己GPU占用率大约在19%左右(要知道我们平时除了打游戏外基本都为0)，如上图。

tensorflow-gpu安装教程

# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time    : 2019/5/17 17:03
# @Author  : xhh
# @Desc    :  minist数据集下载
# @File    : mnist_data_load.py
# @Software: PyCharm
##################
# 数据下载
#################
from tensorflow.examples.tutorials.mnist import  input_data
import pylab

mnist = input_data.read_data_sets("MINST_daya/", one_hot=True)
print("输入数据：",mnist.train.images)
print("数据的shape：",mnist.train.images.shape)

# 展示数据集中的一张图片
im = mnist.train.images[1]
im = im.reshape(-1,28)
pylab.imshow(im)
pylab.show()

from tensorflow.examples.tutorials.mnist import  input_data
import pylab
import tensorflow as tf
from datetime import datetime
mnist = input_data.read_data_sets("MINST_daya/", one_hot=True)
tf.reset_default_graph()
# 定义占位符
x = tf.placeholder(tf.float32, [None, 784])  # mnist data 维度28*28=784
y = tf.placeholder(tf.float32, [None, 10])

# 定义学习参数
# 设置模型的权重
W = tf.Variable(tf.random_normal([784, 10]))  # W的维度是[784, 10]
b = tf.Variable(tf.zeros([10]))
# 定义输出节点， 构建模型
pred = tf.nn.softmax(tf.matmul(x, W) + b)
# 定义反向传播的结构，编译训练模型，得到合适的参数
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))

# 参数设置/学习率
learning_rate = 0.001

# 使用梯度下降优化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
startTime = datetime.now()
#
# original
#
# training_epochs = 25   # 将整个训练样本迭代25次
# batch_size = 100    # 在训练过程中每次随机抽取100条数据进行训练
# display_step = 1   # 迭代的步数
# saver = tf.train.Saver()
# model_path = "mnist/521model.ckpt"

# me
training_epochs = 100   
batch_size = 100   
display_step = 1   
saver = tf.train.Saver()
model_path = "mnist/521model.ckpt"

# 开始训练
with tf.Session()  as sess:
    # 初始化节点
    sess.run(tf.global_variables_initializer())

    # 启动循环开始训练
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # 遍历全部的数据集
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # 运行和优化节点的损失函数值
            _, c = sess.run([optimizer, cost], feed_dict={x:batch_xs,
                                                          y: batch_ys})
            # 计算平均损失值
            avg_cost += c / total_batch

        # 显示训练中的详细信息
        if (epoch+1) % display_step ==0:
            print("Epoch:","%04d"%(epoch+1), "cost=",'{:.9f}'.format(avg_cost))

    print("训练成功！！")

    # 模型测试
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 计算准确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("准确度：",accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))

    # 保存模型的权重
    save_path = saver.save(sess, model_path)
    print("模型文件在：%s"%save_path)

print("Time taken:", datetime.now() - startTime)

# 该模块是第一次训练模块后的精准读来弄的

startTime = datetime.now()

# 读取模型
print("检验第一次训练的情况")
with tf.Session() as sess2:
    # 初始化参数
    sess2.run(tf.global_variables_initializer())
    ############################
    #
    #从保存的模型中获取权重
    #
    ###########################

    saver.restore(sess2, model_path)

    # 测试 model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 计算准确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("准确度：",accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))

    output = tf.argmax(pred, 1)
    batch_xs, batch_ys = mnist.train.next_batch(2)
    outputval, predv = sess2.run([output, pred], feed_dict={x:batch_xs})
    print(outputval, pred, batch_ys)

    im = batch_xs[0]
    im = im.reshape(-1, 28)
    pylab.imshow(im)
    pylab.show()

    im = batch_xs[1]
    im = im.reshape(-1, 28)
    pylab.imshow(im)
    pylab.show()

print("Time taken:", datetime.now() - startTime)

###################
# 测试是否能在原有的基础上来提高训练精度
##################
training_epochs = 1000   
batch_size = 100   
display_step = 1   
saver = tf.train.Saver()
model_path = "mnist/521model.ckpt"

startTime = datetime.now()

# 读取模型
print("启动第二次session")
# Creates a session with log_device_placement set to True.
with tf.Session() as sess2:
    # 初始化参数
    sess2.run(tf.global_variables_initializer())
    #从保存的模型中获取权重
    saver.restore(sess2, model_path)

     # 启动循环开始训练
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # 遍历全部的数据集
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # 运行和优化节点的损失函数值
            _, c = sess2.run([optimizer, cost], feed_dict={x:batch_xs,
                                                          y: batch_ys})
            # 计算平均损失值
            avg_cost += c / total_batch

        # 显示训练中的详细信息
        if (epoch+1) % display_step ==0:
            print("Epoch:","%04d"%(epoch+1), "cost=",'{:.9f}'.format(avg_cost))

    print("训练成功！！")

    # 模型测试
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 计算准确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("准确度：",accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))

    # 保存模型的权重
    save_path = saver.save(sess2, model_path)
    print("模型文件在：%s"%save_path)

print("Time taken:", datetime.now() - startTime)

startTime = datetime.now()

# 读取模型
print("测试第二次训练的情况")
with tf.Session() as sess3:
    # 初始化参数
    sess3.run(tf.global_variables_initializer())
    #从保存的模型中获取权重
    saver.restore(sess3, model_path)

    # 测试 model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 计算准确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("准确度：",accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))

    output = tf.argmax(pred, 1)
    batch_xs, batch_ys = mnist.train.next_batch(2)
    outputval, predv = sess3.run([output, pred], feed_dict={x:batch_xs})
    print(outputval, pred, batch_ys)

    im = batch_xs[0]
    im = im.reshape(-1, 28)
    pylab.imshow(im)
    pylab.show()

    im = batch_xs[1]
    im = im.reshape(-1, 28)
    pylab.imshow(im)
    pylab.show()

print("Time taken:", datetime.now() - startTime)

参考原文

要不赞赏一下?

微信

支付宝

PayPal

Bitcoin

除非特别说明，本博客所有作品均采用知识共享署名-非商业性使用-禁止演绎 4.0 国际许可协议进行许可。转载请注明转自-
https://www.emperinter.info/2019/08/28/tensorflow-gpu%e7%89%88%e6%b5%8b%e8%af%95%e4%bb%a3%e7%a0%81minist%e6%89%8b%e5%86%99%e8%af%86%e5%88%ab/

阿里云国际版	20美元
Vultr	10美元
搬瓦工 \| Bandwagon	应该有折扣吧？
Just My Socks	JMS9272283 【注意手动复制去跳转】
域名 \| namesilo	`emperinter`(1美元)
币安	币安

缘由

要不赞赏一下?

要不聊聊？

YouTube | B站

微信公众号

👉 NewsLetter ❤️ 邮箱订阅 👈

My Project

My Github Contributions

优惠码

近期文章