如果能让程序写诗

2018/08/11 诗词

Posted by WangXiaoDong on August 11, 2018
    今天周六,准备在家休息,顺便思考了一下最近编程的内容,想把程序运用到古代诗词上,这样就可以用
深度学习的技术写诗了,最后根据读入成功的写出了诗词,感觉挺好玩的!

使用神经网络训练写诗

上次已经把Keras使用神经网络的所有内容都介绍完了,这次我们就自己搭建一个网络来试试让程序写诗吧!

首先我们要导入可能需要用到的包:

from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dropout
from keras.optimizers import RMSprop
from keras.utils import to_categorical
from keras.layers import LSTM,GRU,Bidirectional
from keras.layers.convolutional import Conv1D,Conv2D
from keras.layers.pooling import MaxPooling1D,AveragePooling1D,GlobalMaxPooling1D
from keras.callbacks import EarlyStopping
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input, Dense, Permute, Reshape, Lambda, RepeatVector,merge,Embedding
import keras.backend as K
import keras
import read_utils
import os
import pickle
import numpy as np
import sys
import json

import dense_new

然后通过这里下载所需要的诗词,我为了简单测试,只使用了一个文件,格式是json的,因此需要进行 数据读取:

def readJsonPoetry(fn,pl):
    '''
    fn代表读取json文件的路径,pl代表读取的诗词的长度
    '''
    with open(fn,encoding='UTF-8') as f:
        content = f.read()
        JsonResult = json.loads(content)
        allX = []
        allY = []
        allData = []
        for dic in JsonResult:
            singleP = dic['paragraphs']
            for line in singleP:
                if len(line) == pl*2+2 and line[pl]==',' and line[pl*2+1]=='。':
                    allX.append(line)
                    temline = line[1:] + 'e'
                    allY.append(temline)
                    for word in line:
                        allData.append(word)
                    allData.append('e')
        return allX,allY,allData

实现用于预测的类:

class UserRNNReader():
    def __init__(self,modelFile,converter,start,end,steps):
       
        self.model = keras.models.load_model(modelFile)
        self.converter = converter
        self.startToken = start
        self.endToken = end
        self.steps = steps
        
    def prediction_next_n(self, prime, vocab_size, next_n=3,**k):  #prime:开始的几个基本单元;vocab_size:一共有多少个类型的基本单元+1(未知数据编码)
        index = len(prime)-1  #检索的位置
        prime = [prime]
        x = keras.preprocessing.sequence.pad_sequences(prime, maxlen=self.steps, dtype='int32',padding='post', value=self.converter.word_to_int_table[self.endToken])
#        x = to_categorical(x, self.converter.vocab_size)
#        x = x.reshape(-1,x.shape[0],x.shape[1])
        y = self.model.predict(x)
        
        preds = y[0,index,:]
        
        p = np.squeeze(preds)    #squeeze函数从数组的形状中删除单维度条目,即把shape中为1的维度去掉
        # 将next_n个最大的概率的位置得到
        next_n_num = np.argsort(p,kind = 'mergesort')[-next_n:]  #argsort函数可以按照给定方法排序
        #返回的应该是标号和对应的概率值
        s_p_d = []
        for i in next_n_num:
            s_p_d.append((i,p[i]))
        return s_p_d

下面就是进行网络构建:

if __name__ == '__main__':
    batch_size = 100  #每批训练的大小
    poetry_line_length = 12   #输入是固定的序列长,包括一个逗号和一个句号
    
    #数据读取
    JsonFileName = 'p.json'
    allX,allY,allData = readJsonPoetry(JsonFileName,int(poetry_line_length/2-1))
    converter = read_utils.TextConverter(allData)
    x_train,y_train = getNumberData(allX[:1000],allY[:1000],converter)
    x_test,y_test = getNumberData(allX[1001:],allY[1001:],converter)
    
    inp = Input(shape=(poetry_line_length,),name='Input')
    emb = Embedding(input_dim=converter.vocab_size, 
                    output_dim=64, 
                    input_length=poetry_line_length)(inp)
    # 下面是普通RNN网络
    rnn1 = GRU(128, input_shape=(poetry_line_length, converter.vocab_size),return_sequences=True,dropout=0.5,name='RNN')(emb)
    rnn2 = GRU(128,return_sequences=True,dropout=0.5,name='2RNN')(rnn1)
    #下面是LSTM网络    
    lstm1 = LSTM(128, input_shape=(poetry_line_length, converter.vocab_size),return_sequences=True,dropout=0.5,name = 'LSTM')(emb)
    lstm2 = LSTM(128,return_sequences=True,dropout=0.5,name = '2LSTM')(lstm1)
    #下面attention
#    attention_mul = attention_3d_block(lstm1,n_steps,True)
    #下面是最后全连接输出
#    dense1 = Dense(128,name='Dense_hind')(emb)
    op_dense = Dense(converter.vocab_size,name='Dense')(rnn2)   #这里修改输出的从哪里开始
    #attention应该在这里!
    attention_mul = attention_3d_block(op_dense,poetry_line_length,True)
    op = Activation('softmax',name='Softmax_Activate')(attention_mul)
    model = Model(inputs=inp,outputs=op)
    #模型的所有参数和层进行总结
    model.summary()   
#    read_activations.get_activations(model,)
    #简单绘制一下网络图的结构
    plot_model(model, to_file='model.png')
#    sys.exit()   #这里测试,先退出

有了网络后就可以进行损失函数,相关条目操作还有训练和预测了:

 #设置优化器和损失函数
    optimizer = RMSprop(lr=0.01) #该优化器通常是面对递归神经网络时的一个良好选择
#    optimizer = keras.optimizers.Adam()
    model.compile(loss='categorical_crossentropy', optimizer=optimizer,metrics=['accuracy'])
    #设置回调函数相关的操作
#    early_stopping_monitor = EarlyStopping(patience=5)  #早停
    tensor_board = keras.callbacks.TensorBoard()  #数据输出到TensorBoard
    #下面设置模型保存点,一步检测一次,只保存最好的模型
    check_point = keras.callbacks.ModelCheckpoint(
            filepath='logs\weights-{epoch:03d}-{val_acc:.4f}.hdf5',
            monitor='val_acc',
            period=1,
            save_best_only=True)  
    # histories = Histories()   #记录一些数据
    
    hsty = model.fit(x_train, y_train, batch_size=batch_size, epochs=100,validation_data=(x_test,y_test),
              callbacks=[tensor_board,check_point])  # 训练
#                  callbacks=[early_stopping_monitor,tensor_board,histories])  # 训练
    
    model.save('my_model.h5')    

这样就完成了模型的训练程序,进行训练后可以在本地保存最后一次训练完成后的模型,之后 我们就可以读取模型并进行写诗:

import os
#import model
import  read_utils
import pickle
import tensorflow as tf
#from treelib import Tree
from graphviz import Digraph

from dense_new import Dense_New
import gen_poetry

import keras
import random

if __name__ == '__main__':
    startToken = 'start'
    endToken = 'e'
    poetry_line_length = 12
    n_steps = 100
    
    #数据读取
    JsonFileName = 'p.json'
    allX,allY,allData = gen_poetry.readJsonPoetry(JsonFileName,int(poetry_line_length/2-1))
    converter = read_utils.TextConverter(allData)
    
    from gen_poetry import UserRNNReader         
    with keras.utils.CustomObjectScope({'Dense_New': Dense_New}):
        model  = UserRNNReader('my_model.h5',converter,startToken,endToken,poetry_line_length)
        
    next_n = 3   #设置预测步数    
    before_list = [converter.word_to_int_table['爱']]
    pred = model.prediction_next_n(before_list,converter.vocab_size,next_n)
    for i in range(4):
        num = pred[random.randint(0,2)][0]
        before_list.append(num)
        pred = model.prediction_next_n(before_list,converter.vocab_size,next_n)
    before_list.append(converter.word_to_int_table[','])
    for i in range(5):
        num = pred[random.randint(0,2)][0]
        before_list.append(num)
        pred = model.prediction_next_n(before_list,converter.vocab_size,next_n)
    before_list.append(converter.word_to_int_table['。'])
    
    w = converter.arr_to_text(before_list)
    print(w) 

上面的代码就是开头是“爱”字,写出的一首诗。

最后,本篇的全部代码在下面这个网页可以下载:

https://github.com/Dongzhixiao/Python_Exercise/tree/master/LSTM