tensorflow api
를 사용한 예제로 설명해 보겠습니다. 차이점은 크게 아래와 같습니다.
init_state
를 placeholder로 선언하지 않아도 됨init state
에 대한 batch_size
를 선언해 주어서 유동적(e.g. Train/Test에 다른 batch_size
)으로 바꾸어 줄 수 있음또한 RNN from tensorflow에 있는 tf.nn.rnn_cell.BasicRNNCell
를 tf.nn.rnn_cell.LSTMCell
로 바꾼 내용입니다.
tf.nn.static_rnn
은 시퀀스 길이가 고정되어 있다고 가정합니다.시계열 데이터 분석
Google_Stock_Price
데이터를 사용하여 설명하고자 한다.%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
plt.rcParams['figure.figsize'] = [20, 40]
data_set = pd.read_csv("../data/Google_Stock_Price_Train.csv")
print(data_set.head())
Date Open High Low Close Volume
0 1/3/2012 325.25 332.83 324.97 663.59 7,380,500
1 1/4/2012 331.27 333.87 329.08 666.45 5,749,400
2 1/5/2012 329.83 330.75 326.89 657.21 6,590,300
3 1/6/2012 328.34 328.77 323.68 648.24 5,405,900
4 1/9/2012 322.04 322.29 309.46 620.76 11,688,800
Data preprocesing
Open
(시가)와 2) 새로 생성시킨 변수(High
$-$ Low
: 변동량) 2개의 변수를 사용할 때를 가정해보자.data_set.shape
(1258, 6)
data_set = data_set.iloc[:,1:4].values
data_set
array([[325.25, 332.83, 324.97],
[331.27, 333.87, 329.08],
[329.83, 330.75, 326.89],
...,
[793.7 , 794.23, 783.2 ],
[783.33, 785.93, 778.92],
[782.75, 782.78, 770.41]])
data_set = np.array([[i,j-k]for i,j,k in data_set])
data_set
array([[325.25, 7.86],
[331.27, 4.79],
[329.83, 3.86],
...,
[793.7 , 11.03],
[783.33, 7.01],
[782.75, 12.37]])
RNN 입력 Tensor 생성
target interval
를 정의 해야 한다.
target interval
를 $1$ 로 설정하였다.batch_size
, seq_length
,input_dim
)target interval
를 고려하려 데이터 분리seq_length
를 구분하여 데이터 분리X_data = data_set[0:1257]
y_data = data_set[1:1258,0:1]
X_sc = MinMaxScaler() # default is 0,1
X_data = X_sc.fit_transform(X_data)
y_sc = MinMaxScaler() # default is 0,1
y_data = y_sc.fit_transform(y_data)
input_dim
=2X_data
array([[0.08581368, 0.11558367],
[0.09701243, 0.05673759],
[0.09433366, 0.03891125],
...,
[0.95163331, 0.16043703],
[0.95725128, 0.17634656],
[0.93796041, 0.09929078]])
y_data
array([[0.09701243],
[0.09433366],
[0.09156187],
...,
[0.95725128],
[0.93796041],
[0.93688146]])
# hyperparameters
seq_length =7
batch_size = 35
state_size = 4 # hidden_node size
input_dim = X_data.shape[1] # = 2
output_dim = y_data.shape[1] # = 1
print('# of paired dataset', len(y_data)-seq_length)
# of paired dataset 1250
input
7개의 step(0~6)을 보고 그 다음 시점(7)를 예측data_X = []
data_y = []
for i in range(0, len(y_data) - seq_length):
_X_data = X_data[i:i+seq_length]
_y_data = y_data[i+seq_length]
data_X.append(_X_data)
data_y.append(_y_data)
if i%1000 ==0:
print(_X_data, "->", _y_data)
[[0.08581368 0.11558367]
[0.09701243 0.05673759]
[0.09433366 0.03891125]
[0.09156187 0.06248802]
[0.07984225 0.21084915]
[0.0643277 0.12631781]
[0.0585423 0.04389496]] -> [0.06109085]
[[0.88241313 0.16062871]
[0.87512092 0.0555875 ]
[0.88138998 0.22311673]
[0.90700573 0.22465018]
[0.92544088 0.17002108]
[0.91223305 0.17883841]
[0.86293623 0.2102741 ]] -> [0.83875288]
Train
/Test
데이터 분리X_trn, X_tst, y_trn, y_tst = train_test_split(data_X, data_y,
test_size=0.3,
random_state=42,
shuffle=False
)
print('X_train:', len(X_trn))
print('y_train:', len(y_trn))
print('X_test:', len(X_tst))
print('y_test:', len(y_tst))
X_train: 875
y_train: 875
X_test: 375
y_test: 375
Graph로 통과시킬 변수 선언
X = tf.placeholder(tf.float32, [None, seq_length, input_dim])
y = tf.placeholder(tf.float32, [None, 1])
lr = tf.placeholder(tf.float32)
batch_size = tf.placeholder(tf.int32,[]) # [] important
print(X)
print(y)
Tensor("Placeholder:0", shape=(?, 7, 2), dtype=float32)
Tensor("Placeholder_1:0", shape=(?, 1), dtype=float32)
_inputs_series = tf.split(value=X, num_or_size_splits=seq_length, axis=1)
_inputs_series
[<tf.Tensor 'split:0' shape=(?, 1, 2) dtype=float32>,
<tf.Tensor 'split:1' shape=(?, 1, 2) dtype=float32>,
<tf.Tensor 'split:2' shape=(?, 1, 2) dtype=float32>,
<tf.Tensor 'split:3' shape=(?, 1, 2) dtype=float32>,
<tf.Tensor 'split:4' shape=(?, 1, 2) dtype=float32>,
<tf.Tensor 'split:5' shape=(?, 1, 2) dtype=float32>,
<tf.Tensor 'split:6' shape=(?, 1, 2) dtype=float32>]
inputs_series= [tf.squeeze(inputs,axis=1) for inputs in _inputs_series]
inputs_series
[<tf.Tensor 'Squeeze:0' shape=(?, 2) dtype=float32>,
<tf.Tensor 'Squeeze_1:0' shape=(?, 2) dtype=float32>,
<tf.Tensor 'Squeeze_2:0' shape=(?, 2) dtype=float32>,
<tf.Tensor 'Squeeze_3:0' shape=(?, 2) dtype=float32>,
<tf.Tensor 'Squeeze_4:0' shape=(?, 2) dtype=float32>,
<tf.Tensor 'Squeeze_5:0' shape=(?, 2) dtype=float32>,
<tf.Tensor 'Squeeze_6:0' shape=(?, 2) dtype=float32>]
# forward pass
cell = tf.nn.rnn_cell.LSTMCell(num_units=state_size)
cell
<tensorflow.python.ops.rnn_cell_impl.LSTMCell at 0x10e89d7f0>
cell
를 활용해 hidden initial state를 생성하려고 하려면 batch size가 필요하게 됨placeholder
를 이용하여 init state를 생성
batch_size
<tf.Tensor 'Placeholder_3:0' shape=() dtype=int32>
init_state = cell.zero_state(batch_size, tf.float32)
init_state
LSTMStateTuple(c=<tf.Tensor 'LSTMCellZeroState/zeros:0' shape=(?, 4) dtype=float32>, h=<tf.Tensor 'LSTMCellZeroState/zeros_1:0' shape=(?, 4) dtype=float32>)
states_series
: 학습된 hidden states로 input sequence와 동일한 길이를 가짐current_state
: 마지막 hidden state
states_series
[-1] = current_state
states_series, current_state = tf.nn.static_rnn(cell=cell,
inputs=inputs_series, # (batch x input) size
initial_state = init_state)
states_series
[<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell/mul_2:0' shape=(?, 4) dtype=float32>,
<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_1/mul_2:0' shape=(?, 4) dtype=float32>,
<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_2/mul_2:0' shape=(?, 4) dtype=float32>,
<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_3/mul_2:0' shape=(?, 4) dtype=float32>,
<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_4/mul_2:0' shape=(?, 4) dtype=float32>,
<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_5/mul_2:0' shape=(?, 4) dtype=float32>,
<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_6/mul_2:0' shape=(?, 4) dtype=float32>]
states_series = tf.concat([tf.expand_dims(state,1) for state in states_series], axis=1)
states_series
<tf.Tensor 'concat:0' shape=(?, 7, 4) dtype=float32>
# last cell
current_state
LSTMStateTuple(c=<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_6/add_1:0' shape=(?, 4) dtype=float32>, h=<tf.Tensor 'rnn/rnn/lstm_cell/lstm_cell_6/mul_2:0' shape=(?, 4) dtype=float32>)
states_series = tf.concat([tf.expand_dims(state,1) for state in states_series], axis=1)
states_series
<tf.Tensor 'concat:0' shape=(?, 7, 4) dtype=float32>
states_series
[-1]은 current_state
와 동일하다.# last hidden state
states_series[:,-1]
<tf.Tensor 'strided_slice:0' shape=(?, 4) dtype=float32>
FcL
생성# with last hidden state
y_pred = tf.layers.dense(states_series[:,-1], output_dim, activation=None)
y_pred
<tf.Tensor 'dense/BiasAdd:0' shape=(?, 1) dtype=float32>
loss = tf.losses.mean_squared_error(labels=y, predictions=y_pred)
train_op = tf.train.AdamOptimizer(lr).minimize(loss)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
ix=1
for i in range(7000):
for k in range(math.ceil(len(X_trn)/b_size)):
start = k*b_size
end = (k*b_size)+b_size
_ , _loss, _current_state = sess.run([train_op, loss, current_state],
feed_dict={lr:0.01,
X: X_trn[start:end],
y: y_trn[start:end],
batch_size: b_size
})
if i % 1000==0:
print('{}th loss: {}'.format(i,_loss))
plt.subplot(10,1,ix)
total_y_pred = []
for k in range(math.ceil((len(X_tst)/b_size))):
start = k*b_size
end = (k*b_size)+b_size
_y_pred = sess.run(y_pred, feed_dict={ X: X_tst[start:end],
batch_size: len(X_tst[start:end])})
total_y_pred.extend(_y_pred)
total_y_pred = np.array(total_y_pred)
tst_loss = np.mean(np.abs(total_y_pred-y_tst))
plt.plot(total_y_pred, label ='pred')
plt.plot(y_tst, label = ' true')
plt.legend()
plt.title('epoch: {}'.format(i))
ix+=1
0th loss: 0.0006607373361475766
1000th loss: 0.00027929761563427746
2000th loss: 0.001301404437981546
3000th loss: 0.00014712694974150509
4000th loss: 0.00025016447762027383
5000th loss: 0.00027353738551028073
6000th loss: 0.00020064653654117137
결과해석
Early stopping
또는 Dropout
이 필요해 보인다.y_sc.inverse_transform(_y_pred)[0:5]
array([[718.59576],
[769.2097 ],
[843.2715 ],
[811.21063],
[785.0935 ]], dtype=float32)
tst_loss
0.05923108957574122
y_sc.inverse_transform(tst_loss)
array([[310.96026451]])