I ran the code shared here which does time series prediction. Based on 49 values, it predicts 50th. Simplified version is below. MSE was 0.07.

I also created a baseline, a "predictor" that would simply take X_t to be X_{t-1}. So if energy use was 10 today, it will be 10 the next day. That is the simplest predictor there is.

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

df = pd.read_csv('household_power_consumption.txt', sep=';')

df = df[['Global_active_power']]

df = df[df.Global_active_power != '?']

df['G2'] = df['Global_active_power'].shift(1)

df = df.astype(float)

df['err'] = df['G2']-df['Global_active_power']

df['err'] = np.power(df['err'],2)

print df.err.sum() / len(df)

I also get MSE 0.07 from this. In ML it helps always to compare a model to a baseline. This is not to say the LSTM code is doing nothing, or maybe the model can be improved, etc.

import matplotlib.pyplot as plt

import numpy as np, time, csv

from keras.layers.core import Dense, Activation, Dropout

from keras.layers.recurrent import LSTM

from keras.models import Sequential

np.random.seed(1234)

seq = 50

def data_power_consumption(path_to_dataset,sequence_length=seq,ratio=1.0):

max_values = ratio * 2049280

with open(path_to_dataset) as f:

data = csv.reader(f, delimiter=";")

power = []

nb_of_values = 0

for i,line in enumerate(data):

#if i % 20 != 0: continue

try:

power.append(float(line[2]))

nb_of_values += 1

except ValueError:

pass

if nb_of_values >= max_values:

break

print "Data loaded from csv. Formatting..."

result = []

for index in range(len(power) - sequence_length):

result.append(power[index: index + sequence_length])

result = np.array(result) # shape (2049230, 50)

result_mean = result.mean()

result -= result_mean

print "Shift : ", result_mean

print "Data : ", result.shape

row = round(0.9 * result.shape[0])

train = result[:row, :]

np.random.shuffle(train)

X_train = train[:, :-1]

y_train = train[:, -1]

X_test = result[row:, :-1]

y_test = result[row:, -1]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

return [X_train, y_train, X_test, y_test]

ratio = 0.5

path_to_dataset = 'household_power_consumption.txt'

X_train, y_train, X_test, y_test = data_power_consumption(path_to_dataset, seq, ratio)

def build_model():

model = Sequential()

model.add(LSTM(input_dim=1,output_dim=seq,return_sequences=True))

model.add(Dropout(0.2))

model.add(LSTM(100,return_sequences=False))

model.add(Dropout(0.2))

model.add(Dense(output_dim=1))

model.add(Activation("linear"))

model.compile(loss="mse", optimizer="rmsprop")

return model

def run_network(model=None, data=None):

global_start_time = time.time()

ratio = 0.5

sequence_length = seq

path_to_dataset = 'household_power_consumption.txt'

print 'Loading data... '

X_train, y_train, X_test, y_test = data_power_consumption(path_to_dataset, sequence_length, ratio)

print X_train.shape, X_test.shape

model = build_model()

model.fit(X_train, y_train,batch_size=512, nb_epoch=1, validation_split=0.05)

predicted = model.predict(X_test)

predicted = np.reshape(predicted, (predicted.size,))

fig = plt.figure()

ax = fig.add_subplot(111)

ax.plot(y_test[:100])

plt.plot(predicted[:100])

plt.savefig('test_01.png')

return model, y_test, predicted

run_network()