1、整理这个代码,不使用R语言的方法
from rpy2 import robjects
from rpy2.robjects.packages import importr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
'''
r_file = 'auto_arima.R'
robjects.r.source(r_file)
'''
# 验证库
r_script = '''
if (!require('forecast'))
install.packages('forecast')
if (!require('Metrics'))
install.packages('Metrics')
'''
robjects.r(r_script)
# 导入库
forecast = importr('forecast')
Metrics = importr('Metrics')
stats = importr('stats')
# 读取数据
date_parse = lambda dates: pd.datetime.strptime(dates, '%Y/%m/%d')
raw_data = pd.read_csv('time_series.txt',index_col='Date', date_parser=date_parse)
print(raw_data.head())
# 数据预处理
# 拆分训练集和测试集
train_ts = raw_data.iloc[:100]
test_ts = raw_data.iloc[100:]
# 转换为R对象
ts_data = robjects.FloatVector(raw_data.values)
train_ts = robjects.FloatVector(train_ts.values)
test_ts = robjects.FloatVector(test_ts.values)
# 模型训练
model = forecast.auto_arima(train_ts,ic='aic')
print(model)
# 模型校验
in_ts_predict = stats.predict(model,(len(raw_data)-100)) # 预测数据
predict_pred = in_ts_predict.rx2('pred') # 取出预测值
Metrics.rmse(test_ts, predict_pred)
# 重新训练模型并预测
model = forecast.auto_arima(ts_data,ic='aic')
print(model)
out_ts_predict = forecast.forecast(model, h=10, level=0.95)
# 获取预测信息
out_ts_pre_pred = np.array(out_ts_predict.rx2('mean')) # 取出预测均值
out_ts_pre_lower = np.array(out_ts_predict.rx2('lower')) # 取出预测下限值
out_ts_pre_upper = np.array(out_ts_predict.rx2('upper')) # 取出预测上限值
predict_plt = pd.DataFrame.from_dict({'mean':out_ts_pre_pred,'lower':[i[0] for i in out_ts_pre_lower],'upper':[i[0] for i in out_ts_pre_upper]})
predict_plt.index = [max(raw_data.index)+datetime.timedelta(days=i+1) for i in range(10)]
print(predict_plt.head())
# 显示完整时间序列
raw_data.plot() # 原始数据展示
predict_plt['mean'].plot(label='out ts predicted data', style='--') # 预测数据展示
plt.legend(loc='best') # 设置图例位置
plt.title('time series predicted data') # 设置标题
整理这个代码,不使用R语言的方法,ModuleNotFoundErrorTraceback (most recent call last)
Cell In[55], line 1
----> 1 from rpy2 import robjects
2 from rpy2.robjects.packages import importr
3 import pandas as pd
ModuleNotFoundError: No module named 'rpy2'
2、
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
# 读取数据
date_parse = lambda dates: pd.to_datetime(dates, format='%Y/%m/%d')
raw_data = pd.read_csv('time_series.txt', index_col='Date', date_parser=date_parse)
# 数据预处理
# 拆分训练集和测试集
train_ts = raw_data.iloc[:100]
test_ts = raw_data.iloc[100:]
# 模型训练
model = ARIMA(train_ts, order=(5, 1, 0))
fit_model = model.fit()
# 模型校验
in_ts_predict = fit_model.predict(start=len(train_ts), end=len(raw_data) - 1, dynamic=False)
rmse = np.sqrt(mean_squared_error(test_ts, in_ts_predict))
print('Test RMSE:', rmse)
# 重新训练模型并预测
model = ARIMA(raw_data, order=(5, 1, 0))
fit_model = model.fit()
out_ts_predict = fit_model.predict(start=len(raw_data), end=len(raw_data) + 9)
# 显示完整时间序列
raw_data.plot() # 原始数据展示
out_ts_predict.plot(label='out ts predicted data', style='--') # 预测数据展示
plt.legend(loc='best') # 设置图例位置
plt.title('time series predicted data') # 设置标题
plt.show()