疫情确诊数预测图表输出数据错误。

疫情确诊数预测图表输出数据错误。

问题描述:

python用sklearn机器学习新冠疫情确诊预测,
程序跑的没什么问题,光看图表多项式的几个趋势也比较正常,
四次项的更贴合真实数据所以想在图表上输出四次项这部分预测的数据,
但现在图表上和控制台输出的都是线性回归的预测数据,
请问应该怎么改成输出四次项的呀?

图片说明

import operator
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression,LogisticRegression

def init_data():
    # 原始数据
    countrydatahistorys=[{'date': '2020-08-03', 'confirmedNum': 18282208},\
                         {'date': '2020-08-02', 'confirmedNum': 18079506},\
                         {'date': '2020-08-01', 'confirmedNum': 17850469},\
                         {'date': '2020-07-31', 'confirmedNum': 17599836},\
                         {'date': '2020-07-30', 'confirmedNum': 17309805},\
                         {'date': '2020-07-29', 'confirmedNum': 17029155},\
                         {'date': '2020-07-28', 'confirmedNum': 16740006},\
                         {'date': '2020-07-27', 'confirmedNum': 16487669},\
                         {'date': '2020-07-26', 'confirmedNum': 16261215},\
                         {'date': '2020-07-25', 'confirmedNum': 16047935},\
                         {'date': '2020-07-24', 'confirmedNum': 15792390},\
                         {'date': '2020-07-23', 'confirmedNum': 15511225},\
                         {'date': '2020-07-22', 'confirmedNum': 15228469},\
                         {'date': '2020-07-21', 'confirmedNum': 14947990},\
                         {'date': '2020-07-20', 'confirmedNum': 14714367},\
                         {'date': '2020-07-19', 'confirmedNum': 14509589},\
                         {'date': '2020-07-18', 'confirmedNum': 14292942},\
                         {'date': '2020-07-17', 'confirmedNum': 14055307},\
                         {'date': '2020-07-16', 'confirmedNum': 13813400},\
                         {'date': '2020-07-15', 'confirmedNum': 13560727},\
                         {'date': '2020-07-14', 'confirmedNum': 13329608},\
                         {'date': '2020-07-13', 'confirmedNum': 13107969},\
                         {'date': '2020-07-12', 'confirmedNum': 12915161},\
                         {'date': '2020-07-11', 'confirmedNum': 12722506},\
                         {'date': '2020-07-10', 'confirmedNum': 12506381},\
                         {'date': '2020-07-09', 'confirmedNum': 12273804},\
                         {'date': '2020-07-08', 'confirmedNum': 12045577},\
                         {'date': '2020-07-07', 'confirmedNum': 11833775},\
                         {'date': '2020-07-06', 'confirmedNum': 11622931},\
                         {'date': '2020-07-05', 'confirmedNum': 11455588},\
                         {'date': '2020-07-04', 'confirmedNum': 11273007},\
                         {'date': '2020-07-03', 'confirmedNum': 11079326},\
                         {'date': '2020-07-02', 'confirmedNum': 10875759},\
                         {'date': '2020-07-01', 'confirmedNum': 10668014},\
                         {'date': '2020-06-30', 'confirmedNum': 10450359},\
                         {'date': '2020-06-29', 'confirmedNum': 10276540},\
                         {'date': '2020-06-28', 'confirmedNum': 10118389},]

    # 按照时间排序全国趋势数据  
    countrydatahistorys=sorted(countrydatahistorys, key=operator.itemgetter('date'))
    # 结构化全国各省图表所需数据
    xdata=list(range(len(countrydatahistorys)))
    xlabel=list(row['date'] for row in countrydatahistorys)

    # 追加预测未来一周的自变量X
    xdata.extend(list(range(37, 44)))
    from datetime import date, datetime, timedelta
    start_date = date(2020, 8, 4)
    xlabel.extend(list(str(start_date + timedelta(i)) for i in range(7)))

    # 生成累计确诊数据,即y值
    confirmedNum=list(row['confirmedNum'] for row in countrydatahistorys)

    #进行数据格式转换,生成训练集、测试集和预测集
    Xlabel=np.array(xlabel).reshape(-1, 1)
    X=np.array(xdata).reshape(-1, 1)
    y=np.array(confirmedNum).reshape(-1, 1)
    X_train=X[:33]
    X_test=X[33:37]
    X_predict=X[37:44]
    y_train=y[:33]
    y_test=y[33:37]
    return X_train,X_test,y_train,y_test,X_predict,X,y,xlabel

X_train,X_test,y_train,y_test,X_predict,X,y,Xlabel=init_data()
lr=LinearRegression().fit(X_train,y_train)
coef=lr.coef_
intercept=lr.intercept_
score_train=lr.score(X_train,y_train)
score_test=lr.score(X_test,y_test)
y_predict=lr.predict(X_predict)

#--------------------------------
lg=LogisticRegression(C=0.2)
lg.fit(X_train,y_train)
y_lg_predict=lg.predict(X)


#--------------------------------
poly1 =PolynomialFeatures(degree=1)
X_ploy =poly1.fit_transform(X_train)
l1=LinearRegression()
l1.fit(X_ploy,y_train)

#--------------------------------
poly2 =PolynomialFeatures(degree=2)
X_ploy =poly2.fit_transform(X_train)
l2=LinearRegression()
l2.fit(X_ploy,y_train)

#--------------------------------
poly3 =PolynomialFeatures(degree=3)
X_ploy =poly3.fit_transform(X_train)
l3=LinearRegression()
l3.fit(X_ploy,y_train)

#--------------------------------
poly4 =PolynomialFeatures(degree=4)
X_ploy =poly4.fit_transform(X_train)
l4=LinearRegression()
l4.fit(X_ploy,y_train)
print('y_l4_predict=',l4.predict)
#--------------------------------
poly5 =PolynomialFeatures(degree=5)
X_ploy =poly5.fit_transform(X_train)
l5=LinearRegression()
l5.fit(X_ploy,y_train)

#--------------------------------
poly6 =PolynomialFeatures(degree=6)
X_ploy =poly6.fit_transform(X_train)
l6=LinearRegression()
l6.fit(X_ploy,y_train)

fig=plt.figure(figsize=(10,5.5))
plt.rcParams['font.sans-serif']=['SimHei']
# 画出实际值,注意X和y不等,X训练集加测试集和实际y值相等
plt.plot(np.vstack((X_train,X_test)),y,color='black',marker='o',linestyle='-',label='累计确诊病例(官方统计)')
plt.plot(X,y_lg_predict,color='cyan',marker='*',linestyle=':',label='逻辑回归')
plt.plot(X,intercept+X*coef,color='red',marker='*',linestyle=':',label='线性预测')
plt.plot(X,l2.predict(poly2.fit_transform(X)),color='blue',marker='*',linestyle=':',label='2次多项式')
plt.plot(X,l3.predict(poly3.fit_transform(X)),color='yellow',marker='*',linestyle=':',label='3次多项式')
plt.plot(X,l4.predict(poly4.fit_transform(X)),color='green',marker='*',linestyle=':',label='4次多项式')
print("y",y)

# 设置x轴标签及其字号
plt.xlabel('日期',fontsize=14)
# 设置y轴标签及其字号
plt.ylabel('累计确诊病例数量',fontsize=14)
# 设置X轴序列标签值
plt.xticks(X-1,Xlabel,rotation=30,fontsize=10)

# 添加训练集、测试集、预测集分割垂直直线
plt.axvline(x=36.5,linestyle='--',c="green")
plt.axvline(x=45.5,linestyle='--',c="green")

# 添加测试集的预测结果数据标签
for x,y in zip(X_predict.tolist(), y_predict.tolist()):
    plt.text(x[0],y[0],'{:5.0f}'.format(y[0]), fontsize=15)
    print("x:",x,"y:",y)
# 显示图例
plt.legend()

plt.show()