画图
import pandas as pd
unrate = pd.read_csv('DATA.csv')
unrate['DATE'] = pd.to_datetime(unrate['DATE']) # 日期格式转换
print(unrate.head(12))
# 画图工具
import matplotlib.pyplot as plt
#%matplotlib inline
#Using the diffenent pyplot functions,we can create,customize,and display a plot .For example, we can use
# plt.plot() # 画图
# plt.show() # 显示
first_twelve = unrate[0:12] #取值
## 参数说明
print('\n\n----plot(x,y,c)----------')
plt.plot(first_twelve['DATE'],first_twelve['VALUE'],c='red',label='test') #分别为x,y轴对应数据,c:color,label
plt.legend(loc='best') # 显示label,loc为显示位置(best为系统认为最好的位置)
plt.show()
# print(help(plt.plot))
## x,y轴字体角度变化/防止字体过于拥挤在一起
print('\n\n------------x,y轴字体角度变化/防止字体过于拥挤在一起------')
plt.plot(first_twelve['DATE'],first_twelve['VALUE'])
plt.xticks(rotation=45) # 对x坐标标度变换45度
plt.yticks(rotation=180)
plt.show()
## 添加x,y轴说明
print('\n---------添加x,y轴说明-----------')
plt.plot(first_twelve['DATE'],first_twelve['VALUE'])
plt.xticks(rotation=0) # 对x坐标标度变换45度
plt.yticks(rotation=0)
plt.xlabel('MONTH') # x坐标说明
plt.ylabel('VALUE')
plt.title('Monthly Unemployment Trends 1948') #图标标题
plt.show()
画子图
## 画子图
import matplotlib.pyplot as plt
fig = plt.figure() # 指定一个默认画图的区间
# add_subplot(first,second,index) first means number of Row,second means number of Cloumn,index means which place you want to show
ax1 = fig.add_subplot(4,3,1)
ax2 = fig.add_subplot(4,3,3)
ax3 = fig.add_subplot(4,3,12)
plt.show()
规定画图区域的大小
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
# fig = plt.figure(figsize=(3,3)) #规定画图区域的大小;(长,宽)
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.plot(np.random.randint(1,5,5),np.arange(5)) # randint
ax2.plot(np.arange(10)*2,np.arange(10))
plt.show()
# print(help(np.random.randint)) # 随机数
# print(help(np.arange))
# print(np.arange(10))
在一个图中画两条线
## 在一个图中画两条线
import matplotlib.pyplot as plt
import pandas as pd
Data = pd.read_csv('DATA.csv')
Data['DATE'] = pd.to_datetime(Data['DATE'])
print(Data.head(12))
print('------------------\n\n')
Data['MONTH'] = Data['DATE'].dt.month
# Data['MONTH'] = Data['DATE'].dt.month
plt.xlabel('MONTH')
plt.ylabel('VALUE')
plt.title('The Unployment In 1948')
plt.plot(Data[0:6]['MONTH'],Data[0:6]['VALUE'],c='red',label='0-6') # (x,y,Line_color,label)
plt.plot(Data[6:12]['MONTH'],Data[6:12]['VALUE'],c='blue',label='6-12')
plt.legend(loc='best')
plt.show()
给线在图中添加标签说明
## 给线在图中添加标签说明
import matplotlib.pyplot as plt
import pandas as pd
Data = pd.read_csv('DATA.csv')
Data['DATE'] = pd.to_datetime(Data['DATE'])
fig = plt.figure(figsize=(10,6))
colors = ['red','blue','green','orange','black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = Data[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['DATE'],subset['VALUE'],c=colors[i],label=label)
plt.legend(loc='best') # 把label显示出来
plt.xlabel('Data')
plt.ylabel('VALUE')
plt.title('Monthly Unemployment Trends,1948-1952')
#print(plt.legend)
plt.show()
柱形图
## 柱形图
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
Mov = pd.read_csv('data1.csv')
cols = ['A','B','C','D','E','F']
norm_datas = Mov[cols]
print(norm_datas[:8])
print('\n\n---------\n\n')
# bar_heights = norm_datas.ix[0,cols].values # old using
bar_heights = norm_datas.loc[0,cols].values
print(bar_heights)
bar_postions = arange(6) + 1 # 柱间距
print(bar_postions) # 位置,依次对应A,B,C,D,E,F
print('\n\n-----竖状图----\n\n')
# fig = plt.figure()
fig, ax = plt.subplots()
# print(help(plt.subplots))
ax.bar(bar_postions,bar_heights,0.5) #注意ax.bar(x,y,n)其中 x = y,否则会报错;n指柱的宽度
ax.set_xticks(bar_postions) #如注销则会从B值开始显示,应该是和下面的语句配合使用的;
ax.set_xticklabels(norm_datas,rotation = -45)
# 第一个参数指定坐标是什么,可以把123变成原有的clom值;
# 第二个指定x轴的倾斜程度
# ax.set_yticks(bar_heights)
# ax.set_yticklabels(norm_datas,rotation = 45)
ax.set_xlabel('Media ') # x轴
ax.set_ylabel('Rating') # y轴
ax.set_title('Average User Rating For 0') # 标题
plt.show()
print('\n\n\n----------横状形图---------\n\n')
fig, ax = plt.subplots() #ax一般指实际得到的轴;fig通常来设置图的参数(控制图的样子)
# print(help(plt.subplots))
ax.barh(bar_postions,bar_heights,0.5) #注意ax.bar(x,y,n)其中 x = y,否则会报错;n指柱的宽度
ax.set_yticks(bar_postions) #如注销则会从B值开始显示,应该是和下面的语句配合使用的;
ax.set_yticklabels(norm_datas,rotation = 0)
# 第一个参数指定坐标是什么,可以把123变成原有的clom值;
# 第二个指定x轴的倾斜程度
# ax.set_yticks(bar_heights)
# ax.set_yticklabels(norm_datas,rotation = 45)
ax.set_ylabel('Media ') # x轴
ax.set_xlabel('Rating') # y轴
ax.set_title('Average User Rating For 0') # 标题
plt.show()
##散点图
不太理解散点图的数据方式
## 散点图
###?????
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
Mov = pd.read_csv('data1.csv')
cols = ['A','B','C','D','E','F']
norm_datas = Mov[cols]
print(norm_datas[:8])
fig,ax = plt.subplots()
ax.scatter(norm_datas['A'],norm_datas['B']) # 查看两个媒体的评分
ax.set_xlabel('A')
ax.set_ylabel('B')
plt.show()
# print(help(ax.scatter))
# Switching Axes
fig = plt.figure(figsize=(5,10))
# 子图
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.scatter(norm_datas['A'],norm_datas['B'])
ax1.set_xlabel('A')
ax1.set_ylabel('B')
ax2.scatter(norm_datas['A'],norm_datas['B'])
ax2.set_xlabel('A')
ax2.set_ylabel('B')
plt.show()
按区间来统计数量
## 图
# 按区间来统计数量
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
Mov = pd.read_csv('data1.csv')
cols = ['A','B','C','D','E','F']
norm_datas = Mov[cols]
print(norm_datas[:8])
fig,ax = plt.subplots()
ax.hist(norm_datas['C']) # 不指定则默认自动生成bins
# ax.hist(norm_datas['C'],bins=10) # bins 即可容纳的条形数量
# bins : integer or array_like, optional
# 这个参数指定bin(箱子)的个数,也就是总共有几条条状图
# bins 存在一个最小值
# ax.hist(norm_datas['C'],range=(5,7),bins=20) # range(x,y) 指定起始和结束区间
# print(help(ax.hist))
###
### bins 未理解
###
plt.show()
子图显示
## 图
# 子图显示
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
Mov = pd.read_csv('data1.csv')
cols = ['A','B','C','D','E','F']
norm_datas = Mov[cols]
print(norm_datas[:8])
fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_datas['A'],range=(0,5))
ax1.set_title('A')
ax1.set_ylim(0,5) # 指定y轴区间
ax2.hist(norm_datas['B'],range=(0,5))
ax2.set_title('B')
ax2.set_ylim(0,5)
ax3.hist(norm_datas['C'],range=(0,5))
ax3.set_title('C')
ax3.set_ylim(0,5)
ax4.hist(norm_datas['D'],range=(0,5))
ax4.set_title('D')
ax4.set_ylim(0,5)
plt.show()
盒图
## 盒图
# 把数据分成4分,4分图
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
Mov = pd.read_csv('data1.csv')
cols = ['A','B','C','D','E','F']
norm_datas = Mov[cols]
print(norm_datas[:8])
fig,ax = plt.subplots()
ax.boxplot(norm_datas['A'])
ax.set_xticklabels(['A'])
ax.set_ylim(0,8)
plt.show()
盒图多个
### 盒图多个
import pandas as pd
import matplotlib.pyplot as plt
from numpy import arange
Mov = pd.read_csv('data1.csv')
cols = ['A','B','C','D','E','F']
norm_datas = Mov[cols]
print(norm_datas[:8])
fig,ax = plt.subplots()
ax.boxplot(norm_datas[cols].values)
ax.set_xticklabels(cols,rotation=0)
ax.set_ylim(0,17)
plt.show()