在数据分析中,好的数据、坏的数据只有数据分析师才能分析出来,要想让普通人理解数据的含义、从而借助数据预测出公司即将发生的事件,这才是最重要的也是数据分析工作的核心所在,数据分析40%在于对数据的挖掘和处理上,60%在于图形展示、优化上,数据分析中的图形展示方案有很多中,今天就以Matplotlib和plotly为例来认识什么是数据分析中的图表吧?!
Matplotlib和plotly对于这两种工具来说,要想在Python中使用它们,对于我们来说是非常简单的。只需要使用pip安装就可以了; 安装matplotlib、plotly和一些数据分析相关的库包、后续会时常使用它们,不在一一介绍是干什么用的。
pip install matplotlibpip install plotlypip install plotly_expresspip install pandaspip install numpy绘制折线图matplotlib方式:import matplotlib.pyplot as pltx = [1, 2, 3, 4, 5]y = [2, 3, 5, 7, 11]plt.plot(x,y)# plt.title("折线图")plt.title("Line Plot") # 折线图plt.xlabel("X-axis")plt.ylabel("Y-axis")plt.show()plotly方式:import plotly.graph_objects as gox = [1, 2, 3, 4, 5]y = [2, 3, 5, 7, 11]fig = go.Figure()fig.add_trace(go.Scatter(x=x,y=y,mode='lines+markers',name='数据线'))# 更新布局fig.update_layout( title='Line Plot', xaxis_title='X-axis', yaxis_title='Y-axis', hovermode='closest')# 显示图表fig.show()绘制散点图matplotlib方式:plt.scatter(x,y)plt.title("Scatter Plot") # 散点图plt.xlabel("X-axis")plt.ylabel("Y-axis")plt.show()plotly 方式:import plotly.express as pximport pandas as pdx = [1, 2, 3, 4, 5]y = [2, 3, 5, 7, 11]# 创建示例数据df = pd.DataFrame({ 'x': x, 'y': y, 'category': ['A', 'B', 'A', 'B', 'A']})# 创建交互式散点图fig = px.scatter(df, x='x', y='y', color='category', title='Jupyter Notebook中的交互式散点图')# 在 Jupyter Notebook 中显示图表fig.show()绘制条形图matplotlib方式:categories = ['A','B','C','D']values=[10,20,15,30]plt.bar(categories,values) # 条形图plt.title("Bar Chart")plt.xlabel("Categories")plt.ylabel("Values")plt.show()plotly方式:import plotly.graph_objs as goimport plotly.offline as py #保存图表,相当于plotly.plotly as py,同时增加了离线功能py.init_notebook_mode(connected=True) #离线绘图时,需要额外进行初始化categories = ['A','B','C','D']values=[10,20,15,30]trace = go.Bar( x = categories, y = values)data = [trace]py.iplot(data, filename='basic-bar')绘制直方图matplotlib方式:data = [1,2,2,3,4,5,5,6,7]plt.hist(data) # 直方图plt.title("Histogram")plt.xlabel("Value")plt.ylabel("Frequency")plt.show()plotly方式:import plotly.express as pximport numpy as npimport plotly.graph_objects as goimport pandas as pdpandas = pd.DataFrame(data={ "tip":[1,2,2,3,4,5,5,6,7]}) fig = px.histogram(pandas, x="tip") # 直方图fig.show() 绘制饼图matplotlib方式:labels = ['A','B','C','D']sizes = [15,30,45,10]plt.pie(sizes,labels=labels,autopct='%1.1f%%') # 饼图plt.title("Pit Chart")plt.axis('equal')plt.show()plotly方式:import plotly as pyimport plotly.graph_objs as gopyplt = py.offline.plotlabels = ['A','B','C','D']values = [15,30,45,10]trace = [go.Pie(labels=labels, values=values)]layout = go.Layout( title = 'Pit Chart',)fig = go.Figure(data = trace, layout = layout)fig.show() 绘制箱型图matplotlib方式:data=[20,35,30,35,27,40,39]plt.boxplot(data) # 箱形图plt.title("Box Plot")plt.ylabel("Value")plt.show()plotly方式:import plotly_express as pximport plotly.graph_objects as goimport pandas as pdtips = pd.DataFrame(data={ "total_bill":[1,2,2,3,4,5,5,6,7], "day":["Sun","Sun","Sun","Sun","Sun","Sun","Sun","Sun","Sun"]}) fig = px.strip( tips, x='day', # 星期 y='total_bill' # 总账单)fig.show() # 箱形图绘制热力图matplotlib方式:import numpy as npdata = np.random.rand(10, 10)plt.imshow(data, cmap='hot', interpolation='nearest') # 热力图plt.title("Heatmap")plt.colorbar()plt.show()plotly方式:import pandas as pdimport numpy as npimport plotly_express as pximport plotly.graph_objects as goimport plotly.figure_factory as ff # 图形工厂from plotly.subplots import make_subplots # 绘制子图# 数据部分# rgb = np.array([[[105, 0, 0], [0, 255, 0], [0, 0, 55]],# [[0, 250, 0], [0, 0, 205], [255, 0, 0]]], # dtype=np.uint8)rgb = np.random.rand(10, 10)# 调用pxfig = px.imshow(rgb) # 热力图fig.show()绘制3D图matplotlib方式:from mpl_toolkits.mplot3d import Axes3Dfig = plt.figure() # 3D 图ax = fig.add_subplot(111, projection='3d')x = [1, 2, 3]y = [4, 5, 6]z = [7, 8, 9]ax.plot(x, y, z)plt.show()plotly方式:import pandas as pdimport numpy as npimport plotly.express as pximport plotly.graph_objects as goiris = px.data.iris()iris.head()fig = px.scatter_3d( iris, x="sepal_length", y="sepal_width", z="petal_width", color="species")fig.show()