Data Analysis with Plotly
Plotly is a data analysis library that can be used with Python. Below I will show an example data analysis using plots from the plotly library.
You can also use these code snippets.
The plots to be used are as follows;
- Line Plot
- Scatter Plot
- Bar Plot
- Pie Plot
- Bubble Plot
- Histograms Plot
- Cumulative Histograms Plot
- WorldCloud
- Box Plot
- Scatter Plot Matrix
- Inset Plot
- 3D scatter plot
In order to be able to analyze the data, firstly, a dataset is selected and the data to be analyzed is examined. In this example, I will examine a ready dataset containing world wine reviews.
You can access such ready-made datasets at https://www.kaggle.com/.
The sample dataset I will examine contains the following columns.
Let’s import our dataset and rename an undefined column.
winemag130_data = pd.read_csv("../input/winemag-data-130k-v2.csv")
winemag130_data.rename( columns={'Unnamed: 0':'ID'}, inplace=True )
winemag150_data = pd.read_csv("../input/winemag-data_first150k.csv")
winemag150_data.rename( columns={'Unnamed: 0':'ID'}, inplace=True )
Let’s learn about our dataset. For a good analysis, we need to have knowledge about our data.
winemag130_data.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129971 entries, 0 to 129970
Data columns (total 14 columns):
ID 129971 non-null int64
country 129908 non-null object
description 129971 non-null object
designation 92506 non-null object
points 129971 non-null int64
price 120975 non-null float64
province 129908 non-null object
region_1 108724 non-null object
region_2 50511 non-null object
taster_name 103727 non-null object
taster_twitter_handle 98758 non-null object
title 129971 non-null object
variety 129970 non-null object
winery 129971 non-null object
dtypes: float64(1), int64(2), object(11)
memory usage: 13.9+ MB
Let’s show our example 10 data.
winemag130_data.head(10)
Line Plot
Example code block where we get the first 100 records of our data.
Comparison of wines by rating and price.
df = winemag130_data.iloc[:100,:]
import plotly.graph_objs as go
# Creating trace1
trace1 = go.Scatter(
x = df.ID,
y = df.points,
mode = "lines",
name = "points",
marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
text= df.variety)
# Creating trace2
trace2 = go.Scatter(
x = df.ID,
y = df.price,
mode = "lines+markers",
name = "price",
marker = dict(color = 'rgba(80, 26, 80, 0.8)'),
text= df.variety)
data = [trace1, trace2]
layout = dict(title = 'Points and Price vs ID of Top 100 Variety',
xaxis= dict(title= 'ID',ticklen= 5,zeroline= False)
)
fig = dict(data = data, layout = layout)
iplot(fig)
I’m listing points that are unique.
winemag130_data["points"].unique()array([ 87, 86, 85, 88, 92, 91, 90, 89, 83, 82, 81, 80, 100,
98, 97, 96, 95, 93, 94, 84, 99])
Scatter Plot:
I do price analysis according to their scores by taking the first 50 data according to their scores.
df87 = winemag130_data[winemag130_data.points == 87].iloc[:50,:]
df90 = winemag130_data[winemag130_data.points == 90].iloc[:50,:]
df93 = winemag130_data[winemag130_data.points == 93].iloc[:50,:]
df96 = winemag130_data[winemag130_data.points == 96].iloc[:50,:]
df99 = winemag130_data[winemag130_data.points == 99].iloc[:50,:]
df = winemag130_data.iloc[:100,:]
import plotly.graph_objs as go
# creating trace1
trace1 =go.Scatter(
x = df87.points,
y = df.price,
mode = "markers",
name = "87",
marker = dict(color = 'rgba(255, 128, 255, 0.8)'),
text= df87.variety)
# creating trace2
trace2 =go.Scatter(
x = df90.points,
y = df.price,
mode = "markers",
name = "90",
marker = dict(color = 'rgba(240, 128, 255, 0.8)'),
text= df90.variety)
# creating trace3
trace3 =go.Scatter(
x = df93.points,
y = df.price,
mode = "markers",
name = "93",
marker = dict(color = 'rgba(255, 128, 2, 0.8)'),
text= df90.variety)
# creating trace4
trace4 =go.Scatter(
x = df96.points,
y = df.price,
mode = "markers",
name = "96",
marker = dict(color = 'rgba(255, 128, 2, 0.8)'),
text= df96.variety)
# creating trace5
trace5 =go.Scatter(
x = df99.points,
y = df.price,
mode = "markers",
name = "99",
marker = dict(color = 'rgba(0, 255, 200, 0.8)'),
text= df99.variety)
data = [trace1, trace2, trace3,trace4,trace5]
layout = dict(title = 'Points vs world rank of top 50 points with 87, 90,93,96 and 99 points',
xaxis= dict(title= 'ID',ticklen= 5,zeroline= False),
yaxis= dict(title= 'Price',ticklen= 5,zeroline= False)
)
fig = dict(data = data, layout = layout)
iplot(fig)
Bar Plot:
I analyze the first 50 data with 99 points according to grape varieties.
df99 = winemag130_data[winemag130_data.points == 99].iloc[:50,:]
import plotly.graph_objs as go
# create trace1
trace1 = go.Bar(
x = df99.variety,
y = df99.price,
name = "price",
marker = dict(color = 'rgba(255, 174, 255, 0.5)',
line=dict(color='rgb(0,0,0)',width=1.5)),
text = df99.region_1)
# create trace2
trace2 = go.Bar(
x = df99.variety,
y = df99.points,
name = "points",
marker = dict(color = 'rgba(255, 255, 128, 0.5)',
line=dict(color='rgb(0,0,0)',width=1.5)),
text = df99.region_1)
data = [trace1, trace2]
layout = go.Layout(barmode = "group")
fig = go.Figure(data = data, layout = layout)
iplot(fig)
#Plotly bar plot
df87 = winemag130_data[winemag130_data.points == 87].iloc[:10,:]
import plotly.graph_objs as go
x = df87.variety
trace1 = {
'x': x,
'y': df87.price,
'name': 'price',
'type': 'bar'
};
trace2 = {
'x': x,
'y': df87.points,
'name': 'points',
'type': 'bar'
};
data = [trace1, trace2];
layout = {
'xaxis': {'title': 'Top 3 universities'},
'barmode': 'relative',
'title': 'price and points of top 10 variety in 87'
};
fig = go.Figure(data = data, layout = layout)
iplot(fig)
Pie Plot:
I am doing the average analysis according to the prices of the grapes from the 1st to the 8th data of 87 points.
df87 = winemag130_data[winemag130_data.points == 87].iloc[1:8,:]
value=df87.price
labels=df87.title
fig = {
"data": [
{
"values": value,
"labels": labels,
"domain": {"x": [0, .5]},
"name": "Wine names by price",
"hoverinfo":"label+percent+name",
"hole": .3,
"type": "pie"
},],
"layout": {
"title":"Wine names by price",
"annotations": [
{ "font": { "size": 20},
"showarrow": False,
"text": "Wine Reviews Title",
"x": 0.20,
"y": 1
},
]
}
}
iplot(fig)
Bubble Plot:
I am analyzing the data from 1st to 21st data of 87 points according to ID and price.
df87 = winemag130_data[winemag130_data.points == 87].iloc[1:21,:]
df=df87.fillna(0)
color=df.price
data = [
{
'y': df.price,
'x': df.ID,
'mode': 'markers',
'marker': {
'color': color,
'size': color,
'showscale': True
},
"text" : df.variety
}
]
iplot(data)
Histograms Plot:
I analyze the first 50 data of 96 and 100 points according to the price.
df96 = winemag130_data[winemag130_data.points == 96].iloc[:50,:]
df100 = winemag130_data[winemag130_data.points == 100].iloc[:50,:]
import plotly.graph_objs as go
trace1 = go.Histogram(
x=df96.price,
opacity=0.75,
name = "96 points",
marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(
x=df100.price,
opacity=0.75,
name = "100 points",
marker=dict(color='rgba(12, 50, 196, 0.6)'))
data = [trace1, trace2]
layout = go.Layout(barmode='overlay',
title=' Wine Reviews price in 96 and 100 points',
xaxis=dict(title='price'),
yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)
Cumulative Histograms Plot:
I am doing the price increase analysis of the first 100 records of 87 points.
df87 = winemag130_data[winemag130_data.points == 87].iloc[:100,:]
import plotly.graph_objs as go
trace2 = go.Histogram(
x=df87.price,
cumulative=dict(enabled=True))
data = [trace2]
layout = go.Layout(barmode='overlay',
title=' Wine Reviews price in 87 points',
xaxis=dict(title='price'),
yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)
WordCloud:
Representation of the most wine producing countries of the first 160 data with 87 points.
df87 = winemag130_data[winemag130_data.points == 87].iloc[:160,:]
df87_new=df87.country[df87.points==87]
plt.subplots(figsize=(10,10))
wordcloud = WordCloud(
background_color='white',
width=512,
height=384
).generate(" ".join(df87_new))
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('graph.png')
plt.show()
Box Plot:
I am doing the max, min, avg value analysis of the prices of the first 100 data of 99 and 100 points.
df99 = winemag130_data[winemag130_data.points == 99].iloc[:100,:]
df100 = winemag130_data[winemag130_data.points == 100].iloc[:100,:]
trace0 = go.Box(
y=df99.price,
name = 'total score of price in 99',
marker = dict(
color = 'rgb(12, 12, 140)',
)
)
trace1 = go.Box(
y=df100.price,
name = 'research of price in 100',
marker = dict(
color = 'rgb(12, 128, 128)',
)
)
data = [trace0, trace1]
iplot(data)
Scatter Plot Matrix:
I analyze the first 100 records of 100 points with the help of many plots.
df100=winemag150_data[winemag150_data.points == 100].iloc[:100,:]
import plotly.figure_factory as ff
# prepare data
df100 = df100.loc[:,["points","price", "ID"]]
df100["index"] = np.arange(1,len(df100)+1)
# scatter matrix
fig = ff.create_scatterplotmatrix(df100, diag='box', index='index',colormap='Portland',
colormap_type='cat',
height=700, width=700)
iplot(fig)
Inset Plot:
I do price analysis according to the IDs of the data of 100 points.
df100=winemag150_data[winemag150_data.points == 100]
trace1 = go.Scatter(
x=df100.ID,
y=df100.price,
name = "price",
marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
)
# second line plot
trace2 = go.Scatter(
x=df100.ID,
y=df100.points,
xaxis='x2',
yaxis='y2',
name = "points",
marker = dict(color = 'rgba(160, 112, 20, 0.8)'),
)
data = [trace1, trace2]
layout = go.Layout(
xaxis2=dict(
domain=[0.6, 0.95],
anchor='y2',
),
yaxis2=dict(
domain=[0.6, 0.95],
anchor='x2',
),
title = 'Points and Price vs ID of Wine Reviews'
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)
3D scatter plot:
The price and score analysis of the data with 99 and 100 points is displayed in 3 dimensions.
df99=winemag150_data[winemag150_data.points == 99]
df100=winemag150_data[winemag150_data.points == 100]
trace1 = go.Scatter3d(
x=df99.ID,
y=df99.price,
z=df99.points,
mode='markers',
marker=dict(
size=10,
color='rgb(255, 0, 0)', # set color to an array/list of desired values
)
)
trace2 = go.Scatter3d(
x=df100.ID,
y=df100.price,
z=df100.points,
mode='markers',
marker=dict(
size=10,
color='rgb(127, 127, 127)', # set color to an array/list of desired values
)
)
data = [trace1,trace2]
layout = go.Layout(
margin=dict(
l=0,
r=0,
b=0,
t=0
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)
Thanks for reading.