当前位置:网站首页>[note] e-commerce order data analysis practice
[note] e-commerce order data analysis practice
2022-07-01 05:50:00 【Sprite. Nym】
One 、 E-commerce order data analysis practice
1.1 Data cleaning
# %load prep.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
%config InlineBackend.figure_format = 'svg'
ec_df = pd.read_csv('datas/ecomm/ An e-commerce platform 2021 Annual order data .csv', index_col=0)
# transformation orderTime and payTime by datetime
ec_df['orderTime'] = pd.to_datetime(ec_df['orderTime'])
ec_df['payTime'] = pd.to_datetime(ec_df['payTime'])
ec_df
# Extract orderTime stay 2021 Years of data
ec_df = ec_df[ec_df['orderTime'].dt.year == 2021]
ec_df
# Extract payTime No later than orderTime 30 Minute data 、 Payment amount and order amount are greater than 0 The data of
ec_df = ec_df[(ec_df['payTime'] - ec_df['orderTime']).dt.total_seconds() <= 1800]
ec_df = ec_df[ec_df['payTime'] >= ec_df['orderTime']]
ec_df = ec_df[(ec_df['payment'] >= 0) & (ec_df['orderAmount'] >= 0)]
ec_df
# fill chanelID Column null value
ec_df['chanelID'] = ec_df['chanelID'].fillna(ec_df['chanelID'].mode()[0])
ec_df.info()
# Rename the channel column and platform column
ec_df = ec_df.rename(columns={
'chanelID': 'channelID', 'platfromType': 'platformType'})
ec_df
# Change the typo of the platform type
ser = ec_df['platformType'].replace(r'[\s·]', '', regex=True)
ser = ser.str.title()
ec_df['platformType'] = ser.replace([' Weixin ', 'Vx', ' Webpage ', ' Website '],
[' WeChat ', ' WeChat ', 'Web', 'Web'])
# take payment Greater than orderAmount Change the data of
temp_df = ec_df[ec_df['payment'] <= ec_df['orderAmount']]
mean_discount = (temp_df['payment'] / temp_df['orderAmount']).mean()
ec_df['payment'] = np.round(
ec_df['payment'].where(
ec_df['payment'] <= ec_df['orderAmount'],
ec_df['orderAmount'] * mean_discount
)
)
1.2 Data analysis
1.2.1 Calculate the overall indicators
# Calculation GMV
GMV = ec_df['orderAmount'].sum()
print(GMV)
# Calculate total sales
all_payment = ec_df['payment'].sum()
print(all_payment)
# Calculate actual sales
true_payment = ec_df[ec_df['chargeback'] == False]['payment'].sum()
print(true_payment)
# Calculate return rate
chargeback_ratio = f"{
100 * len(ec_df[ec_df['chargeback']]) / len(ec_df):.2f}%"
print(chargeback_ratio)
# Calculate the customer unit price
ARPU = true_payment / ec_df['userID'].nunique()
print(ARPU)
1.2.2 Calculate monthly GMV And trend analysis
# Add month column
ec_df['month'] = ec_df['orderTime'].dt.month
ec_df
# Add the actual payment amount column
temp_df = ec_df
temp_df['true_payment'] = temp_df['payment'].where(ec_df['chargeback'] == False, 0)
# Generate month pairs GMV And actual payment pivot table
monthly_gmv = pd.pivot_table(temp_df, index='month', values=['orderAmount', 'true_payment'], aggfunc='sum')
monthly_gmv = monthly_gmv.applymap(lambda x: np.round(x/10000, 2))
# Start drawing
import pyecharts.options as opts
from pyecharts.charts import Line
gmv_line = Line()
# Add month data
gmv_line.add_xaxis(monthly_gmv.index.to_list())
# Join in GMV data
gmv_line.add_yaxis('GMV', monthly_gmv['orderAmount'].to_list())
# Add net sales data
gmv_line.add_yaxis(' Actual sales ', monthly_gmv['true_payment'].to_list())
# Modify global configuration
gmv_line.set_global_opts(
title_opts=opts.TitleOpts(title="2021 Annual monthly sales figures "),
yaxis_opts=opts.AxisOpts(name='GMV( Company : Ten thousand yuan )'),
xaxis_opts=opts.AxisOpts(name=' Time ( Company : month )')
)
gmv_line.render_notebook()

1.2.3 Disassembly of flow channel sources GMV Proportion
# Build platform 、GMV PivotTable
channel_gmv = pd.pivot_table(temp_df, index='channelID', values=['orderAmount'], aggfunc='sum')
channel_gmv = channel_gmv.applymap(lambda x: x/ 10000)
channel_gmv
# Start drawing
from pyecharts.charts import Pie
# Create pie chart objects
channel_pie = Pie()
# Add data , Set the size of the inner and outer rings of the pie chart
channel_pie.add(
'',
channel_gmv.reset_index().values.tolist(),
radius=["50%", "75%"]
)
# Configure global settings
channel_pie.set_global_opts(
title_opts=opts.TitleOpts(title=" Various channels GMV Proportion "),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%")
)
# Configure the percentage of values displayed on the label
channel_pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
channel_pie.render_notebook()

1.2.4 Count the orders by the day of the week
# obtain orderTime The day of the week
temp_df['weekday'] = temp_df['orderTime'].dt.weekday.map(lambda x: f' week {
x + 1}')
temp_df
# Generate a PivotTable of the day of the week and the order quantity
weekday_count = pd.pivot_table(temp_df, index='weekday', values='orderID', aggfunc='nunique')
weekday_count
# Start drawing
from pyecharts.charts import Bar
from pyecharts.globals import ThemeType
# Create a histogram object
weekday_bar = Bar({
"theme": ThemeType.LIGHT})
# add to x Axis data
weekday_bar.add_xaxis(weekday_count.index.tolist())
# add to y Axis data
weekday_bar.add_yaxis(" Order quantity ", weekday_count['orderID'].tolist())
# Configure global settings with title
weekday_bar.set_global_opts(title_opts=opts.TitleOpts(title=" Order quantity per week "))
# Add marker lines
weekday_bar.set_series_opts(
label_opts=opts.LabelOpts(is_show=True),
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="min", name=" minimum value "),
opts.MarkLineItem(type_="max", name=" Maximum ")
]
),
)
weekday_bar.render_notebook()

1.2.5 Count the order quantity according to the order period (30 Every minute )
# Create a time period column
temp_df['hour'] = temp_df['orderTime'].dt.floor('30T').dt.strftime('%H:%M')
temp_df
# Generate pivot table of time period and order quantity
time_count = pd.pivot_table(temp_df, index='hour', values='orderID', aggfunc='nunique')
time_count
# Create a histogram object
weekday_bar = Bar({
"theme": ThemeType.ESSOS})
# add to x Axis data
weekday_bar.add_xaxis(time_count.index.tolist())
# add to y Axis data
weekday_bar.add_yaxis(" Order quantity ", time_count['orderID'].tolist())
# Configure global settings with title
weekday_bar.set_global_opts(
title_opts=opts.TitleOpts(title=" Order quantity in each period "),
datazoom_opts=opts.DataZoomOpts()
)
# Add marker lines
weekday_bar.set_series_opts(
label_opts=opts.LabelOpts(is_show=True),
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="min", name=" minimum value "),
opts.MarkLineItem(type_="max", name=" Maximum ")
]
),
)
weekday_bar.render_notebook()

1.2.6 Calculate the repurchase rate by month
# Generate each... By month userID Statistics are not repeated orderID My PivotTable
multiple_bought = pd.pivot_table(temp_df, index='userID', columns='month', values='orderID', aggfunc='nunique')
multiple_bought = multiple_bought.applymap(
lambda x: np.nan if np.isnan(x) else (
0 if x == 1 else 1
)
)
np.round(100 * multiple_bought.sum() / multiple_bought.count(), 2)
# No more pictures , Laugh to death
边栏推荐
猜你喜欢

Boot + jsp University Community Management System (with source Download Link)

MySQL数据迁移遇到的一些错误

Send you through the data cloud

HCM 初学 ( 一 ) - 简介

如何添加葫芦儿派盘

QT write custom control - self drawn battery

我从技术到产品经理的几点体会

穿越派·派盘 + 思源笔记 = 私人笔记本

linux 关闭redis 进程 systemd+

Continuous breakthrough and steady progress -- Review and Prospect of cross platform development technology of mobile terminal
随机推荐
srpingboot security demo
论文学习记录随笔 多标签之LSML
【知识点总结】卡方分布,t分布,F分布
restframework-simpleJWT重写认证机制
OpenGL ES: (5) OpenGL的基本概念、OpenGL ES 在屏幕产生图片的过程、OpenGL管线(pipeline)
MySQL converts milliseconds to time string
FPGA - 7系列 FPGA内部结构之Clocking -01- 时钟架构概述
TIDB数据库特性总结
excel可视化
Advanced drawing skills of Excel lecture 100 (1) - use Gantt chart to show the progress of the project
A little assistant for teenagers' physiological health knowledge based on wechat applet (free source code + project introduction + operation introduction + operation screenshot + Thesis)
Brief description of activation function
Seven major technical updates that developers should pay most attention to on build 2022
Fragment upload and breakpoint resume
Timer based on LabVIEW
QT write custom control - self drawn battery
tese_ Time_ 2h
Trust guessing numbers game
Data governance: data governance framework (Part I)
Boot + jsp University Community Management System (with source Download Link)