当前位置:网站首页>【笔记】电商订单数据分析实战
【笔记】电商订单数据分析实战
2022-07-01 05:41:00 【Sprite.Nym】
一、电商订单数据分析实战
1.1 数据清洗
# %load prep.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
%config InlineBackend.figure_format = 'svg'
ec_df = pd.read_csv('datas/ecomm/某电商平台2021年订单数据.csv', index_col=0)
# 转换orderTime和payTime为datetime
ec_df['orderTime'] = pd.to_datetime(ec_df['orderTime'])
ec_df['payTime'] = pd.to_datetime(ec_df['payTime'])
ec_df
# 提取出orderTime在2021年的数据
ec_df = ec_df[ec_df['orderTime'].dt.year == 2021]
ec_df
# 提取出payTime不晚于orderTime 30分钟的数据、支付金额和订单金额大于0的数据
ec_df = ec_df[(ec_df['payTime'] - ec_df['orderTime']).dt.total_seconds() <= 1800]
ec_df = ec_df[ec_df['payTime'] >= ec_df['orderTime']]
ec_df = ec_df[(ec_df['payment'] >= 0) & (ec_df['orderAmount'] >= 0)]
ec_df
# 填充chanelID列空值
ec_df['chanelID'] = ec_df['chanelID'].fillna(ec_df['chanelID'].mode()[0])
ec_df.info()
# 给渠道列和平台列改名
ec_df = ec_df.rename(columns={
'chanelID': 'channelID', 'platfromType': 'platformType'})
ec_df
# 改平台类型的错字
ser = ec_df['platformType'].replace(r'[\s·]', '', regex=True)
ser = ser.str.title()
ec_df['platformType'] = ser.replace(['薇信', 'Vx', '网页', '网站'],
['微信', '微信', 'Web', 'Web'])
# 将payment大于orderAmount的数据改过来
temp_df = ec_df[ec_df['payment'] <= ec_df['orderAmount']]
mean_discount = (temp_df['payment'] / temp_df['orderAmount']).mean()
ec_df['payment'] = np.round(
ec_df['payment'].where(
ec_df['payment'] <= ec_df['orderAmount'],
ec_df['orderAmount'] * mean_discount
)
)
1.2 数据分析
1.2.1 计算总体指标
# 计算GMV
GMV = ec_df['orderAmount'].sum()
print(GMV)
# 计算总销售额
all_payment = ec_df['payment'].sum()
print(all_payment)
# 计算实际销售额
true_payment = ec_df[ec_df['chargeback'] == False]['payment'].sum()
print(true_payment)
# 计算退货率
chargeback_ratio = f"{
100 * len(ec_df[ec_df['chargeback']]) / len(ec_df):.2f}%"
print(chargeback_ratio)
# 计算客单价
ARPU = true_payment / ec_df['userID'].nunique()
print(ARPU)
1.2.2 计算每月GMV及趋势分析
# 添加月份列
ec_df['month'] = ec_df['orderTime'].dt.month
ec_df
# 添加真实付款金额列
temp_df = ec_df
temp_df['true_payment'] = temp_df['payment'].where(ec_df['chargeback'] == False, 0)
# 生成月份对GMV和实际付款额透视表
monthly_gmv = pd.pivot_table(temp_df, index='month', values=['orderAmount', 'true_payment'], aggfunc='sum')
monthly_gmv = monthly_gmv.applymap(lambda x: np.round(x/10000, 2))
# 开始画图
import pyecharts.options as opts
from pyecharts.charts import Line
gmv_line = Line()
# 加入月份数据
gmv_line.add_xaxis(monthly_gmv.index.to_list())
# 加入GMV数据
gmv_line.add_yaxis('GMV', monthly_gmv['orderAmount'].to_list())
# 加入净销售额数据
gmv_line.add_yaxis('实际销售额', monthly_gmv['true_payment'].to_list())
# 修改全局配置
gmv_line.set_global_opts(
title_opts=opts.TitleOpts(title="2021年按月销售额图"),
yaxis_opts=opts.AxisOpts(name='GMV(单位:万元)'),
xaxis_opts=opts.AxisOpts(name='时间(单位:月)')
)
gmv_line.render_notebook()

1.2.3 流量渠道来源拆解GMV占比
# 生成平台、GMV透视表
channel_gmv = pd.pivot_table(temp_df, index='channelID', values=['orderAmount'], aggfunc='sum')
channel_gmv = channel_gmv.applymap(lambda x: x/ 10000)
channel_gmv
# 开始画图
from pyecharts.charts import Pie
# 创建饼图对象
channel_pie = Pie()
# 加入数据,设置饼图内外环大小
channel_pie.add(
'',
channel_gmv.reset_index().values.tolist(),
radius=["50%", "75%"]
)
# 配置全局设置
channel_pie.set_global_opts(
title_opts=opts.TitleOpts(title="各渠道GMV占比"),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%")
)
# 配置标签显示数值的百分比
channel_pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
channel_pie.render_notebook()

1.2.4 按星期几统计下单量
# 获取orderTime对应的星期几
temp_df['weekday'] = temp_df['orderTime'].dt.weekday.map(lambda x: f'星期{
x + 1}')
temp_df
# 生成星期几和下单量的透视表
weekday_count = pd.pivot_table(temp_df, index='weekday', values='orderID', aggfunc='nunique')
weekday_count
# 开始画图
from pyecharts.charts import Bar
from pyecharts.globals import ThemeType
# 创建柱状图对象
weekday_bar = Bar({
"theme": ThemeType.LIGHT})
# 添加x轴数据
weekday_bar.add_xaxis(weekday_count.index.tolist())
# 添加y轴数据
weekday_bar.add_yaxis("下单量", weekday_count['orderID'].tolist())
# 配置全局设置加标题
weekday_bar.set_global_opts(title_opts=opts.TitleOpts(title="各星期数下单量"))
# 添加标记线
weekday_bar.set_series_opts(
label_opts=opts.LabelOpts(is_show=True),
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="min", name="最小值"),
opts.MarkLineItem(type_="max", name="最大值")
]
),
)
weekday_bar.render_notebook()

1.2.5 根据下单时段统计下单量(30分钟一段)
# 创建时间段列
temp_df['hour'] = temp_df['orderTime'].dt.floor('30T').dt.strftime('%H:%M')
temp_df
# 生成时段和下单量的透视表
time_count = pd.pivot_table(temp_df, index='hour', values='orderID', aggfunc='nunique')
time_count
# 创建柱状图对象
weekday_bar = Bar({
"theme": ThemeType.ESSOS})
# 添加x轴数据
weekday_bar.add_xaxis(time_count.index.tolist())
# 添加y轴数据
weekday_bar.add_yaxis("下单量", time_count['orderID'].tolist())
# 配置全局设置加标题
weekday_bar.set_global_opts(
title_opts=opts.TitleOpts(title="各时段下单量"),
datazoom_opts=opts.DataZoomOpts()
)
# 添加标记线
weekday_bar.set_series_opts(
label_opts=opts.LabelOpts(is_show=True),
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="min", name="最小值"),
opts.MarkLineItem(type_="max", name="最大值")
]
),
)
weekday_bar.render_notebook()

1.2.6 按月统计复购率
# 生成按月的每个userID统计不重复orderID的透视表
multiple_bought = pd.pivot_table(temp_df, index='userID', columns='month', values='orderID', aggfunc='nunique')
multiple_bought = multiple_bought.applymap(
lambda x: np.nan if np.isnan(x) else (
0 if x == 1 else 1
)
)
np.round(100 * multiple_bought.sum() / multiple_bought.count(), 2)
# 图就不画了,笑死
边栏推荐
- 穿越派·派盘 + Mountain Duck = 数据本地管理
- Set set detailed explanation
- Continue to learn MySQL
- Boot + jsp University Community Management System (with source Download Link)
- Basic electrician knowledge 100 questions
- OneFlow源码解析:算子签名的自动推断
- C语言初阶——实现扫雷游戏
- [excel] column operation, which performs specific column for data in a cell, such as text division by comma, colon, space, etc
- Mongodb学习篇:安装后的入门第一课
- 芯片,建立在沙粒上的帝国!
猜你喜欢

Boot + jsp University Community Management System (with source Download Link)

【考研高数 自用】高数第一章基础阶段思维导图
Educational administration management system of SSM (free source code)

Cockroachdb: the resistant geo distributed SQL database paper reading notes

JDBC common interview questions

穿越派与贸大合作,为大学生增添效率

In depth understanding of condition source code interpretation and analysis of concurrent programming

Educational administration management system (free source code)

Brief description of activation function

Simple implementation of database connection pool
随机推荐
JDBC common interview questions
Using nocalhost to develop microservice application on rainbow
boot+jsp的高校社團管理系統(附源碼下載鏈接)
Printk debugging summary
【QT】qt加减乘除之后,保留小数点后两位
Flowable source code comment (XXXIX) task listener
vsCode函数注解/文件头部注解快捷键
2022.6.30-----leetcode.1175
In depth understanding of condition source code interpretation and analysis of concurrent programming
Advanced cross platform application development (III): online resource upgrade / hot update with uni app
Typeorm framework
Summary of common components of applet
数据治理:数据治理管理(第五篇)
QT等待框制作
Ssgssrcsr differences
MySQL数据迁移遇到的一些错误
Wild melon or split melon?
Daily code 300 lines learning notes day 11
从底层结构开始学习FPGA----RAM IP的定制与测试
bat操作ftp上传下载命令