import pandas as pd
import numpy as np
import osos.getcwd()'D:\\Jupyter\\notebook\\Python数据清洗实战\\数据清洗之数据统计'os.chdir('D:\\Jupyter\\notebook\\Python数据清洗实战\\数据')df = pd.read_csv('online_order.csv', encoding='gbk', dtype={'customer':str, 'order':str})df.head(5)<div>
<style scoped>
.dataframe tbody tr th:only-of-type { vertical-align: middle;}.dataframe tbody tr th { vertical-align: top;}.dataframe thead th { text-align: right;}</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;"> <th></th> <th>customer</th> <th>order</th> <th>total_items</th> <th>discount%</th> <th>weekday</th> <th>hour</th> <th>Food%</th> <th>Fresh%</th> <th>Drinks%</th> <th>Home%</th> <th>Beauty%</th> <th>Health%</th> <th>Baby%</th> <th>Pets%</th></tr></thead>
<tbody>
<tr> <th>0</th> <td>0</td> <td>0</td> <td>45</td> <td>23.03</td> <td>4</td> <td>13</td> <td>9.46</td> <td>87.06</td> <td>3.48</td> <td>0.00</td> <td>0.00</td> <td>0.00</td> <td>0.0</td> <td>0.0</td></tr><tr> <th>1</th> <td>0</td> <td>1</td> <td>38</td> <td>1.22</td> <td>5</td> <td>13</td> <td>15.87</td> <td>75.80</td> <td>6.22</td> <td>2.12</td> <td>0.00</td> <td>0.00</td> <td>0.0</td> <td>0.0</td></tr><tr> <th>2</th> <td>0</td> <td>2</td> <td>51</td> <td>18.08</td> <td>4</td> <td>13</td> <td>16.88</td> <td>56.75</td> <td>3.37</td> <td>16.48</td> <td>6.53</td> <td>0.00</td> <td>0.0</td> <td>0.0</td></tr><tr> <th>3</th> <td>1</td> <td>3</td> <td>57</td> <td>16.51</td> <td>1</td> <td>12</td> <td>28.81</td> <td>35.99</td> <td>11.78</td> <td>4.62</td> <td>2.87</td> <td>15.92</td> <td>0.0</td> <td>0.0</td></tr><tr> <th>4</th> <td>1</td> <td>4</td> <td>53</td> <td>18.31</td> <td>2</td> <td>11</td> <td>24.13</td> <td>60.38</td> <td>7.78</td> <td>7.72</td> <td>0.00</td> <td>0.00</td> <td>0.0</td> <td>0.0</td></tr></tbody>
</table>
</div>
grouped = df.groupby('weekday')type(grouped)pandas.core.groupby.generic.DataFrameGroupBygrouped.mean()<div>
<style scoped>
.dataframe tbody tr th:only-of-type { vertical-align: middle;}.dataframe tbody tr th { vertical-align: top;}.dataframe thead th { text-align: right;}</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;"> <th></th> <th>total_items</th> <th>discount%</th> <th>hour</th> <th>Food%</th> <th>Fresh%</th> <th>Drinks%</th> <th>Home%</th> <th>Beauty%</th> <th>Health%</th> <th>Baby%</th> <th>Pets%</th></tr><tr> <th>weekday</th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th> <th></th></tr></thead>
<tbody>
<tr> <th>1</th> <td>30.662177</td> <td>8.580705</td> <td>14.693122</td> <td>22.690866</td> <td>20.000904</td> <td>22.522993</td> <td>13.932553</td> <td>6.972394</td> <td>1.152285</td> <td>11.592562</td> <td>1.007306</td></tr><tr> <th>2</th> <td>31.868612</td> <td>8.638014</td> <td>14.966197</td> <td>23.994915</td> <td>19.407738</td> <td>24.346459</td> <td>13.559191</td> <td>4.903366</td> <td>1.079423</td> <td>11.277284</td> <td>1.272638</td></tr><tr> <th>3</th> <td>31.869796</td> <td>7.794507</td> <td>15.059898</td> <td>24.309274</td> <td>19.957653</td> <td>23.822470</td> <td>13.282088</td> <td>6.702640</td> <td>1.156829</td> <td>9.591389</td> <td>0.937205</td></tr><tr> <th>4</th> <td>32.251899</td> <td>8.068155</td> <td>14.324185</td> <td>24.374364</td> <td>21.538027</td> <td>24.553266</td> <td>13.391946</td> <td>4.806528</td> <td>1.031490</td> <td>9.058201</td> <td>1.080473</td></tr><tr> <th>5</th> <td>31.406619</td> <td>9.159031</td> <td>13.386919</td> <td>24.602790</td> <td>20.549153</td> <td>24.976466</td> <td>12.485788</td> <td>5.431221</td> <td>1.248605</td> <td>9.655343</td> <td>0.908227</td></tr><tr> <th>6</th> <td>32.154814</td> <td>8.414258</td> <td>14.751084</td> <td>23.743196</td> <td>18.707788</td> <td>23.593699</td> <td>14.173291</td> <td>5.878647</td> <td>1.170585</td> <td>11.478343</td> <td>1.150980</td></tr><tr> <th>7</th> <td>32.373837</td> <td>8.710171</td> <td>16.989535</td> <td>22.271512</td> <td>21.020359</td> <td>21.093767</td> <td>13.632481</td> <td>5.895322</td> <td>1.145938</td> <td>13.844250</td> <td>0.950391</td></tr></tbody>
</table>
</div>
grouped.mean()['Food%']weekday1 22.6908662 23.9949153 24.3092744 24.3743645 24.6027906 23.7431967 22.271512Name: Food%, dtype: float64# 多个字段分组
grouped = df.groupby(by=['customer', 'weekday'])grouped.sum()['total_items']customer weekday0 4 96 5 381 1 423 2 127 4 37 5 3610 1 23 3 26100 1 38 2 78 3 78 7 1351000 2 610000 6 3010001 6 1510002 3 11 6 42 7 4810003 2 410004 2 28 3 131 4 9310005 7 2910006 2 20 5 27 7 2610007 2 6 6 1510008 7 12310009 1 2 ... 9984 6 40 7 619985 6 119986 1 50 6 49 7 509987 1 239988 1 18 4 19989 1 27999 1 173 2 45 4 60 5 137 7 1499990 7 89991 6 469992 1 13 2 14 5 25 6 249993 6 89994 2 64 3 579995 7 149996 7 149997 6 59998 1 28 6 109999 6 4Name: total_items, Length: 20777, dtype: int64原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。