!pip install --upgrade pandas
!pip install --upgrade seabornimport numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
# 检查版本
print(np.__version__)
print(pd.__version__)
print(sb.__version__)
# train_data = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-29/nyc_squirrels.csv")
train_data = pd.read_csv("work/data.csv")
print(type(train_data))
print(train_data.head(4))
ss1.19.5
1.3.50.12.2<class 'pandas.core.frame.DataFrame'> long lat unique_squirrel_id hectare shift date \0 -73.956134 40.794082 37F-PM-1014-03 37F PM 10142018 1 -73.957044 40.794851 37E-PM-1006-03 37E PM 10062018 2 -73.976831 40.766718 2E-AM-1010-03 02E AM 10102018 3 -73.975725 40.769703 5D-PM-1018-05 05D PM 10182018a = np.arange(18).reshape(3,6)
print(a)
data_frame = pd.DataFrame(np.arange(18).reshape(3,6),
index=["a", "b", "c"],#行索引
columns=["A", "B", "C", "D", "E", "F"])#列索引
print(data_frame)
print(data_frame.index)
print(data_frame.columns)
print(data_frame.dtypes)[[ 0 1 2 3 4 5] [ 6 7 8 9 10 11] [12 13 14 15 16 17]] A B C D E Fa 0 1 2 3 4 5b 6 7 8 9 10 11c 12 13 14 15 16 17Index(['a', 'b', 'c'], dtype='object')Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')A int64B int64C int64D int64E int64F int64dtype: object A B C D E Fa 0 1 2 3 4 5b 6 7 8 9 10 11c 12 13 14 15 16 17 A B C D E Fa 0 1 2 3 4 5b 6 7 8 9 10 11c 12 13 14 15 16 17# DataFrame 查看特定类型
df = pd.DataFrame(np.ones((3,3)), columns=["a", "b", "c"])
print(df)
print(df.dtypes)
# DataFrame 的特定类型的列 select_dtypes
print(df.select_dtypes(include=["int64"])) #只看整数型
print(df.select_dtypes(exclude=["float64"])) #去掉浮点型
print("\nexclude int:\n",df.select_dtypes(exclude=["int64"])) a b c0 1.0 1.0 1.01 1.0 1.0 1.02 1.0 1.0 1.0a float64b float64c float64dtype: objectEmpty DataFrameColumns: []Index: [0, 1, 2]Empty DataFrameColumns: []Index: [0, 1, 2]exclude int: a b c0 1.0 1.0 1.01 1.0 1.0 1.02 1.0 1.0 1.0# 取出特定样本、特征
df = pd.DataFrame(np.ones((3,3)), columns=["a", "b", "c"])
print(df,"\n----------------------------")
# 取出第0行和第2行数据
print(df.loc[[0,2], :], "\n-----------------------")
# 取出第0行b标签数据
print(df.loc[0, "b"], "\n----------------------")
# 取出所有行b列、c列数据
print(df.loc[:, ["b", "c"]]) a b c0 1.0 1.0 1.01 1.0 1.0 1.02 1.0 1.0 1.0 ---------------------------- a b c0 1.0 1.0 1.02 1.0 1.0 1.0 -----------------------1.0 ---------------------- b c0 1.0 1.01 1.0 1.02 1.0 1.0# 截断
print(df.truncate(before=0, after=1))# 默认方向是行 axis="index"
print(df.truncate(before="b", after="c", axis='columns'))
# 丢掉默写特征
df.pop("b")
print(df) a b c0 1.0 1.0 1.01 1.0 1.0 1.0 b c0 1.0 1.01 1.0 1.02 1.0 1.0 a c0 1.0 1.01 1.0 1.02 1.0 1.0# 合并 DataFrame(0, 1) => DataFrame(行, 列), 若维度不同进行合并, Nana 补填
print(pd.concat([df, df], axis=0))#行维度
print(pd.concat([df, df], axis=1))#列维度 a c0 1.0 1.01 1.0 1.02 1.0 1.00 1.0 1.01 1.0 1.02 1.0 1.0 a c a c0 1.0 1.0 1.0 1.01 1.0 1.0 1.0 1.02 1.0 1.0 1.0 1.0# 数学运算
df = pd.DataFrame(np.ones((3,3)), columns=["a", "b", "c"])
print(df,"\n----------------------------")
#add, sub, mul, div, mod, pow: +, -, *, /, //, %, **
print(df.add(df))
print(df.sub(df))
print(df.mul(df))
print(df.div(df)) a b c0 1.0 1.0 1.01 1.0 1.0 1.02 1.0 1.0 1.0 ---------------------------- a b c0 2.0 2.0 2.01 2.0 2.0 2.02 2.0 2.0 2.0 a b c0 0.0 0.0 0.01 0.0 0.0 0.02 0.0 0.0 0.0 a b c0 1.0 1.0 1.01 1.0 1.0 1.02 1.0 1.0 1.0 a b c0 1.0 1.0 1.01 1.0 1.0 1.02 1.0 1.0 1.0# 在一定范围内的数的个数统计
x = [0,2,9,2,5,6,2]
plt.hist(x)
plt.show()
# hist()绘制直方图
plt.hist(train_data["hectare_squirrel_number"])
plt.show()
plt.figure(0)
plt.title("hectare_squirrel_number")
plt.xlabel("squirrel")
plt.ylabel("hectare")
ax = train_data["hectare_squirrel_number"].hist(bins=30, grid=True, color="green")
print(ax.patches)#使用patchespatches查看plot的输出的一系列图块
for rect in ax.patches:
if rect.get_x() >= 10:
rect.set_color("blue")
plt.show()<Axes.ArtistList of 30 patches>原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。