数据分析的最后一部分是报告展示,也就是图表展现。如何将数据分析之后的结果通过图表又生动又形象又好理解的让看这份报告的人接受、认同并信服你的结论,这也是一门大学问。
市面上数据可视化的软件很多,tableau(要钱),powerbi(超卡),excel(不够好看)等等。所以斑点鱼今天就手把手教大家用R做数据可视化,一起来看一下吧~
1.散点图
#添加标签
#添加回归拟合线
#渐变色
#处理大面积重叠情况(调低透明度、热图展示)
1.1气泡图
#实际是三维散点图中的第三个变量颜色的设置变成了大小的设置
2.折线图
#不同的节点和线段会有不同的展示效果
#从上两幅图可以看出密集程度。
2.1 时序图
3.面积图
4.条形图
#添加标签
5.火柴图
6.直方图
#密度分布图
7.箱线图+小提琴图
8.二位密度图
9.玫瑰图
喜欢的朋友点个赞丫,赞个赏丫~~若需代码,请往后翻~~
一起学习的小伙伴如果有什么想法或者意见,欢迎沟通~
#附code:
install.packages("ggplot2")
install.packages("gcookbook")
install.packages("hexbin")
library(ggplot2)
library(gcookbook)
library(plyr)
library(hexbin)
####读取数据
data=read.csv("data.csv",header=FALSE,sep="\t")
#手动为列名赋值
names(data)=c("a","b","c")
#转换为因子
data$sex=factor(data$sex)
str(data)
install.packages("xlsx")
library(xlsx)
data=read.xlsx("data.xlsx",1)
data=read.xlsx("data.xlsx",sheetIndex=2)
data=read.xlsx("data.xlsx",sheetName="REVENUES")
install.packages("gdata")
library(gdata)
data=read.xls("data.xls",sheet=2)
install.packages("foreign")
library(foreign)
data=read.spss("data.sav")
####绘制散点图
qplot(mtcars$wt,mtcars$mpg)
qplot(wt,mpg,data=mtcars)
ggplot(mtcars,aes(x=wt,y=mpg))+geom_point()
heightweight[,c("ageYear","heightIn")]
ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point()
ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(shape=21)#空心圈代替实心圈
ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(size=3)
#使用点型和颜色属性,基于某变量对数据进行分组
heightweight[,c("sex","ageYear","heightIn")]
ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex))+geom_point()
ggplot(heightweight,aes(x=ageYear,y=heightIn,shape=sex))+geom_point()
ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex,shape=sex))+geom_point()
ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=sex,shape=sex))+geom_point()+
scale_shape_manual(values = c(3,2))+
scale_color_brewer(palette = "Set1")
#生成一个数据副本
hw=heightweight
##四分类:将数据按照是否大于100磅分为两组
hw$weightGroup=cut(hw$weightLb,breaks=c(-Inf,100,Inf),labels = c("100"))
#使用具有颜色和填充色的点型及对应空值和填充色的颜色
ggplot(hw,aes(x=ageYear,y=heightIn,shape=sex,fill=weightGroup))+geom_point()+
scale_shape_manual(values = c(21,24))+
scale_fill_manual(values=c(NA,"black"),guide=guide_legend(override.aes=list(shape=21)))
#将第三个连续型变量映射给颜色
ggplot(heightweight,aes(x=ageYear,y=heightIn,colour=weightLb))+geom_point()
ggplot(heightweight,aes(x=ageYear,y=heightIn,fill=weightLb))+geom_point(shape=21,size=2.5)+
scale_fill_gradient(low="black",high="white")
ggplot(heightweight,aes(x=ageYear,y=heightIn,size=weightLb,colour=sex))+geom_point(size=1.5)+
scale_size_area()+#使数据点面积正比于变量值
scale_color_brewer(palette="Set1")
#将第三个连续型变量映射给大小
ggplot(heightweight,aes(x=ageYear,y=heightIn,size=weightLb))+geom_point()
ggplot(heightweight,aes(x=ageYear,y=heightIn,fill=weightLb))+geom_point(shape=21,size=2.5)+
##处理图形重叠
sp=ggplot(diamonds,aes(x=carat,y=price))
sp+geom_point()
#增加透明度
sp+geom_point(alpha=.1)
sp+geom_point(alpha=.01)
#stat_bin2d()对数据进行分箱
sp+stat_bin2d()
sp+stat_bin2d(bins=50)+
scale_fill_gradient(low="lightblue",high="red",limits=c(0,6000))
#stat_binhex()对数据进行分箱
library(hexbin)
sp+stat_binhex()+scale_fill_gradient(low="lightblue",high="red",limits=c(0,8000))
sp+stat_binhex()+scale_fill_gradient(low="lightblue",high="red",breaks=c(0,500,1000,2000,4000,6000),limits=c(0,6000))
#position_jitter()给数据点增加随机扰动
sp1=ggplot(ChickWeight,aes(x=Time,y=weight))
sp1+geom_point()#变量x为离散型的数据集
sp1+geom_point(position="jitter")#添加随机扰动
sp1+geom_point(position=position_jitter(width=.5,height=0))#只在水平方向添加随机扰动
sp1+geom_boxplot(aes(group=Time))#当数据集对应于一个离散型数据轴和一个连续型数据轴时,箱线图可能是一种比较好的表达方式。
##添加回归拟合线
#基本绘图对象
sp=ggplot(heightweight,aes(x=ageYear,y=heightIn))
sp+geom_point()+stat_smooth(method = lm)
#99%置信域
sp+geom_point()+stat_smooth(method=lm,level=0.99)
#没有置信域
sp+geom_point()+stat_smooth(method = lm,se=FALSE)
#调整颜色
sp+geom_point(colour="grey60")+stat_smooth(method = lm,se=FALSE,colour="black")
#loess曲线:默认(局部加权多项式拟合线)
sp+geom_point(colour="grey60")+stat_smooth()
sp+geom_point(colour="grey60")+stat_smooth(method = loess)
#逻辑回归拟合线
library(MASS)
b=biopsy
b$classn[b$class=="benign"]=0
b$classn[b$class=="malignant"]=1
b
ggplot(b,aes(x=V1,y=classn))+
geom_point(position = position_jitter(width=0.3,height=0.06),alpha=0.4,shape=21,size=1.5)+
stat_smooth(method = glm,family=binomial)
##根据已有模型向散点图添加拟合线
model=lm(heightIn~ageYear+I(ageYear^2),heightweight)
model
#创建一个包含ageYear的列,并对其进行插值
xmin=min(heightweight$ageYear)
xmax=max(heightweight$ageYear)
predicted=data.frame(ageYear=seq(xmin,xmax,length.out = 100))
#计算变量heightIn的预测值
predicted$heightIn=predict(model,predicted)
predicted
#绘制图表
sp=ggplot(heightweight,aes(x=ageYear,y=heightIn))+geom_point(colour="grey40")
sp+geom_line(data=predicted,size=1)
############################################## P80-P86
#建立模型:predictvals的函数还未写
modlinear=lm(heightIn~ageYear,heightweight)
modloess=loess(heightIn~ageYear,heightweight)
lm_predicted=predictvals(modlinear,"ageYear","heightIn")
loess_predicted=predictvals(modloess,"ageYear","heightIn")
sp+geom_line(data=lm_predicted,colour="red",size=.8)+
geom_line(data=loess_predicted,colour="blue",size=.8)
#############################################################
##向散点图添加边际地毯
ggplot(faithful,aes(x=eruptions,y=waiting))+geom_point()+geom_rug()
ggplot(faithful,aes(x=eruptions,y=waiting))+geom_point()+geom_rug(position="jitter",size=.2)
##向散点图添加标签annotate()\geom_text()
subset(countries,Year==2009 & healthexp>2000)
sp=ggplot(subset(countries,Year==2009 & healthexp>2000),
aes(x=healthexp,y=infmortality))+
geom_point()
sp+annotate("text",x=4350,y=5.4,label="Canada")+
annotate("text",x=7400,y=6.8,label="USA")
sp+geom_text(aes(label=Name),size=4)
sp+geom_text(aes(label=Name),size=4,vjust=0)#vjust=0标签文本的基线会与数据点对齐,vjust=1标签文本的顶部会与数据点对齐
sp+geom_text(aes(y=infmortality+1.7,label=Name),size=4,vjust=0)
sp+geom_text(aes(label=Name),size=4,hjust=0)#hjust=0左对齐,hjust=1右对齐
sp+geom_text(aes(x=healthexp+400,label=Name),size=4,hjust=0)
cdat=subset(countries,Year== 2009 & healthexp >2000)
cdat$Name1=cdat$Name
idx=cdat$Name1 %in% c("Canada","Ireland","United Kindom","Switzerland")
idx
cdat$Name1[!idx]=NA
cdat
ggplot(cdat,aes(x=healthexp,y=infmortality))+geom_point()+
geom_text(aes(x=healthexp+100,label=Name1),size=4,hjust=0)+
xlim(2000,10000)
####绘制气泡图
cdat=subset(countries,Year==2009 & healthexp >5000)
p=ggplot(cdat,aes(x=healthexp,y=infmortality,size=GDP))+
geom_point(shape=21,colour="black",fill="cornsilk")
p
p+scale_size_area(max_size =15)
#当x轴,y轴皆为分类变量时,气泡图可以用来表示网格点上的变量值
#对男性组和女性组求和
hec=HairEyeColor[,,"Male"]+HairEyeColor[,,"Female"]
#转化为长格式
library(reshape2)
hec=melt(hec,value.name = "count")
ggplot(hec,aes(x=Eye,y=Hair))+
geom_point(aes(size=count),shape=21,colour="black",fill="cornsilk")+
scale_size_area(max_size = 20,guide=FALSE)+
geom_text(aes(y=as.numeric(Hair)-sqrt(count)/22,label=count),vjust=1,colour="grey60",size=4)
##绘制散点图矩阵
library(gcookbook)
c2009=subset(countries,Year==2009,
select=c(Name,GDP,laborrate,healthexp,infmortality))
c2009
pairs(c2009[,2:5])
####折线图
qplot(pressure$temperature,pressure$pressure,geom="line")
qplot(pressure$temperature,pressure$pressure,geom=c("line","point"))
qplot(temperature,pressure,data=pressure,geom=c("line","point"))
ggplot(pressure,aes(x=temperature,y=pressure))+geom_line()+geom_point()
BOD1=BOD#将数据复制一份
BOD1$Time=factor(BOD1$Time)
ggplot(BOD1,aes(x=Time,y=demand,group=1))+geom_line()
#改变y轴长度
ggplot(BOD,aes(x=Time,y=demand))+geom_line()+ylim(0,max(BOD$demand))
ggplot(BOD,aes(x=Time,y=demand))+geom_line()+expand_limits(y=0)
#像折线图添加数据标签
ggplot(BOD,aes(x=Time,y=demand))+geom_line()+geom_point()
ggplot(worldpop,aes(x=Year,y=Population))+geom_line()+geom_point()
ggplot(worldpop,aes(x=Year,y=Population))+geom_line()+geom_point()+scale_y_log10()#y轴取对数
##绘制多重折线图
#对ToothGrowth数据集进行汇总
tg=ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len))
#将supp映射颜色colour
ggplot(tg,aes(x=dose,y=length,colour=supp))+geom_line()
#将supp映射给线型linetype
ggplot(tg,aes(x=dose,y=length,linetype=supp))+geom_line()
#颜色+线型
ggplot(tg,aes(x=dose,y=length,linetype=supp,colour=supp))+geom_line()
#dose变离散型后作图
ggplot(tg,aes(x=factor(dose),y=length,colour=supp,group=supp))+geom_line()
ggplot(tg,aes(x=dose,y=length))+geom_line()#不可缺少group语句
##添加数据点
ggplot(tg,aes(x=dose,y=length,shape=supp))+geom_line()+geom_point(size=4)
ggplot(tg,aes(x=dose,y=length,fill=supp))+geom_line()+geom_point(size=4,shape=21)
#避免重叠,左移或右移0.2
ggplot(tg,aes(x=dose,y=length,shape=supp))+geom_line(position = position_dodge(0.2))+
geom_point(position=position_dodge(0.2),size=4)
##修改线条样式:线型、宽度、颜色
ggplot(BOD,aes(x=Time,y=demand))+
geom_line(linetype="dashed",size=1,colour="blue")
tg=ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len))#类似数据透视
ggplot(tg,aes(x=dose,y=length,colour=supp))+geom_line()+
scale_color_brewer(palette = "Set1")
##修改数据标记样式
ggplot(BOD,aes(x=Time,y=demand))+geom_line()+
geom_point(size=4,shape=22,colour="darkred",fill="pink")
ggplot(BOD,aes(x=Time,y=demand))+geom_line()+
geom_point(size=4,shape=21,colour="darkred",fill="white")
tg=ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len))#类似数据透视
ggplot(tg,aes(x=dose,y=length,fill=supp))+geom_line(position = position_dodge(0.2))+
geom_point(shape=21,size=3,position=position_dodge(0.2))+
scale_fill_manual(values = c('black','white'))
####绘制面积图
#将sunspot.year数据集转化为数据框
sunspotyear=data.frame(Year=as.numeric(time(sunspot.year)),
Sunspots=as.numeric(sunspot.year))
ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area()
ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area(colour="black",fill="blue",alpha=0.2)#alpha设置透明度
ggplot(sunspotyear,aes(x=Year,y=Sunspots))+geom_area(fill="blue",alpha=0.2)+geom_line()
##绘制堆积面积图
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+geom_area()
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+
geom_area(colour="black",size=0.2,alpha=0.4)+
scale_fill_brewer(palette = "Blues",breaks=rev(levels(uspopage$AgeGroup)))
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+
geom_area(colour=NA,alpha=0.4)+
scale_fill_brewer(palette = "Blues")+
geom_line(position = "stack",size=0.2)
#绘制百分比堆积面积图
uspopage_prop=ddply(uspopage,"Year",transform,Percent=Thousands/sum(Thousands)*100)
ggplot(uspopage_prop,aes(x=Year,y=Percent,fill=AgeGroup))+
geom_area(colour="black",size=0.2,alpha=0.4)+
scale_fill_brewer(palette="Blues",breaks=rev(levels(uspopage$AgeGroup)))
#####添加置信域
#抓取climate一个子集
clim=subset(climate,Source=="Berkeley",select = c("Year","Anomaly10y","Unc10y"))
clim
#将置信域绘制为阴影
ggplot(clim,aes(x=Year,y=Anomaly10y))+
geom_ribbon(aes(ymin=Anomaly10y-Unc10y,ymax=Anomaly10y+Unc10y),alpha=0.2)+#alpha=0.2将阴影部分的透明度设定为80%
geom_line()
#用虚线表示置信域的上下边界
ggplot(clim,aes(x=Year,y=Anomaly10y))+
geom_line(aes(y=Anomaly10y-Unc10y),colour="grey50",linetype="dotted")+
geom_line(aes(y=Anomaly10y+Unc10y),colour="grey50",linetype="dotted")+
geom_line()
####条形图
qplot(BOD$Time,BOD$demand,geom="bar",stat="identity")
'''Error: stat_count() must not be used with a y aesthetic.
In addition: Warning message:
`stat` is deprecated '''
#将x转化为因子变量,可视为离散值
qplot(factor(BOD$Time),BOD$demand,geom="bar",stat="identity")
ggplot(BOD,aes(x=Time,y=demand))+geom_bar(stat="identity")
ggplot(BOD,aes(x=factor(Time),y=demand))+geom_bar(stat="identity")
#添加颜色fill=和边框线colour=
ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity",fill="lightblue",colour="black")
##簇状条形图
cabbage_exp
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(position="dodge",stat="identity")
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(position="dodge",stat="identity",colour="black")+
scale_fill_brewer(palette="Pastell")#pastell是调色盘
ce=cabbage_exp[1:5,1:3]#复制删除了最后一行的数据集
ce
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(position="dodge",stat="identity",colour="black")+
scale_fill_brewer(palette="Pastell")
#频数条形图
qplot(mtcars$cyl)
qplot(factor(mtcars$cyl))
qplot(factor(cyl),data=mtcars)
ggplot(diamonds,aes(x=cut))+geom_bar()
#条形图着色
upc=subset(uspopchange,rank(Change)>40)
upc
ggplot(upc,aes(x=Abb,y=Change,fill=Region))+geom_bar(stat="identity")
#颜色的重新设定
ggplot(upc,aes(x=reorder(Abb,Change),y=Change,fill=Region))+
geom_bar(stat="identity")+scale_fill_manual(values=c("#669933","#FFCC66"))+xlab("State")
#对正负条形图进行调色
csub=subset(climate,Source=="Berkeley"&Year>=1900)#通过筛选条件取子集
csub$pos=csub$Anomaly10y>=0
csub
ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+
geom_bar(stat="identity",position = "identity")
ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+
geom_bar(stat="identity",position = "identity",colour="black",size=0.25)+
scale_fill_manual(values=c("#CCEEFF","#FFDDDD"),guide=FALSE)
#调整条形宽度和条形间距
ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity")
ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity",width=0.5)#窄
ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat="identity",width=1)#宽
#更窄的簇状条形图
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat="identity",width=0.5,position="dodge")
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat="identity",width=0.5,position=position_dodge(0.7))#调组距
#堆积条形图
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity")
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity")+
guides(fill=guide_legend(reverse=TRUE))#图例颜色顺序改变
install.packages("plyr")#为了用desc
library(plyr)
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar,order=desc(Cultivar)))+geom_bar(stat="identity")
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity",colour="blue")+
guides(fill=guide_legend(reverse=TRUE))+
scale_fill_brewer(palette = "Pastell")
#绘制百分比堆积条形图
#通过plyr中的ddply()\transform()函数将每组条形对应的数据标准化为100%
#以Date为切割变量()对每组数据进行transform()
ce=ddply(cabbage_exp,"Date",transform,percent_weight=Weight/sum(Weight)*100)#新增一个percent_weight变量
ggplot(ce,aes(x=Date,y=percent_weight,fill=Cultivar))+geom_bar(stat="identity")
ggplot(ce,aes(x=Date,y=percent_weight,fill=Cultivar))+geom_bar(stat="identity",colour="blue")+
guides(fill=guide_legend(reverse=TRUE))+
scale_fill_brewer(palette = "Pastell")
##添加数据标签
#在条形图顶端下方
ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+
geom_bar(stat = "identity")+
geom_text(aes(label=Weight),vjust=1.5,colour="white")
#在条形图顶端上方
ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+
geom_bar(stat = "identity")+
geom_text(aes(label=Weight),vjust=-0.2,colour="brown")
#将y轴上限变大
ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+
geom_bar(stat = "identity")+
geom_text(aes(label=Weight),vjust=-0.2)+
ylim(0,max(cabbage_exp$Weight)*1.25)
#设定标签的y轴位置使其略高于条形图顶端-y轴范围会自动调整
ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+
geom_bar(stat = "identity")+
geom_text(aes(y=Weight+0.1,label=Weight),vjust=-0.2)#标签的位置上调0.1
#簇状条形图的标签
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity",position = "dodge")+
geom_text(aes(label=Weight),vjust=1.5,colour="white",position = position_dodge(.9),size=5)#标签的位置上调0.1
#堆积条形图的数据标签
#根据日期和性别进行排序
ce=arrange(cabbage_exp,Date,desc(Cultivar))
#计算累积和
ce=ddply(ce,"Date",transform,label_y=cumsum(Weight))#label_y是位置信息
ce
ggplot(ce,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity")+
geom_text(aes(y=label_y,label=Weight),vjust=1.2,colour="white")
ce=arrange(cabbage_exp,Date,desc(Cultivar))
#计算y轴的位置,将数据标签置于条形中部
ce=ddply(ce,"Date",transform,label_y=cumsum(Weight)-0.5*Weight)
ggplot(ce,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity")+
geom_text(aes(y=label_y,label=Weight),colour="white")
####绘制Cleveland点图(有时用来代替点图)
tophit=tophitters2001[1:25,]#取前25个数据
ggplot(tophit,aes(x=avg,y=name))+geom_point()
tophit[,c("name","lg","avg")]#取其中3列
ggplot(tophit,aes(x=avg,y=reorder(name,avg)))+
geom_point(size=3)+
theme_bw()+#删除背景,用虚线代替
#将点图的x轴和y轴互换
ggplot(tophit,aes(x=reorder(name,avg),y=avg))+
geom_point(size=3)+
theme_bw()+
theme(axis.text.x = element_text(angle=60,hjust=1),#横坐标轴的文字方向
#提取出name,依次根据lg,avg进行排序
nameorder=tophit$name[order(tophit$lg,tophit$avg)]
#将name转化为因子, 因子水平与nameorder保持一致
tophit$name=factor(tophit$name,levels=nameorder)
#借助geom_segment()用“以数据点为端点的线段”代替网格线
ggplot(tophit,aes(x=avg,y=name))+
geom_segment(aes(yend=name),xend=0,colour="grey50")+
geom_point(size=3,aes(colour=lg))+
theme_bw()+
legend.position = c(1,0.55),#将图例放置在绘图区中
legend.justification = c(1,0.5))
#以队为分组的火柴图
ggplot(tophit,aes(x=avg,y=name))+
geom_segment(aes(yend=name),xend=0,colour="grey50")+
geom_point(size=3,aes(colour=lg))+
scale_color_brewer(palette="Set1",limits=c("NL","AL"),guide=FALSE)+
theme_bw()+
facet_grid(lg~.,scales="free_y",space="free_y")#没有生效
####直方图
qplot(mtcars$mpg)
qplot(mpg,data=mtcars,binwidth=4)
ggplot(mtcars,aes(x=mpg))+geom_histogram(binwidth=6)#binwidth为组距
##简单直方图
ggplot(faithful,aes(x=waiting))+geom_histogram()#从坐标就是横坐标的计数
#将变量值保存为一个基本向量
w=faithful$waiting
ggplot(NULL,aes(x=w))+geom_histogram()
#设定组距为5
ggplot(faithful,aes(x=waiting))+geom_histogram(binwidth=5,fill="white",colour="black")
#将x的取值切分成15组
binsize=diff(range(faithful$waiting))/15
ggplot(faithful,aes(x=waiting))+geom_histogram(binwidth=binsize,fill="white",colour="black")
ggplot(faithful,aes(x=waiting))+geom_bar()
ggplot(faithful,aes(x=waiting))+geom_histogram(binwidth=binsize,fill="white",colour="black",boundary=25)
##基于分组数据绘制分组直方图
library(MASS)
#使用smoke作为分面变量
ggplot(birthwt,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(smoke~.)
#修改分面标签
birthwt1=birthwt
#将smoke转化为因子
birthwt1$smoke=factor(birthwt1$smoke)
levels(birthwt1$smoke)
birthwt1$smoke=revalue(birthwt1$smoke,c("0"="no smoke","1"="smoke"))
birthwt1$smoke=revalue(birthwt1$smoke,c("no smoke"="不抽烟","smoke"="抽烟"))
ggplot(birthwt1,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(smoke~.)
#使用race作为分面变量
ggplot(birthwt,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(race~.)
ggplot(birthwt,aes(x=bwt))+geom_histogram(fill="white",colour="black")+facet_grid(race~.,scales="free")
#用颜色来区别分组
#将smoke转化为因子
birthwt1$smoke=factor(birthwt1$smoke)
#将smoke映射给fill,取消条形堆叠,并使图形半透明
ggplot(birthwt1,aes(x=bwt,fill=smoke))+geom_histogram(position="identity",alpha=0.4)
###绘制密度曲线
ggplot(faithful,aes(x=waiting))+geom_density()
ggplot(faithful,aes(waiting))+geom_line(stat="density")+expand_limits(y=0)#扩大y轴范围以包含0点
#将变量值保存在一个简单向量里
w=faithful$waiting
ggplot(NULL,aes(x=w))+geom_density()
ggplot(faithful,aes(waiting))+geom_line(stat="density",adjust=.25,colour="red")+
geom_line(stat="density")+geom_line(stat="density",adjust=2,colour="blue")
ggplot(faithful,aes(waiting))+geom_density(fill="blue",colour=NA,alpha=.2)+
geom_line(stat="density")+xlim(35,105)
ggplot(faithful,aes(waiting,y=..density..))+geom_histogram(fill="cornsilk",colour="grey60",size=.2)+
geom_density()+xlim(35,105)
##基于分组数据绘制分组密度曲线
library(MASS)
birthwt1=birthwt
#将smoke转化为因子
birthwt1$smoke=factor(birthwt1$smoke)
#将smoke映射给colour
ggplot(birthwt1,aes(x=bwt,colour=smoke))+geom_density()
#将smoke映射给fill
ggplot(birthwt1,aes(x=bwt,fill=smoke))+geom_density(alpha=.3)
#研究下bwt和smoke之间的关系
ggplot(birthwt1,aes(x=bwt))+geom_density()+facet_grid(smoke~.)
ggplot(birthwt1,aes(x=bwt,y=..density..))+geom_histogram(binwidth=200,fill="cornsilk",colour="grey60",size=.2)+
geom_density()+facet_grid(smoke~.)
###绘制频数多边形
ggplot(faithful,aes(x=waiting))+geom_freqpoly()
ggplot(faithful,aes(x=waiting))+geom_freqpoly(binwidth=4)
#将组数设定为15
binsize=diff(range(faithful$waiting))/15
ggplot(faithful,aes(waiting))+geom_freqpoly(binwidth=binsize)
####箱线图
qplot(ToothGrowth$supp,ToothGrowth$len,geom="boxplot")
ggplot(ToothGrowth,aes(x=supp,y=len))+geom_boxplot()
qplot(interaction(ToothGrowth$supp,ToothGrowth$dose),ToothGrowth$len,geom="boxplot")
ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot()
ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot(width=.2)
ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot(outlier.size=3,outlier.shape=21)
#添加槽口
ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot(notch=TRUE)
#添加均值
library(MASS)
ggplot(birthwt,aes(x=factor(race),y=bwt))+geom_boxplot()+
stat_summary(fun.y="mean",geom="point",shape=23,size=3,fill="white")
###绘制小提琴图
ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin()
ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin()+
geom_boxplot(width=.1,fill="black",outlier.colour = NA)+
stat_summary(fun.y=median,geom="point",fill="white",shape=21,size=2.5)
ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin(trim=FALSE)#保留小提琴的尾部
ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin(scale="count")#校准小提琴图面积,使其与频数保持一致
ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin(adjust=2)#更平滑
ggplot(heightweight,aes(x=sex,y=heightIn))+geom_violin(adjust=.5)#欠平滑
####绘制winkinson点图
countries2009=subset(countries,Year==2009&healthexp>2000)
p=ggplot(countries2009,aes(x=infmortality))
p+geom_dotplot()
p+geom_dotplot(binwidth=.25)+geom_rug()+
scale_y_continuous(breaks=NULL)+#移除刻度线
theme(axis.title.y = element_blank())#移除y坐标轴标签
p+geom_dotplot(method="histodot",binwidth=.25)+geom_rug()+
scale_y_continuous(breaks=NULL)+#移除刻度线
theme(axis.title.y = element_blank())#移除y坐标轴标签
p+geom_dotplot(binwidth=.25,stackdir = "center")+geom_rug()+
scale_y_continuous(breaks=NULL)+#移除刻度线
theme(axis.title.y = element_blank())#移除y坐标轴标签
p+geom_dotplot(binwidth=.25,stackdir = "centerwhole")+geom_rug()+
scale_y_continuous(breaks=NULL)+#移除刻度线
theme(axis.title.y = element_blank())#移除y坐标轴标签
ggplot(heightweight,aes(x=sex,y=heightIn))+
geom_dotplot(binaxis = "y",binwidth=.5,stackdir = "center")
####绘制二维数据密度图
p
p + geom_point() + stat_density2d()
p + stat_density2d(aes(colour=..level..))
#将密度估计映射给填充色
p+stat_density2d(aes(fill=..density..),geom="raster",contour = FALSE)
#带数据点,并将密度估计映射给alpha的瓦片图
p+geom_point()+stat_density2d(aes(alpha=..density..),geom="tile",contour = FALSE)
p+stat_density2d(aes(fill=..density..),geom="raster",contour = FALSE,h=c(.5,5))
####注解
#添加文本注解
p
p + annotate("text", x=3, y=48, label="Group 1") +
annotate("text", x=4.5, y=66, label="Group 2")
p + annotate("text", x=3, y=48, label="Group 1", family="serif",
fontface="italic", colour="darkred", size=3) +
annotate("text", x=4.5, y=66, label="Group 2", family="serif",
fontface="italic", colour="darkred", size=3)
p + annotate("text", x=3, y=48, label="Group 1", alpha=.1) + # Normal
geom_text(x=4.5, y=66, label="Group 2", alpha=.1)
#添加边缘线注释
p + annotate("text", x=-Inf, y=Inf, label="Upper left", hjust=-.2, vjust=2) +
annotate("text", x=mean(range(faithful$eruptions)), y=-Inf, vjust=-0.4,
label="Bottom middle")
#添加公式注解
p
p + annotate("text", x=2, y=0.3, parse=TRUE,
label="frac(1, sqrt(2 * pi)) * e ^ {-x^2 / 2}")
p + annotate("text", x=0, y=0.05, parse=TRUE, size=4,
label="'Function: ' * y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")
#添加直线
p
#添加横线和竖线
p + geom_hline(yintercept=60) + geom_vline(xintercept=14)
pg
pg + geom_vline(xintercept = 2)
pg + geom_vline(xintercept = which(levels(PlantGrowth$group)=="ctrl"))
# 添加斜线
p + geom_abline(intercept=37.4, slope=1.75)
#添加平均线
library(plyr) # For the ddply() function
hw_means
hw_means
p + geom_hline(aes(yintercept=heightIn, colour=sex), data=hw_means,
linetype="dashed", size=1)
#添加线段和箭头
p
geom_line()
p + annotate("segment", x=1950, xend=1980, y=-.25, yend=-.25)#segment添加线段
library(grid)
p + annotate("segment", x=1850, xend=1820, y=-.8, yend=-.95, colour="blue",
size=2, arrow=arrow()) +
annotate("segment", x=1950, xend=1980, y=-.25, yend=-.25,
arrow=arrow(ends="both", angle=90, length=unit(.2,"cm")))
#添加矩阵阴影
p
p + annotate("rect", xmin=1950, xmax=1980, ymin=-1, ymax=1, alpha=.1,
fill="blue")
#高亮某一元素
pg
pg$hl
pg$hl[pg$group=="trt2"]
ggplot(pg, aes(x=group, y=weight, fill=hl)) + geom_boxplot() +
scale_fill_manual(values=c("grey85", "#FFDDCC"), guide=FALSE)
ggplot(PlantGrowth, aes(x=group, y=weight, fill=group)) + geom_boxplot() +
scale_fill_manual(values=c("grey85", "grey85", "#FFDDCC"), guide=FALSE)
#添加误差线
library(gcookbook) # For the data set
# Take a subset of the cabbage_exp data for this example
ce
# 为条形图添加误差线
ggplot(ce, aes(x=Date, y=Weight)) +
geom_bar(fill="white", colour="black") +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)#报错
# 为折线图添加误差线
ggplot(ce, aes(x=Date, y=Weight)) +
geom_line(aes(group=1)) +
geom_point(size=4) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se), width=.2)
# 反例:未指定并列宽度
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(position="dodge") +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),
position="dodge", width=.2)#报错
# 正例:设定并列宽度与条形的相同
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(position="dodge") +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),
position=position_dodge(0.9), width=.2)#报错
pd
ggplot(cabbage_exp, aes(x=Date, y=Weight, colour=Cultivar, group=Cultivar)) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),
width=.2, size=0.25, colour="black", position=pd) +
geom_line(position=pd) +
geom_point(position=pd, size=2.5)
##像独立分面添加注释
# The base plot
p
# A data frame with labels for each facet
f_labels
p + geom_text(x=6, y=40, aes(label=label), data=f_labels)
# If you use annotate(), the label will appear in all facets
p + annotate("text", x=6, y=42, label="label text")
lm_labels
mod
formula
round(coef(mod)[1], 2), round(coef(mod)[2], 2))
r
r2
data.frame(formula=formula, r2=r2, stringsAsFactors=FALSE)
}
library(plyr) # For the ddply() function
labels
labels
p + geom_smooth(method=lm, se=FALSE) +
geom_text(x=3, y=40, aes(label=formula), data=labels, parse=TRUE, hjust=0) +
geom_text(x=3, y=35, aes(label=r2), data=labels, parse=TRUE, hjust=0)
Figure
##坐标轴
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot()
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + coord_flip()#使横纵坐标互换
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + coord_flip() +
scale_x_discrete(limits=rev(levels(PlantGrowth$group)))#反转了x轴元素顺序
#设置连续型坐标轴值域
p=ggplot(PlantGrowth,aes(x=group,y=weight))+geom_boxplot()
p
p+ylim(0,max(PlantGrowth$weight))
ylim(0, 10)
scale_y_continuous(limits=c(0, 10))
p + ylim(0, 10) + scale_y_continuous(breaks=NULL)
p + scale_y_continuous(breaks=NULL) + ylim(0, 10)
p + scale_y_continuous(limits=c(0, 10), breaks=NULL)
p + scale_y_continuous(limits = c(5, 6.5)) # Same as using ylim()
p + coord_cartesian(ylim = c(5, 6.5))
p + expand_limits(y=0)
# y轴坐标翻转
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + scale_y_reverse()
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + ylim(6.5, 3.5)
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() +
scale_y_reverse(limits=c(8, 0))
#修改类别型x坐标轴上项目的顺序
p
p + scale_x_discrete(limits=c("trt1","ctrl","trt2"))
p + scale_x_discrete(limits=c("ctrl","trt1"))
p + scale_x_discrete(limits=rev(levels(PlantGrowth$group)))
#设置x轴,y轴的缩放比例
sp
sp + coord_fixed()
sp + coord_fixed() +
sp + coord_fixed(ratio=1/2) +
##设置刻度线的位置
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot()
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() +
scale_y_continuous(breaks=c(4, 4.25, 4.5, 5, 6, 8))
#为离散坐标轴同时设定breaks和limits
ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() +
scale_x_discrete(limits=c("trt2", "ctrl"), breaks="ctrl")
#移除刻度线和标签
p
p + theme(axis.text.y = element_blank())
p + theme(axis.ticks = element_blank(), axis.text.y = element_blank())
p + scale_y_continuous(breaks=NULL)
#修改刻度标签的文本
hwp
geom_point()
hwp
hwp + scale_y_continuous(breaks=c(50, 56, 60, 66, 72),
labels=c("Tiny", "Really\nshort", "Short",
"Medium", "Tallish"))#\n用来换行
footinch_formatter
foot
inch
return(paste(foot, "'", inch, "\"", sep=""))
}
footinch_formatter(56:64)
timeHMS_formatter
h
m
s
lab
lab
lab
return(lab)
}
timeHMS_formatter(c(.33, 50, 51.25, 59.32, 60, 60.1, 130.23))
#修改刻度标签的外观
bp
scale_x_discrete(breaks=c("ctrl", "trt1", "trt2"),
labels=c("Control", "Treatment 1", "Treatment 2"))
bp
bp + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=.5))
bp + theme(axis.text.x = element_text(angle=30, hjust=1, vjust=1))#调节文字角度
bp + theme(axis.text.x = element_text(family="Times", face="italic",
colour="darkred", size=rel(0.9)))#调节大小、样式、字体
#修改坐标轴标签的文本
library(gcookbook) # For the data set
hwp
geom_point()
hwp
# 设置坐标轴标签
hwp + xlab("Age in years") + ylab("Height in inches")
hwp + labs(x = "Age in years", y = "Height in inches")
hwp + scale_x_continuous(name="Age in years")
hwp + scale_x_continuous(name="Age\n(years)")
#移除坐标轴标签
p
p + theme(axis.title.x=element_blank())
#修改坐标轴标签的外观
library(gcookbook) # For the data set
hwp
hwp + theme(axis.title.x=element_text(face="italic", colour="darkred", size=14))
hwp + ylab("Height\n(inches)") +
theme(axis.title.y=element_text(angle=90, face="italic", colour="darkred",
size=14))
#沿坐标轴显示直线
library(gcookbook) # For the data set
p
p + theme(axis.line = element_line(colour="black"))
# 对于较粗的直线,只有一半重叠
p + theme_bw() +
theme(panel.border = element_blank(),
axis.line = element_line(colour="black", size=4))
# 完全重叠
p + theme_bw() +
theme(panel.border = element_blank(),
axis.line = element_line(colour="black", size=4, lineend="square"))
#使用对数坐标轴
library(MASS) # For the data set
p
geom_text(size=3)
p
# 使用对数x,y标度
p + scale_x_log10() + scale_y_log10()
p + scale_x_log10(breaks=10^(-1:5)) + scale_y_log10(breaks=10^(0:3))
library(scales)
p + scale_x_log10(breaks=10^(-1:5),
labels=trans_format("log10", math_format(10^.x))) +
scale_y_log10(breaks=10^(0:3),
labels=trans_format("log10", math_format(10^.x)))
ggplot(Animals, aes(x=log10(body), y=log10(brain), label=rownames(Animals))) +
geom_text(size=3)
library(scales)
# Use natural log on x, and log2 on y
p + scale_x_continuous(trans = log_trans(),
breaks = trans_breaks("log", function(x) exp(x)),
labels = trans_format("log", math_format(e^.x))) +
scale_y_continuous(trans = log2_trans(),
breaks = trans_breaks("log2", function(x) 2^x),
labels = trans_format("log2", math_format(2^.x)))
library(gcookbook) # For the data set
ggplot(aapl, aes(x=date,y=adj_price)) + geom_line()
ggplot(aapl, aes(x=date,y=adj_price)) + geom_line() +
scale_y_log10(breaks=c(2,10,50,250))
#为对数坐标轴添加刻度
library(MASS) # For the data set
library(scales) # For the trans and format functions
ggplot(Animals, aes(x=body, y=brain, label=rownames(Animals))) +
geom_text(size=3) +
annotation_logticks() +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))
ggplot(Animals, aes(x=body, y=brain, label=rownames(Animals))) +
geom_text(size=3) +
annotation_logticks() +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)),
minor_breaks = log10(5) + -2:5) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)),
minor_breaks = log10(5) + -1:3) +
coord_fixed() +
theme_bw()
####绘制环状图形
ggplot(wind, aes(x=DirCat, fill=SpeedCat)) +
geom_histogram(binwidth=15, boundary=-7.5) +
coord_polar() +
scale_x_continuous(limits=c(0,360))
ggplot(wind, aes(x=DirCat, fill=SpeedCat)) +
geom_histogram(binwidth=15, boundary=-7.5, colour="black", size=.25) +
guides(fill=guide_legend(reverse=TRUE)) +
coord_polar() +
scale_x_continuous(limits=c(0,360), breaks=seq(0, 360, by=45),
minor_breaks=seq(0, 360, by=15)) +
scale_fill_brewer()
# 将mdeaths的时间序列数据放入一个数据框
md
month = as.numeric(cycle(mdeaths)))
library(plyr) # For the ddply() function
md
md
p
scale_x_continuous(breaks=1:12)
p + coord_polar()
p + coord_polar() + ylim(0, max(md$deaths))
p + coord_polar() + ylim(0, max(md$deaths)) + xlim(0, 12)
# Connect the lines by adding a value for 0 that is the same as 12
mdx
mdx$month
mdnew
# Make the same plot as before, but with the new data, by using %+%
p %+% mdnew + coord_polar() + ylim(0, max(md$deaths))
#在坐标轴上使用日期
str(economics)
ggplot(economics, aes(x=date, y=psavert)) + geom_line()
# Take a subset of economics
econ = as.Date("1992-05-01") &
date
# Base plot - without specifying breaks
p
p
# Specify breaks as a Date vector
datebreaks
# Use breaks, and rotate text labels
p + scale_x_date(breaks=datebreaks) +
theme(axis.text.x = element_text(angle=30, hjust=1))
library(scales)
p + scale_x_date(breaks=datebreaks, labels=date_format("%Y %b")) +
theme(axis.text.x = element_text(angle=30, hjust=1))
# Mac and Linux
Sys.setlocale("LC_TIME", "it_IT.UTF-8")
# Windows
Sys.setlocale("LC_TIME", "italian")
####函数图像
myfun=function(xvar){
1/(1+exp(-xvar+10))
}
qplot(c(0,20),fun=myfun,stat="function",geom="line")#报错
ggplot(data.frame(x=c(0,20)),aes=(x=x))+stat_function(fun=myfun,geom="line")
####玫瑰图
####玫瑰图
install.packages("rvest")
install.packages("xml2")
install.packages("dplyr")
install.packages("showtext")
install.packages("Cairo")
library(sysfonts)
library(showtextdb)
library(rvest)
library(xml2)
library(dplyr)
library(ggplot2)
library(grid)
library(showtext)
library(Cairo)
setwd("C:\\Users\\admin\\Desktop\\晶赞")
mydata=read.table("city.txt")
names(mydata)=c("city","rate")
#计算标签的旋转角度:
mydata$angle=c(rev(9*(1:10-1)+4.5),-(9*(1:10-1)+4.5))
mydata$angle[16:20]
mydata2=arrange(mydata,city,desc(rate))
#图形可视化过程:
ggplot(mydata2,aes(x=city,y=rate))+geom_bar(stat="identity")+
#geom_hline(yintercept =c(25,50,100,200,500),linetype=2,size=.25)+
#geom_text(aes(x=id,y=value+12,label=label_ff,angle=angle),family="myfont",size=3.5,lineheight=1)+#坐标轴放大一倍占位:
#geom_text(aes(x=id,y=value+12,label=label_ff,angle=angle),size=3.5,lineheight=1)+
#scale_x_continuous(limits=c(0,40),expand=c(0,0))+#Y延伸到负值突出圆心的空白
#scale_y_continuous(limits=c(0,600))+
#scale_fill_manual(values=c("#00643E","#207A57","#3D8C6D","#59A284","#76B69B","#95CBB3"),guide=FALSE)+
coord_polar(start=0)
theme_void()
#图形输出:
setwd("E:/数据可视化/R/R语言学习笔记/数据可视化/ggplot2/优秀R语言案例")
CairoPNG(file="polar_rose.png",width=2400,height=1800)
showtext.begin()
grid.newpage()
pushViewport(viewport(layout=grid.layout(6,8)))
vplayout
print(p,vp=vplayout(1:6,1:8))
showtext.end()
dev.off()
领取专属 10元无门槛券
私享最新 技术干货