向量——一维
表格——二维
1:矩阵matrix:只允许一种数据类型
2:数据框data.frame:每列只允许一种数据类型
列表——可装万物
1.数据框来源
(1)用代码新建
(2)由已有数据转换或处理得到
(3)读取表格文件
(4)R语言内置数据
> class(iris)
[1] "data.frame"
> class(volcano)
[1] "matrix" "array"
2.新建和读取数据框
df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2),
score = c(5,3,-2,-4))
df1
df2 <- read.csv("gene.csv")
df2
3.数据框属性
#
dim(df1) ##数据的维度
nrow(df1)
ncol(df1)
#
rownames(df1)
colnames(df1)
4.数据框取子集
df1$score #删掉score,按tab键试试
mean(df1$score)
## 按坐标,第几行第几列
df1[2,2]
df1[2,]
df1[,2]
df1[c(1,3),1:2]
## 按名字
df1[,"gene"]
df1[,c('gene','change')]
## 代码思维
#如何取数据框的最后一列?
df1[,3]
df1[,ncol(df1)]
#如何取数据框除了最后一列以外的其他列?
df1[,-ncol(df1)]
5.数据框修改
#改一个格
df1[3,3] <- 5
df1
#改一整列
df1$score <- c(12,23,50,2)
df1
#新增一列
df1$p.value <- c(0.01,0.02,0.07,0.05)
df1
#改行名和列名
rownames(df1) <- c("r1","r2","r3","r4")
#只修改某一行/列的名
colnames(df1)[2] <- "CHANGE"
6.两个数据框的连接(merge函数)
test1 <- data.frame(name = c('jimmy','nicker','Damon','Sophie'),
blood_type = c("A","B","O","AB"))
test1
> test1
name blood_type
1 jimmy A
2 nicker B
3 Damon O
4 Sophie AB
test2 <- data.frame(name = c('Damon','jimmy','nicker','tony'),
group = c("group1","group1","group2","group2"),
vision = c(4.2,4.3,4.9,4.5))
test2
> test2
name group vision
1 Damon group1 4.2
2 jimmy group1 4.3
3 nicker group2 4.9
4 tony group2 4.5
test3 <- data.frame(NAME = c('Damon','jimmy','nicker','tony'),
weight = c(140,145,110,138))
test3
> test3
NAME weight
1 Damon 140
2 jimmy 145
3 nicker 110
4 tony 138
merge(test1,test2,by="name")
name blood_type group vision
1 Damon O group1 4.2
2 jimmy A group1 4.3
3 nicker B group2 4.9
merge(test1,test3,by.x = "name",by.y = "NAME")
name blood_type weight
1 Damon O 140
2 jimmy A 145
3 nicker B 110
如何按照数据框的某一列,给整个数据框排序?
如何按照数据框的某一列,给某一列数据框去重复值?
m <- matrix(1:9, nrow = 3)
colnames(m) <- c("a","b","c") #加列名
m
m[2,]
m[,1]
m[2,3]
m[2:3,1:2]
m
t(m)###矩阵转置
as.data.frame(m)###转换为数据框
##矩阵画热图
install.packages("pheatmap")
library(pheatmap)
pheatmap::pheatmap(m)
pheatmap::pheatmap(m,pheatmap::pheatmap(m,cluster_rows = F,cluster_cols = F))
x <- list(m1 = matrix(1:9, nrow = 3),
m2 = matrix(2:9, nrow = 2))
x
> x
$m1
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
$m2
[,1] [,2] [,3] [,4]
[1,] 2 4 6 8
[2,] 3 5 7 9
###取子集方法
x[[1]]
x$m1
scores = c(100,59,73,95,45)
names(scores) = c("jimmy","nicker","Damon","Sophie","tony")
scores
scores["jimmy"]
scores[c("jimmy","nicker")]
names(scores)[scores>60]
## 按条件(逻辑值)
df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2),
score = c(5,3,-2,-4))
df1[df1$score>0,]
#筛选score > 0的基因
df1[df1$score > 0,1]
df1$gene[df1$score > 0]##筛选score大于0的基因。
df1[df1$score > 0,1]##筛选score大于0的基因。
##筛选score大于0的行。
df1$score
df1$score > 0
df1$score[df1$score > 0]]
df1[df1$score > 0,]
rm(x)###删除一个
rm(df1,df2)###删除多个
rm(list = ls()) ###删除全部
##清空控制台 ctrl+L
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。