
需要的包:library(tidyr)
library(dplyr)
library(stringr)
library(tibble)
stringr包
x <- "The birch canoe slid on the smooth planks."
xstr_length(x) #检测一共有多少字符
length(x) #有多少字符串### 纯字符型向量
str_split(x," ")
class(str_split(x," "))
x2 = str_split(x," ")[[1]];x2
# 数值型+字符型向量 此时出来的都是字符型向量
y = c("jimmy 150","nicker 140","tony 152")
str_split(y," ")
y2 = str_split(y," ",simplify = T)
str(y)
y2 = as.data.frame(y2) #转换为数据框
y2$V2 <- as.numeric(y2$V2) #向量类型转换
str(y2) #检测数据类型 输出结果为:'data.frame': 3 obs.(行) of 2 variables:(列)str_sub(x,5,9) #提取从5-9位置的字符串str_detect(x2,"h") #检测每个位置是否有该字符
str_starts(x2,"T")
str_ends(x2,"e")x2
str_replace(x2,"o","A") #只替换每个字符串中第一次匹配到的元素
str_replace_all(x2,"o","A") #替换每个字符串中所有匹配到的元素x
str_remove(x," ")
str_remove_all(x," ")test <- iris[c(1:2,51:52,101:102),]
rownames(test) =NULL # 去掉行名,NULL是“什么都没有”
test
# arrange,数据框按照某一列排序
library(dplyr) # 数据框整理的包
arrange(test, Sepal.Length) #从小到大,加上引号之后没有报错但也没排序
arrange(test, desc(Sepal.Length)) #从大到小
# distinct,数据框按照某一列去重复
distinct(test,Species,.keep_all = T)
?dplyr
# mutate,数据框新增一列
mutate(test, new = Sepal.Length * Sepal.Width)x1 = select(iris,-5)
x2 = as.matrix(x1)
x3 = head(x2,50)
pheatmap::pheatmap(x3)pheatmap::pheatmap(head(as.matrix(select(iris,-5)),50))iris %>%
select(-5) %>%
as.matrix() %>%
head(50) %>%
pheatmap::pheatmap()library(stringr)# if(F){
# 下载数据的代码
# }i = -1
if (i<0) print('up')
if (i>0) print('up')
#理解下面代码
if(!require(tidyr)) install.packages('tidyr')i =1
if (i>0){
print('+')
} else {
print("-")
}
i = 1
ifelse(i>0,"+","-")
x = rnorm(3)
x
ifelse(x>0,"+","-")
#ifelse()+str_detect(),王炸
samples = c("tumor1","tumor2","tumor3","normal1","normal2","normal3")
k1 = str_detect(samples,"tumor");k1 #string包中的函数:samples中有tumor的位置返回的结果为TRUE
ifelse(k1,"tumor","normal")
k2 = str_detect(samples,"normal");k2
ifelse(k2,"normal","tumor")i = 0
if (i>0){
print('+')
} else if (i< 0) {
print('-')
} else{
print('0')
}
ifelse(i>0,"+",ifelse(i<0,"-","0"))for( i in 1:4){ # 一共执行4次
print(i)
}
#批量画图
par(mfrow = c(2,2))
for(i in 1:4){
plot(iris[,i],col = iris[,5])
}
#批量装包
pks = c("tidyr","dplyr","stringr") # 3个包的名字
for(g in pks){
if(!require(g,character.only = T))
install.packages(g,ask = F,update = F)
}#apply(X, MARGIN, FUN, …)
#其中X是数据框/矩阵名;
#MARGIN为1表示行,为2表示列,FUN是函数
test<- iris[1:6,1:4]
apply(test, 2, mean) #给的结果中名字为向量的名字
apply(test, 1, sum)# 对列表/向量中的每个元素实施相同的操作
lapply(1:4,rnorm)原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。