

rm(list = ls())
if(!require(stringr))install.packages('stringr')
library(stringr)x <- "The birch canoe slid on the smooth planks."
x## [1] "The birch canoe slid on the smooth planks."### 1.检测字符串长度
str_length(x) #一个引号为一个字符串## [1] 42length(x)## [1] 1### 2.字符串拆分
str_split(x," ")## [[1]]
## [1] "The" "birch" "canoe" "slid" "on"
## [6] "the" "smooth" "planks."class(str_split(x," "))## [1] "list"x2 = str_split(x," ")[[1]];x2 #列表取子集## [1] "The" "birch" "canoe" "slid" "on"
## [6] "the" "smooth" "planks."y = c("jimmy 150","nicker 140","tony 152")
str_split(y," ")## [[1]]
## [1] "jimmy" "150"
##
## [[2]]
## [1] "nicker" "140"
##
## [[3]]
## [1] "tony" "152"str_split(y," ",simplify = T) #将拆分结果简化 #返回了一个matrix数据结构## [,1] [,2]
## [1,] "jimmy" "150"
## [2,] "nicker" "140"
## [3,] "tony" "152"#matrix只能有一种数据类型,不能只改其中一列数据类型
#只有数据框可以修改其中一列数据类型### 3.按位置提取字符串
str_sub(x,5,9)#从x字符串的第5位到第9位截取## [1] "birch"### 4.字符检测
str_detect(x2,"h") #哪一个字符含有h## [1] TRUE TRUE FALSE FALSE FALSE TRUE TRUE FALSEstr_starts(x2,"T") #是否T开头## [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSEstr_ends(x2,"e") #是否以e结尾## [1] TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE### 5.字符串替换
x2## [1] "The" "birch" "canoe" "slid" "on"
## [6] "the" "smooth" "planks."str_replace(x2,"o","A") #只替换第一个出现o为A## [1] "The" "birch" "canAe" "slid" "An"
## [6] "the" "smAoth" "planks."str_replace_all(x2,"o","A")## [1] "The" "birch" "canAe" "slid" "An"
## [6] "the" "smAAth" "planks."str_replace_all(x2,"o|e","A")## [1] "The" "birch" "canoe" "slid" "on"
## [6] "the" "smooth" "planks."### 6.字符删除
x## [1] "The birch canoe slid on the smooth planks."str_remove(x," ") #删除第一个空格## [1] "Thebirch canoe slid on the smooth planks."str_remove_all(x," ") #删除所有空格## [1] "Thebirchcanoeslidonthesmoothplanks."test <- iris[c(1:2,51:52,101:102),]
rownames(test) =NULL # 去掉行名,NULL是“什么都没有”
test## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 7.0 3.2 4.7 1.4
## 4 6.4 3.2 4.5 1.5
## 5 6.3 3.3 6.0 2.5
## 6 5.8 2.7 5.1 1.9
## Species
## 1 setosa
## 2 setosa
## 3 versicolor
## 4 versicolor
## 5 virginica
## 6 virginica
# arrange,数据框按照某一列排序
library(dplyr)
arrange(test, Sepal.Length) #根据指定列将所有行从小到大排序## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 4.9 3.0 1.4 0.2
## 2 5.1 3.5 1.4 0.2
## 3 5.8 2.7 5.1 1.9
## 4 6.3 3.3 6.0 2.5
## 5 6.4 3.2 4.5 1.5
## 6 7.0 3.2 4.7 1.4
## Species
## 1 setosa
## 2 setosa
## 3 virginica
## 4 virginica
## 5 versicolor
## 6 versicolorarrange(test, desc(Sepal.Length)) #从大到小## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 7.0 3.2 4.7 1.4
## 2 6.4 3.2 4.5 1.5
## 3 6.3 3.3 6.0 2.5
## 4 5.8 2.7 5.1 1.9
## 5 5.1 3.5 1.4 0.2
## 6 4.9 3.0 1.4 0.2
## Species
## 1 versicolor
## 2 versicolor
## 3 virginica
## 4 virginica
## 5 setosa
## 6 setosa
# distinct,数据框按照某一列去重复
distinct(test,Species,.keep_all = T) #留下第一行去掉重复的行## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 7.0 3.2 4.7 1.4
## 3 6.3 3.3 6.0 2.5
## Species
## 1 setosa
## 2 versicolor
## 3 virginica
# mutate,数据框新增一列
mutate(test, new = Sepal.Length * Sepal.Width)## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 7.0 3.2 4.7 1.4
## 4 6.4 3.2 4.5 1.5
## 5 6.3 3.3 6.0 2.5
## 6 5.8 2.7 5.1 1.9
## Species new
## 1 setosa 17.85
## 2 setosa 14.70
## 3 versicolor 22.40
## 4 versicolor 20.48
## 5 virginica 20.79
## 6 virginica 15.66#运行这句代码test需要赋值
# 连续的步骤
# 1.多次赋值,产生多个中间的变量
x1 = select(iris,-5)
x2 = as.matrix(x1)
x3 = head(x2,50) #取前50行
pheatmap::pheatmap(x3)# 2. 嵌套,代码不易读
pheatmap::pheatmap(head(as.matrix(select(iris,-5)),50))# 3.管道符号传递,简洁明了
iris %>% #%>% #cmd+shift+m
select(-5) %>%
as.matrix() %>%
head(50) %>%
pheatmap::pheatmap()




rm(list = ls())i = -1
if (i<0) print('up')## [1] "up"if (i>0) print('up')
#理解下面代码
if(!require(tidyr)) install.packages('tidyr')
#读取下一级文件夹里的文件 read.csv(xiaolizhenbang/x.csv)
#读取上一级read.csv(../dalizhenbang/x.csv)
i =1
if (i>0){ #()一个逻辑值
print('+')
} else {
print("-")
}## [1] "+"i = 1
ifelse(i>0,"+","-")## [1] "+"x = rnorm(3)
x## [1] -0.9283129 -0.1291495 -0.7929700ifelse(x>0,"+","-")## [1] "-" "-" "-"
samples = c("tumor1","tumor2","tumor3","normal1","normal2","normal3")
k1 = str_detect(samples,"tumor");k1## [1] TRUE TRUE TRUE FALSE FALSE FALSEifelse(k1,"tumor","normal")## [1] "tumor" "tumor" "tumor" "normal" "normal" "normal"k2 = str_detect(samples,"normal");k2## [1] FALSE FALSE FALSE TRUE TRUE TRUEifelse(k2,"normal","tumor")## [1] "tumor" "tumor" "tumor" "normal" "normal" "normal"
i = 0
if (i>0){
print('+')
} else if (i==0) {
print('0')
} else if (i< 0){
print('-')
}## [1] "0"ifelse(i>0,"+",ifelse(i<0,"-","0"))## [1] "0"
for( i in 1:4){
print(i)
}## [1] 1
## [1] 2
## [1] 3
## [1] 4

#批量画图
par(mfrow = c(2,2))
for(i in 1:4){
plot(iris[,i],col = iris[,5])
}

#批量装包
pks = c("tidyr","dplyr","stringr")
for(g in pks){
if(!require(g,character.only = T))
install.packages(g,ask = F,update = F)
}原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。