rm(list = ls())
if(!require(stringr))install.packages('stringr')
library(stringr)
x <- "The birch canoe slid on the smooth planks."
x
## [1] "The birch canoe slid on the smooth planks."
str_length(x)
## [1] 42
length(x)
## [1] 1
str_split(x," ") # 把x按空格拆分,得到一个只有一个元素的列表
## [[1]]
## [1] "The" "birch" "canoe" "slid" "on" "the" "smooth" "planks."
class(str_split(x," "))
## [1] "list"
x2 = str_split(x," ")[[1]];x2
## [1] "The" "birch" "canoe" "slid" "on" "the" "smooth" "planks."
y = c("jimmy 150","nicker 140","tony 152")
str_split(y," ") # 把y按空格拆分,得到一个有三个元素的列表(对多个字符串同时拆分)
## [[1]]
## [1] "jimmy" "150"
##
## [[2]]
## [1] "nicker" "140"
##
## [[3]]
## [1] "tony" "152"
str_split(y," ",simplify = T) # 简化为矩阵,后续自己调整
## [,1] [,2]
## [1,] "jimmy" "150"
## [2,] "nicker" "140"
## [3,] "tony" "152"
str_sub(x,5,9) # 第5位到第9位
## [1] "birch"
str_sub(x,5,-2) # 倒数也可以
## [1] "birch canoe slid on the smooth planks"
str_detect(x2,"h")
## [1] TRUE TRUE FALSE FALSE FALSE TRUE TRUE FALSE
str_starts(x2,"T")
## [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str_ends(x2,"e")
## [1] TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
x2
## [1] "The" "birch" "canoe" "slid" "on" "the" "smooth" "planks."
str_replace(x2,"o","A") # 一个字符串内重复出现的字符只被替换第一个
## [1] "The" "birch" "canAe" "slid" "An" "the" "smAoth" "planks."
str_replace_all(x2,"o","A") # 替换全部
## [1] "The" "birch" "canAe" "slid" "An" "the" "smAAth" "planks."
x
## [1] "The birch canoe slid on the smooth planks."
str_remove(x," ")
## [1] "Thebirch canoe slid on the smooth planks."
str_remove_all(x," ")
## [1] "Thebirchcanoeslidonthesmoothplanks."
####字符串处理可以学一下正则表达式
# 读取表格文件,提取title列, 提取所有"Control" "Vemurafenib" 并转为小写
library(rio)
a <- import("group.csv")
title <- a$title;title
## [1] "A375 cells 24h Control rep1" "A375 cells 24h Control rep2"
## [3] "A375 cells 24h Control rep3" "A375 cells 24h Vemurafenib rep1"
## [5] "A375 cells 24h Vemurafenib rep2" "A375 cells 24h Vemurafenib rep3"
# 发现要提取的都是第四个单词,可以用空格分开转换为矩阵,取第四列
a <- str_split(title," ",simplify = T);a
## [,1] [,2] [,3] [,4] [,5]
## [1,] "A375" "cells" "24h" "Control" "rep1"
## [2,] "A375" "cells" "24h" "Control" "rep2"
## [3,] "A375" "cells" "24h" "Control" "rep3"
## [4,] "A375" "cells" "24h" "Vemurafenib" "rep1"
## [5,] "A375" "cells" "24h" "Vemurafenib" "rep2"
## [6,] "A375" "cells" "24h" "Vemurafenib" "rep3"
a <- a[,4];a
## [1] "Control" "Control" "Control" "Vemurafenib" "Vemurafenib" "Vemurafenib"
b <- tolower(a);b # 大写转小写
## [1] "control" "control" "control" "vemurafenib" "vemurafenib" "vemurafenib"
也可以不用空格分开,直接取15到-6位字符
引用自生信技能树
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。