生信技能树-数据挖掘课程笔记
library(stringr)
str = "Hello world !"
#查看字符串字符数
str_length(str)
length(str) #查看元素数,注意区分
#指定字符分割字符串
str_split(str," ")
str_split(str," ")[[1]] #该函数会形成一个列表
#str_split()可分割具有多个字符串的向量
strs = c("Joey doesn"t share foods !","How"re you doing ?","My eyes ! My eyes !")
str_split(strs," ")
#截取部分字符串
str_sub(str,7,11)
#判断字符串是否有某一字符
str_detect(strs,"foods")
#判断字符串开头是否有某一字符
str_starts(strs,"Joey")
#判断字符串结尾是否有某一字符
str_ends(strs,"\\?")
#字符串的替换
str_replace(str,"o","O")
str_replace_all(str,"o","O")
#删除字符串中的字符
str_remove(str," ")
str_remove_all(str," ")
输出结果:
library(dplyr)
head(iris,5)
#将内部数据iris所有列按Sepal.Length列的数值从小到大排列
head(arrange(iris,Sepal.Length),5)
#将内部数据iris所有列按Sepal.Length列的数值从大到小排列
head(arrange(iris,desc(Sepal.Length)),5)
#去除重复的Species列内容
head(arrange(iris,Species,.keep_all = T),5)
#筛选Sepal.Width大于3的行
head(filter(iris,Sepal.Width>3),5)
#筛选出Sepal.Length和Sepal.Width列
head(select(iris,Sepal.Length,Sepal.Width),5)
#管道符可以将上一函数的输出,传递至下一个函数的第一个参数
iris
filter(Sepal.Width>3) %>%
select(Sepal.Length,Sepal.Width) %>%
arrange(Sepal.Length) %>%
head(,3)
输出结果:
#模拟一个表达矩阵数据
set.seed(1)
exp = matrix(rnorm(18),ncol = 6)
exp = round(exp,2)
rownames(exp) = paste0("gene",1:3)
colnames(exp) = paste0("test",1:6)
exp[,1:3] = exp[,1:3]+1
exp
#表达矩阵的格式转换
library(tidyr)
library(tibble)
library(dplyr)
library(ggplot2)
data = as.data.frame(t(exp))
data
data = rownames_to_column(data)
data
data$group = rep(c("control","treat"),each = 3)
data
pdata = pivot_longer(data,cols = starts_with("gene"), names_to="gene",values_to="count")
pdata
p = ggplot(pdata,aes(gene,count))+
geom_boxplot(aes(fill = group))+
theme_bw()+
facet_wrap(~gene,scales = "free")
p
输出结果:
#数据框的连接
library(dplyr)
data1 = data.frame(name = c("Joey","Chandler","Ross","Monica","Rachel","Phoebe"),
sex = c("Male","Male","Male","Female","Female","Female"))
data2 = data.frame(name = c("Joey","Gunther","Ross","Chandler"),
job = c("Actor", "Waiter", "Paleontologist","Don't Know"))
inner_join(data1,data2,by="name")
right_join(data1,data2,by="name")
full_join(data1,data2,by="name")
semi_join(data1,data2,by="name")
anti_join(data1,data2,by="name")
data=iris[1:5,1:4]
data
#计算每一列的加和
apply(data,1,sum)
#计算每一行的平均值
apply(data,2,mean)
输出结果:
test = list(x = 36:33,y = 32:35,z = 30:27);test
#计算列表每一个元素的平均值
lapply(test,mean)
#将上面结果通过矩阵的方式展示
sapply(test,mean)
library(stringr)
i = 1
#条件判断语句
if(i > 0){
print("+")
} else if(i == 1){
print("=")
} else{
print("-")
}
#条件判断的函数
i = rnorm(3)
ifelse(i>0,"+","-")
#ifelse()与str_detect()组合使用
samples = c("tumor1","tumor2","tumor3","normal1","normal2","normal3")
k = str_detect(samples,"tumor")
k
ifelse(k,"tumor","normal")
#循环语句
x = c(2,3,4,5)
for (i in x){
print(i+1)
}
#下标循环语句
for (i in 1:length(x)){
print(x[[i]]+1)
}
#下标循环便于将循环结果保存至列表中
result = list()
for (i in 1:length(x)){
result[[i]] = x[[i]]+1
}
result
输出结果:
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。