
Tidyverse Tutorial

GeneID Sample1 Sample2
Gene1 473 526
Gene2 7203 6405
Gene3 59487 51467
GeneID Sample Count
Gene1 Sample1 473
Gene2 Sample1 7203
Gene3 Sample1 59487
Gene1 Sample2 526
Gene2 Sample2 6405
Gene3 Sample2 51467

The pipe function:

library(magrittr) #Ceci ne pas une pipe
rnorm(10) %>% mean

Ceci ne pas une pipe


df <- read.delim("examples/SampleInfo.txt")
2 * as.numeric(df$ReadLength)
# [1] 6 6 6 6 6 6 6 8 4 6 6 4 2 4 4 2 2 6 4 2 2 4 4 2 4 4 2...

2 * as.numeric(as.character(df$ReadLength))
# [1] 152 152 152 152 152 152 152  NA 250 152 152 250 200

tibble <- read_tsv("examples/SampleInfo.txt")
2 * as.numeric(tibble$ReadLength)
# [1] 152 152 152 152 152 152 152  NA 250 152 152 250 200


wide <-tribble(
  ~Gene, ~Sample1,  ~Sample2,
  "Gene1_APOE1", 473,  526,
  "Gene2_SETD1A", 7203,  6405,
  "Gene3_TCF4", 59487, 51467

long <- gather(tibble, Sample, Value, -Gene)
wide <- spread(long, Sample, Value)

separate(wide, Gene, c("GeneID", "Gene_name"))


head(tibble[,c('Sample', 'Sex')])
select(SchoolData, Sample, Sex) %>% head
head(subset(tibble, PCW< 14))
filter(tibble, PCW<14) %>% head
arrange(tibble, RIN) %>% head
head(aggregate(PCW ~ Sex, data=tibble, FUN=function(x) av_score=mean(x)))
#I can't figure out how to rename the output
tibble %>% group_by(Sex) %>% summarise(av_age=mean(PCW)) %>% head
head(aggregate(RIN ~ Sex+PCW, data=tibble, FUN=function(x) c(mean=mean(x), var=var(x))))
tibble %>% group_by(Sex, PCW) %>% summarise(mean=mean(RIN), var=var(RIN)) %>% head
head(aggregate(RIN ~ Sex, data=tibble,  FUN=function(x) num_students=length(x)))
tibble %>% group_by(Sex) %>% summarise(num_samples=n()) %>% head
tibble$total_length <- 2 * as.numeric(tibble$ReadLength)
tibble %>% mutate(total_length=2 * as.numeric(ReadLength)) %>% head

tibble %>% 
    group_by(Sex) %>% 
    summarise(mean=mean(PCW), se=sd(PCW)/sqrt(n())) %>% 
    ggplot(aes(x=Sex, y=mean)) +
        geom_bar(stat="identity", fill="royalblue4", alpha=1/2) +
        geom_errorbar(aes(ymin=mean-se, ymax=mean+se), colour="royalblue4", alpha=1/2)
tibble %>% 
    group_by(Sex) %>% 
    summarise(mean=mean(PCW), se=sd(PCW)/sqrt(n())) %>% 
    ggplot(aes(x=Sex, y=mean)) +
        geom_jitter(aes(x=Sex, y=PCW), alpha=1/10, 
                    position = position_jitter(width = 0.2), 
                    colour="royalblue4", data=tibble) +
        geom_point(stat="identity", alpha=2/3, shape=5, size=2, colour="royalblue4") +
        geom_errorbar(aes(ymin=mean-se, ymax=mean+se), colour="royalblue4", alpha=2/3)

tibble %>% group_by(PCW) %>% filter(min_rank(desc(RIN)) == 1)
tibble %>% 
    group_by(Sex) %>% 
    mutate(mean = mean(PCW)) %>% 
    ungroup() %>% 

inner_join(tibble, counts, by=c("Sample" = "SampleID")) # keeps only rows common to both datasets
left_join(tibble, counts, by=c("Sample" = "SampleID")) #keeps all rows in left dataframe, adding NA when row is missing from right dataset
right_join(tibble, counts, by=c("Sample" = "SampleID")) #keeps all rows in right, adding NA when row is missing from left dataset
full_join(tibble, counts, by=c("Sample" = "SampleID")) #keeps all rows in both, adding NA when row is missing from either dataset


frag_size <- read_file("examples/peak_calling.txt") %>%
    str_extract("(?<=fragment size = )\\d+") %>% 


inner_join(tibble, counts, by=c("Sample" = "SampleID")) %>%
    gather(stat, value, 7:13) %>%
    group_by(stat) %>%
    nest() %>%
    mutate(cor=map(data, ~cor.test(RIN, .$value)), r=map_dbl(cor, 4), p=map_dbl(cor, 3)) %>%
    select(-data, -cor)

