example r plots
TRANSCRIPT
//Example plotting in R //A.M. Larracuente for Bio472 Section 3 //Example scripts, data and plots are found in /scratch/bio472_2014/Example_scripts/R_examples/ //Read in example dataset of various covariates for Drosophila melanogaster //Data from: Larracuente, A.M., T.B. Sackton, A.J. Greenberg, A. Wong, N.D. Singh, D. Sturgill, Y. Zhang, B. Oliver, A.G. Clark. 2008. Evolution of protein-coding genes in Drosophila. Trends Genet. 24(3): 114-123. cov=read.table("/scratch/bio472_2014/Example_scripts/R_examples/Larracuente_covariate_table.txt",header=T) //eg 1 //Make a boxplot of Expression level (average whole body expression in FlyAtlas) by cellular location boxplot(FlyAtlasWhole.New~CellularLocation,data=cov,notch=T,ylab="Expression rank")
//eg2 //Make a density plot of dS and dN plot(density(cov$dN),xlim=c(0,3),xlab="substitutions per site",main="dN and dS") lines(density(cov$dS),col="red",lty=2) legend("topright",inset=c(.01,.01),c("dN","dS"),col=c("black","red"),lty=c(1,2))
Extra Intra Mito none Nucl
0
1000
2000
3000
4000
5000
Exp
ress
ion
rank
0.0 0.5 1.0 1.5 2.0 2.5 3.0
0
1
2
3
4
5
dN and dS
substitutions per site
Density
dNdS
//eg3 //Make a scatterplot of recombination rate and omega (dN/dS), fit a line and plot it plot(cov$Recomb.ACE,cov$w, xlab="Recombination rate",ylab="omega",pch=20) cor.test(cov$w,cov$Recomb.ACE) z=lm(w~Recomb.ACE,data=cov) abline(z,col="grey")
//eg4 //Make a histogram of intron number counts hist(cov$Intron.Num,col="light blue",xlab="Number of introns",ylab="Count",main="")
0 5 10 15
0.0
0.2
0.4
0.6
0.8
1.0
1.2
1.4
Recombination rate
omega
Number of introns
Count
0 5 10 15 20 25 30
0
1000
2000
3000
4000
//eg5 //Plot RPM according to genomic location with the sign corresponding to the strand (negative RPM for - strand and positive RPM for + strand) //First read in RPM file called test_piRNA_RPM.txt and then load (first install if you don't have) ggplot2 //code modified from J. Vedanayagam //Also see useful plots here:http://www.ark-genomics.org/bioinformatics/virome pirna=read.table("/scratch/bio472_2014/Example_scripts/R_examples/test_piRNA_RPM.txt") library(ggplot2) ggplot(pirna, aes(x=pirna$V1, y=pirna$V2, fill=pirna$V3))+ geom_bar(stat="identity", position="identity")+ scale_y_continuous(limits=c(-15,15))+ scale_fill_manual(values=c("orange","blue"), guide=FALSE)+ xlab("Position (bp)")+ ylab("RPM")+ theme(panel.background = element_rect(colour = "black", fill = "white"))+ theme(panel.grid.major = element_blank())+ theme(panel.grid.minor = element_blank())+ geom_hline(yintercept=0, size=0.15,colour="light grey")+ theme(axis.text = element_text(colour = "black"))+ theme(axis.ticks = element_line(colour = "black"))
-10
0
10
0 2500 5000 7500 10000Position (bp)
RPM
//eg6 //Make boxplots for multiple variables using par(new=T). In this example, we'll grab subsets of the dataframe to plot separately TEcount=read.table("~/scratch/bio472_2014/Example_scripts/R_examples/TE.countsforR.txt",header=T) shar=TEcount[TEcount$Status=="Shared",] non=TEcount[TEcount$Status=="Non-reference",] ref=TEcount[TEcount$Status=="Reference",] boxplot(Count~TE,data=shar,boxwex=.2,col="red",at=c(1:67)-.5,axes=F,border="pink",outwex=.01) boxplot(Count~TE,data=non,boxwex=.2,col="green",at=c(1:67)-.1,add=T,axes=F,border="dark olivegreen2",outwex=.01) boxplot(Count~TE,data=ref,boxwex=.2,col="blue",at=c(1:67)-.3,add=T,ylab="Counts",border="light blue",outwex=.01,las=2)
legend("topright",inset=c(.01,.01),c("Ref","NonRef","Shared"),fill=c("red","blue","green"))
1360 17
1731 297
3S18 412
accord2
Bari1
blood
Burdock
copia
diver
diver2
Dm88Doc F FB flea
frogger G2
G5
G6
G7
gtwin
gypsy
gypsy4
gypsy7
gypsy8 HB
HMS
hobo
hopper
Idefix
invader1
invader2
invader3
invader4
invader6 Ivk
jockey
Juan
looper1
Max
McClintock
mdg1
mdg3
micropia
opus
pogo Q
Quasimodo
R1A1
roo
rover
Rt1b S
springer
Stalker
Stalker2
Stalker3T
Stalker4
Tabor
Tc1TE
Tirant
Tom1
Transpac
0
5
10
15
Counts
RefNonRefShared
//eg7 //Make stacked barplots. In this example, we'll flip the table so the rows become columns and vice versa (this is done with t() ). TEbar=read.table("~/scratch/bio472_2014/Example_scripts/R_examples/TE.countsforRbarplot.txt",header=T) TEcol=cbind(TEbar$X2L_shared,TEbar$X2L_Ref,TEbar$X2L_Nonref,TEbar$X2R_shared,TEbar$X2R_Ref,TEbar$X2R_Nonref) barplot(t(as.matrix(TEcol)),names=TEbar[,1],col=c("red","blue","green","orange","light blue","purple"),las=2,cex.names=.8,legend=c("2L_shared","2L_Ref","2L_Nonref","2R_shared","2R_Ref","2R_Nonref"))
Tom1 S
Burdock
mdg3
looper1
Doc
gypsy4 G7
1360 297
opus
copia
G6
HB
gypsy
pogo
McClintock
accord2
Stalker2 Q
3S18
R1A1
Tc1
Transpac
1731
springer
Idefix
gypsy8
Stalker
invader1
frogger
gypsy7 rooTE FB 17
mdg1
Max G2
gtwin
invader2
micropiaIvk
invader3
rover
invader6
hopper
Quasimodo F
412
hobo
Tabor
Stalker3T
diver
Dm88
invader4
jockey G5
Stalker4
HMS
Rt1b
Tirant
flea
blood
diver2
Bari1
Juan
2R_Nonref2R_Ref2R_shared2L_Nonref2L_Ref2L_shared
0
10
20
30
40