ICDS User Guide

1 Introduce

This vignette illustrates how to easily use the ICDS package. This package can identifying cancer risk subpathways which can explain the disturbed biological functions in more detail and accurately.

  • This package provides the getExpp,getMethp,getCnvpfunction to calculate p-values or corrected p-values for each gene.

  • This package provides the coverp2zscore,combinep_two,combinep_three function to Convert p-values or corrected p-values to z-scores.

  • This package provides the FindSubPath function to search for interested subpathways in each entire pathway.

  • This package provides the opt_subpath function to Optimize interested subpathways.

  • This package provides the Permutation function to to calculate statistical significance for these interested subpathways .

2 Example: Obtain p-values or corrected p-values for each gene

We can use function getExampleData to return example data and environment variables, such as the data of exp_data.

library(ICDS)
# obtain the expression profile data
exp_data<-GetExampleData("exp_data")
#view first six rows and six colmns of data
exp_data[1:6, 1:6]
##       TCGA.38.4632.11 TCGA.44.5645.11 TCGA.44.6144.11 TCGA.44.6145.11
## GALM      15.55599000      18.3477000      13.4976300      12.4355000
## HK3        9.65274100      12.4103200      12.3639500      17.5058100
## G6PC       0.06917292       0.1689286       0.1555786       0.1847281
## PGM1      24.33696000      69.3011200      68.6039000      48.9442400
## HKDC1      0.65847880       1.2588160       3.0050750       2.8253770
## GPI       73.56174000      58.7149700      62.7976900      58.1940700
##       TCGA.44.6146.11 TCGA.44.6147.11
## GALM       16.5563600       8.3897860
## HK3         6.8267430       8.2327620
## G6PC        0.1555417       0.1721295
## PGM1       32.4310600      43.4132300
## HKDC1       1.1371050       0.5703535
## GPI        53.6260600      41.9420700
#obtain the labels of the samples of the expression profile, the label vector is a vector of 0/1s,
# 0 represents the case sample and 1 represents the control sample
label1<-GetExampleData("label1")
#view first ten label
label1[1:10]
##  [1] 0 0 0 0 0 0 0 0 0 0

we used the Student’s t-test to calculate the p-value for expression level and methylation level of each gene in the tumor and normal samples.label,0 represents normal samples;1 represents cancer samples.

#calculate p-values or corrected p-values for each gene

exp.p<-getExpp(exp_data,label = label1,p.adjust = FALSE)
label2<-GetExampleData("label2")
meth_data<-GetExampleData("meth_data")
meth.p<-getMethp(meth_data,label = label2,p.adjust = FALSE)
exp.p[1:10,]
##            gene                    p
## GALM       GALM  0.00382104698965552
## HK3         HK3 1.22893635962516e-14
## G6PC       G6PC    0.309765961639907
## PGM1       PGM1 6.50659302913753e-05
## HKDC1     HKDC1 1.27479647059785e-05
## GPI         GPI 1.37454189773835e-11
## FBP1       FBP1 2.09855354282047e-12
## ALDOA     ALDOA 2.54043616919423e-14
## G6PC3     G6PC3 0.000265142879987233
## ALDH7A1 ALDH7A1  0.00626937915913354

For copy number data, we can calculate p-value through the following way:

#obtain Copy number variation data
cnv_data<-GetExampleData("cnv_data")
#obtion amplified genes
amp_gene<-GetExampleData("amp_gene")
#obtion deleted genes
del_gene<-GetExampleData(("del_gene"))
#calculate p-values or corrected p-values for each gene
cnv.p<-getCnvp(exp_data,cnv_data,amp_gene,del_gene,p.adjust=FALSE,method="fdr")
cnv.p[1:10,]
##            gene                   p
## ACSS1     ACSS1 0.00641326042511501
## ACSS2     ACSS2                   1
## ADH1A     ADH1A   0.454656092798328
## ADH1B     ADH1B   0.429492188406724
## ADH5       ADH5 0.00253178462835118
## AKR1A1   AKR1A1                   1
## ALDH1A3 ALDH1A3                   1
## ALDH1B1 ALDH1B1                   1
## ALDH2     ALDH2   0.145658828705581
## ALDH3B1 ALDH3B1                   1

3 Example: Combine P-values of different kinds of data

With the above data, we constructed an integrative gene z-score (Z) based three datasets.

  • Firstly,the gene score calculated through combined p-values of Fisher’s Inverse chi-square tests. This method computes a combined statistic S from the p-values of the difference coefficient obtained from three individual datasets .Usually,the statistic S follows a chi-square distribution with 2k degrees,we can calculate the null hypothesis p-value of the statistic S.

  • Secondly,we convert the p-value to z-score according to Gaussian distribution , which is taken as the gene z-score(Z) .

#obtain the p-values of expression profile data,methylation profile data and Copy number variation data
exp.p<-GetExampleData("exp.p")
meth.p<-GetExampleData("meth.p")
cnv.p<-GetExampleData("cnv.p")
#calculate z-scores for p-values of each kind of data
zexp<-coverp2zscore(exp.p)
zmeth<-coverp2zscore(meth.p)
zcnv<-coverp2zscore(cnv.p)
#combine two kinds of p-values,then,calculate z-score for them
zz<-combinep_two(exp.p,meth.p)
#combine three kinds of p-values,then,calculate z-score for them
zzz<-combinep_three(exp.p,meth.p,cnv.p)
zzz[1:6,]
##                       padjust          z_score                exp.p
## AACS     1.49856652283489e-12 6.97785849233781 4.27591802855694e-05
## AASDHPPT 3.59090577618182e-09 5.78661689489162 1.51758714809107e-05
## ACAA1    2.26766115435344e-06 4.58520914055579  0.00254706069215554
## ACACA    7.81825182198367e-05 3.78073660442176   0.0414461349050663
## ACACB    3.17474389450551e-24 10.0863369031681  1.0796730435591e-25
## ACAD8    8.80185318920442e-08 5.22301085773967 2.21022111058924e-05
##                        meth.p                cnv.p
## AACS     3.79351155555026e-07 0.000154005933715148
## AASDHPPT    0.138052336368975 4.95190895519194e-06
## ACAA1    4.80814465758352e-06                    1
## ACACA    1.62339209249856e-05                    1
## ACACB       0.152770581756488   0.0979253199784077
## ACAD8        0.19898879195769 7.67894501521561e-05

4 Example: Obtain subpathways

For a priori KEGG path, we perform a greedy algorithm to search for interested subpathways.We provide two statistical test methods of the subpathway, which one is whole gene-based perturbation, and the other is the local gene perturbation in a particular pathway.

#obtain z-score of each gene
zzz<-GetExampleData("zzz")
zzz[1:10,]
##                       padjust          z_score                exp.p
## AACS      1.4985665228349e-12 6.97785849233781 4.27591802855694e-05
## AASDHPPT 3.59090577618182e-09 5.78661689489162 1.51758714809107e-05
## ACAA1    2.26766115435345e-06 4.58520914055579  0.00254706069215554
## ACACA    7.81825182198368e-05 3.78073660442176   0.0414461349050663
## ACACB    3.17474389450549e-24 10.0863369031681  1.0796730435591e-25
## ACAD8    8.80185318920443e-08 5.22301085773967 2.21022111058924e-05
## ACADL    6.14820533862155e-54 15.4184802326598 2.63073053599382e-55
## ACAT1     1.1545332900424e-06 4.72430235586153 6.58517201812735e-06
## ACER1    2.96364608769913e-06  4.5289664035691                    1
## ACER3    1.63555761386473e-07 5.10711682637474  5.9116689567922e-07
##                        meth.p                cnv.p
## AACS     3.79351155555026e-07 0.000154005933715148
## AASDHPPT    0.138052336368975 4.95190895519194e-06
## ACAA1    4.80814465758352e-06                    1
## ACACA    1.62339209249856e-05                    1
## ACACB       0.152770581756488   0.0979253199784077
## ACAD8        0.19898879195769 7.67894501521561e-05
## ACADL     0.00265801159320894                    1
## ACAT1       0.287245454170909  0.00305344462196611
## ACER1    1.65137200042873e-08                    1
## ACER3     0.00112767915011995                    1
require(graphite)
zz<-GetExampleData("zzz")
#subpathdata<-FindSubPath(zz) #only show

Optimize interested subpathways.If the number of genes shared by the two pathways accounted for more than the Overlap ratio of each pathway genes,than combine two pathways.

subpathdata<-GetExampleData("subpathdata")
keysubpathways<-opt_subpath(subpathdata,zz,overlap=0.6)
head(keysubpathways)
##      SubpathwayID  pathway                       
## [1,] "path00010_1" "Glycolysis / Gluconeogenesis"
## [2,] "path00010_2" "Glycolysis / Gluconeogenesis"
## [3,] "path00010_3" "Glycolysis / Gluconeogenesis"
## [4,] "path00010_4" "Glycolysis / Gluconeogenesis"
## [5,] "path00010_5" "Glycolysis / Gluconeogenesis"
## [6,] "path00010_6" "Glycolysis / Gluconeogenesis"
##      Subgene                                                                
## [1,] "G6PC/HK3/PGM1/HKDC1/GPI/FBP1/ALDOA/G6PC2/GALM/G6PC3"                  
## [2,] "ADH5/ADH1B/ADH1A/ALDH3B2/ALDH2/AKR1A1/ALDH7A1/ALDH3B1/ALDH1B1/ALDH1A3"
## [3,] "PKLR/ENO1/LDHA/PGAM1/MINPP1/PCK1/LDHAL6A/LDHAL6B"                     
## [4,] "ACSS1/ALDH3B2/ADH1B/ADH1A/ALDH2/DLAT/ACSS2"                           
## [5,] "PFKP/ALDOA/GAPDH/FBP1/GPI/ALDOC/GAPDHS"                               
## [6,] "PGAM4/ENO1/PGAM1/MINPP1/PCK1/PGK2"                                    
##      Size SubpathwayZScore  
## [1,] "10" "20.8492758044976"
## [2,] "10" "20.9317587858665"
## [3,] "8"  "19.0899605586423"
## [4,] "7"  "19.7078248333379"
## [5,] "7"  "18.3020024323356"
## [6,] "6"  "16.7772761794305"

the perturbation test was used to calculate statistical significance for these interested subpathways.

keysubpathways<-Permutation(keysubpathways,zz,nperm1=100,method1=TRUE,nperm2=100,method2=FALSE)
head(keysubpathways)
##      SubpathwayID  pathway                       
## [1,] "path00010_1" "Glycolysis / Gluconeogenesis"
## [2,] "path00010_2" "Glycolysis / Gluconeogenesis"
## [3,] "path00010_3" "Glycolysis / Gluconeogenesis"
## [4,] "path00010_4" "Glycolysis / Gluconeogenesis"
## [5,] "path00010_5" "Glycolysis / Gluconeogenesis"
## [6,] "path00010_6" "Glycolysis / Gluconeogenesis"
##      Subgene                                                                
## [1,] "G6PC/HK3/PGM1/HKDC1/GPI/FBP1/ALDOA/G6PC2/GALM/G6PC3"                  
## [2,] "ADH5/ADH1B/ADH1A/ALDH3B2/ALDH2/AKR1A1/ALDH7A1/ALDH3B1/ALDH1B1/ALDH1A3"
## [3,] "PKLR/ENO1/LDHA/PGAM1/MINPP1/PCK1/LDHAL6A/LDHAL6B"                     
## [4,] "ACSS1/ALDH3B2/ADH1B/ADH1A/ALDH2/DLAT/ACSS2"                           
## [5,] "PFKP/ALDOA/GAPDH/FBP1/GPI/ALDOC/GAPDHS"                               
## [6,] "PGAM4/ENO1/PGAM1/MINPP1/PCK1/PGK2"                                    
##      Size SubpathwayZScore   p1     fdr1               
## [1,] "10" "20.8492758044976" "0.07" "0.442469135802469"
## [2,] "10" "20.9317587858665" "0.07" "0.442469135802469"
## [3,] "8"  "19.0899605586423" "0.03" "0.289811320754717"
## [4,] "7"  "19.7078248333379" "0.02" "0.238139534883721"
## [5,] "7"  "18.3020024323356" "0.05" "0.371014492753623"
## [6,] "6"  "16.7772761794305" "0.09" "0.490212765957447"

4 Example: plot a network graph when user input a list of gene

require(graphite)
require(org.Hs.eg.db)
subpID<-unlist(strsplit("ACSS1/ALDH3B2/ADH1B/ADH1A/ALDH2/DLAT/ACSS2","/"))
pathway.name="Glycolysis / Gluconeogenesis"
zzz<- GetExampleData("zzz")
PlotSubpathway(subpID=subpID,pathway.name=pathway.name,zz=zzz)