Match genes in a list-like object to a vector of genesymbols

matchGenes(list, object, ...)

# S4 method for GmtList,character
matchGenes(list, object)

# S4 method for GmtList,matrix
matchGenes(list, object)

# S4 method for GmtList,eSet
matchGenes(list, object, col = "GeneSymbol")

# S4 method for character,character
matchGenes(list, object)

# S4 method for character,matrix
matchGenes(list, object)

# S4 method for character,eSet
matchGenes(list, object)

# S4 method for character,DGEList
matchGenes(list, object, col = "GeneSymbol")

# S4 method for GmtList,DGEList
matchGenes(list, object, col = "GeneSymbol")

# S4 method for SignedGenesets,character
matchGenes(list, object)

# S4 method for SignedGenesets,matrix
matchGenes(list, object)

# S4 method for SignedGenesets,eSet
matchGenes(list, object, col = "GeneSymbol")

# S4 method for SignedGenesets,DGEList
matchGenes(list, object, col = "GeneSymbol")

Arguments

list	A GmtList, list, character or SignedGenesets object
object	Gene symbols to be matched; they can come from a vector of character strings, or a column in the fData of an `eSet` object.
...	additional arguments like `col`
col	Column name of `fData` in an `eSet` object, or `genes` in an `DGEList` object, to specify where gene symbols are stored. The default value is set to "GeneSymbol"

Value

An IndexList object, which is essentially a list of the same length as input (length of 1 in case characters are used as input), with matching indices.

Examples

## test GmtList, character
testGenes <- sprintf("gene%d", 1:10)
testGeneSets <- GmtList(list(gs1=c("gene1", "gene2"), gs2=c("gene9", "gene10"), gs3=c("gene100")))
matchGenes(testGeneSets, testGenes)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

## test GmtList, matrix
testGenes <- sprintf("gene%d", 1:10)
testGeneSets <- GmtList(list(gs1=c("gene1", "gene2"), gs2=c("gene9", "gene10"), gs3=c("gene100")))
testGeneExprs <- matrix(rnorm(100), nrow=10, dimnames=list(testGenes, sprintf("sample%d", 1:10)))
matchGenes(testGeneSets, testGeneExprs)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

## test GmtList, eSet
testGenes <- sprintf("gene%d", 1:10)
testGeneSets <- GmtList(list(gs1=c("gene1", "gene2"), gs2=c("gene9", "gene10"), gs3=c("gene100")))
testGeneExprs <- matrix(rnorm(100), nrow=10, dimnames=list(testGenes, sprintf("sample%d", 1:10)))
testFeat <- data.frame(GeneSymbol=rownames(testGeneExprs), row.names=testGenes)
testPheno <- data.frame(SampleId=colnames(testGeneExprs), row.names=colnames(testGeneExprs))
testEset <- ExpressionSet(assayData=testGeneExprs,
    featureData=AnnotatedDataFrame(testFeat),
    phenoData=AnnotatedDataFrame(testPheno))
matchGenes(testGeneSets, testGeneExprs)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA
## force using row names
matchGenes(testGeneSets, testEset, col=NULL)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

 ## test GmtList, DGEList
 if(requireNamespace("edgeR")) {
    mat <- matrix(rnbinom(100, mu=5, size=2), ncol=10)
    rownames(mat) <- sprintf("gene%d", 1:nrow(mat))
    y <- edgeR::DGEList(counts=mat, group=rep(1:2, each=5))

    ## if genes are not set, row names of the count matrix will be used for lookup
    myGeneSet <- GmtList(list(gs1=rownames(mat)[1:2], gs2=rownames(mat)[9:10], gs3="gene100"))
    matchGenes(myGeneSet, y)

    matchGenes(c("gene1", "gene2"), y)
    ## alternatively, use 'col' parameter to specify the column in 'genes'
    y2 <- edgeR::DGEList(counts=mat,
      group=rep(1:2, each=5),
      genes=data.frame(GeneIdentifier=rownames(mat), row.names=rownames(mat)))
    matchGenes(myGeneSet, y2, col="GeneIdentifier")
 }
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

## test character, character
matchGenes(c("gene1", "gene2"), testGenes)
#> A list of 1 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   TempGeneSet (n=2): 1,2

## test character, matrix
matchGenes(c("gene1", "gene2"), testGeneExprs)
#> A list of 1 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   TempGeneSet (n=2): 1,2

## test character, eset
matchGenes(c("gene1", "gene2"), testEset)
#> A list of 1 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   TempGeneSet (n=2): 1,2