Map gene annotation — map_annotation • GeneDiscoveR

This function maps gene annotation to the filtered genes in a GeneDiscoveR object. The annotation file should contain in the first column the gene IDs and in the subsequent columns the annotations in one column or multiple columns. If the annotation is in one column, the function will split the annotation into multiple columns by sep separator. If the annotation is in multiple columns, the function will assign the annotation to the filtered genes based on the gene IDs.

map_annotation(
  GeneDiscoveRobject = NULL,
  indexFilteredGenes = NULL,
  oneColumn = TRUE,
  sep = ";",
  specieWithAnnotation = "MpTAKv6-Marchantia_polymorpha_rudelaris"
)

Arguments

GeneDiscoveRobject: A GeneDiscoveR object.
indexFilteredGenes: The index of the filtered genes in the GeneDiscoveR object. If NULL, the annotation is mapped to the complete table.
oneColumn: Logical value indicating whether to split the annotation into multiple columns (default is TRUE).
sep: The separator to use when splitting the annotation into multiple columns (default is ";").
specieWithAnnotation: The species with the annotation to be mapped (default is "MpTAKv6-Marchantia_polymorpha_rudelaris").

Value

The updated GeneDiscoveR object with the mapped gene annotation.

Examples

# Create a GeneDiscoveR object
N0sDir <- system.file("extdata", "N0-1dot3-6", package = "GeneDiscoveR")
overallsDir <- system.file("extdata", "Comparatives-1dot3-6", package = "GeneDiscoveR")
dataFile <- system.file("extdata", "annotatedCDSs.tsv", package = "GeneDiscoveR")
minInflation <- 1.3
maxInflation <- 6
stepInflation <- 0.1

GeneDiscoveRobject <- GeneDiscoveR(overallsDir = overallsDir, N0sDir = N0sDir, dataFile = dataFile, minInflation = minInflation, maxInflation = maxInflation, stepInflation = stepInflation)

# Set active run
GeneDiscoveRobject <- set_run_active(GeneDiscoveRobject, InflationValue = 1.8)
#> -----------From OrthoFinder-----------
#> The process has been completed successfully

# Select species by phenotype
GeneDiscoveRobject <- select_species_by_phenotype(GeneDiscoveRobject = GeneDiscoveRobject, columnPhenotype = "Oil-body-type", columnID = "OrthofinderID", type = "one_in_specialized_cell")
GeneDiscoveRobject <- select_species_by_phenotype(GeneDiscoveRobject = GeneDiscoveRobject, columnPhenotype = "Oil-body-type", columnID = "OrthofinderID", type = "many_in_all_cells")

# Gene identification by phenotype
GeneDiscoveRobject <- gene_identification_by_phenotype(GeneDiscoveRobject = GeneDiscoveRobject, formula = as.formula("one_in_specialized_cell ~ many_in_all_cells"), statistic = "Fisher", name = "PerOBtype", cores = 8)

# Select genes by phenotype
GeneDiscoveRobject <- select_genes_by_phenotype(GeneDiscoveRobject, pvalue = 0.05, oddsRatio = 1, sign = ">=", name = "PerOBtype")

# Annotation file with annotations in a one-line format separated by ";"
annotationFile <- system.file("extdata", "MpTak_v6.1_func_annotation_1line.tsv", package = "GeneDiscoveR")
# Set the annotation file
GeneDiscoveRobject <- set_annotation_file(GeneDiscoveRobject, annotationFile = annotationFile)
# Obtain the index of the filtered genes
indexFilteredGenes <- select_filtered_gene_index(GeneDiscoveRobject, name = "PerOBtype", pvalue = 0.05, oddsRatio = 1, sign = ">=")

# Map gene annotation to the filtered genes
GeneDiscoveRobject <- map_annotation(GeneDiscoveRobject = GeneDiscoveRobject, indexFilteredGenes = indexFilteredGenes, specieWithAnnotation = "MpTAKv6-Marchantia_polymorpha_rudelaris", oneColumn = TRUE, sep = ";")
#> Rows: 23399 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (2): X1, X2
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Output: A GeneDiscoveR object with the mapped gene annotation

# Map gene annotation to the complete table
GeneDiscoveRobject <- map_annotation(GeneDiscoveRobject = GeneDiscoveRobject, specieWithAnnotation = "MpTAKv6-Marchantia_polymorpha_rudelaris", oneColumn = TRUE, sep = ";")
#> Rows: 23399 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (2): X1, X2
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Output: A GeneDiscoveR object with the mapped gene annotation

# Obtain the filtered genes table with annotation
filteredTable <- get_filtered_genes_table(GeneDiscoveRobject, name = "PerOBtype", pvalue = 0.05, oddsRatio = 1, sign = ">=")
# Obtain the complete table with annotation
completeTable <- get_complete_table(GeneDiscoveRobject)
# View the filtered genes table with the mapped gene annotation
filteredGenes <- get_filtered_genes_table(GeneDiscoveRobject, name = "PerOBtype", pvalue = 0.05, oddsRatio = 1, sign = ">=")
# Output: A table with the filtered genes and their annotations