This function retrieves the OG (Orthogroup) and Gene Tree Parent Clade for a given GeneID and SpeciesID. It takes a GeneDiscoveR object, a GeneID, and a SpeciesID as input and returns a data frame containing the OG, Gene Tree Parent Clade, and GeneID for the matching GeneID and SpeciesID. If no matching GeneID and SpeciesID are found, the function returns NA for the OG and Gene Tree Parent Clade.

obtain_OG_from_gene(GeneDiscoveRobject, GeneID = NULL, SpeciesID = NULL)

Arguments

GeneDiscoveRobject

The GeneDiscoveR object containing the complete table.

GeneID

A character vector of GeneIDs.

SpeciesID

A character vector of SpeciesIDs.

Value

A data frame containing the OG, Gene Tree Parent Clade, and GeneID for the matching GeneIDs and SpeciesIDs.

Examples

N0Dir <- system.file("extdata", "Brassicaceae", package = "GeneDiscoveR")
dataTSV <- system.file("extdata", "Brassicaceae", "table_traits_selfcomp.tsv", package = "GeneDiscoveR")

# Create a GeneDiscoveR object with Brassicaceae data from one execution of OrthoFinder
# In this case, because we are unique execution, we use the uniqueInflation parameter and GeneDiscoveR set automatically the active run.
GeneDiscoveRobject <- GeneDiscoveR(
    N0sDir = N0Dir,
    dataFile = dataTSV,
    uniqueInflation = 1.5,
    orthologsTool = "OrthoFinder"
)
#> -----------From OrthoFinder-----------
#> The process has been completed successfully

# Select species by phenotype. You can perform this step with different phenotypes.
GeneDiscoveRobject <- select_species_by_phenotype(
    GeneDiscoveRobject = GeneDiscoveRobject,
    columnPhenotype = "Self-compatible",
    columnID = "OrthofinderID",
    type = "0"
)
GeneDiscoveRobject <- select_species_by_phenotype(
    GeneDiscoveRobject = GeneDiscoveRobject,
    columnPhenotype = "Self-compatible",
    columnID = "OrthofinderID",
    type = "1"
)

# Identify genes by phenotype. You can perform this step with different phenotypes.
GeneDiscoveRobject <- gene_identification_by_phenotype(
    GeneDiscoveRobject = GeneDiscoveRobject,
    formula = as.formula("1 ~ 0"),
    statistic = "Fisher",
    name = "Self-incompatible",
    cores = 8
)
GeneDiscoveRobject <- select_genes_by_phenotype(GeneDiscoveRobject,
    pvalue = 0.05,
    oddsRatio = 1,
    sign = ">",
    name = "Self-incompatible"
)

# Set annotation file
# Import Arabidopsis thaliana annotation file from TAIR10
annotationFile <- system.file("extdata", "Brassicaceae", "TAIR10_functional_descriptions", package = "GeneDiscoveR")

GeneDiscoveRobject <- set_annotation_file(GeneDiscoveRobject, annotationFile = annotationFile)
indexFilteredGenes <- select_filtered_gene_index(GeneDiscoveRobject, name = "Self-incompatible", pvalue = 0.05, oddsRatio = 1, sign = ">")

# Map annotation to the filtered genes. indexFilteredGenes is the index of the filtered genes, if NULL, the annotation is mapped to the complete table
GeneDiscoveRobject <- map_annotation(
    GeneDiscoveRobject = GeneDiscoveRobject,
    indexFilteredGenes = indexFilteredGenes,
    specieWithAnnotation = "Athaliana_447_Araport11.protein_primaryTranscriptOnly",
    oneColumn = FALSE
)
#> Rows: 41672 Columns: 5
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (5): X1, X2, X3, X4, X5
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Show the filtered genes table
FilteredGenes <- get_filtered_genes_table(GeneDiscoveRobject, name = "Self-incompatible", pvalue = 0.05, oddsRatio = 1, sign = ">")

# OG of the gene AT5G44220.1
GeneID <- c("AT5G44220.1")
OrthoFinderID <- c("Athaliana_447_Araport11.protein_primaryTranscriptOnly")
AT5G44220_OG <- obtain_OG_from_gene(GeneDiscoveRobject, GeneID, OrthoFinderID)
AT5G44220_OG # Identify the Orthologous Group (OG) of the gene
#> # A tibble: 1 × 3
#> # Rowwise: 
#>   OG        `Gene Tree Parent Clade` GeneID     
#>   <chr>     <chr>                    <chr>      
#> 1 OG0000495 n0                       AT5G44220.1