Created
October 31, 2025 16:45
-
-
Save mikelove/9a16122e866777e4e61243ab46543312 to your computer and use it in GitHub Desktop.
Exploring alternative UTR code with GenomicFeatures and plyranges
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(GenomicRanges) | |
| library(GenomicFeatures) | |
| library(TxDb.Hsapiens.UCSC.hg38.knownGene) | |
| txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene | |
| ebt <- exonsBy(txdb, by="tx") | |
| cbt <- cdsBy(txdb, by="tx") | |
| fubt <- fiveUTRsByTranscript(txdb) | |
| tubt <- threeUTRsByTranscript(txdb) | |
| # compare lengths | |
| length(ebt) | |
| length(cbt) | |
| length(fubt) | |
| length(tubt) | |
| head(names(cbt)) | |
| head(names(fubt)) | |
| head(names(tubt)) | |
| # rename | |
| txps <- transcripts(txdb) | |
| tx_name <- txps$tx_name | |
| names(txps) <- tx_name | |
| names(ebt) <- tx_name | |
| names(cbt) <- tx_name[as.numeric(names(cbt))] | |
| names(fubt) <- tx_name[as.numeric(names(fubt))] | |
| names(tubt) <- tx_name[as.numeric(names(tubt))] | |
| # make a corresponding GRangesList for cbt | |
| cbt_exons <- ebt[names(cbt)] | |
| # examine these using bind_ranges | |
| library(plyranges) | |
| bind_ranges( | |
| cds=cbt[[1]], | |
| exons=cbt_exons[[1]], | |
| .id="origin" | |
| ) | |
| # now, examine how fiveUTRsByTranscript works: | |
| fubt_exons <- ebt[names(fubt)] | |
| fubt_cds <- cbt[names(fubt)] | |
| # examine these using bind_ranges | |
| bind_ranges( | |
| fivep_utr=fubt[[1]], | |
| cds=fubt_cds[[1]], | |
| exons=fubt_exons[[1]], | |
| .id="origin" | |
| ) |> mutate(tx_id = names(fubt)[1]) |> | |
| select(exon_rank, origin, tx_id) | |
| # we could do this manually with plyranges if we wanted to | |
| ebt[["ENST00000641515.2"]] |> | |
| setdiff_ranges( cbt[["ENST00000641515.2"]] ) | |
| # checking work | |
| fubt[["ENST00000641515.2"]] |> select(exon_rank) | |
| tubt[["ENST00000641515.2"]] |> select(exon_rank) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment