Plot specific transcripts
Hand pick which transcripts to plot and plot them in specific order
[1]:
import RNApysoforms as RNApy
import polars as pl
[2]:
## Path to your ENSEMBL GTF file, counts matrix file, and metadata file
ensembl_gtf_path = "../../tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../../tests/test_data/counts_matrix_chr21_and_Y.tsv"
metadata_path = "../../tests/test_data/sample_metadata.tsv"
## Read ENSEMBL GTF and counts matrix with metadata and normalization
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path,
metadata_path=metadata_path,
cpm_normalization=True, relative_abundance=True)
## Filter APP gene and do not filter RNA isoforms by expression
app_annotation, app_expresison_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="APP",
order_by_expression=True, order_by_expression_column="counts")
## Rescale introns
app_annotation = RNApy.shorten_gaps(app_annotation)
"""
Filter only the desired transcripts and keep them on in the app_annotation.
`make_traces()` only plots transcripts present in both the annotationa and the
expression matrix when both are passed (it does give a warning when that happens),
so only the transcripts you kept in the annotation will be plotted.
"""
transcript_to_keep = ["ENST00000348990", "ENST00000707133"]
app_annotation = app_annotation.filter(pl.col("transcript_id").is_in(transcript_to_keep))
## Order transcripts based on `transcripts_to_keep` order
app_annotation = app_annotation.with_columns(
pl.col("transcript_id").cast(pl.Categorical).cast(pl.Utf8).replace(
{k: i for i, k in enumerate(transcript_to_keep)}).alias("sort_key")
).sort("sort_key", descending=True).drop("sort_key")
"""
Create traces for plotting, make sure to set the
`order_transcripts_by_expression_matrix` to False so
that the order of the annotation is the one that determines
the order in which the transcripts are plotted.
"""
traces = RNApy.make_traces(annotation=app_annotation, expression_matrix=app_expresison_matrix,
x_start="rescaled_start", x_end="rescaled_end",
y='transcript_id', annotation_hue="transcript_biotype",
hover_start="start", hover_end="end",
expression_columns=["counts", "CPM", "relative_abundance"],
expression_hue="AD status", marker_size=3, arrow_size=7,
order_transcripts_by_expression_matrix=False ## Order by annotation order instead of expression matrix
)
## Put traces into figure
fig = RNApy.make_plot(traces=traces, subplot_titles=["Transcript Structure", "Counts", "CPM", "Relative Abundance"],
width=1200, height=500, boxgap=0.1, boxgroupgap=0.5)
## Show figure
fig.show()
C:\Users\local_bag222\OneDrive - University of Kentucky\grad_school\fall_2024\ebbert_lab\dash_apps\RNApysoforms\src\RNApysoforms\make_traces.py:293: UserWarning: 18 transcript(s) are present in the expression matrix but missing in the annotation. Missing transcripts: ENST00000346798, ENST00000354192, ENST00000357903, ENST00000358918, ENST00000359726, ENST00000415997, ENST00000439274, ENST00000440126, ENST00000448850, ENST00000462267, ENST00000463070, ENST00000464867, ENST00000466453, ENST00000474136, ENST00000491395, ENST00000548570, ENST00000707132, ENST00000707134. Only transcripts present in both will be used for making traces.
warnings.warn(
Notes:
You can click on the legend items to make figure elements appear and disappear.
The legend title will get grayed out when clicking on the first legend item. I could not find a workaround for that with the current plotly release (version 5).
The hovering for exons and CDS works best if you hover your mouse over the corners of the CDS/exon boxes.