05: Plot specific transcripts

Hand pick which transcripts to plot and plot them in specific order

[3]:
import RNApysoforms as RNApy
import polars as pl
[4]:
## Path to your ENSEMBL GTF file, counts matrix file, and metadata file
ensembl_gtf_path = "../dash_apps/RNApysoforms/tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv"
metadata_path = "../dash_apps/RNApysoforms/tests/test_data/sample_metadata.tsv"


## Read ENSEMBL GTF and counts matrix with metadata and normalization
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path,
                                          metadata_path=metadata_path,
                                           cpm_normalization=True, relative_abundance=True)


## Filter APP gene and do not filter RNA isoforms by expression
app_annotation, app_expresison_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="APP",
                                                        order_by_expression=True, order_by_expression_column="counts")

## Rescale introns
app_annotation = RNApy.shorten_gaps(app_annotation)


"""
Filter only the desired transcripts and keep them on in the app_annotation.
`make_traces()` only plots transcripts present in both the annotationa and the
expression matrix when both are passed (it does give a warning when that happens),
so only the transcripts you kept in the annotation will be plotted.
"""
transcript_to_keep = ["ENST00000348990", "ENST00000707133"]
app_annotation = app_annotation.filter(pl.col("transcript_id").is_in(transcript_to_keep))


## Order transcripts based on `transcripts_to_keep` order
app_annotation = app_annotation.with_columns(
    pl.col("transcript_id").cast(pl.Categorical).cast(pl.Utf8).replace(
        {k: i for i, k in enumerate(transcript_to_keep)}).alias("sort_key")
        ).sort("sort_key", descending=True).drop("sort_key")


"""
Create traces for plotting, make sure to set the
`order_transcripts_by_expression_matrix` to False so
that the order of the annotation is the one that determines
the order in which the transcripts are plotted.
"""
traces = RNApy.make_traces(annotation=app_annotation,  expression_matrix=app_expresison_matrix,
                        x_start="rescaled_start", x_end="rescaled_end",
                         y='transcript_id', annotation_hue="transcript_biotype",
                         hover_start="start", hover_end="end",
                         expression_columns=["counts", "CPM", "relative_abundance"],
                         expression_hue="AD status", marker_size=3, arrow_size=7,
                         order_transcripts_by_expression_matrix=False ## Order by annotation order instead of expression matrix
                         )

## Put traces into figure
fig = RNApy.make_plot(traces=traces, subplot_titles=["Transcript Structure", "Counts", "CPM", "Relative Abundance"],
                   width=1200, height=500, boxgap=0.1, boxgroupgap=0.5)

## Show figure
fig.show()
C:\Users\local_bag222\Desktop\dash_apps\RNApysoforms\src\RNApysoforms\make_traces.py:294: UserWarning:

18 transcript(s) are present in the expression matrix but missing in the annotation. Missing transcripts: ENST00000346798, ENST00000354192, ENST00000357903, ENST00000358918, ENST00000359726, ENST00000415997, ENST00000439274, ENST00000440126, ENST00000448850, ENST00000462267, ENST00000463070, ENST00000464867, ENST00000466453, ENST00000474136, ENST00000491395, ENST00000548570, ENST00000707132, ENST00000707134. Only transcripts present in both will be used for making traces.

You can click on the legend items to make figure elements appear and disappear.

The legend title will get grayed out when clicking on the first legend item. I could not find a workaround for that with the current plotly release (version 5).

The hovering for exons and CDS works best if you hover your mouse over the edges of the CDS/exon boxes.