08: Separate CDS interactivity

Be able to click CDS regions in and out of the plot separate from the exons

[5]:
import RNApysoforms as RNApy
import polars as pl
[6]:
## Path to your ENSEMBL GTF file, counts matrix file, and metadata file
ensembl_gtf_path = "../dash_apps/RNApysoforms/tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv"
metadata_path = "../dash_apps/RNApysoforms/tests/test_data/sample_metadata.tsv"


## Read ENSEMBL GTF and counts matrix with metadata and normalizations
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path,
                                          metadata_path=metadata_path,
                                           cpm_normalization=True, relative_abundance=True)


## Filter APP gene and keep only top 5 expressed transcripts
app_annotation, app_counts_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="APP",
                                                        order_by_expression=True, keep_top_expressed_transcripts=5,
                                                        order_by_expression_column="counts")


# Rescale introns
app_annotation = RNApy.shorten_gaps(app_annotation)

"""
Add separate transcript biotype label to CDS regions and exon regions in
protein coding transcripts so that they can be isolated when interacting with the
plotly plots.
"""
app_annotation = app_annotation.with_columns(
    pl.when(pl.col("type") == "CDS")
    .then(pl.lit("protein_coding - CDS"))
    .when((pl.col("type") == "exon") & (pl.col("transcript_biotype") == "protein_coding"))
    .then(pl.lit("protein_coding - Exon"))
    .otherwise(pl.col("transcript_biotype"))
    .alias("transcript_biotype")
)



# Define a mapping from transcript_biotype to colors
biotype_colors = {
    'protein_coding - Exon': '#F8766D',
    'protein_coding - CDS': '#F8766D',
    'protein_coding_CDS_not_defined': '#00BFC4'
}

# Define a mapping for "AD Status" from expression matrix
ad_status_colors = {
    'AD': '#7CAE00',
    'Control': '#C77CFF',
}



"""
Create traces, notice the `annotation_color_map` and the
`expression_color_map` being passed to define the color maps for the different
values contained in the `expression_hue` column from the expression matrix and
the `annotation_hue` column from the annotation. This can be applied to ensure
that your plot always has the same colors associated with the same
`annotation_hue` and `expression_hue` values.
"""
traces = RNApy.make_traces(annotation=app_annotation,  expression_matrix=app_counts_matrix,
                        x_start="rescaled_start", x_end="rescaled_end",
                         y='transcript_id', annotation_hue="transcript_biotype",
                         hover_start="start", hover_end="end",
                         expression_columns=["counts", "CPM", "relative_abundance"],
                         expression_hue="AD status", marker_size=3, arrow_size=7,
                         annotation_color_map=biotype_colors,
                         expression_color_map=ad_status_colors)

## Put traces into a figure
fig = RNApy.make_plot(traces=traces, subplot_titles=["Transcript Structure", "Counts", "CPM", "Relative Abundance"],
                   width=1200, height=500)

## Show figure
fig.show()

You can click on the legend items to make figure elements appear and disappear.

The legend title will get grayed out when clicking on the first legend item. I could not find a workaround for that with the current plotly release (version 5).

The hovering for exons and CDS works best if you hover your mouse over the edges of the CDS/exon boxes.