08: Separate CDS interactivity

Be able to click CDS regions in and out of the plot separate from the exons

[5]:
import RNApysoforms as RNApy
import polars as pl
[6]:
## Path to your ENSEMBL GTF file, counts matrix file, and metadata file
ensembl_gtf_path = "../dash_apps/RNApysoforms/tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv"
metadata_path = "../dash_apps/RNApysoforms/tests/test_data/sample_metadata.tsv"


## Read ENSEMBL GTF and counts matrix with metadata and normalizations
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path,
                                          metadata_path=metadata_path,
                                           cpm_normalization=True, relative_abundance=True)


## Filter APP gene and do not filter RNA isoforms by expression
app_annotation, app_counts_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="APP",
                                                        order_by_expression=True, keep_top_expressed_transcripts=5,
                                                        order_by_expression_column="counts")


# Rescale introns
app_annotation = RNApy.shorten_gaps(app_annotation)

"""
Add separate transcript biotype label to CDS regions and exon regions in
protein coding transcripts so that they can be isolated when interacting with the
plotly plots.
"""
app_annotation = app_annotation.with_columns(
    pl.when(pl.col("type") == "CDS")
    .then(pl.lit("protein_coding - CDS"))
    .when((pl.col("type") == "exon") & (pl.col("transcript_biotype") == "protein_coding"))
    .then(pl.lit("protein_coding - Exon"))
    .otherwise(pl.col("transcript_biotype"))
    .alias("transcript_biotype")
)



# Define a mapping from transcript_biotype to colors
biotype_colors = {
    'protein_coding - Exon': '#F8766D',
    'protein_coding - CDS': '#F8766D',
    'protein_coding_CDS_not_defined': '#00BFC4'
}

# Define a mapping for "AD Status" from expression matrix
ad_status_colors = {
    'AD': '#7CAE00',
    'Control': '#C77CFF',
}



"""
Create traces, notice the `annotation_color_map` and the
`expression_color_map` being passed to define the color maps for the different
values contained in the `expression_hue` column from the expression matrix and
the `annotation_hue` column from the annotation. This can be applied to ensure
that your plot always has the same colors associated with the same
`annotation_hue` and `expression_hue` values.
"""
traces = RNApy.make_traces(annotation=app_annotation,  expression_matrix=app_counts_matrix,
                        x_start="rescaled_start", x_end="rescaled_end",
                         y='transcript_id', annotation_hue="transcript_biotype",
                         hover_start="start", hover_end="end",
                         expression_columns=["counts", "CPM", "relative_abundance"],
                         expression_hue="AD status", marker_size=3, arrow_size=7,
                         annotation_color_map=biotype_colors,
                         expression_color_map=ad_status_colors)

## Put traces into a figure
fig = RNApy.make_plot(traces=traces, subplot_titles=["Transcript Structure", "Counts", "CPM", "Relative Abundance"],
                   width=1200, height=500)

## Show figure
fig.show()