02: Expression plot

Create an RNA isoform structure plot with an RNA isoform expression plot beside it

[7]:
import RNApysoforms as RNApy
[8]:
## Path to your ENSEMBL GTF file and counts matrix file
ensembl_gtf_path = "../dash_apps/RNApysoforms/tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv"
[9]:
## Read ENSEMBL gtf and counts matrix
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path)
[10]:
## Filter gene name in annotation and counts matrix.
sod1_annotation, sod1_counts_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="SOD1")

sod1_counts_matrix.head()
[10]:
shape: (5, 4)
transcript_idgene_idsample_idcounts
strstrstrf64
"ENST00000476106""ENSG00000142168""sample_1"0.0
"ENST00000476106""ENSG00000142168""sample_4"0.0
"ENST00000476106""ENSG00000142168""sample_7"0.0
"ENST00000476106""ENSG00000142168""sample_2"0.0
"ENST00000476106""ENSG00000142168""sample_6"0.0
[11]:
"""
Rescale introns (no need to run function "to_intron", shorten_gaps() already does this
by default if introns aren't already included in annotation.
"""
sod1_annotation = RNApy.shorten_gaps(sod1_annotation)

sod1_annotation.head()
[11]:
shape: (5, 13)
gene_idgene_nametranscript_idtranscript_nametranscript_biotypeseqnamesstrandtypestartendexon_numberrescaled_startrescaled_end
strstrstrstrstrstrstrstri64i64i64i64i64
"ENSG00000142168""SOD1""ENST00000270142""SOD1-201""protein_coding""21""+""exon"3165969331659841129177
"ENSG00000142168""SOD1""ENST00000270142""SOD1-201""protein_coding""21""+""CDS"31659770316598411106177
"ENSG00000142168""SOD1""ENST00000270142""SOD1-201""protein_coding""21""+""intron"316598413166379011771431
"ENSG00000142168""SOD1""ENST00000270142""SOD1-201""protein_coding""21""+""CDS"3166379031663886214311527
"ENSG00000142168""SOD1""ENST00000270142""SOD1-201""protein_coding""21""+""exon"3166379031663886214311527
[12]:
"""
Create traces for plotting, the expression plot will come out in
the order of columns passed on the `expression_columns` parameters.
This is important if you are passing multiple expression columns
like CPM and relative abundance.
"""
traces = RNApy.make_traces(annotation=sod1_annotation,  expression_matrix=sod1_counts_matrix,
                        x_start="rescaled_start", x_end="rescaled_end",
                         y='transcript_id', annotation_hue="transcript_biotype",
                         hover_start="start", hover_end="end",
                         expression_columns=["counts"])

"""
Put traces into the figure. The order of `subplot_titles` is important.
The first plot will always be "Transcript Structure" if you passed an annotation
to make the traces. After that the order of the expression plots is determined
by the `expression_columns` parameter passed to the `make_traces()` function.
"""
fig = RNApy.make_plot(traces = traces, subplot_titles = ["Transcript Structure", "Counts"], width=1200, height=500)

## Show figure
fig.show()