02: Expression plot

Create an RNA isoform structure plot with an RNA isoform expression plot beside it

[7]:

import RNApysoforms as RNApy

[8]:

## Path to your ENSEMBL GTF file and counts matrix file
ensembl_gtf_path = "../dash_apps/RNApysoforms/tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv"

[9]:

## Read ENSEMBL gtf and counts matrix
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path)

[10]:

## Filter gene name in annotation and counts matrix.
sod1_annotation, sod1_counts_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="SOD1")

sod1_counts_matrix.head()

[10]:

shape: (5, 4)

transcript_id	gene_id	sample_id	counts
str	str	str	f64
"ENST00000476106"	"ENSG00000142168"	"sample_1"	0.0
"ENST00000476106"	"ENSG00000142168"	"sample_4"	0.0
"ENST00000476106"	"ENSG00000142168"	"sample_7"	0.0
"ENST00000476106"	"ENSG00000142168"	"sample_2"	0.0
"ENST00000476106"	"ENSG00000142168"	"sample_6"	0.0

[11]:

"""
Rescale introns (no need to run function "to_intron", shorten_gaps() already does this
by default if introns aren't already included in annotation.
"""
sod1_annotation = RNApy.shorten_gaps(sod1_annotation)

sod1_annotation.head()

[11]:

shape: (5, 13)

gene_id	gene_name	transcript_id	transcript_name	transcript_biotype	seqnames	strand	type	start	end	exon_number	rescaled_start	rescaled_end
str	str	str	str	str	str	str	str	i64	i64	i64	i64	i64
"ENSG00000142168"	"SOD1"	"ENST00000270142"	"SOD1-201"	"protein_coding"	"21"	"+"	"exon"	31659693	31659841	1	29	177
"ENSG00000142168"	"SOD1"	"ENST00000270142"	"SOD1-201"	"protein_coding"	"21"	"+"	"CDS"	31659770	31659841	1	106	177
"ENSG00000142168"	"SOD1"	"ENST00000270142"	"SOD1-201"	"protein_coding"	"21"	"+"	"intron"	31659841	31663790	1	177	1431
"ENSG00000142168"	"SOD1"	"ENST00000270142"	"SOD1-201"	"protein_coding"	"21"	"+"	"CDS"	31663790	31663886	2	1431	1527
"ENSG00000142168"	"SOD1"	"ENST00000270142"	"SOD1-201"	"protein_coding"	"21"	"+"	"exon"	31663790	31663886	2	1431	1527

[12]:

"""
Create traces for plotting, the expression plot will come out in
the order of columns passed on the `expression_columns` parameters.
This is important if you are passing multiple expression columns
like CPM and relative abundance.
"""
traces = RNApy.make_traces(annotation=sod1_annotation,  expression_matrix=sod1_counts_matrix,
                        x_start="rescaled_start", x_end="rescaled_end",
                         y='transcript_id', annotation_hue="transcript_biotype",
                         hover_start="start", hover_end="end",
                         expression_columns=["counts"])

"""
Put traces into the figure. The order of `subplot_titles` is important.
The first plot will always be "Transcript Structure" if you passed an annotation
to make the traces. After that the order of the expression plots is determined
by the `expression_columns` parameter passed to the `make_traces()` function.
"""
fig = RNApy.make_plot(traces = traces, subplot_titles = ["Transcript Structure", "Counts"], width=1200, height=500)

## Show figure
fig.show()