{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 10: Dealing with non-ENSEMBL GTF files\n", "\n", "Here is a quick overview on how to deal with non-ENSEMBL GTF files. By no means a comprehensive guide, just some suggestions. \n", "\n", "You don't have to do your data wrangling in Polars. You could always do it using Pandas and then convert it to a Polars dataframe at the end by running: `polars_df = pandas_df.from_pandas()`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import RNApysoforms as RNApy\n", "import polars as pl" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "## Path to your non-ENSEMBL GTF file\n", "alternative_gtf_path = \"../dash_apps/RNApysoforms/tests/test_data/alternative_gtf_format_chr_21_and_Y.gtf\"\n", "\n", "\n", "\n", "# Define the column names for the GTF file\n", "column_names = [\n", " \"seqnames\", # Chromosome or sequence name\n", " \"source\", # Annotation source\n", " \"type\", # Feature type (e.g., exon, CDS)\n", " \"start\", # Start position of the feature\n", " \"end\", # End position of the feature\n", " \"score\", # Score value (usually '.')\n", " \"strand\", # Strand information ('+' or '-')\n", " \"phase\", # Reading frame phase\n", " \"attributes\" # Additional attributes in key-value pairs\n", "]\n", "\n", "# Definte types for GTF columns\n", "dtypes = {\n", " \"seqnames\": pl.Utf8,\n", " \"source\": pl.Utf8,\n", " \"type\": pl.Utf8,\n", " \"start\": pl.Int64,\n", " \"end\": pl.Int64,\n", " \"score\": pl.Utf8,\n", " \"strand\": pl.Utf8,\n", " \"phase\": pl.Utf8,\n", " \"attributes\": pl.Utf8\n", "}\n", "\n", "# Read the GTF file using Polars\n", "alt_gtf_df = pl.read_csv(\n", " alternative_gtf_path,\n", " separator=\"\\t\",\n", " has_header=False,\n", " comment_prefix=\"#\", # Skip comment lines starting with '#'\n", " new_columns=column_names, # Assign column names since GTF files have no header\n", " schema_overrides=dtypes # Specify data types for each column\n", ")\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 9)
seqnamessourcetypestartendscorestrandphaseattributes
strstrstri64i64strstrstrstr
"21""Bambu""transcript"50117995017145".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081";"
"21""Bambu""exon"50117995011874".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "1";"
"21""Bambu""exon"50125485012687".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "2";"
"21""Bambu""exon"50143865014471".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "3";"
"21""Bambu""exon"50169355017145".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "4";"
" ], "text/plain": [ "shape: (5, 9)\n", "┌──────────┬────────┬────────────┬─────────┬───┬───────┬────────┬───────┬──────────────────────────────────────────────────────────────────────────────┐\n", "│ seqnames ┆ source ┆ type ┆ start ┆ … ┆ score ┆ strand ┆ phase ┆ attributes │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ str ┆ i64 ┆ ┆ str ┆ str ┆ str ┆ str │\n", "╞══════════╪════════╪════════════╪═════════╪═══╪═══════╪════════╪═══════╪══════════════════════════════════════════════════════════════════════════════╡\n", "│ 21 ┆ Bambu ┆ transcript ┆ 5011799 ┆ … ┆ . ┆ + ┆ . ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5011799 ┆ … ┆ . ┆ + ┆ . ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"1\"; │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5012548 ┆ … ┆ . ┆ + ┆ . ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"2\"; │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5014386 ┆ … ┆ . ┆ + ┆ . ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"3\"; │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5016935 ┆ … ┆ . ┆ + ┆ . ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"4\"; │\n", "└──────────┴────────┴────────────┴─────────┴───┴───────┴────────┴───────┴──────────────────────────────────────────────────────────────────────────────┘" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Make sure polars print the full column contents\n", "pl.Config(fmt_str_lengths=1000, tbl_width_chars=1000)\n", "\n", "\n", "## Visualize GTF file format\n", "alt_gtf_df.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2,)
type
str
"transcript"
"exon"
" ], "text/plain": [ "shape: (2,)\n", "Series: 'type' [str]\n", "[\n", "\t\"transcript\"\n", "\t\"exon\"\n", "]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## See all possible values for \"type column\"\n", "alt_gtf_df[\"type\"].unique()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 12)
seqnamessourcetypestartendscorestrandphaseattributesgene_idtranscript_idexon_number
strstrstri64i64strstrstrstrstrstrstr
"21""Bambu""exon"50117995011874".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "1";""ENSG00000279493""ENST00000624081""1"
"21""Bambu""exon"50125485012687".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "2";""ENSG00000279493""ENST00000624081""2"
"21""Bambu""exon"50143865014471".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "3";""ENSG00000279493""ENST00000624081""3"
"21""Bambu""exon"50169355017145".""+"".""gene_id "ENSG00000279493"; transcript_id "ENST00000624081"; exon_number "4";""ENSG00000279493""ENST00000624081""4"
"21""Bambu""exon"50225315022693".""+"".""gene_id "ENSG00000277117"; transcript_id "ENST00000623960"; exon_number "1";""ENSG00000277117""ENST00000623960""1"
" ], "text/plain": [ "shape: (5, 12)\n", "┌──────────┬────────┬──────┬─────────┬───┬──────────────────────────────────────────────────────────────────────────────┬─────────────────┬─────────────────┬─────────────┐\n", "│ seqnames ┆ source ┆ type ┆ start ┆ … ┆ attributes ┆ gene_id ┆ transcript_id ┆ exon_number │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ str ┆ i64 ┆ ┆ str ┆ str ┆ str ┆ str │\n", "╞══════════╪════════╪══════╪═════════╪═══╪══════════════════════════════════════════════════════════════════════════════╪═════════════════╪═════════════════╪═════════════╡\n", "│ 21 ┆ Bambu ┆ exon ┆ 5011799 ┆ … ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"1\"; ┆ ENSG00000279493 ┆ ENST00000624081 ┆ 1 │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5012548 ┆ … ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"2\"; ┆ ENSG00000279493 ┆ ENST00000624081 ┆ 2 │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5014386 ┆ … ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"3\"; ┆ ENSG00000279493 ┆ ENST00000624081 ┆ 3 │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5016935 ┆ … ┆ gene_id \"ENSG00000279493\"; transcript_id \"ENST00000624081\"; exon_number \"4\"; ┆ ENSG00000279493 ┆ ENST00000624081 ┆ 4 │\n", "│ 21 ┆ Bambu ┆ exon ┆ 5022531 ┆ … ┆ gene_id \"ENSG00000277117\"; transcript_id \"ENST00000623960\"; exon_number \"1\"; ┆ ENSG00000277117 ┆ ENST00000623960 ┆ 1 │\n", "└──────────┴────────┴──────┴─────────┴───┴──────────────────────────────────────────────────────────────────────────────┴─────────────────┴─────────────────┴─────────────┘" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Filter out to only keep exons\n", "alt_gtf_df = alt_gtf_df.filter(pl.col(\"type\") == \"exon\")\n", "\n", "## Extract the \"gene_id\", \"transcript_id\", and \"exon_number\" from the attributes column and assign them to columns\n", "alt_gtf_df = alt_gtf_df.with_columns([pl.col(\"attributes\").str.extract(r'gene_id \"([^\"]+)\"', 1).alias(\"gene_id\"),\n", " pl.col(\"attributes\").str.extract(r'transcript_id \"([^\"]+)\"', 1).alias(\"transcript_id\"),\n", " pl.col(\"attributes\").str.extract(r'exon_number \"([^\"]+)\"', 1).alias(\"exon_number\")])\n", "\n", "## Visualize data\n", "alt_gtf_df.head()\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 8)
gene_idtranscript_idseqnamesstrandtypestartendexon_number
strstrstrstrstri64i64i64
"ENSG00000279493""ENST00000624081""21""+""exon"501179950118741
"ENSG00000279493""ENST00000624081""21""+""exon"501254850126872
"ENSG00000279493""ENST00000624081""21""+""exon"501438650144713
"ENSG00000279493""ENST00000624081""21""+""exon"501693550171454
"ENSG00000277117""ENST00000623960""21""+""exon"502253150226931
" ], "text/plain": [ "shape: (5, 8)\n", "┌─────────────────┬─────────────────┬──────────┬────────┬──────┬─────────┬─────────┬─────────────┐\n", "│ gene_id ┆ transcript_id ┆ seqnames ┆ strand ┆ type ┆ start ┆ end ┆ exon_number │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 │\n", "╞═════════════════╪═════════════════╪══════════╪════════╪══════╪═════════╪═════════╪═════════════╡\n", "│ ENSG00000279493 ┆ ENST00000624081 ┆ 21 ┆ + ┆ exon ┆ 5011799 ┆ 5011874 ┆ 1 │\n", "│ ENSG00000279493 ┆ ENST00000624081 ┆ 21 ┆ + ┆ exon ┆ 5012548 ┆ 5012687 ┆ 2 │\n", "│ ENSG00000279493 ┆ ENST00000624081 ┆ 21 ┆ + ┆ exon ┆ 5014386 ┆ 5014471 ┆ 3 │\n", "│ ENSG00000279493 ┆ ENST00000624081 ┆ 21 ┆ + ┆ exon ┆ 5016935 ┆ 5017145 ┆ 4 │\n", "│ ENSG00000277117 ┆ ENST00000623960 ┆ 21 ┆ + ┆ exon ┆ 5022531 ┆ 5022693 ┆ 1 │\n", "└─────────────────┴─────────────────┴──────────┴────────┴──────┴─────────┴─────────┴─────────────┘" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Select the relevant columns and reorder them to make it prettier to look at (this step is optional)\n", "alt_gtf_df = alt_gtf_df.select([\n", " \"gene_id\",\n", " \"transcript_id\",\n", " \"seqnames\",\n", " \"strand\",\n", " \"type\",\n", " \"start\",\n", " \"end\",\n", " \"exon_number\"])\n", "\n", "# Cast 'exon_number' to Int64, handling possible nulls without strict type enforcement (this is mandatory!)\n", "alt_gtf_df = alt_gtf_df.with_columns([pl.col(\"exon_number\").cast(pl.Int64, strict=False)])\n", "\n", "\"\"\"\n", "Alternatively if your GTF annotation did not provide exon number you could calculate it by running:\n", " \n", "`alt_gtf_df = RNApy.calculate_exon_number(alt_gtf_df)`\n", "\n", "See RNApysoforms function documentation for more details about the `calculate_exon_number()` function\n", "\"\"\"\n", "\n", "## Visualize dataframe\n", "alt_gtf_df.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000476106
Feature Type: exon
Feature Number: 1
Chromosome: 21
Start: 31659693
End: 31659841
Size: 148
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "Transcript Structure Hue" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": true, "type": "scatter", "x": [ 29, 177, 177, 29, 29 ], "xaxis": "x", "y": [ -0.15, -0.15, 0.15, 0.15, -0.15 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000476106
Feature Type: exon
Feature Number: 2
Chromosome: 21
Start: 31661549
End: 31661734
Size: 185
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1145, 1330, 1330, 1145, 1145 ], "xaxis": "x", "y": [ -0.15, -0.15, 0.15, 0.15, -0.15 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000476106
Feature Type: exon
Feature Number: 3
Chromosome: 21
Start: 31663790
End: 31663886
Size: 96
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1431, 1527, 1527, 1431, 1431 ], "xaxis": "x", "y": [ -0.15, -0.15, 0.15, 0.15, -0.15 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000476106
Feature Type: exon
Feature Number: 4
Chromosome: 21
Start: 31666449
End: 31666518
Size: 69
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1628, 1697, 1697, 1628, 1628 ], "xaxis": "x", "y": [ -0.15, -0.15, 0.15, 0.15, -0.15 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000476106
Feature Type: exon
Feature Number: 5
Chromosome: 21
Start: 31667258
End: 31667341
Size: 83
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1798, 1881, 1881, 1798, 1798 ], "xaxis": "x", "y": [ -0.15, -0.15, 0.15, 0.15, -0.15 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000470944
Feature Type: exon
Feature Number: 1
Chromosome: 21
Start: 31659709
End: 31660708
Size: 999
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 45, 1044, 1044, 45, 45 ], "xaxis": "x", "y": [ 0.85, 0.85, 1.15, 1.15, 0.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000470944
Feature Type: exon
Feature Number: 2
Chromosome: 21
Start: 31663790
End: 31663886
Size: 96
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1431, 1527, 1527, 1431, 1431 ], "xaxis": "x", "y": [ 0.85, 0.85, 1.15, 1.15, 0.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000470944
Feature Type: exon
Feature Number: 3
Chromosome: 21
Start: 31666449
End: 31666518
Size: 69
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1628, 1697, 1697, 1628, 1628 ], "xaxis": "x", "y": [ 0.85, 0.85, 1.15, 1.15, 0.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000470944
Feature Type: exon
Feature Number: 4
Chromosome: 21
Start: 31667258
End: 31667375
Size: 117
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1798, 1915, 1915, 1798, 1798 ], "xaxis": "x", "y": [ 0.85, 0.85, 1.15, 1.15, 0.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000470944
Feature Type: exon
Feature Number: 5
Chromosome: 21
Start: 31668471
End: 31668931
Size: 460
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 2016, 2476, 2476, 2016, 2016 ], "xaxis": "x", "y": [ 0.85, 0.85, 1.15, 1.15, 0.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000389995
Feature Type: exon
Feature Number: 1
Chromosome: 21
Start: 31659666
End: 31659784
Size: 118
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 2, 120, 120, 2, 2 ], "xaxis": "x", "y": [ 1.85, 1.85, 2.15, 2.15, 1.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000389995
Feature Type: exon
Feature Number: 2
Chromosome: 21
Start: 31663790
End: 31663886
Size: 96
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1431, 1527, 1527, 1431, 1431 ], "xaxis": "x", "y": [ 1.85, 1.85, 2.15, 2.15, 1.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000389995
Feature Type: exon
Feature Number: 3
Chromosome: 21
Start: 31666449
End: 31666518
Size: 69
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1628, 1697, 1697, 1628, 1628 ], "xaxis": "x", "y": [ 1.85, 1.85, 2.15, 2.15, 1.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000389995
Feature Type: exon
Feature Number: 4
Chromosome: 21
Start: 31667258
End: 31667375
Size: 117
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1798, 1915, 1915, 1798, 1798 ], "xaxis": "x", "y": [ 1.85, 1.85, 2.15, 2.15, 1.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000389995
Feature Type: exon
Feature Number: 5
Chromosome: 21
Start: 31668471
End: 31668931
Size: 460
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 2016, 2476, 2476, 2016, 2016 ], "xaxis": "x", "y": [ 1.85, 1.85, 2.15, 2.15, 1.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000270142
Feature Type: exon
Feature Number: 1
Chromosome: 21
Start: 31659693
End: 31659841
Size: 148
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 29, 177, 177, 29, 29 ], "xaxis": "x", "y": [ 2.85, 2.85, 3.15, 3.15, 2.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000270142
Feature Type: exon
Feature Number: 2
Chromosome: 21
Start: 31663790
End: 31663886
Size: 96
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1431, 1527, 1527, 1431, 1431 ], "xaxis": "x", "y": [ 2.85, 2.85, 3.15, 3.15, 2.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000270142
Feature Type: exon
Feature Number: 3
Chromosome: 21
Start: 31666449
End: 31666518
Size: 69
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1628, 1697, 1697, 1628, 1628 ], "xaxis": "x", "y": [ 2.85, 2.85, 3.15, 3.15, 2.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000270142
Feature Type: exon
Feature Number: 4
Chromosome: 21
Start: 31667258
End: 31667375
Size: 117
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1798, 1915, 1915, 1798, 1798 ], "xaxis": "x", "y": [ 2.85, 2.85, 3.15, 3.15, 2.85 ], "yaxis": "y" }, { "fill": "toself", "fillcolor": "red", "hoverlabel": { "namelength": -1 }, "hoveron": "fills+points", "hovertemplate": "transcript_id: ENST00000270142
Feature Type: exon
Feature Number: 5
Chromosome: 21
Start: 31668471
End: 31668931
Size: 460
", "legendgroup": "Exon and/or CDS", "legendgrouptitle": { "text": "" }, "line": { "color": "black", "width": 0.25 }, "marker": { "opacity": 0 }, "mode": "lines+markers", "name": "Exon and/or CDS", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 2016, 2476, 2476, 2016, 2016 ], "xaxis": "x", "y": [ 2.85, 2.85, 3.15, 3.15, 2.85 ], "yaxis": "y" }, { "hoverinfo": "skip", "marker": { "color": "black", "size": 10, "symbol": "arrow-right" }, "mode": "markers", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 799.2857142857142 ], "xaxis": "x", "y": [ 0 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000476106
Feature Type: intron
Feature Number: 1
Chromosome: 21
Start: 31659841
End: 31661549
Size: 1708
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 177, 1145 ], "xaxis": "x", "y": [ 0, 0 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000476106
Feature Type: intron
Feature Number: 2
Chromosome: 21
Start: 31661734
End: 31663790
Size: 2056
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1330, 1431 ], "xaxis": "x", "y": [ 0, 0 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000476106
Feature Type: intron
Feature Number: 3
Chromosome: 21
Start: 31663886
End: 31666449
Size: 2563
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1527, 1628 ], "xaxis": "x", "y": [ 0, 0 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000476106
Feature Type: intron
Feature Number: 4
Chromosome: 21
Start: 31666518
End: 31667258
Size: 740
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1697, 1798 ], "xaxis": "x", "y": [ 0, 0 ], "yaxis": "y" }, { "hoverinfo": "skip", "marker": { "color": "black", "size": 10, "symbol": "arrow-right" }, "mode": "markers", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1292.7857142857142 ], "xaxis": "x", "y": [ 1 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000470944
Feature Type: intron
Feature Number: 1
Chromosome: 21
Start: 31660708
End: 31663790
Size: 3082
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1044, 1431 ], "xaxis": "x", "y": [ 1, 1 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000470944
Feature Type: intron
Feature Number: 2
Chromosome: 21
Start: 31663886
End: 31666449
Size: 2563
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1527, 1628 ], "xaxis": "x", "y": [ 1, 1 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000470944
Feature Type: intron
Feature Number: 3
Chromosome: 21
Start: 31666518
End: 31667258
Size: 740
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1697, 1798 ], "xaxis": "x", "y": [ 1, 1 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000470944
Feature Type: intron
Feature Number: 4
Chromosome: 21
Start: 31667375
End: 31668471
Size: 1096
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1915, 2016 ], "xaxis": "x", "y": [ 1, 1 ], "yaxis": "y" }, { "hoverinfo": "skip", "marker": { "color": "black", "size": 10, "symbol": "arrow-right" }, "mode": "markers", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 962.7857142857142 ], "xaxis": "x", "y": [ 2 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000389995
Feature Type: intron
Feature Number: 1
Chromosome: 21
Start: 31659784
End: 31663790
Size: 4006
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 120, 1431 ], "xaxis": "x", "y": [ 2, 2 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000389995
Feature Type: intron
Feature Number: 2
Chromosome: 21
Start: 31663886
End: 31666449
Size: 2563
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1527, 1628 ], "xaxis": "x", "y": [ 2, 2 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000389995
Feature Type: intron
Feature Number: 3
Chromosome: 21
Start: 31666518
End: 31667258
Size: 740
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1697, 1798 ], "xaxis": "x", "y": [ 2, 2 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000389995
Feature Type: intron
Feature Number: 4
Chromosome: 21
Start: 31667375
End: 31668471
Size: 1096
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1915, 2016 ], "xaxis": "x", "y": [ 2, 2 ], "yaxis": "y" }, { "hoverinfo": "skip", "marker": { "color": "black", "size": 10, "symbol": "arrow-right" }, "mode": "markers", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 983.1428571428571 ], "xaxis": "x", "y": [ 3 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000270142
Feature Type: intron
Feature Number: 1
Chromosome: 21
Start: 31659841
End: 31663790
Size: 3949
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 177, 1431 ], "xaxis": "x", "y": [ 3, 3 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000270142
Feature Type: intron
Feature Number: 2
Chromosome: 21
Start: 31663886
End: 31666449
Size: 2563
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1527, 1628 ], "xaxis": "x", "y": [ 3, 3 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000270142
Feature Type: intron
Feature Number: 3
Chromosome: 21
Start: 31666518
End: 31667258
Size: 740
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1697, 1798 ], "xaxis": "x", "y": [ 3, 3 ], "yaxis": "y" }, { "hovertemplate": "transcript_id: ENST00000270142
Feature Type: intron
Feature Number: 4
Chromosome: 21
Start: 31667375
End: 31668471
Size: 1096
", "line": { "color": "black", "width": 0.5 }, "mode": "lines", "opacity": 1, "showlegend": false, "type": "scatter", "x": [ 1915, 2016 ], "xaxis": "x", "y": [ 3, 3 ], "yaxis": "y" }, { "boxmean": true, "boxpoints": "all", "fillcolor": "#FECB52", "jitter": 0.3, "legendgroup": "AD", "legendgrouptitle": { "text": "Expression Plot Hue" }, "line": { "width": 0.5 }, "marker": { "color": "black", "opacity": 1, "size": 5 }, "name": "AD", "offsetgroup": "0", "opacity": 1, "orientation": "h", "pointpos": 0, "showlegend": true, "text": [ "sample_1", "sample_7", "sample_3", "sample_5", "sample_1", "sample_7", "sample_3", "sample_5", "sample_1", "sample_7", "sample_3", "sample_5", "sample_1", "sample_7", "sample_3", "sample_5" ], "type": "box", "x": [ 0, 0, 0, 0, 13.01121, 66.96726, 27.91332, 29.3307, 103.45822, 204.75457, 178.30945, 37.95737, 55869.53057, 149305.27817, 101614.77723, 12803.71193 ], "xaxis": "x2", "y": [ "0", "0", "0", "0", "1", "1", "1", "1", "2", "2", "2", "2", "3", "3", "3", "3" ], "yaxis": "y2" }, { "boxmean": true, "boxpoints": "all", "fillcolor": "#FF97FF", "jitter": 0.3, "legendgroup": "Control", "legendgrouptitle": { "text": "" }, "line": { "width": 0.5 }, "marker": { "color": "black", "opacity": 1, "size": 5 }, "name": "Control", "offsetgroup": "1", "opacity": 1, "orientation": "h", "pointpos": 0, "showlegend": true, "text": [ "sample_4", "sample_2", "sample_6", "sample_8", "sample_4", "sample_2", "sample_6", "sample_8", "sample_4", "sample_2", "sample_6", "sample_8", "sample_4", "sample_2", "sample_6", "sample_8" ], "type": "box", "x": [ 0, 0, 0, 0, 33.24766, 51.97457, 44.89448, 50.35971, 278.6152, 262.00388, 227.70089, 227.15655, 215866.13713, 191291.02155, 191269.40463, 183275.48374 ], "xaxis": "x2", "y": [ "0", "0", "0", "0", "1", "1", "1", "1", "2", "2", "2", "2", "3", "3", "3", "3" ], "yaxis": "y2" } ], "layout": { "annotations": [ { "font": { "size": 16 }, "showarrow": false, "text": "Transcript Structure", "x": 0.245, "xanchor": "center", "xref": "paper", "y": 1, "yanchor": "bottom", "yref": "paper" }, { "font": { "size": 16 }, "showarrow": false, "text": "Counts", "x": 0.755, "xanchor": "center", "xref": "paper", "y": 1, "yanchor": "bottom", "yref": "paper" } ], "boxgap": 0.2, "boxgroupgap": 0.8, "boxmode": "group", "height": 500, "hoverlabel": { "font": { "size": 12 } }, "hovermode": "closest", "legend": { "font": { "size": 12 }, "grouptitlefont": { "size": 14 }, "tracegroupgap": 7 }, "margin": { "b": 50, "l": 100, "r": 50, "t": 100 }, "showlegend": true, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "white", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "white", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "#C8D4E3", "linecolor": "#C8D4E3", "minorgridcolor": "#C8D4E3", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "#C8D4E3", "linecolor": "#C8D4E3", "minorgridcolor": "#C8D4E3", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "white", "showlakes": true, "showland": true, "subunitcolor": "#C8D4E3" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "white", "polar": { "angularaxis": { "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "" }, "bgcolor": "white", "radialaxis": { "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "white", "gridcolor": "#DFE8F3", "gridwidth": 2, "linecolor": "#EBF0F8", "showbackground": true, "ticks": "", "zerolinecolor": "#EBF0F8" }, "yaxis": { "backgroundcolor": "white", "gridcolor": "#DFE8F3", "gridwidth": 2, "linecolor": "#EBF0F8", "showbackground": true, "ticks": "", "zerolinecolor": "#EBF0F8" }, "zaxis": { "backgroundcolor": "white", "gridcolor": "#DFE8F3", "gridwidth": 2, "linecolor": "#EBF0F8", "showbackground": true, "ticks": "", "zerolinecolor": "#EBF0F8" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "#DFE8F3", "linecolor": "#A2B1C6", "ticks": "" }, "baxis": { "gridcolor": "#DFE8F3", "linecolor": "#A2B1C6", "ticks": "" }, "bgcolor": "white", "caxis": { "gridcolor": "#DFE8F3", "linecolor": "#A2B1C6", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "#EBF0F8", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "#EBF0F8", "linecolor": "#EBF0F8", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "#EBF0F8", "zerolinewidth": 2 } } }, "title": { "text": "" }, "violingap": 0.2, "violingroupgap": 0.8, "violinmode": "group", "width": 1200, "xaxis": { "anchor": "y", "domain": [ 0, 0.49 ], "showgrid": true, "showticklabels": false, "tickfont": { "size": 12 }, "title": { "text": "" } }, "xaxis2": { "anchor": "y2", "domain": [ 0.51, 1 ], "showgrid": true, "showticklabels": true, "tickfont": { "size": 12 }, "title": { "text": "" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "range": [ -0.8, 3.8 ], "showgrid": true, "showticklabels": true, "tickfont": { "color": "black", "family": "DejaVu Sans", "size": 12 }, "ticktext": [ "ENST00000476106", "ENST00000470944", "ENST00000389995", "ENST00000270142" ], "tickvals": [ 0, 1, 2, 3 ], "title": { "text": "" } }, "yaxis2": { "anchor": "x2", "domain": [ 0, 1 ], "matches": "y", "range": [ -0.8, 3.8 ], "showgrid": true, "showticklabels": false, "ticks": "", "ticktext": [ "ENST00000476106", "ENST00000470944", "ENST00000389995", "ENST00000270142" ], "tickvals": [ 0, 1, 2, 3 ] } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\"\"\"\n", "Proceed to generate figures as usual. Notice that we did not have \"transcript_biotype\" in the attributes columns,\n", "therefore we have to either pick another column to \"hue\" our RNA isoforms structure plot with or just \n", "pick a fillcolor for all the RNA isoforms (default is grey)\n", "\"\"\"\n", "\n", "## Get counts matrix data path and metadata path\n", "counts_matrix_path = \"../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv\"\n", "metadata_path = \"../dash_apps/RNApysoforms/tests/test_data/sample_metadata.tsv\"\n", "\n", "## Read counts matrix with metadata and normalizations\n", "counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path,\n", " metadata_path=metadata_path,\n", " cpm_normalization=True, relative_abundance=True)\n", "\n", "\n", "\"\"\"\n", "Filter annotation by gene_id instead of gene_name by using the `gene_id_column` parameter\n", "since we don't have \"gene_name\" in the GTF file used. \n", "You can always get creative with \"joins\" with other GTF annotations\n", "to fill in more information about specific genes and transcripts, just make sure you\n", "do NOT have any null/NA values in your columns. One way to get around that is to \n", "fill NAs in the gene_name column using the gene_id column.\n", "\"\"\"\n", "sod1_annotation, sod1_expression_matrix = RNApy.gene_filtering(annotation=alt_gtf_df,\n", " expression_matrix=counts_matrix, order_by_expression_column=\"counts\",\n", " order_by_expression=True, gene_id_column=\"gene_id\", ## This is \"gene_name\" by default\n", " target_gene=\"ENSG00000142168\" ## SOD1 ensembl gene_id\n", " )\n", "\n", "## Rescale introns\n", "sod1_annotation = RNApy.shorten_gaps(sod1_annotation)\n", "\n", "\"\"\"\n", "Make traces using a constant value for annotation fill color since we don't have\n", "the \"transcript_biotype\" color for hue. You could alternatively hue the RNA\n", "isoform structure plot by a different annotation column such as \n", "\"transcript_id\", thus making every transcript a unique color\n", "\"\"\"\n", "traces = RNApy.make_traces(annotation=sod1_annotation, expression_matrix=sod1_expression_matrix,\n", " x_start=\"rescaled_start\", x_end=\"rescaled_end\",\n", " y='transcript_id', \n", " annotation_fill_color=\"red\", ## Set annotation fill color to a constant value\n", " expression_hue=\"AD status\",\n", " hover_start=\"start\", hover_end=\"end\")\n", "\n", "## Put traces into figure\n", "fig = RNApy.make_plot(traces = traces, subplot_titles = [\"Transcript Structure\", \"Counts\"], width=1200, height=500)\n", "\n", "## Show figure\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### You can click on the legend items to make figure elements appear and disappear.\n", "\n", "### The legend title will get grayed out when clicking on the first legend item. I could not find a workaround for that with the current plotly release (version 5).\n", "\n", "### The hovering for exons and CDS works best if you hover your mouse over the edges of the CDS/exon boxes." ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 2 }