Module `cnvpytor.viewer`

cnvpytor.viewer

Class Viewer: ploting CNVpytor data

Source code

""" cnvpytor.viewer

Class Viewer: ploting CNVpytor data
"""
from __future__ import absolute_import, print_function, division

from .io import *
from .utils import *
from .genome import *
from .viewparams import ViewParams, HelpDescription
from .annotator import *
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.colors as colors
from scipy.cluster import hierarchy
from scipy.stats import beta

import numpy as np
import logging
import readline
import traceback
import os
import sys
import datetime

_logger = logging.getLogger("cnvpytor.viewer")


class Reader:
    def __init__(self, files):
        """ Class constructor opens cnvpytor files.

        Parameters
        ----------
        files : list of str
            List of cnvpytor filenames.

        """
        self.io = [IO(f, ro=True) for f in files]


class Show(Reader):
    def ls(self):
        """ Prints to stdout content of all cnvpytor files.

        """
        for i in self.io:
            i.ls()

    def meta(self):
        """ Prints to stdout meta tags of all cnvpytor files.

        """
        for i in self.io:
            i.read_meta_attribute()

    def info(self, bin_sizes):
        """ Prints to stdout RD info for all cnvpytor files.
        Columns are following:
            filename
            mean read length, stdev of read length in %
            mean template length, stdev of template length in %
            for each bin_size (including 100 always):
                rd level and corresponding stdev for each chromosome type (autosomes, sex chromosomes and mitochondria)

        """
        if 100 not in bin_sizes:
            bin_sizes = [100] + bin_sizes
        labels = ["FILE", "RL", "dRL[%]", "FL", "dFL[%]"]
        for bs in bin_sizes:
            labels.append("RD_AUTO_" + binsize_format(bs))
            labels.append("dRD_AUTO_" + binsize_format(bs) + "[%]")
            labels.append("RD_GC_AUTO_" + binsize_format(bs))
            labels.append("dRD_GC_AUTO_" + binsize_format(bs) + "[%]")
            labels.append("RD_XY_" + binsize_format(bs))
            labels.append("dRD_XY_" + binsize_format(bs) + "[%]")
            labels.append("RD_GC_XY_" + binsize_format(bs))
            labels.append("dRD_GC_XY_" + binsize_format(bs) + "[%]")
            if bs <= 500:
                labels.append("RD_MT_" + binsize_format(bs))
                labels.append("dRD_MT_" + binsize_format(bs) + "[%]")
                labels.append("RD_GC_MT_" + binsize_format(bs))
                labels.append("dRD_CG_MT_" + binsize_format(bs) + "[%]")
        print(("{:}\t{:}\t{:}\t{:}\t{:}\t" + "{:}\t" * (len(labels) - 5)).format(*tuple(labels)))
        for i in self.io:
            rfd = i.get_signal(None, None, "read frg dist")
            rd = np.sum(rfd, axis=1)
            fd = np.sum(rfd, axis=0)
            mrl = np.sum(rd * np.arange(rd.size)) / np.sum(rd)
            mfl = np.sum(fd * np.arange(fd.size)) / np.sum(fd)
            mrl2 = np.sum(rd * np.arange(rd.size) * np.arange(rd.size)) / np.sum(rd)
            mfl2 = np.sum(fd * np.arange(fd.size) * np.arange(fd.size)) / np.sum(fd)
            sdr = 100. * np.sqrt(mrl2 - mrl * mrl) / mrl
            sdf = 100. * np.sqrt(mfl2 - mfl * mfl) / mfl
            print("{:}\t{:.2f}\t{:.2f}\t{:.2f}\t{:.2f}\t".format(i.filename, mrl, sdr, mfl, sdf), end="")
            for bs in bin_sizes:
                for flag in [FLAG_AUTO, FLAG_SEX, FLAG_MT]:
                    if bs <= 500 or not flag == FLAG_MT:
                        if i.signal_exists(None, bs, "RD stat", flags=flag):
                            stat = i.get_signal(None, bs, "RD stat", flags=flag)
                            if stat[4] > 0:
                                stat[5] /= stat[4] / 100.
                            print("{:.2f}\t{:.2f}\t".format(stat[4], stat[5]), end="")
                        else:
                            print("{:}\t{:}\t".format("-", "-"), end="")
                        if i.signal_exists(None, bs, "RD stat", flags=(flag | FLAG_GC_CORR)):
                            stat = i.get_signal(None, bs, "RD stat", flags=(flag | FLAG_GC_CORR))
                            if stat[4] > 0:
                                stat[5] /= stat[4] / 100.
                            print("{:.2f}\t{:.2f}\t".format(stat[4], stat[5]), end="")
                        else:
                            print("{:}\t{:}\t".format("-", "-"), end="")
            print()


class Figure(ViewParams):
    def __init__(self, params, force_agg=False):
        """ Class implements matplotlib frequently used figure manipulation and plot panels arrangement.

        Parameters
        ----------
        params : dict
            Params to be passed to ViewParam class

        """
        if force_agg:
            import matplotlib
            matplotlib.use("Agg")
            import matplotlib.pyplot as plt
        ViewParams.__init__(self, params)
        self.fig = None
        self.fig_grid = None
        self.fig_sub_grid = None
        self.count = 0
        self.current = -1
        self.sg_current = -1

    def new_figure(self, panel_count, grid="auto", panel_size=None, title=None):
        """ Clear figure and create new plot layout.

        Parameters
        ----------
        panel_count : int
            Number of panels
        grid : str or (int, int)
            number of columns and rows (sx, sy) or "auto"
        panel_size : (float, float)
            size of a single panel (only when plots in file)

        """
        if panel_size is None:
            panel_size = self.panel_size
        if grid == "auto":
            grid = self.grid
        plt.clf()
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, dpi=self.dpi, facecolor='w', edgecolor='k')
        if title is not None:
            self.fig.suptitle(title, fontsize=16)
        sx, sy = self._get_grid(grid, panel_count)
        if self.output_filename != "":
            self.fig.set_figheight(panel_size[1] * sy)
            self.fig.set_figwidth(panel_size[0] * sx)
        self.fig_grid = gridspec.GridSpec(sy, sx, hspace=self.margins[5], wspace=self.margins[4])
        self.current = -1
        self.sg_current = -1

    def new_subgrid(self, panel_count, grid="auto", hspace=0, wspace=0):
        if grid == "auto":
            grid = self.subgrid
        sx, sy = self._get_grid(grid, panel_count)
        self.current += 1
        self.fig_sub_grid = gridspec.GridSpecFromSubplotSpec(sy, sx, subplot_spec=self.fig_grid[self.current],
                                                             wspace=wspace, hspace=hspace)
        self.sg_current = -1
        self.sg_current_ax = None

    def next_panel(self):
        """ Return axes of next panel

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel
        """
        self.current += 1
        return self.fig.add_subplot(self.fig_grid[self.current])

    def next_subpanel(self, sharex=False):
        """ Return axes of next sub panel

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel
        """
        self.sg_current += 1
        if self.sg_current == 0 or not sharex:
            self.sg_current_ax = self.fig.add_subplot(self.fig_sub_grid[self.sg_current])
        else:
            self.sg_current_ax = self.fig.add_subplot(self.fig_sub_grid[self.sg_current], sharex=self.sg_current_ax)
        return self.sg_current_ax

    def next_polar_panel(self):
        """ Return axes of next panel

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel
        """
        self.current += 1
        return self.fig.add_subplot(self.fig_grid[self.current], projection="polar")

    def get_panel(self, i):
        """ Returns axes of a i-th panel

        Parameters
        ----------
        i : int
            Panel number

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel

        """
        return self.fig.get_axes()[i]

    def _get_grid(self, grid, panel_count):
        if grid == "auto":
            sx, sy = self._panels_shape(panel_count)
        elif grid == "vertical":
            sx, sy = 1, panel_count
        elif grid == "horizontal":
            sx, sy = panel_count, 1
        else:
            sx, sy = tuple(grid)
        return sx, sy

    def fig_show(self, add_sufix=True, suffix=""):
        """ Plot figure. If output_filename is specified it will plot only into a file.

        Parameters
        ----------
        add_sufix : bool
            If true it will add sufix to output_filename in format prefix.sufix.count.extension
            where count is auto-incremented integer starting from 0 and
            prefix.extension is parsed from output_filename parameter.

        suffix : str
            Sufix used in filename.

        """
        bottom, top, left, right, wspace, hspace = self.margins
        plt.subplots_adjust(bottom=bottom, top=top, wspace=wspace, hspace=hspace, left=left, right=right)
        if self.output_filename != "":
            image_filename = self.output_filename
            if add_sufix:
                image_filename = self._image_filename(suffix)
            if image_filename is not None:
                try:
                    plt.savefig(image_filename, dpi=self.dpi)
                except:
                    _logger.warning("Figure is not saved due to an error!")
                plt.close(self.fig)
            else:
                _logger.warning("Figure is not saved!")
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def _image_filename(self, suffix):
        parts = self.output_filename.split(".")
        if parts[-1] not in ["png", "pdf", "jpg", "eps", "svg"]:
            _logger.warning("File extension should be: .jpg, .png, .svg, .eps or .pdf")
            return None
        if suffix == "":
            suffix = str(self.count).zfill(4)
        else:
            suffix += "." + str(self.count).zfill(4)
        self.count += 1
        parts[-1] = suffix + "." + parts[-1]
        return ".".join(parts)

    @staticmethod
    def _panels_shape(n):
        sx, sy = 1, 1
        if n == 2:
            sy = 2
        elif n in [3, 4]:
            sx, sy = 2, 2
        elif n in [5, 6]:
            sx, sy = 2, 3
        elif n in [7, 8, 9]:
            sx, sy = 3, 3
        elif n in [10, 11, 12]:
            sx, sy = 3, 4
        elif n in [13, 14, 15, 16]:
            sx, sy = 4, 4
        elif n in [17, 18, 19, 20]:
            sx, sy = 4, 5
        elif n in [21, 22, 23, 24]:
            sx, sy = 4, 6
        else:
            while sx * sy < n:
                sy += 1
                sx = int(2. * sy / 3 + 1.)
        return sx, sy


class Viewer(Show, Figure, HelpDescription):

    def __init__(self, files, params={}, force_agg=False, history_file_size=1000):
        """

        Parameters
        ----------
        files : list of str
            List of cnvpytor filenames
        params : dict
            List of parameters different than default to be passed to ViewParams class.

        """
        _logger.debug("Viewer class init: files [%s], params %s." % (", ".join(files), str(params)))
        Figure.__init__(self, params, force_agg=force_agg)
        Show.__init__(self, files)
        self.history_file_size = history_file_size
        self.cnvpytor_dir = os.path.expanduser('~/.cnvpytor')
        self.save_history = False
        if os.path.exists(self.cnvpytor_dir):
            if os.access(self.cnvpytor_dir, os.W_OK):
                self.save_history = True
            if os.path.exists(self.cnvpytor_dir+"/viewer.conf"):
                conf = eval(open(self.cnvpytor_dir+"/viewer.conf").read())
                for key in conf:
                    setattr(self,key,conf[key])

        self.io_gc = self.io[0]
        self.io_mask = self.io[0]
        self.reference_genome = None
        self.plot_files = list(range(len(files)))
        self.default["plot_files"] = list(range(len(files)))
        if self.io[0].signal_exists(None, None, "reference genome"):
            rg_name = np.array(self.io[0].get_signal(None, None, "reference genome")).astype("str")[0]
            self.reference_genome = Genome.reference_genomes[rg_name]
            if "mask_file" in Genome.reference_genomes[rg_name]:
                self.io_mask = IO(Genome.reference_genomes[rg_name]["mask_file"], ro=True, buffer=True)
            if "gc_file" in Genome.reference_genomes[rg_name]:
                self.io_gc = IO(Genome.reference_genomes[rg_name]["gc_file"], ro=True, buffer=True)

    def parse(self, command):
        current = "regions"
        regions = []

        for p in command:
            if p.isdigit() and (int(p) % 100) == 0:
                self.bin_size = int(p)
                if current == "rd":
                    self.rd()
                if current == "baf":
                    self.baf()
                if current == "likelihood":
                    self.likelihood()
                elif current == "manhattan":
                    self.global_plot()
                elif current == "calls":
                    if len(self.callers) > 0:
                        self.manhattan(plot_type=self.callers[0])
                elif current == "stat":
                    self.stat(int(p))
                elif current == "circular":
                    self.circular()
                elif current == "regions":
                    self.multiple_regions(regions)
                    regions = []
            elif p == "rdstat":
                self.stat()
            elif p == "snp":
                self.snp()
            elif p in ["rd", "baf", "manhattan", "calls", "stat", "regions", "likelihood", "circular"]:
                current = p
            elif current == "regions":
                regions.append(p)
            else:
                current = p

    def plot_command(self, command):
        self.interactive = False
        self.parse(command)

    def prompt(self):
        self.interactive = True

        chromosomes = set({})
        for f in self.io:
            chromosomes = chromosomes.union(set(f.rd_chromosomes()))
            chromosomes = chromosomes.union(set(f.snp_chromosomes()))
        for c in chromosomes:
            self.command_tree[c] = None
        self.command_tree["set"]["style"] = dict(zip(plt.style.available, [None] * len(plt.style.available)))
        if os.path.exists(self.cnvpytor_dir+"/history"):
            readline.read_history_file(self.cnvpytor_dir+"/history")

        readline.parse_and_bind("tab: complete")
        completer = PromptCompleter(self.command_tree)
        readline.set_completer(completer.complete)
        quit = False
        try:
            while not quit:
                prompt_str = ""
                if os.isatty(sys.stdin.fileno()):
                    prompt_str = "cnvpytor> "
                else:
                    self.interactive = False
                try:
                    line = raw_input(prompt_str)
                except NameError:
                    line = input(prompt_str)

                if line[0] == "#" or line[0] == "":
                    continue

                if self.save_history and self.interactive:
                    readline.set_history_length(self.history_file_size)
                    readline.write_history_file(self.cnvpytor_dir+"/history")

                pre = line.split(">")
                f = pre[0].strip().split(" ")
                n = len(f)
                if len(line) == 0:
                    continue
                elif f[0] == "quit" or f[0] == "exit":
                    quit = True
                elif line[0] == "|":
                    try:
                        eval(compile(line[1:], '<string>', 'single'))
                    except Exception as e:
                        print(traceback.format_exc())
                elif f[0] == "save":
                    if n > 1:
                        try:
                            plt.savefig(f[1])
                        except ValueError:
                            _logger.warning("File extension should be: .jpg, .png, .svg, .eps or .pdf")
                        except:
                            _logger.warning("Figure is not saved due to an error!")

                elif f[0] in ["draw", "repaint", "update"]:
                    if n == 1:
                        self.fig.canvas.draw()
                elif f[0] == "ls":
                    self.ls()
                elif f[0] == "meta":
                    self.meta()
                elif f[0] == "show":
                    if n == 1:
                        self.show()
                elif f[0] == "set":
                    if n > 1:
                        self.set(f[1], f[2:])
                elif f[0] == "help" and n > 1:
                    self.help(f[1])
                elif f[0] == "help" and n == 1:
                    self.help("help")
                elif f[0] == "unset":
                    if n > 1:
                        self.unset(f[1])
                elif f[0] == "genotype":
                    if n > 1:
                        self.genotype_all([self.bin_size], f[1:], interactive=True)
                elif f[0] == "snv":
                    if n == 2:
                        self.snp(callset=f[1])
                    elif n == 1:
                        self.snp(callset="default")
                elif f[0] == "compare":
                    if n == 3:
                        self.compare(f[1], f[2], plot=self.plot)
                    elif n == 4:
                        self.compare(f[1], f[2], n_bins=int(f[3]), plot=self.plot)
                elif f[0] == "info":
                    if n > 1:
                        self.info(list(map(binsize_type, f[1:])))
                elif f[0] == "print":
                    if f[1] == "calls":
                        if self.print_filename == "":
                            self.print_calls()
                        else:
                            self.print_calls_file()
                    elif f[1] == "joint_calls":
                        self.print_simple_joint_calls()

                else:
                    try:
                        if f[0] not in ["rdstat", "snp"]:
                            self.parse(f + [str(self.bin_size)])
                        else:
                            self.parse(f)
                        if len(pre) > 1:
                            fns = pre[1].strip().split(" ")
                            if fns[0] != "":
                                plt.savefig(fns[0], dpi=200)
                    except Exception as e:
                        print(traceback.format_exc())
        except (EOFError, KeyboardInterrupt):
            print()
            return

    def help(self, param):
        if param in self.param_help:
            print(self.param_help[param])
        else:
            print("\nUnknown parameter !\n")

    @staticmethod
    def set_style(style):
        if style in plt.style.available:
            plt.style.use("default")
            plt.style.use(style)

    def file_title(self, ix):
        if ix < len(self.file_titles):
            return self.file_titles[ix]
        else:
            return self.io[ix].filename.split("/")[-1].replace(".pytor", "")

    def show(self):
        print("\nParameters")
        for key in sorted(self.params.keys()):
            print("    * %s: %s" % (key, str(self.params[key])))
            if key == "plot_files":
                for i in range(len(self.io)):
                    print("            %d: %s" % (i, self.io[i].filename))
        print()

    def stat(self, his_bin_size=100, return_image=False):
        plt.clf()
        auto = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_AUTO)
        sex = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_SEX)
        mt = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_MT) and (his_bin_size < 1001)
        if not (auto or sex or mt):
            return
        cond = [auto, sex, mt]
        stat_list = []
        n_cols = sum(map(int, cond))
        ix = 1
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, figsize=(4 * n_cols, 8), dpi=90, facecolor='w', edgecolor='k')
        for t, c, flag in zip(["Autosomes", "X/Y", "Mitochondria"], cond, [FLAG_AUTO, FLAG_SEX, FLAG_MT]):
            if c:
                stat = self.io[self.plot_file].get_signal(None, his_bin_size, "RD stat", flag)
                stat_list.append(stat)
                max_rd = int(stat[0])
                bin_size = int(stat[1])
                n_bins = int(stat[2])
                lim_rd = int(max(2 * stat[4], stat[4] + 3 * stat[5]))
                _logger.info("RD stat for %s: %.2f +- %.2f" % (t, stat[4], stat[5]))
                if t == "Mitochondria" and auto:
                    _logger.info("RD stat for %s - number of mitochondria per cell: %.2f +- %.2f" % (
                        t, 2 * stat[4] / stat_list[0][4],
                        2 * stat[5] / stat_list[0][4] + stat_list[0][5] * stat[4] / (
                                stat_list[0][4] * stat_list[0][4])))
                his_p = self.io[self.plot_file].get_signal(None, his_bin_size, "RD p dist", flag)
                his_u = self.io[self.plot_file].get_signal(None, his_bin_size, "RD u dist", flag)
                his_rd_gc = self.io[self.plot_file].get_signal(None, his_bin_size, "RD GC dist", flag)
                gc_corr = self.io[self.plot_file].get_signal(None, his_bin_size, "GC corr", flag)
                ax = plt.subplot(2, n_cols, ix)
                ax.set_xlabel("RD")
                ax.set_ylabel("GC [%]")
                ax.xaxis.set_ticklabels([])
                ax.set_title(t)
                his_rd_gc[0][0] = 0
                ax.imshow(his_rd_gc[:lim_rd // bin_size, :].T, aspect="auto", interpolation='nearest', origin='lower')
                ax.plot(gc_corr * stat[4] / bin_size, range(101), "w-")

                ax = plt.subplot(2, n_cols, ix + n_cols)
                ax.set_ylabel("Normalised distribution")
                ax.set_xlabel("RD")
                ax.set_xlim([0, lim_rd])
                # ax.set_ylim([0, 1.1])
                bins = range(0, max_rd, bin_size)
                x = np.arange(0, max_rd // bin_size * bin_size, 0.1 * bin_size)
                plt.plot(x, normal(x, 1, stat[4], stat[5]), "g-")
                x = np.array(bins)
                plt.plot(x[:len(his_u)], his_u / stat[3], "y*")
                plt.plot(x[:len(his_p)], his_p / stat[3], "b*")
                ix += 1
        plt.subplots_adjust(bottom=0.08, top=0.95, wspace=0.25, hspace=0, left=0.05 * 3 / n_cols, right=0.95)
        if return_image:
            self.fig.canvas.draw()
            import PIL
            pil_image = PIL.Image.frombytes('RGB', self.fig.canvas.get_width_height(),
                                            self.fig.canvas.tostring_rgb())
            return pil_image
        elif self.output_filename != "":
            plt.savefig(self._image_filename("stat"), dpi=150)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def rd(self):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            rd_chr = self.io[self.plot_file].rd_chromosome_name(c)
            if self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", 0) and \
                    self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                    (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append((rd_chr, l))
        self.new_figure(panel_count=len(chroms))
        for c, l in chroms:
            flag_rd = FLAG_USEMASK if self.rd_use_mask else 0
            mean, stdev = self.io[self.plot_file].rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
            his_p = self.io[self.plot_file].get_signal(c, bin_size, "RD", flag_rd)
            his_p_corr = self.io[self.plot_file].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            his_p_seg = self.io[self.plot_file].get_signal(c, bin_size, "RD partition", flag_rd | FLAG_GC_CORR)
            his_p_call = self.io[self.plot_file].get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic segments",
                                                                  flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
            his_p_mosaic_call = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic call",
                                                                   flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg_2d = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic segments 2d",
                                                                     flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
            his_p_mosaic_call_2d = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic call 2d",
                                                                      flag_rd | FLAG_GC_CORR)
            his_p_mosaic = np.zeros_like(his_p) * np.nan
            if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                    "rd_mosaic" in self.callers):
                for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                    for segi in seg:
                        his_p_mosaic[segi] = lev
            his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
            if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                    "combined_mosaic" in self.callers):
                for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                    for segi in seg:
                        his_p_mosaic_2d[segi] = lev
            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // bin_size, 10e6 // bin_size), minor=[])
            if (self.rd_range[1] - self.rd_range[0]) < 30:
                ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                                   minor=[])
            ax.set_ylim([self.rd_range[0] * mean / 2, self.rd_range[1] * mean / 2])
            n_bins = l // bin_size
            ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
            ax.grid()
            if self.rd_raw:
                plt.step(his_p, self.rd_colors[0])
            if self.rd_corrected:
                plt.step(his_p_corr, self.rd_colors[1])
            if his_p_seg is not None and len(his_p_seg) > 0 and self.rd_partition:
                plt.step(his_p_seg, self.rd_colors[2])
            if his_p_call is not None and len(his_p_call) > 0 and self.rd_call:
                plt.step(his_p_call, self.rd_colors[3])
            if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                    "rd_mosaic" in self.callers):
                plt.step(his_p_mosaic, self.rd_colors[4])
            if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                    "combined_mosaic" in self.callers):
                plt.step(his_p_mosaic_2d, self.rd_colors[5])
        self.fig_show(suffix="rd")

    def rd_diff(self, file1, file2):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            rd_chr = self.io[self.plot_file].rd_chromosome_name(c)
            if self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", 0) and \
                    self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                    (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append((rd_chr, l))
        self.new_figure(panel_count=len(chroms))
        for c, l in chroms:
            flag = FLAG_MT if Genome.is_mt_chrom(c) else FLAG_SEX if Genome.is_sex_chrom(c) else FLAG_AUTO
            stat1 = self.io[file1].get_signal(None, bin_size, "RD stat", flag)
            stat2 = self.io[file2].get_signal(None, bin_size, "RD stat", flag)
            if stat1 is None:
                _logger.error(
                    "Data for bin size %d is missing in file '%s'!" % (bin_size, self.io[file1].filename))
                return
            if stat2 is None:
                _logger.error(
                    "Data for bin size %d is missing in file '%s'!" % (bin_size, self.io[file2].filename))
                return
            flag_rd = (FLAG_USEMASK if self.rd_use_mask else 0)
            his_p_corr1 = self.io[file1].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            his_p_corr2 = self.io[file2].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            if (self.rd_range[1] - self.rd_range[0]) < 30:
                ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                                   minor=[])
            ax.yaxis.set_ticks(np.arange(0, 2, 0.25), minor=[])
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // bin_size, 10e6 // bin_size), minor=[])
            ax.set_ylim([0, 1])
            n_bins = l // bin_size
            ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
            ax.grid()

            plt.step(np.abs(his_p_corr1 / stat1[4] - his_p_corr2 / stat2[4]), "k")
        self.fig_show(suffix="rd_diff")

    def likelihood(self):
        bin_size = self.bin_size
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        chroms = []
        if self.reference_genome is None:
            chroms = self.io[self.plot_file].snp_chromosomes()
        else:
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
                if self.io[self.plot_file].signal_exists(snp_chr, bin_size, "SNP likelihood", snp_flag) and (
                        Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                    chroms.append(snp_chr)
        self.new_figure(panel_count=len(chroms))
        for c in chroms:
            likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood", snp_flag)
            img = np.array(likelihood).transpose()
            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.imshow(img, aspect='auto')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.xaxis.set_ticks(np.arange(0, likelihood.shape[0], 50), minor=[])
            ax.set_xlim([0, likelihood.shape[0]])
            if self.snp_call and ("baf_mosaic" in self.callers):
                likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                segments = segments_decode(
                    self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
                call_pos = []
                call_i1 = []
                call_i2 = []
                call_c = []
                for s, lh in zip(segments, likelihood):
                    i1, i2, p = likelihood_pixels_pval(lh)
                    if i1 != i2 and len(s) > self.min_segment_size:
                        alpha = -np.log(p + 1e-40) / self.contrast
                        if alpha > 1:
                            alpha = 1
                        for pos in s:
                            call_pos.append(pos)
                            call_i1.append(min(i1, i2))
                            call_i2.append(max(i1, i2))
                            color = colors.to_rgb(self.lh_colors[0]) + (alpha,)
                            call_c.append(color)
                plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
                plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
            if self.snp_call and ("combined_mosaic" in self.callers):
                likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood call 2d", snp_flag)
                segments = segments_decode(
                    self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood segments 2d", snp_flag))
                call_pos = []
                call_i1 = []
                call_i2 = []
                call_c = []
                for s, lh in zip(segments, likelihood):
                    i1, i2, p = likelihood_pixels_pval(lh)
                    if i1 != i2 and len(s) > self.min_segment_size:
                        alpha = -np.log(p + 1e-40) / self.contrast
                        if alpha > 1:
                            alpha = 1
                        for pos in s:
                            call_pos.append(pos)
                            call_i1.append(min(i1, i2))
                            call_i2.append(max(i1, i2))
                            color = colors.to_rgb(self.lh_colors[1]) + (alpha,)
                            call_c.append(color)
                plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
                plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
        self.fig_show(suffix="likelihood")

    def baf(self):
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
            if self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP baf", snp_flag) and \
                    self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP maf", snp_flag) and \
                    self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP i1", snp_flag) and \
                    self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP i2", snp_flag) and \
                    (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append((snp_chr, l))

        self.new_figure(panel_count=len(chroms))
        for c, l in chroms:
            baf = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP baf", snp_flag)
            maf = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP maf", snp_flag)
            i1 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i1", snp_flag)
            i2 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i2", snp_flag)

            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // self.bin_size, 10e6 // self.bin_size), minor=[])
            ax.set_ylim([0, 1])
            n_bins = l // self.bin_size
            ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
            ax.grid()
            ax.step(baf, self.baf_colors[0])
            ax.step(maf, self.baf_colors[1])
            ax.step(i1, self.baf_colors[2])
        self.fig_show(suffix="baf")

    def snp(self, plot_gt=None, plot_pmask=None, callset=None):
        if plot_pmask is None:
            plot_pmask = [0, 1]
        if plot_gt is None:
            plot_gt = [0, 1, 2, 3]
        chroms = []
        if self.reference_genome is None:
            chroms = self.io[self.plot_file].snp_chromosomes()
        else:
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
                if callset is None:
                    if self.io[self.plot_file].signal_exists(snp_chr, None, "SNP pos", 0) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "SNP desc", 0) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "SNP counts", 0) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "SNP qual", 0) and \
                            (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append(snp_chr)
                else:
                    if self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP pos", 0, name=callset) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP desc", 0,
                                                                  name=callset) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP counts", 0,
                                                                  name=callset) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP qual", 0,
                                                                  name=callset) and \
                            (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append(snp_chr)
        self.new_figure(panel_count=len(chroms))
        for c in chroms:
            pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
            hpos = []
            baf = []
            color = []
            qlpha = 0.7
            for i in range(len(pos)):
                if (nref[i] + nalt[i]) != 0:
                    if (gt[i] % 4 in plot_gt) and ((flag[i] >> 1) in plot_pmask):
                        hpos.append(pos[i])
                        if gt[i] % 4 != 2:
                            baf.append(1.0 * nalt[i] / (nref[i] + nalt[i]))
                        else:
                            baf.append(1.0 * nref[i] / (nref[i] + nalt[i]))
                        if self.snp_alpha_P:
                            alpha = None
                            color.append(colors.to_rgba(self.snp_colors[(gt[i] % 4) * 2 + 1], (flag[i] >> 1) * 0.4))
                        else:
                            color.append(self.snp_colors[(gt[i] % 4) * 2 + (flag[i] >> 1)])

            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
            l = max(pos)
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6), 10e6), minor=[])
            ax.set_ylim([0., 1.])
            ax.set_xlim([-0.05 * l, 1.05 * l])
            ax.grid()
            if self.markersize == "auto":
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=0.7)
            else:
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=0.7)
        self.fig_show(suffix="snp")

    def get_calls(self):
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        if self.annotate:
            annotator = Annotator(self.reference_genome)
        ret = []
        for caller in self.callers:
            if caller == "rd_mean_shift":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                            if io.signal_exists(c, bin_size, "calls", flag):
                                calls = io.read_calls(c, bin_size, "calls", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range) \
                                            and in_interval(call["dG"], self.dG_range):
                                        type = "duplication" if call["type"] == 1 else "deletion"

                                        row = [self.file_title(ix[i]), caller, type, c, call["start"], call["end"],
                                               call["size"], call["cnv"], call["p_val"], call["p_val_2"],
                                               call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"], call["dG"]]
                                        if self.annotate:
                                            row.append(annotator.get_info("%s:%d-%d" % (c, call["start"], call["end"])))
                                        ret.append(row)
            elif caller == "combined_mosaic":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR | \
                                   (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                            if io.signal_exists(c, bin_size, "calls combined", flag):
                                calls = io.read_calls(c, bin_size, "calls combined", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range):

                                        if n > 1:
                                            print("%s\t" % self.file_title(ix[i]), end="")
                                        if len(self.callers) > 1:
                                            print("%s\t" % caller, end="")
                                        keys = ["start", "end", "size", "cnv", "p_val", "lh_del", "lh_loh",
                                                "lh_dup", "Q0", "pN", "pNS", "pP", "bins", "baf",
                                                "rd_p_val", "baf_p_val", "segment", "hets", "homs"]
                                        type = {-1: "deletion", 0: "cnnloh", 1: "duplication"}[call["type"]]
                                        row = [self.file_title(i), caller, type, c] + [call[k] for k in keys]
                                        for m in range(2):
                                            row += call["models"][m]

                                        if self.annotate:
                                            row.append(annotator.get_info("%s:%d-%d" % (data[3], data[4], data[5])))
                                        ret.append(row)
        return ret

    def print_calls_file(self):
        format = self.print_filename.split(".")[-1]
        calls = self.get_calls()
        if self.print_filename == "":
            for call in calls:
                print(*call, sep="\t")
        elif format == "tsv":
            with open(self.print_filename, 'w') as f:
                for call in calls:
                    print(*call, sep="\t", file=f)
        elif format == "xlsx":
            import xlsxwriter
            workbook = xlsxwriter.Workbook(self.print_filename)
            files_callers = []
            sheets = {}
            rix = {}
            for call in calls:
                caller = call[1]
                fc = call[0] + " (" + caller + ")"
                sfc = call[0][:25] + " " + ({"rd_mean_shift": "ms", "combined_mosaic": "2d"}[caller])
                if fc not in files_callers:
                    sheets[fc] = workbook.add_worksheet(sfc)
                    rix[fc] = 0
                    files_callers.append(fc)
            for call in calls:
                caller = call[1]
                fc = call[0] + " (" + caller + ")"
                cix = 0
                for f in call[2:]:
                    sheets[fc].write(rix[fc], cix, f)
                    cix += 1
                rix[fc] += 1
            workbook.close()
        elif format == "vcf":
            samples = []
            for call in calls:
                sample = call[0]
                if sample not in samples:
                    samples.append(sample)
            header = """##fileformat=VCFv4.1
##fileDate={date}
##reference={rg}
##source=CNVpytor
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=pytorRD,Number=1,Type=Float,Description="Normalized RD">
##INFO=<ID=pytorP1,Number=1,Type=Float,Description="e-val by t-test">
##INFO=<ID=pytorP2,Number=1,Type=Float,Description="e-val by Gaussian tail">
##INFO=<ID=pytorP3,Number=1,Type=Float,Description="e-val by t-test (middle)">
##INFO=<ID=pytorP4,Number=1,Type=Float,Description="e-val by Gaussian tail (middle)">
##INFO=<ID=pytorQ0,Number=1,Type=Float,Description="Fraction of reads with 0 mapping quality">
##INFO=<ID=pytorPN,Number=1,Type=Integer,Description="Fraction of N bases">
##INFO=<ID=pytorDG,Number=1,Type=Integer,Description="Distance to nearest gap in reference genome">
##INFO=<ID=pytorCL,Number=1,Type=Integer,Description="Caller method">
##INFO=<ID=SAMPLES,Number=.,Type=String,Description="Sample genotyped to have the variant">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=LOH,Description="Copy number neutral loss of heterozygosity">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">;
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{samples}"""
            if self.reference_genome:
                rg = self.reference_genome["name"]
            else:
                rg = "unknown"
            header = header.format(date=datetime.date.today().strftime("%Y-%m-%d"), rg=rg, samples="\t".join(samples))
            ii = 0
            with open(self.print_filename, 'w') as f:
                print(header, file=f)
                for call in calls:
                    ii += 1
                    id = "CNVpytor_" + {"deletion": "del", "duplication": "dup", "cnnloh": "loh"}[call[2]] + str(ii)
                    alt = {"deletion": "<DEL>", "duplication": "<DUP>", "cnnloh": "<LOH>"}[call[2]]
                    info = "END=" + str(int(call[5])) + ";IMPRECISE;SVLEN=" + str(int(call[6])) + ";SVTYPE=" + alt[1:4]
                    info += ";pytorRD=" + str(call[7])
                    info += ";pytorP1=" + str(call[8])
                    info += ";pytorP2=" + str(call[9])
                    info += ";pytorP3=" + str(call[10])
                    info += ";pytorP4=" + str(call[11])
                    info += ";pytorQ0=" + str(call[12])
                    info += ";pytorPN=" + str(call[13])
                    info += ";pytorDG=" + str(call[14])
                    info += ";pytorCL=" + call[1]
                    format = "GT:CN"
                    row = [call[3], int(call[4]), id, ".", alt, ".", "PASS", info, format]
                    for sample in samples:
                        if sample == call[0]:
                            if call[2] == "deletion" and call[7] < 0.25:
                                row.append("1/1:0")
                            elif call[2] == "deletion" and call[7] > 0.25:
                                row.append("0/1:0")
                            elif call[2] == "duplication" and call[7] <= 1.75:
                                row.append("0/1:2")
                            elif call[2] == "duplication" and call[7] > 1.75 and call[7] <= 2.25:
                                row.append("1/1:2")
                            elif call[2] == "duplication" and call[7] > 2.25:
                                row.append("./1:%.2f" % call[7])
                            else:
                                row.append("./.:.")
                        else:
                            row.append("./.:.")
                    print(*row, sep="\t", file=f)
        if self.plot:
            for call in calls:
                plot_start = call[4] - call[6]
                if plot_start < 0:
                    plot_start = 0
                plot_end = call[5] + call[6]
                self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])

    def print_calls(self):
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        if self.annotate:
            annotator = Annotator(self.reference_genome)
        for caller in self.callers:
            if caller == "rd_mean_shift":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                            if io.signal_exists(c, bin_size, "calls", flag):
                                calls = io.read_calls(c, bin_size, "calls", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range) \
                                            and in_interval(call["dG"], self.dG_range):
                                        type = "duplication" if call["type"] == 1 else "deletion"
                                        if n > 1:
                                            print("%s\t" % self.file_title(i), end="")
                                        if len(self.callers) > 1:
                                            print("%s\t" % caller, end="")
                                        print("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e\t%.4f\t%.4f\t%d\t" % (
                                            type, c, call["start"], call["end"], call["size"], call["cnv"],
                                            call["p_val"],
                                            call["p_val_2"], call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"],
                                            call["dG"]), end="")
                                        if self.annotate:
                                            print("\t%s" % annotator.get_info(
                                                "%s:%d-%d" % (c, call["start"], call["end"])))
                                        else:
                                            print()
                                        if self.plot:
                                            plot_start = call["start"] - call["size"]
                                            if plot_start < 0:
                                                plot_start = 0
                                            plot_end = call["end"] + call["size"]
                                            self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])
            elif caller == "combined_mosaic":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR | \
                                   (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                            if io.signal_exists(c, bin_size, "calls combined", flag):
                                calls = io.read_calls(c, bin_size, "calls combined", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range):
                                        type = "duplication" if call["type"] == 1 else "deletion"
                                        if n > 1:
                                            print("%s\t" % self.file_title(i), end="")
                                        if len(self.callers) > 1:
                                            print("%s\t" % caller, end="")
                                        keys = ["start", "end", "size", "cnv", "p_val", "lh_del", "lh_loh",
                                                "lh_dup", "Q0", "pN", "pNS", "pP", "bins", "baf",
                                                "rd_p_val", "baf_p_val", "segment", "hets", "homs"]
                                        type = {-1: "deletion", 0: "cnnloh", 1: "duplication"}[call["type"]]
                                        data = [type, c] + [call[k] for k in keys]
                                        for m in range(2):
                                            data += call["models"][m]

                                        print(("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e" + \
                                               "\t%.4f\t%.4f\t%.4f\t%.4f\t" + "%d\t%d\t%.4f\t%e\t%e\t%d\t%d\t%d\t" + \
                                               "CN%d/CN%d\t%e\t%.4f\t%d\tCN%d/CN%d\t%e\t%.4f") % tuple(data), end="")
                                        if self.annotate:
                                            print("\t%s" % annotator.get_info("%s:%d-%d" % (data[1], data[2], data[3])))
                                        else:
                                            print()
                                        if self.plot:
                                            plot_start = call["start"] - call["size"]
                                            if plot_start < 0:
                                                plot_start = 0
                                            plot_end = call["end"] + call["size"]
                                            self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])

    def print_simple_joint_calls(self):

        bin_size = self.bin_size
        n = len(self.plot_files)
        if n == 0:
            return
        ix = self.plot_files
        format = self.print_filename.split(".")[-1]
        if format == "tsv":
            f = open(self.print_filename, 'w')
        elif format == "xlsx":
            import xlsxwriter
            if os.path.exists(self.print_filename):
                os.remove(self.print_filename)
            workbook = xlsxwriter.Workbook(self.print_filename)
            sheet = workbook.add_worksheet("merged_calls")
            header = ["TYPE", "REGION", "SIZE"]
            for i in range(n):
                header.append(self.file_title(ix[i]))
            if self.annotate:
                header.append("GENES")
            styleh = workbook.add_format({'bold': True, 'font_color': 'white'})
            styleh.set_pattern(1)  # This is optional when using a solid fill.
            styleh.set_bg_color('#555555')
            styleh2 = workbook.add_format({'bold': True, 'font_color': 'white'})
            styleh2.set_pattern(1)  # This is optional when using a solid fill.
            styleh2.set_bg_color('#555555')
            styleh2.set_rotation(75)
            style_r = workbook.add_format()
            style_r.set_pattern(1)  # This is optional when using a solid fill.
            style_r.set_bg_color('red')
            style_g = workbook.add_format()
            style_g.set_pattern(1)  # This is optional when using a solid fill.
            style_g.set_bg_color('green')
            style_size = workbook.add_format({'num_format': '#,##0'})
            style_cn = workbook.add_format({'num_format': '0'})
            style_cn_b = workbook.add_format({'num_format': '0', 'bold': True})
            sheet.set_column(0, 0, 10)
            sheet.set_column(1, 1, 22)
            sheet.set_column(2, 2, 10)
            if self.annotate:
                sheet.set_column(len(header) - 1, len(header) - 1, 100)

            for col, val in enumerate(header):
                if col > 2 and col < len(header) - int(self.annotate):
                    sheet.write(0, col, val, styleh2)
                else:
                    sheet.write(0, col, val, styleh)
            ri = 0
        if self.annotate:
            annotator = Annotator(self.reference_genome)
        chroms = self.io[ix[0]].rd_chromosomes()
        for c in chroms:
            if (c in self.chrom) or len(self.chrom) == 0:
                flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                calls = [list(filter(lambda call: in_interval(call["size"], self.size_range) \
                                                  and in_interval(call["p_val"], self.p_range) \
                                                  and in_interval(call["pN"], self.pN_range) \
                                                  and in_interval(call["Q0"], self.Q0_range) \
                                                  and in_interval(call["dG"], self.dG_range),
                                     self.io[ix[i]].read_calls(c, bin_size, "calls", flag))) for i in range(n)]
                pointers = [0] * n
                while any([pointers[i] < len(calls[i]) for i in range(n)]):
                    starts = [calls[i][pointers[i]]["start"] if pointers[i] < len(calls[i]) else np.inf for i in
                              range(n)]
                    mini = starts.index(min(starts))
                    maxend = 0
                    toupdate = []
                    minend = calls[mini][pointers[mini]]["end"]
                    maxstart = 0
                    files = []
                    types = []
                    cns = []
                    for i in range(n):
                        if (pointers[i] < len(calls[i])) and ((min(calls[i][pointers[i]]["end"],
                                                                   calls[mini][pointers[mini]]["end"]) -
                                                               calls[i][pointers[i]]["start"]) > (
                                                                      0.5 * calls[mini][pointers[mini]]["size"])) \
                                and ((min(calls[i][pointers[i]]["end"],
                                          calls[mini][pointers[mini]]["end"]) -
                                      calls[i][pointers[i]]["start"]) > (
                                             0.5 * (calls[i][pointers[i]]["end"] - calls[i][pointers[i]]["start"]))):
                            toupdate.append(i)
                            call = calls[i][pointers[i]]
                            if call["end"] > maxend:
                                maxend = call["end"]
                            if call["end"] < minend:
                                minend = call["end"]
                            if call["start"] > maxstart:
                                maxstart = call["start"]
                            type = "duplication" if call["type"] == 1 else "deletion"
                            types.append(type)
                            files.append(i)
                            cns.append(int(call["cnv"] * 2))
                    type = max(set(types), key=types.count)
                    data = [type, c, maxstart, minend, minend - maxstart + 1]
                    genotypes = [
                        self.genotype([bin_size], "%s:%d-%d" % (c, maxstart, minend), file_index=ix[i], p_val=True)[0]
                        for i
                        in range(n)]
                    copynumbers = [c[3] for c in genotypes]
                    if np.all([np.abs(c - np.round(c)) < 0.25 for c in copynumbers]) or True:
                        if self.print_filename == "":
                            print(("%s\t%s:%d-%d\t%d" + n * "\t%.2f") % tuple(data + copynumbers), end="")
                            print("\t%s" % str(files), end="")
                            if self.annotate:
                                print("\t%s" % annotator.get_info("%s:%d-%d" % (c, maxstart, minend)))
                            else:
                                print()
                        elif format == "tsv":
                            print(("%s\t%s:%d-%d\t%d" + n * "\t%.2f") % tuple(data + copynumbers), end="", file=f)
                            print("\t%s" % str(files), end="", file=f)
                            if self.annotate:
                                print("\t%s" % annotator.get_info("%s:%d-%d" % (c, maxstart, minend)), file=f)
                            else:
                                print(file=f)
                        elif format == "xlsx":
                            ri += 1
                            if type == "deletion":
                                sheet.write(ri, 0, data[0], style_r)
                            else:
                                sheet.write(ri, 0, data[0], style_g)
                            sheet.write(ri, 1, "%s:%d-%d" % (c, maxstart, minend))
                            sheet.write(ri, 2, data[4], style_size)
                            for col, val in enumerate(copynumbers):
                                if col in files:
                                    sheet.write(ri, 3 + col, val, style_cn_b)
                                else:
                                    sheet.write(ri, 3 + col, val, style_cn)
                            if self.annotate:
                                sheet.write(ri, 3 + len(copynumbers),
                                            annotator.get_info("%s:%d-%d" % (c, maxstart, minend)))

                        if self.plot:
                            plot_start = maxstart - (minend - maxstart)
                            if plot_start < 0:
                                plot_start = 0
                            plot_end = minend + (minend - maxstart)
                            self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])
                    for i in toupdate:
                        pointers[i] += 1
        if format == "tsv":
            f.close()
        elif format == "xlsx":
            sheet.conditional_format(1, 3, ri, len(header) - int(self.annotate), {'type': '3_color_scale',
                                                                                  'min_color': "#FF0000",
                                                                                  'mid_color': "#FFFFFF",
                                                                                  'max_color': "#00FF00",
                                                                                  'min_type': 'num',
                                                                                  'min_value': 0,
                                                                                  'mid_type': 'num',
                                                                                  'mid_value': 2,
                                                                                  'max_type': 'num',
                                                                                  'max_value': 4
                                                                                  })
            workbook.close()

    def manhattan(self, plot_type="rd"):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for manhattan.")
            return
        n = len(self.plot_files)
        ix = self.plot_files

        self.new_figure(panel_count=n, grid=(1, n), panel_size=(24, 2))
        for i in range(n):
            ax = self.next_panel()
            io = self.io[ix[i]]
            ax.set_title(self.file_title(ix[i]), position=(0.01, 1.01),
                         fontdict={'verticalalignment': 'bottom', 'horizontalalignment': 'left'})

            if plot_type == "rd":
                chroms = []
                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    rd_chr = io.rd_chromosome_name(c)
                    if len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom):
                        if io.signal_exists(rd_chr, bin_size, "RD", 0) and \
                                io.signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                                (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((rd_chr, l))

                apos = 0
                xticks = [0]

                max_m, stdev = io.rd_normal_level(bin_size, FLAG_GC_CORR)
                for c, l in chroms:
                    flag_rd = (FLAG_USEMASK if self.rd_use_mask else 0)
                    his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                    his_p_corr = io.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
                    if self.rd_manhattan_call:
                        his_p_call = io.get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg = io.get_signal(c, bin_size, "RD mosaic segments",
                                                         flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
                        his_p_mosaic_call = io.get_signal(c, bin_size, "RD mosaic call",
                                                          flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg_2d = io.get_signal(c, bin_size, "RD mosaic segments 2d",
                                                            flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
                        his_p_mosaic_call_2d = io.get_signal(c, bin_size, "RD mosaic call 2d",
                                                             flag_rd | FLAG_GC_CORR)
                        his_p_mosaic = np.zeros_like(his_p) * np.nan
                        if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and (
                                "rd_mosaic" in self.callers):
                            for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                                for segi in seg:
                                    his_p_mosaic[segi] = lev
                        his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
                        if his_p_mosaic_call_2d is not None and len(
                                his_p_mosaic_call_2d) > 0 and ("combined_mosaic" in self.callers):
                            for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                                for segi in seg:
                                    his_p_mosaic_2d[segi] = lev
                    pos = range(apos, apos + len(his_p))
                    ax.text(apos + len(his_p) // 2, max_m // 10, Genome.canonical_chrom_name(c),
                            fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                    if self.markersize == "auto":
                        plt.plot(pos, his_p_corr, ls='', marker='.')
                    else:
                        plt.plot(pos, his_p_corr, ls='', marker='.', markersize=self.markersize)
                    if self.rd_manhattan_call:
                        if his_p_call is not None and len(his_p_call) > 0 and ("rd_mean_shift" in self.callers):
                            plt.step(pos, his_p_call, "r")
                        if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and (
                                "rd_mosaic" in self.callers):
                            plt.plot(pos, his_p_mosaic, "k")
                        if his_p_mosaic_call_2d is not None and len(
                                his_p_mosaic_call_2d) > 0 and ("combined_mosaic" in self.callers):
                            plt.plot(pos, his_p_mosaic_2d, "k")
                    apos += len(his_p)
                    xticks.append(apos)
                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 15, 0.5) * max_m, minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([self.rd_manhattan_range[0] * max_m, self.rd_manhattan_range[1] * max_m])
                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

            elif plot_type == "baf_mosaic":
                chroms = []
                snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0)
                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    snp_chr = io.snp_chromosome_name(c)
                    if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                        if io.signal_exists(snp_chr, bin_size, "SNP likelihood call", snp_flag) and \
                                io.signal_exists(snp_chr, bin_size, "SNP likelihood segments", snp_flag) and \
                                (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((snp_chr, l))

                apos = 0
                xticks = [0]

                cix = 0
                cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
                for c, l in chroms:
                    likelihood = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                    segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
                    call_pos = []
                    call_baf = []
                    call_c = []
                    for s, lh in zip(segments, likelihood):
                        b, p = likelihood_baf_pval(lh)
                        if b > 0 and len(s) > self.min_segment_size:
                            alpha = -np.log(p + 1e-40) / self.contrast
                            if alpha > 1:
                                alpha = 1
                            for pos in s:
                                call_pos.append(apos + pos)
                                call_baf.append(b)
                                color = cmap[cix % len(cmap)]
                                color = (color[0], color[1], color[2], alpha)
                                call_c.append(color)

                    ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                            fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                    plt.scatter(call_pos, call_baf, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                    apos += l // bin_size
                    xticks.append(apos)
                    cix += 1

                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 0.5, 0.1), minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([0, 0.5])
                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

            elif plot_type == "rd_mean_shift":
                chroms = []
                flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    rd_chr = io.rd_chromosome_name(c)
                    if rd_chr is not None and len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom):
                        if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((rd_chr, l))

                apos = 0
                xticks = [0]

                cix = 0
                cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
                for c, l in chroms:
                    call_pos = []
                    call_conc = []
                    call_c = []
                    if io.signal_exists(c, bin_size, "calls", flag):
                        calls = io.read_calls(c, bin_size, "calls", flag)

                        for call in calls:
                            if in_interval(call["size"], self.size_range) and in_interval(call["p_val"], self.p_range) \
                                    and in_interval(call["pN"], self.pN_range) \
                                    and in_interval(call["Q0"], self.Q0_range):
                                alpha = - np.log(call["p_val"] + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                if alpha < 0:
                                    alpha = 0
                                for pos in range(int(call["start"]) // bin_size, int(call["end"]) // bin_size + 1):
                                    call_pos.append(apos + pos)
                                    level = call["cnv"] * 2
                                    if level > 4:
                                        level = 4
                                    call_conc.append(level)
                                    if call["type"] == 1:
                                        call_c.append((0, 1, 0, alpha))
                                    elif call["type"] == -1:
                                        call_c.append((1, 0, 0, alpha))
                                    else:
                                        call_c.append((0, 0, 1, alpha))
                        ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                                fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                        plt.scatter(call_pos, call_conc, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                        apos += l // bin_size
                        xticks.append(apos)
                        cix += 1

                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 4.0, 1.0), minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([0, 4.0])
                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

            elif plot_type == "combined_mosaic":
                chroms = []
                flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    snp_chr = io.snp_chromosome_name(c)
                    if snp_chr is not None and len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                        if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((snp_chr, l))

                apos = 0
                xticks = [0]

                cix = 0
                cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
                for c, l in chroms:
                    call_pos = []
                    call_conc = []
                    call_c = []
                    if io.signal_exists(c, bin_size, "calls combined", flag):
                        calls = io.read_calls(c, bin_size, "calls combined", flag)

                        for call in calls:
                            if call["bins"] > self.min_segment_size:
                                alpha = -np.log(call["p_val"] + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                for pos in range(int(call["start"]) // bin_size, int(call["end"]) // bin_size + 1):
                                    call_pos.append(apos + pos)
                                    call_conc.append(call["models"][0][4])
                                    if call["type"] == 1:
                                        call_c.append((0, 1, 0, alpha))
                                    elif call["type"] == -1:
                                        call_c.append((1, 0, 0, alpha))
                                    else:
                                        call_c.append((0, 0, 1, alpha))

                        ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                                fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                        plt.scatter(call_pos, call_conc, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                        apos += l // bin_size
                        xticks.append(apos)
                        cix += 1

                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 1.0, 0.1), minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([0, 1.0])

                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

        self.fig_show(suffix="manhattan" if plot_type == "rd" else "snp_calls")

    def callmap(self, color="frequency", background="white", pixel_size=1700000, max_p_val=1e-20, min_freq=0.01,
                plot="cmap"):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for callmap.")
            return
        n = len(self.plot_files)
        ix = self.plot_files

        if plot:
            self.new_figure(panel_count=n, grid=(1, 1), panel_size=(24, 0.24 * n))

        chroms = []
        starts = []
        ends = []
        pixels = 0
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            if l > 10 * bin_size:
                if len(self.chrom) == 0 or (c in self.chrom) or (self.io[0].snp_chromosome_name(c) in self.chrom):
                    chroms.append(c)
                    starts.append(pixels)
                    pixels += l // pixel_size + 1
                    ends.append(pixels - 1)

        cmap = np.zeros((n, pixels, 3))
        cmap[:, ends, :] = 1

        for i in range(n):
            io = self.io[ix[i]]
            print(io.filename)
            flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
            flag_rd = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
            for c, start in zip(chroms, starts):
                snp_chr = io.snp_chromosome_name(c)
                if io.signal_exists(snp_chr, bin_size, "calls combined", flag):

                    calls = io.read_calls(snp_chr, bin_size, "calls combined", flag)
                    segments = io.get_signal(snp_chr, bin_size, "RD mosaic segments 2d", flag_rd)
                    segments = segments_decode(segments)

                    for call in calls:
                        if call["bins"] > self.min_segment_size and call["p_val"] < max_p_val and "segment" in call and \
                                call["models"][0][4] > min_freq:
                            cix = int(call["type"]) + 1
                            for b in segments[int(call["segment"])]:
                                if color == "frequency":
                                    cmap[i, start + b * bin_size // pixel_size, cix] = max(
                                        cmap[i, start + b * bin_size // pixel_size, cix], call["models"][0][4])
                                elif color == "coverage":
                                    cmap[i, start + b * bin_size // pixel_size, cix] += bin_size / pixel_size
                                else:  # model copy number
                                    if call["models"][0][0] == 0:
                                        cmap[i, start + b * bin_size // pixel_size, 0] = 1
                                    elif call["models"][0][0] == 1:
                                        cmap[i, start + b * bin_size // pixel_size, 0] = 1
                                        cmap[i, start + b * bin_size // pixel_size, 1] = 1
                                    elif call["models"][0][0] == 2:
                                        cmap[i, start + b * bin_size // pixel_size, 2] = 1
                                    else:
                                        cn = call["models"][0][0]
                                        if cn > 6:
                                            cn = 6
                                        cmap[i, start + b * bin_size // pixel_size, 1] = (2 + cn) / 8

        def b2w(pixel):
            if np.all(pixel == 1):
                pixel[:] = 0
            elif pixel[0] > pixel[1] and pixel[0] > pixel[2]:
                pixel[1] = pixel[2] = 1 - pixel[0]
                pixel[0] = 1
            elif pixel[1] > pixel[2]:
                pixel[0] = pixel[2] = 1 - pixel[1]
                pixel[1] = 1
            else:
                pixel[0] = pixel[1] = 1 - pixel[2]
                pixel[2] = 1
            return pixel

        if background == "white":
            cmap = cmap.reshape(n * pixels, 3)
            np.apply_along_axis(b2w, 1, cmap)
            cmap = cmap.reshape(n, pixels, 3)

        cmap = (255 * cmap).astype("int")
        if plot == "cmap":
            self.new_figure(panel_count=1, grid=(1, 1), panel_size=(24, 0.24 * n))
            ax = self.next_panel()
            plt.imshow(cmap, aspect='auto')
            for i in ends[:-1]:
                plt.axvline(x=i - 0.5, color='red', linewidth=0.5)
            ax.set_yticks([])
            ax.set_yticklabels([])
            ax.set_xticks((np.array(starts) + np.array(ends)) / 2)
            chroms = list(map(Genome.canonical_chrom_name, chroms))
            ax.set_xticklabels(chroms)
            self.fig_show(suffix="callmap")
        elif plot == "regions":
            self.new_figure(panel_count=1, grid=(1, 1), panel_size=(24, 24))
            ax = self.next_panel()
            corr = np.corrcoef(
                np.concatenate((cmap[:, :, 0].transpose(), cmap[:, :, 1].transpose(), cmap[:, :, 2].transpose()),
                               axis=0))
            plt.imshow(corr, aspect='auto', vmin=-1, vmax=1)
            plt.colorbar()
            starts3 = np.concatenate((np.array(starts), np.array(starts) + ends[-1], np.array(starts) + 2 * ends[-1]))
            ends3 = np.concatenate((np.array(ends), np.array(ends) + ends[-1], np.array(ends) + 2 * ends[-1]))
            for i in ends3[:-1]:
                plt.axvline(x=i - 0.5, color='red', linewidth=0.5)
                plt.axhline(y=i - 0.5, color='red', linewidth=0.5)

            ax.set_xticks((starts3 + ends3) / 2)
            ax.set_yticks((starts3 + ends3) / 2)
            chroms = list(map(Genome.canonical_chrom_name, chroms))
            ax.set_xticklabels(chroms + chroms + chroms)
            ax.set_yticklabels(chroms + chroms + chroms)
            self.fig_show(suffix="callmap")
        else:
            self.new_figure(panel_count=2, panel_size=(12, 12))
            ax = self.next_panel()
            x = np.concatenate((cmap[:, :, 0], cmap[:, :, 1], cmap[:, :, 2]),
                               axis=1)
            corr = np.corrcoef(x)
            plt.imshow(corr, aspect='auto', vmin=-1, vmax=1)
            plt.colorbar()
            ax = plt.gca()

            ax.set_xticks(range(n))
            ax.set_yticks(range(n))
            ax = self.next_panel()
            Z = hierarchy.linkage(x, 'average', 'correlation')
            dn = hierarchy.dendrogram(Z)

            self.fig_show(suffix="callmap")
        return cmap

    def multiple_regions(self, regions):
        n = len(self.plot_files) * len(regions)
        self.new_figure(panel_count=n)
        j = 0
        for i in range(len(self.plot_files)):
            for r in regions:
                self.regions(self.plot_files[i], r)
                j += 1
        self.fig_show(suffix="regions")

    def regions(self, ix, region):
        panels = self.panels
        bin_size = self.bin_size
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        self.new_subgrid(len(panels), hspace=0.05, wspace=0.1)
        r = decode_region(region, max_size=1000000000)
        io = self.io[ix]
        for i in range(len(panels)):
            ax = self.next_subpanel(sharex=True)
            if i == 0 and self.title:
                ax.set_title(self.file_title(ix) + ": " + region, position=(0.01, 0.9),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                             color='C0')

            if panels[i] == "rd":
                g_p = [0]
                g_p_corr = [0]
                g_p_seg = [0]
                g_p_call = [0]
                g_p_call_mosaic = [0]
                g_p_call_mosaic_2d = [0]
                mean, stdev = 0, 0
                borders = []
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    flag_rd = 0
                    if self.rd_use_mask:
                        flag_rd = FLAG_USEMASK
                    mean, stdev = io.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
                    his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                    his_p_corr = io.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
                    his_p_seg = io.get_signal(c, bin_size, "RD partition", flag_rd | FLAG_GC_CORR)
                    his_p_call = io.get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg = io.get_signal(c, bin_size, "RD mosaic segments",
                                                     flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
                    his_p_mosaic_call = io.get_signal(c, bin_size, "RD mosaic call",
                                                      flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg_2d = io.get_signal(c, bin_size, "RD mosaic segments 2d",
                                                        flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
                    his_p_mosaic_call_2d = io.get_signal(c, bin_size, "RD mosaic call 2d",
                                                         flag_rd | FLAG_GC_CORR)
                    his_p_mosaic = np.zeros_like(his_p) * np.nan
                    if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and ("rd_mosaic" in self.callers):
                        for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                            for segi in seg:
                                his_p_mosaic[segi] = lev
                    his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
                    if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and (
                            "combined_mosaic" in self.callers):
                        for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                            for segi in seg:
                                his_p_mosaic_2d[segi] = lev
                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    bins = len(list(his_p[start_bin:end_bin]))
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])

                    g_p.extend(list(his_p[start_bin:end_bin]))
                    g_p_corr.extend(list(his_p_corr[start_bin:end_bin]))
                    if his_p_seg is not None and len(his_p_seg) > 0 and self.rd_partition:
                        g_p_seg.extend(list(his_p_seg[start_bin:end_bin]))
                    if his_p_call is not None and len(his_p_call) > 0 and self.rd_call and (
                            "rd_mean_shift" in self.callers):
                        g_p_call.extend(list(his_p_call[start_bin:end_bin]))
                    if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                            "rd_mosaic" in self.callers):
                        g_p_call_mosaic.extend(list(his_p_mosaic[start_bin:end_bin]))
                    if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                            "combined_mosaic" in self.callers):
                        g_p_call_mosaic_2d.extend(list(his_p_mosaic_2d[start_bin:end_bin]))
                    borders.append(len(g_p) - 1)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(g_p)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)

                if (self.rd_range[1] - self.rd_range[0]) < 30:
                    ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                                       minor=[])
                    ax.yaxis.set_ticklabels([str(i) for i in range(int(self.rd_range[0]), int(self.rd_range[1] + 1))])
                ax.set_ylim([self.rd_range[0] * mean / 2, self.rd_range[1] * mean / 2])
                ax.set_ylabel("Read depth")
                ax.yaxis.grid()

                if self.rd_raw:
                    ax.step(g_p, self.rd_colors[0], label="raw")
                if self.rd_corrected:
                    ax.step(g_p_corr, self.rd_colors[1], label="GC corrected")
                if len(g_p_seg) > 1:
                    plt.step(g_p_seg, self.rd_colors[2], label="partitioning")
                if len(g_p_call) > 1:
                    plt.step(g_p_call, self.rd_colors[3], label="cnv calls")
                if len(g_p_call_mosaic) > 1:
                    plt.step(g_p_call_mosaic, self.rd_colors[4], label="mosaic cnv calls")
                if len(g_p_call_mosaic_2d) > 1:
                    plt.step(g_p_call_mosaic_2d, self.rd_colors[5], label="combined cnv calls")
                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                if self.legend:
                    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), shadow=True, ncol=2)
                self.fig.add_subplot(ax)

            elif panels[i] == "snp":
                borders = []
                hpos = []
                baf = []
                color = []
                alpha = 0.7
                start_pos = 0
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c)
                    ix = 0
                    mdp = 0
                    while ix < len(pos) and pos[ix] <= pos2:
                        if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                            hpos.append((start_pos + pos[ix] - pos1) / bin_size)
                            if pos[ix] - pos1 > mdp:
                                mdp = pos[ix] - pos1
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                            if self.snp_alpha_P:
                                alpha = None
                                color.append(
                                    colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                            else:
                                color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                        ix += 1
                    start_pos += pos2 - pos1
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size))
                    borders.append(start_pos / bin_size)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(pos_x)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)

                # ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("Allele frequency")
                l = max(hpos)
                ax.set_ylim([-0.05, 1.05])
                # ax.set_xlim([0, borders[-1]])
                ax.yaxis.grid()
                if self.markersize == "auto":
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=alpha)
                else:
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)

                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "snv" or panels[i][:4] == "snv:":
                callset = "default"
                if panels[i][:4] == "snv:":
                    callset = panels[i].split(":")[1]
                borders = []
                hpos = []
                baf = []
                color = []
                alpha = 0.7
                start_pos = 0
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c, callset=callset)
                    ix = 0
                    mdp = 0
                    while ix < len(pos) and pos[ix] <= pos2:
                        if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0:
                            hpos.append((start_pos + pos[ix] - pos1) / bin_size)
                            if pos[ix] - pos1 > mdp:
                                mdp = pos[ix] - pos1
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                            if self.snp_alpha_P:
                                alpha = None
                                color.append(
                                    colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                            else:
                                color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                        ix += 1
                    start_pos += pos2 - pos1
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size))
                    borders.append(start_pos / bin_size)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(pos_x)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)
                ax.xaxis.grid()
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("Allele frequency")
                ax.set_ylim([0., 1.])
                ax.yaxis.grid()
                if self.markersize == "auto":
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=alpha)
                else:
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)

                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "baf":
                g_baf, g_maf, g_i1, g_i2 = [0], [0], [0], [0]
                borders = []
                pos_x = []

                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000

                    flag_snp = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                        FLAG_USEHAP if self.snp_use_phase else 0)
                    baf = io.get_signal(c, bin_size, "SNP baf", flag_snp)
                    maf = io.get_signal(c, bin_size, "SNP maf", flag_snp)
                    i1 = io.get_signal(c, bin_size, "SNP i1", flag_snp)
                    i2 = io.get_signal(c, bin_size, "SNP i2", flag_snp)

                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    bins = len(list(baf[start_bin:end_bin]))
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])

                    g_baf.extend(list(baf[start_bin:end_bin]))
                    g_maf.extend(list(maf[start_bin:end_bin]))
                    g_i1.extend(list(i1[start_bin:end_bin]))
                    g_i2.extend(list(i2[start_bin:end_bin]))
                    borders.append(len(g_baf) - 1)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(g_baf)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()

                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("Allele frequency")

                ax.set_ylim([0, 1])
                # ax.set_xlim([-l * 0.0, l * 1.0])

                ax.yaxis.grid()
                # ax.xaxis.grid()
                ax.step(g_baf, self.baf_colors[0], label="BAF")
                ax.step(g_maf, self.baf_colors[1], label="MAF")
                ax.step(g_i1, self.baf_colors[2], label="I1")
                if self.legend:
                    ax.legend()
                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "likelihood":
                borders = []
                gl = []
                call_pos = []
                call_i1 = []
                call_i2 = []
                call_c = []
                call_pos_2d = []
                call_i1_2d = []
                call_i2_2d = []
                call_c_2d = []
                tlen = 0
                tlen_2d = 0
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    likelihood = io.get_signal(c, bin_size, "SNP likelihood", snp_flag)
                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    bins = len(list(likelihood[start_bin:end_bin]))
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])
                    gl.extend(list(likelihood[start_bin:end_bin]))
                    borders.append(len(gl) - 1)
                    if self.snp_call and ("baf_mosaic" in self.callers):
                        likelihood_call = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                        segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))

                        for s, lh in zip(segments, likelihood_call):
                            i1, i2, p = likelihood_pixels_pval(lh)
                            if i1 != i2 and len(s) > self.min_segment_size:
                                alpha = -np.log(p + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                for pos in s:
                                    if pos >= start_bin and pos < end_bin:
                                        call_pos.append(pos - start_bin + tlen)
                                        call_i1.append(min(i1, i2))
                                        call_i2.append(max(i1, i2))
                                        color = colors.to_rgb(self.lh_colors[0]) + (alpha,)
                                        call_c.append(color)
                        tlen += end_bin - start_bin
                    if self.snp_call and ("combined_mosaic" in self.callers):
                        likelihood_call = io.get_signal(c, bin_size, "SNP likelihood call 2d", snp_flag)
                        segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments 2d", snp_flag))

                        for s, lh in zip(segments, likelihood_call):
                            i1, i2, p = likelihood_pixels_pval(lh)
                            if i1 != i2 and len(s) > self.min_segment_size:
                                alpha = -np.log(p + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                for pos in s:
                                    if pos >= start_bin and pos < end_bin:
                                        call_pos_2d.append(pos - start_bin + tlen_2d)
                                        call_i1_2d.append(min(i1, i2))
                                        call_i2_2d.append(max(i1, i2))
                                        color = colors.to_rgb(self.lh_colors[1]) + (alpha,)
                                        call_c_2d.append(color)
                        tlen_2d += end_bin - start_bin

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                img = np.array(gl).transpose()
                l = img.shape[1]
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    # ax.xaxis.grid()
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)

                ax.imshow(img, aspect='auto')
                # ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, img.shape[0] / 4, img.shape[0] / 2, 3 * img.shape[0] / 4, img.shape[0] - 1],
                                   minor=[])
                ax.yaxis.set_ticklabels(["1", "3/4", "1/2", "1/4", "0"])
                ax.set_ylabel("Allele frequency")
                # ax.xaxis.set_ticks(np.arange(0, len(gl), 50), minor=[])
                # ax.set_xlim([-0.5, img.shape[1] - 0.5])
                if self.snp_call and ("baf_mosaic" in self.callers):
                    plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                                marker=self.lh_marker)
                    plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                                marker=self.lh_marker)
                if self.snp_call and ("combined_mosaic" in self.callers):
                    plt.scatter(call_pos_2d, call_i1_2d, s=self.lh_markersize, color=np.array(call_c_2d),
                                edgecolors='face', marker=self.lh_marker)
                    plt.scatter(call_pos_2d, call_i2_2d, s=self.lh_markersize, color=np.array(call_c_2d),
                                edgecolors='face', marker=self.lh_marker)

                for i in borders[:-1]:
                    ax.axvline(i + 0.5, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "CN":
                borders = []
                gh1 = []
                gh2 = []
                tlen = 0
                tlen_2d = 0
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000

                    his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    if end_bin > len(his_p):
                        end_bin = len(his_p)
                    h1 = np.array([0] * (end_bin - start_bin))
                    h2 = np.array([0] * (end_bin - start_bin))
                    h1[his_p != 0] = 1
                    h2[his_p != 0] = 1

                    flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                        FLAG_USEHAP if self.snp_use_phase else 0) | (
                               FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                    flag_rd = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
                    if io.signal_exists(c, bin_size, "calls combined", flag):
                        calls = io.read_calls(c, bin_size, "calls combined", flag)
                        segments = io.get_signal(c, bin_size, "RD mosaic segments 2d", flag_rd)
                        segments = segments_decode(segments)

                        for call in calls:
                            for b in segments[int(call["segment"])]:
                                if b < end_bin and b >= start_bin:
                                    h1[b - start_bin] = call["models"][0][1]
                                    h2[b - start_bin] = call["models"][0][2]
                    gh1.extend(list(h1))
                    gh2.extend(list(h2))
                    borders.append(len(gh1) - 1)
                x = range(len(gh1))
                plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)
                plt.stackplot(x, gh1, gh2, baseline='sym')

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(gh1)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()

                for i in borders[:-1]:
                    ax.axvline(i + 0.5, color="g", lw=1)
                self.fig.add_subplot(ax)

    def global_plot(self):
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            rd_chr = self.io[self.plot_files[0]].rd_chromosome_name(c)
            if (len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom)) and rd_chr is not None:
                if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                    chroms.append((rd_chr, l))
        panels = self.panels
        bin_size = self.bin_size
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        rd_flag = (FLAG_USEMASK if self.rd_use_mask else 0) | (FLAG_GC_CORR if self.rd_use_gc_corr else 0)
        n = len(self.plot_files)
        self.new_figure(panel_count=n)
        for ii in range(len(self.plot_files)):
            ix = self.plot_files[ii]
            self.new_subgrid(len(panels), hspace=0.05, wspace=0.05)
            io = self.io[ix]
            for i in range(len(panels)):
                ax = self.next_subpanel(sharex=True)
                if i == 0:
                    ax.set_title(self.file_title(ix), position=(0.01, 0.9),
                                 fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                                 color='C0')

                if panels[i] == "rd":
                    start = 0
                    xticks = [0]
                    xticks_minor = []
                    xticks_labels = []
                    for c, l in chroms:
                        mean, stdev = io.rd_normal_level(bin_size, rd_flag | FLAG_GC_CORR)
                        his_p = io.get_signal(c, bin_size, "RD", rd_flag)
                        pos = range(start, start + len(his_p))
                        if self.markersize == "auto":
                            plt.plot(pos, his_p, ls='', marker='.', markersize=1)
                        else:
                            plt.plot(pos, his_p, ls='', marker='.', markersize=self.markersize)
                        xticks_minor.append(start + len(his_p) // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)

                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(which="both"), visible=False)
                    yticks = np.arange(self.rd_manhattan_range[0], self.rd_manhattan_range[1], 0.5)
                    ax.yaxis.set_ticklabels([str(int(2 * t)) for t in yticks])
                    ax.yaxis.set_ticks(yticks * mean)
                    ax.set_ylabel("RD [CN]")
                    ax.set_ylim([self.rd_manhattan_range[0] * mean, self.rd_manhattan_range[1] * mean])
                    ax.grid()
                    self.fig.add_subplot(ax)

                elif panels[i] == "snp":
                    start = 0
                    xticks = []
                    xticks_minor = []
                    xticks_labels = []
                    pos_x = []
                    for c, l in chroms:
                        pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c)
                        ix = 0
                        hpos = []
                        color = []
                        alpha = 0.7
                        baf = []
                        while ix < len(pos):
                            if (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                                hpos.append(start + (pos[ix] / bin_size))
                                if gt[ix] % 4 != 2:
                                    baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                else:
                                    baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                if self.snp_alpha_P:
                                    alpha = None
                                    color.append(
                                        colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                                else:
                                    color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                            ix += 1
                        if self.markersize == "auto":
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=0.1, alpha=alpha)
                        else:
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)
                        xticks_minor.append(start + l // bin_size // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)
                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(minor=True), visible=False)
                    ax.grid()
                    ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0])
                    ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                    ax.set_ylabel("BAF")
                    ax.set_ylim([-0.05, 1.05])
                    ax.yaxis.grid()
                    self.fig.add_subplot(ax)

                elif panels[i] == "snv" or panels[i][:4] == "snv:":
                    callset = "default"
                    if panels[i][:4] == "snv:":
                        callset = panels[i].split(":")[1]
                    start = 0
                    xticks = []
                    xticks_minor = []
                    xticks_labels = []
                    pos_x = []
                    for c, l in chroms:
                        pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c, callset=callset)
                        ix = 0
                        hpos = []
                        color = []
                        alpha = 0.7
                        baf = []
                        while ix < len(pos):
                            if (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                                hpos.append(start + (pos[ix] / bin_size))
                                if gt[ix] % 4 != 2:
                                    baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                else:
                                    baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                if self.snp_alpha_P:
                                    alpha = None
                                    color.append(
                                        colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                                else:
                                    color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                            ix += 1
                        if self.markersize == "auto":
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=0.1, alpha=alpha)
                        else:
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)
                        xticks_minor.append(start + l // bin_size // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)
                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(minor=True), visible=False)
                    ax.grid()
                    ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0])
                    ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                    ax.set_ylabel("BAF")
                    ax.set_ylim([-0.05, 1.05])
                    ax.yaxis.grid()
                    self.fig.add_subplot(ax)


                elif panels[i] == "likelihood":
                    start = 0
                    xticks = [0]
                    xticks_minor = []
                    xticks_labels = []
                    gl = []
                    for c, l in chroms:
                        likelihood = io.get_signal(c, bin_size, "SNP likelihood", snp_flag)
                        lh = list(likelihood)
                        size = l // bin_size + 1
                        if len(lh) < size:
                            if len(lh)>0:
                                lh.extend([lh[-1] for jj in range(size - len(lh))])
                            elif len(gl)>0:
                                lh.extend([gl[-1] for jj in range(size - len(lh))])

                        gl.extend(lh)
                        xticks_minor.append(start + l // bin_size // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)

                    img = np.array(gl).transpose()
                    img[0, :] = 0
                    img[-1, :] = 0
                    ax.imshow(img, aspect='auto')
                    ax.yaxis.set_ticks([0, img.shape[0] / 4, img.shape[0] / 2, 3 * img.shape[0] / 4, img.shape[0] - 1],
                                       minor=[])
                    ax.yaxis.set_ticklabels(["1", "3/4", "1/2", "1/4", "0"])
                    ax.set_ylabel("BAF")
                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(minor=True), visible=False)
                    ax.xaxis.grid()
                    self.fig.add_subplot(ax)

        self.fig_show(suffix="global")

    def circular(self):
        chroms = self.chrom
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        rd_flag = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
        self.new_figure(panel_count=n)
        for i in range(n):
            ax = self.next_polar_panel()
            ax.set_theta_zero_location("N")
            ax.set_theta_direction(-1)
            rainbow = ax._get_lines
            io = self.io[ix[i]]
            plot_len = 0
            plot_chroms = []
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                rd_chr = io.rd_chromosome_name(c)
                if rd_chr is not None and (len(chroms) == 0 or (rd_chr in chroms) or (c in chroms)) and (
                        Genome.is_autosome(c) or Genome.is_sex_chrom(c)
                ) and io.signal_exists(rd_chr, bin_size, "SNP maf", snp_flag) and io.signal_exists(
                    rd_chr, bin_size, "RD", rd_flag):
                    plot_chroms.append((rd_chr, l))
                    plot_len += l // bin_size + 1
            rd_mean, stdev = io.rd_normal_level(bin_size, rd_flag)
            tl = 0
            dt = 2.0 * np.pi / plot_len
            theta = np.arange(0, 2.0 * np.pi, dt)
            angles = []
            labels = []
            for j in range(len(plot_chroms)):
                c, l = plot_chroms[j]
                next_color = rainbow.get_next_color()
                rd_color = self.rd_circular_colors[j % len(self.rd_circular_colors)]
                snp_color = self.snp_circular_colors[j % len(self.snp_circular_colors)]
                rd = io.get_signal(c, bin_size, "RD", rd_flag)
                maf = io.get_signal(c, bin_size, "SNP maf", snp_flag)
                c01 = io.get_signal(c, bin_size, "SNP bin count 0|1", snp_flag)
                c10 = io.get_signal(c, bin_size, "SNP bin count 1|0", snp_flag)
                hets = c01 + c10
                np.warnings.filterwarnings('ignore')
                maf[hets < (bin_size / 10000)] = 0
                # plt.polar(theta[tl:tl + maf.size], 1 - maf / 2, color=snp_color, linewidth=0.3)
                # plt.fill_between(theta[tl:tl + maf.size], 1 - maf / 2, np.ones_like(maf), color=snp_color, alpha=0.8)
                plt.polar(theta[tl:tl + maf.size], 1 - maf / 2, linewidth=0.3, color=next_color)
                plt.fill_between(theta[tl:tl + maf.size], 1 - maf / 2, np.ones_like(maf), alpha=1, color=next_color)
                markersize = 5
                if self.markersize != "auto":
                    markersize = self.markersize
                ax.scatter(theta[tl:tl + rd.size], np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
                           s=markersize, alpha=0.7, color=next_color)

                # plt.polar(theta[tl:tl + rd.size], np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
                #          color=rd_color, linewidth=0.3)
                # plt.fill_between(theta[tl:tl + rd.size], np.ones_like(rd) / 10.,
                #                 np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
                #                 color=rd_color,
                #                 alpha=0.8)

                # ax.text(theta[tl + maf.size // 3], 0.8, c, fontsize=8)
                labels.append(Genome.canonical_chrom_name(c))
                angles.append(180 * theta[tl + rd.size // 2] / np.pi)
                tl += l // bin_size + 1
            for cn in range(int(self.rd_range[1])):
                plt.polar(theta, np.ones_like(theta) * (0.1 + 0.7 * (cn / self.rd_range[1])), color="k", linewidth=0.1)
            ax.set_rmax(1.0)
            ax.set_rticks([])
            ax.set_thetagrids(angles, labels=labels, fontsize=10, weight="bold", color="black")
            ax.set_title(self.file_title(ix[i]), loc="left", fontsize=10, weight="bold", color="black")
            ax.grid(False)
        self.fig_show(suffix="circular")

    def rd_baf(self, hist=True):
        plt.clf()
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, figsize=(12, 8), facecolor='w', edgecolor='k')
        n = len(self.plot_files)
        ix = self.plot_files
        if self.grid == "auto":
            sx, sy = self._panels_shape(n)
        else:
            sx, sy = tuple(self.grid)
        grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        bin_size = self.bin_size
        for i in range(n):
            ax = self.fig.add_subplot(grid[i])
            io = self.io[ix[i]]

            chroms = []
            snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0)
            rd_flag = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = io.snp_chromosome_name(c)
                if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                    if io.signal_exists(snp_chr, bin_size, "SNP likelihood call", snp_flag) and \
                            io.signal_exists(snp_chr, bin_size, "SNP likelihood segments", snp_flag) and \
                            io.signal_exists(snp_chr, bin_size, "RD mosaic call", rd_flag) and \
                            io.signal_exists(snp_chr, bin_size, "RD mosaic segments", rd_flag) and \
                            Genome.is_autosome(c):
                        chroms.append((snp_chr, l))
            x = []
            y = []
            for c, l in chroms:
                flag = FLAG_MT if Genome.is_mt_chrom(c) else FLAG_SEX if Genome.is_sex_chrom(c) else FLAG_AUTO

                likelihood = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                segments_baf = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
                rd = io.get_signal(c, bin_size, "RD mosaic call", rd_flag)
                segments_rd = segments_decode(io.get_signal(c, bin_size, "RD mosaic segments", rd_flag))

                mbaf = {}
                mrd = {}
                for s, lh in zip(segments_baf, likelihood):
                    b, p = likelihood_baf_pval(lh)
                    for pos in s:
                        mbaf[pos] = 0.5 - b
                for s, r in zip(segments_rd, rd[0]):
                    for pos in s:
                        mrd[pos] = r
                for p in mbaf:
                    if p in mrd:
                        x.append(mbaf[p])
                        y.append(mrd[p])

            if hist:
                from matplotlib.colors import LogNorm
                ax.hist2d(x, y, bins=[np.arange(0, 0.51, 0.01), np.arange(0, max(y), max(y) / 100.)], norm=LogNorm())
            else:
                ax.scatter(x, y, marker=".", alpha=0.5)

        if self.output_filename != "":
            plt.savefig(self._image_filename("rd_baf"), dpi=150)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def dispersion(self, legend=True):
        plt.clf()
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, facecolor='w', edgecolor='k')
        if self.output_filename != "":
            self.fig.set_figheight(8)
            self.fig.set_figwidth(12)
        grid = gridspec.GridSpec(1, 2, wspace=0.2, hspace=0.2)

        ax = self.fig.add_subplot(grid[0])
        for i in self.io:
            bin_sizes = sorted(set([int(x[1]) for x in i.chromosomes_bin_sizes_with_signal("RD")]))
            rd = []
            drd = []
            for bs in bin_sizes:
                if i.signal_exists(None, bs, "RD stat", flags=FLAG_AUTO):
                    stat = i.get_signal(None, bs, "RD stat", flags=FLAG_AUTO)
                    rd.append(stat[4])
                    drd.append(stat[5])
            ax.set_yscale("log")
            ax.set_xscale("log")
            ax.grid(True)
            ax.set_xlabel("mean RD")
            ax.set_ylabel("stdev RD")
            if legend:
                ax.legend(loc="upper left")
            ax.plot(rd, drd, "*-", label=i.filename)

        ax = self.fig.add_subplot(grid[1])
        for i in self.io:
            bin_sizes = sorted(set([int(x[1]) for x in i.chromosomes_bin_sizes_with_signal("RD")]))
            rd = []
            drd = []
            for bs in bin_sizes:
                if i.signal_exists(None, bs, "RD stat", flags=FLAG_AUTO | FLAG_GC_CORR):
                    stat = i.get_signal(None, bs, "RD stat", flags=FLAG_AUTO | FLAG_GC_CORR)
                    rd.append(stat[4])
                    drd.append(stat[5])
            ax.set_yscale("log")
            ax.set_xscale("log")
            ax.grid(True)
            ax.set_xlabel("mean RD (GC corr)")
            ax.set_ylabel("stdev RD (GC corr)")
            if legend:
                ax.legend(loc="upper left")
            ax.plot(rd, drd, "*-", label=i.filename)

        if self.output_filename != "":
            plt.savefig(self._image_filename("dispersion"), dpi=200)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def region_rd_stat(self, region, n_bins=21, plot=False, legend=True):
        n = len(self.plot_files)
        ix = self.plot_files
        if plot:
            plt.clf()
            plt.rcParams["font.size"] = 8
            if self.grid == "auto":
                sx, sy = self._panels_shape(n)
            else:
                sx, sy = tuple(self.grid)
            self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
            if self.output_filename != "":
                self.fig.set_figheight(3 * sy)
                self.fig.set_figwidth(4 * sx)
            grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        for i in range(n):
            io = self.io[ix[i]]
            if plot:
                ax = self.fig.add_subplot(grid[i])
                ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs = decode_region(region)
            data = []
            for c, (pos1, pos2) in regs:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])

            data = np.array(data)
            dmin = np.min(data)
            dmax = np.max(data)
            p1 = np.percentile(data, 1)
            p99 = np.percentile(data, 99)
            data = data[data > p1]
            data = data[data < p99]
            mean = np.mean(data)
            std = np.std(data)

            rd_min = mean - 5 * std
            rd_max = mean + 5 * std
            bins = np.linspace(rd_min, rd_max, n_bins)

            hist, binsr = np.histogram(data, bins=bins)

            fitn, fitm, fits = fit_normal(bins[:-1], hist)[0]

            print("%s\t%s\t%.4f\t%.4f\t%e\t%e\t%.4f\t%.4f\t%.4f\t%.4f" % (
                io.filename, region, fitm, fits, dmin, dmax, p1, p99, mean, std))

            if plot:
                x = np.linspace(bins[0], bins[-1], 1001)
                plt.plot(x, normal(x, fitn, fitm, fits), "g-", label=region)
                plt.plot(bins[:-1], hist, "b*")
                if legend:
                    plt.legend()

        if plot:
            if self.output_filename != "":
                plt.savefig(self._image_filename("comp"), dpi=200)
                plt.close(self.fig)
            elif self.interactive:
                plt.show(block=False)
                plt.draw()
            else:
                plt.show()

    def compare(self, region1, region2, n_bins=21, plot=False, stdout=True, legend=True):
        n = len(self.plot_files)
        ix = self.plot_files
        ret = []

        if plot:
            plt.clf()
            plt.rcParams["font.size"] = 8
            if self.grid == "auto":
                sx, sy = self._panels_shape(n)
            else:
                sx, sy = tuple(self.grid)
            self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
            if self.output_filename != "":
                self.fig.set_figheight(3 * sy)
                self.fig.set_figwidth(4 * sx)
            grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        for i in range(n):
            io = self.io[ix[i]]
            if plot:
                ax = self.fig.add_subplot(grid[i])
                ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs1 = decode_region(region1)
            regs2 = decode_region(region2)
            data1 = []
            data2 = []
            for c, (pos1, pos2) in regs1:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data1 += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])
            for c, (pos1, pos2) in regs2:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data2 += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])

            data1 = np.array(data1)
            p1_1 = np.percentile(data1, 1)
            p99_1 = np.percentile(data1, 99)
            data1 = data1[data1 > p1_1]
            data1 = data1[data1 < p99_1]
            mean1 = np.mean(data1)
            std1 = np.std(data1)

            data2 = np.array(data2)
            p1_2 = np.percentile(data2, 1)
            p99_2 = np.percentile(data2, 99)
            data2 = data2[data2 > p1_2]
            data2 = data2[data2 < p99_2]
            mean2 = np.mean(data2)
            std2 = np.std(data2)

            rd_min = min(mean1 - 5 * std1, mean2 - 5 * std2)
            rd_max = max(mean1 + 5 * std1, mean2 + 5 * std2)
            bins = np.linspace(rd_min, rd_max, n_bins)

            hist1, binsr = np.histogram(data1, bins=bins)
            hist2, binsr = np.histogram(data2, bins=bins)

            fitn1, fitm1, fits1 = fit_normal(bins[:-1], hist1)[0]
            fitn2, fitm2, fits2 = fit_normal(bins[:-1], hist2)[0]

            pval = t_test_2_samples(fitm1, fits1, sum(hist1), fitm2, fits2, sum(hist2))

            if stdout:
                print("%s\t%s\t%s\t%.4f\t%.4f\t%.4f\t%.4f\t%e\t%.4f\t%.4f" % (
                    io.filename, region1, region2, fitm1, fits1, fitm2, fits2, pval, fitm1 / fitm2,
                    fitm1 / fitm2 * (fits1 / fitm1 / np.sqrt(sum(hist1)) + fits2 / fitm2 / np.sqrt(sum(hist2)))))
            ret.append([io.filename, region1, region2, fitm1, fits1, fitm2, fits2, pval, fitm1 / fitm2,
                        fitm1 / fitm2 * (fits1 / fitm1 / np.sqrt(sum(hist1)) + fits2 / fitm2 / np.sqrt(sum(hist2)))])

            if plot:
                x = np.linspace(bins[0], bins[-1], 1001)
                plt.plot(x, normal(x, fitn1, fitm1, fits1), "g-", label=region1)
                plt.plot(x, normal(x, fitn2, fitm2, fits2), "b-", label=region2)
                plt.plot(bins[:-1], hist1, "g*")
                plt.plot(bins[:-1], hist2, "b*")
                if legend:
                    plt.legend()

        if plot:
            if self.output_filename != "":
                plt.savefig(self._image_filename("comp"), dpi=200)
                plt.close(self.fig)
            elif self.interactive:
                plt.show(block=False)
                plt.draw()
            else:
                plt.show()

        return ret

    def compare_baf(self, region1, region2, plot=False, stdout=True, legend=True):
        n = len(self.plot_files)
        ix = self.plot_files
        ret = []

        if plot:
            plt.clf()
            plt.rcParams["font.size"] = 8
            if self.grid == "auto":
                sx, sy = self._panels_shape(n)
            else:
                sx, sy = tuple(self.grid)
            self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
            if self.output_filename != "":
                self.fig.set_figheight(3 * sy)
                self.fig.set_figwidth(4 * sx)
            grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        for i in range(n):
            io = self.io[ix[i]]
            if plot:
                ax = self.fig.add_subplot(grid[i])
                ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs1 = decode_region(region1)
            regs2 = decode_region(region2)
            data1 = []
            data2 = []
            for c, (pos1, pos2) in regs1:
                flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                his_p = io.get_signal(c, self.bin_size, "SNP likelihood", flag)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data1 += list(his_p[bin1:bin2 + 1])
            for c, (pos1, pos2) in regs2:
                flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                his_p = io.get_signal(c, self.bin_size, "SNP likelihood", flag)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size

                data2 += list(his_p[bin1:bin2 + 1])

            d1 = np.array(data1)
            d2 = np.array(data2)
            h1 = np.ones_like(d1[0])
            h2 = np.ones_like(d2[0])
            for i in range(len(d1)):
                if sum(d1[i]) != 0:
                    h1 *= d1[i]
                h1 /= sum(h1)
            for i in range(len(d2)):
                if sum(d2[i]) != 0:
                    h2 *= d2[i]
                h2 /= sum(h2)

            b1, p1 = likelihood_baf_pval(h1)
            b2, p2 = likelihood_baf_pval(h2)

            if stdout:
                print("%s\t%s\t%s\t%.4f\t%e\t%.4f\t%e" % (
                    io.filename, region1, region2, b1, p1, b2, p2))
            ret.append([io.filename, region1, region2, b1, p1, b2, p2])

            if plot:
                plt.plot(h1, "g")
                plt.plot(h2, "b")

        if plot:
            if self.output_filename != "":
                plt.savefig(self._image_filename("comp_baf"), dpi=200)
                plt.close(self.fig)
            elif self.interactive:
                plt.show(block=False)
                plt.draw()
            else:
                plt.show()

        return ret

    def single_cell_allelic_dropout(self, callset=None, res=1000, n_bins=100, threshold=0.1, snp_threshold=0.01,
                                    neigh=False, plot=False, stdout=True, title=None):
        """
        Function used to identify regions without allelic dropout in the case of single cell amplification.
        It requires baf data for bin size. It will filter out all bins with at least one SNP bellow snp_threshold and
        all bins with collective maximum baf likelihood bellow threshold parameter.

        Parameters
        ----------
        callset : str or None
            Name of callset if not default.
        res : int
            Resolution in bins used to calculate percentage of dropouts in region.
        n_bins : int
            Number of bins in histograms.
        threshold : float
            Collective threshold of AF for allelic dropout
        snp_threshold : float
            Single SNP threshold of AF for allelic dropout
        neigh : bool
            Remove neighbouring bins also.
        plot : bool
            Make plots.
        stdout : bool
            Print out good regions

        """

        if plot:
            self.new_figure(panel_count=2, panel_size=(16, 6), title=title)
            ax = self.next_panel()
            bafG = []
            baf = []
            cpos = 0
            sizeG = []
            sizeB = []
        for c in self.io[self.plot_file].snp_chromosomes():
            if len(self.chrom) == 0 or (c in self.chrom):
                snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0)
                i1 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i1", snp_flag)
                pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
                c00 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 0|0", snp_flag)
                c11 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 1|1", snp_flag)
                homs = c00 + c11
                c01 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 0|1", snp_flag)
                c10 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 1|0", snp_flag)
                hets = c01 + c10
                count = c01 + c10 + c00 + c11
                mask = np.zeros_like(i1)
                density = np.zeros(len(mask) // res)
                # mask[hets == 0] = 1
                mask[hets == 0] = 2
                mask[i1 > (0.5 - threshold)] = 1
                for ix in range(len(pos)):
                    if (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in [1, 2]):
                        b = 1.0 * nalt[ix] / (nref[ix] + nalt[ix])
                        if (b < snp_threshold) or (b > (1 - snp_threshold)):
                            mask[(pos[ix] - 1) // self.bin_size] = 1

                if neigh:
                    ada = mask == 1
                    ada1 = np.roll(ada, 1)
                    ada2 = np.roll(ada, -1)
                    ada1[0] = False
                    ada2[-1] = False
                    mask[ada1] = 1
                    mask[ada2] = 1
                ix = 0
                while ix < len(mask):
                    if mask[ix] == 2:
                        adan = 0
                        if ix > 0 and mask[ix - 1] == 1:
                            adan = 1
                        jx = ix
                        while jx < len(mask) and mask[jx] == 2:
                            jx += 1
                        if jx < len(mask) and mask[jx] == 1:
                            adan = 1
                        mask[ix:jx] = adan
                        ix = jx
                    else:
                        ix += 1
                ix = 0
                ojx = 0
                while ix < len(mask):
                    if mask[ix] == 0:
                        jx = ix
                        while jx < len(mask) and mask[jx] == 0:
                            jx += 1
                        if stdout:
                            print("%s\t%d\t%d" % (c, ix * self.bin_size + 1, jx * self.bin_size))
                        sizeG.append((jx - ix) * self.bin_size)
                        if ix > ojx:
                            sizeB.append((ix - ojx) * self.bin_size)
                        ojx = jx
                        ix = jx
                    else:
                        ix += 1
                if plot:
                    for ix in range(len(density)):
                        density[ix] = np.mean(mask[res * ix:res * (ix + 1)])
                    ax.plot(np.arange(cpos, cpos + len(density)) * res, density)
                    cpos += len(density)
                    for ix in range(len(pos)):
                        if (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in [1, 2]):
                            baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            if mask[(pos[ix] - 1) // self.bin_size] == 0:
                                bafG.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
        ax.set_xlabel("Position in genome [bins]")
        ax.set_ylabel("Percentage of allelic dropout")
        ax.grid(True)
        if plot:
            self.new_subgrid(2, grid="horizontal", hspace=0.05, wspace=0.2)
            ax = self.next_subpanel()
            ms = 5 * max(np.mean(sizeG), np.mean(sizeB))
            ax.hist(sizeB, bins=np.arange(1, ms, self.bin_size), histtype="step", log=True,
                    label="Allelic dropout regions", linewidth=3)
            ax.hist(sizeG, bins=np.arange(1, ms, self.bin_size), histtype="step", log=True,
                    label="Region with both alleles", linewidth=3)
            ax.legend()
            ax.grid(True)
            ax.set_xlabel("Size [bp]")
            ax.set_ylabel("Number of regions")
            self.fig.add_subplot(ax)

            ax = self.next_subpanel()
            ax.hist(baf, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)),
                    label="All heterozygous variants")
            ax.hist(bafG, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)),
                    label="Region with both alleles")
            ax.legend()
            ax.grid(True)
            ax.set_xlabel("VAF")
            ax.set_ylabel("Distribution")
            self.fig.add_subplot(ax)

            self.fig_show(suffix="allelic_dropout")

    def compare_rd_dist(self, regions):
        self.new_figure(panel_count=1)
        ax = self.next_panel()
        ax.set_ylabel("Normalised distribution")
        ax.set_xlabel("Difference in copy number")
        regs = decode_region(regions)
        io1 = self.io[self.plot_files[0]]
        io2 = self.io[self.plot_files[1]]
        bin_size = self.bin_size
        drd = []
        for c, (pos1, pos2) in regs:
            flag_rd = 0
            if self.rd_use_mask:
                flag_rd = FLAG_USEMASK
            mean1, stdev = io1.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
            mean2, stdev = io2.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
            his_p_corr1 = io1.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            his_p_corr2 = io2.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            for i in range(len(his_p_corr1)):
                drd.append(his_p_corr1[i] * 2 / mean1 - his_p_corr2[i] * 2 / mean2)

        # for i in range(n):
        #     io = self.io[ix[i]]
        #     stat = self.io[self.plot_file].get_signal(None, self.bin_size, "RD stat", FLAG_AUTO)
        #     his_p = io.get_signal(None, self.bin_size, "RD p dist", FLAG_AUTO)
        #     bin_size = int(stat[1])
        #     max_rd = int(stat[0])
        #     lim_rd = int(max(2 * stat[4], stat[4] + 3 * stat[5]))
        #     ax.set_xlim([0, lim_rd])
        #     bins = range(0, 2*max_rd + 5*bin_size, bin_size)
        #     x = np.arange(0, max_rd // bin_size * bin_size, 0.1 * bin_size)
        #     #plt.plot(x, normal(x, 1, stat[4], stat[5]), "g-")
        #     x = np.array(bins)
        #     plt.plot(x[1:len(his_p)], his_p[1:] / stat[3],label = io.filename)
        ax.hist(drd, bins=np.linspace(-0.5, 0.5, 100))
        # ax.legend()
        ax.set_yticklabels([])
        ax.grid()
        self.fig_show(suffix="compare_rd")

    def snp_dist(self, regions, callset=None, n_bins=100, gt_plot=[0, 1, 2, 3], titles=None, beta_distribution=False,
                 log_scale=False):
        nf = len(self.plot_files)
        regions = regions.split(" ")
        nr = len(regions)
        n = nf * nr
        self.new_figure(panel_count=n)
        for ii in range(nf):
            for i in range(nr):
                ax = self.next_panel()
                if titles is None:
                    ax.set_title(self.file_title(self.plot_files[ii]) + ": " + regions[i], position=(0.01, 1.10),
                                 fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
                else:
                    ax.set_title(titles[i], position=(0.01, 1.10),
                                 fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
                regs = decode_region(regions[i])
                baf = []
                bafP = []
                bafNP = []
                mean_rd = 0
                for c, (pos1, pos2) in regs:
                    pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_files[ii]].read_snp(c,
                                                                                                      callset=callset)
                    ix = 0
                    while ix < len(pos) and pos[ix] <= pos2:
                        if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in gt_plot):
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                if flag[ix] & 2:
                                    bafP.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                    mean_rd += nref[ix] + nalt[ix]
                                else:
                                    bafNP.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                if flag[ix] & 2:
                                    bafP.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                    mean_rd += nref[ix] + nalt[ix]
                                else:
                                    bafNP.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                        ix += 1
                mean_rd /= len(bafP)
                x_bins = np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1))
                ax.hist(baf, bins=x_bins, label="All heterozygous variants")
                ax.hist(bafP, bins=x_bins, label="P bases only")
                # ax.hist(bafNP, bins=x_bins, label="non-P bases only", histtype=u'step')
                if log_scale:
                    plt.yscale('log', nonposy='clip')

                if beta_distribution:
                    xx = np.linspace(0.2, 0.8, 200)
                    ax.plot(xx, beta.pdf(xx, mean_rd / 2, mean_rd / 2) * len(bafP) / n_bins, c="black",
                            label="Beta distribution")
                ax.legend(bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left", mode="expand", borderaxespad=0, ncol=3)
                ax.set_xlabel("VAF")
                ax.set_ylabel("Distribution")

        self.fig_show(suffix="snp_dist")

    def phased_baf(self, regions, callset=None, print=False):
        regions = regions.split(" ")
        n = len(regions)
        ret = []
        for i in range(n):
            regs = decode_region(regions[i])
            talt = 0
            tref = 0
            taltP = 0
            trefP = 0
            for c, (pos1, pos2) in regs:
                pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
                ix = 0
                while ix < len(pos) and pos[ix] <= pos2:
                    if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0:
                        if gt[ix] == 5:
                            talt += nalt[ix]
                            tref += nref[ix]
                            if flag[ix] & 2:
                                taltP += nalt[ix]
                                trefP += nref[ix]
                        elif gt[ix] == 6:
                            tref += nalt[ix]
                            talt += nref[ix]
                            if flag[ix] & 2:
                                trefP += nalt[ix]
                                taltP += nref[ix]
                    ix += 1
            baf = talt / (tref + talt)
            bafP = taltP / (trefP + taltP)
            ret.append([baf, bafP])
            if print:
                print("%s\t%f\t%f" % (regions[i], baf, bafP))
        return ret

    def snp_compare(self, regions, ix1, ix2, callset=None, n_bins=100, titles=None, test_loh=False):
        regions = regions.split(" ")
        n = len(regions)
        self.new_figure(panel_count=n)
        for i in range(n):
            ax = self.next_panel()
            if titles is None:
                ax.set_title(regions[i], position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            else:
                ax.set_title(titles[i], position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs = decode_region(regions[i])
            oval = []
            for c, (pos_start, pos_end) in regs:
                pos1, ref1, alt1, nref1, nalt1, gt1, flag1, qual1 = self.io[ix1].read_snp(c, callset=callset)
                pos2, ref2, alt2, nref2, nalt2, gt2, flag2, qual2 = self.io[ix2].read_snp(c, callset=callset)

                counts1 = {}
                counts2 = {}
                ix = 0
                while ix < len(pos1) and pos1[ix] <= pos_end:
                    if pos1[ix] >= pos_start and (nref1[ix] + nalt1[ix]) != 0:
                        counts1[pos1[ix]] = (nref1[ix] / np.sqrt(nref1[ix] ** 2 + nalt1[ix] ** 2),
                                             nalt1[ix] / np.sqrt(nref1[ix] ** 2 + nalt1[ix] ** 2))
                    ix += 1
                ix = 0
                xx = []
                yy = []
                cc = []
                hist1 = []
                hist2 = []
                while ix < len(pos2) and pos2[ix] <= pos_end:
                    if pos2[ix] >= pos_start and (nref2[ix] + nalt2[ix]) != 0:
                        counts2[pos2[ix]] = (nref2[ix], nalt2[ix])
                    ix += 1
                for p in counts1:
                    if p in counts2:
                        xx.append(p)
                        yy.append(counts1[p][1] / (counts1[p][0] + counts1[p][1]))
                        cc.append("green")
                        xx.append(p)
                        yy.append(counts2[p][1] / (counts2[p][0] + counts2[p][1]))
                        cc.append("blue")
                        if counts2[p][1] / (counts2[p][0] + counts2[p][1]) > 0.8:
                            t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                            if t > 0.2 and t < 0.8:
                                hist1.append(t)
                        else:
                            t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                            if t > 0.2 and t < 0.8:
                                hist2.append(t)
                    else:
                        xx.append(p)
                        yy.append(counts1[p][1] / (counts1[p][0] + counts1[p][1]))
                        cc.append("red")
                        t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                        if t > 0.2 and t < 0.8:
                            hist2.append(t)
                for p in counts2:
                    if not (p in counts1):
                        xx.append(p)
                        yy.append(counts2[p][1] / (counts2[p][0] + counts2[p][1]))
                        cc.append("orange")

            if test_loh:
                ax.hist(hist1, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)), histtype='step')
                ax.hist(hist2, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)), histtype='step')
                print("H1:", np.mean(hist1), np.std(hist1), len(hist1))
                print("H2:", np.mean(hist2), np.std(hist2), len(hist2))
                ax.set_xlabel("baf")
                ax.set_ylabel("distribnution")
            else:
                ax.scatter(xx, yy, marker=".", s=0.1, c=cc)
                # ax.hist(oval, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)))
                ax.set_xlabel("position")
                ax.set_ylabel("baf")

        self.fig_show(suffix="snp_dist")

    def denovo_calls(self, sample, reference, call_type="mosaic"):
        bin_size = self.bin_size
        io = self.io[sample]
        if call_type == "mosaic":
            chroms = io.rd_chromosomes()
            for c in chroms:
                if (c in self.chrom) or len(self.chrom) == 0:
                    flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                    if io.signal_exists(c, bin_size, "calls", flag):
                        calls = io.read_calls(c, bin_size, "calls", flag)
                        for call in calls:
                            if in_interval(call["size"], self.size_range) \
                                    and in_interval(call["p_val"], self.p_range) \
                                    and in_interval(call["pN"], self.pN_range) \
                                    and in_interval(call["Q0"], self.Q0_range):
                                type = "duplication" if call["type"] == 1 else "deletion"
                                region = "%s:%d-%d" % (c, call["start"], call["end"])

                                cn0 = self.genotype([bin_size], region, file_index=sample)[0][3]
                                cref = list(
                                    map(lambda x: self.genotype([bin_size], region, file_index=x)[0][3], reference))
                                if (((sum(map(lambda x: 0 if (cn0 - x) > 0.5 else 1, cref)) == 0) and cn0 > 2.5) \
                                    or ((sum(map(lambda x: 0 if (x - cn0) > 0.5 else 1, cref)) == 0) and cn0 < 1.5)) \
                                        and (sum(map(lambda x: 0 if np.abs(x - 2.) < 0.5 else 1, cref)) == 0):
                                    print(type, region, call["cnv"], cn0, cref)

                                # if n > 1:
                                #     print("%s\t" % self.file_title(i), end="")
                                # print("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e\t%.4f\t%.4f\t" % (
                                #     type, c, call["start"], call["end"], call["size"], call["cnv"], call["p_val"],
                                #     call["p_val_2"], call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"]))

    def genotype(self, bin_sizes, region, p_val=False, interactive=False, file_index=None):
        if file_index is None:
            file_index = self.plot_file
        ret = []
        regs = decode_region(region, max_size=1000000000)
        for c, (pos1, pos2) in regs:
            chr_len = self.io[file_index].get_chromosome_length(c)
            if chr_len is not None and pos2 == 1000000000:
                pos2 = chr_len
            if interactive:
                print(c + ":" + str(pos1) + "-" + str(pos2), end="")
            ret.append([c, pos1, pos2])
            for bs in bin_sizes:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_AUTO)
                if stat is None or len(stat) == 0:
                    stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_SEX)
                his_p = self.io[file_index].get_signal(c, bs, "RD", flag_rd)
                bin1 = (pos1 - 1) // bs
                bin2 = (pos2 - 1) // bs
                rc = 0
                rc2 = 0
                if bin1 == bin2:
                    try:
                        rc = (pos2 - pos1 + 1) * his_p[bin1] / bs
                        rc2 = (pos2 - pos1 + 1) * his_p[bin1] * his_p[bin1] / bs
                    except IndexError:
                        pass
                else:
                    try:
                        rc += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] / bs
                        rc += (pos2 - bin2 * bs) * his_p[bin2] / bs
                        rc2 += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] * his_p[bin1] / bs
                        rc2 += (pos2 - bin2 * bs) * his_p[bin2] * his_p[bin2] / bs
                    except IndexError:
                        pass
                    for ix in range(bin1 + 1, bin2):
                        try:
                            rc += his_p[ix]
                            rc2 += his_p[ix] * his_p[ix]
                        except IndexError:
                            pass
                e2 = 0
                if p_val:
                    e1 = getEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9 / bs
                    e2 = gaussianEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9
                if interactive:
                    print("\t%f" % (2. * rc / (stat[4] * (pos2 - pos1 + 1) / bs)), end="")
                    if p_val:
                        print("\t%e\t%e" % (e1, e2), end="")

                ret[-1].append(2. * rc / (stat[4] * (pos2 - pos1 + 1) / bs))
                if p_val:
                    ret[-1].append(e2)
            if interactive:
                print()

        return ret

    def genotype_all(self, bin_sizes, regions, interactive=False, file_index=None):
        if file_index is None:
            file_index = self.plot_file
        rd_gc_chromosomes = {}
        for c in self.io_gc.gc_chromosomes():
            rd_name = self.io[file_index].rd_chromosome_name(c)
            if not rd_name is None:
                rd_gc_chromosomes[rd_name] = c
        ret = {}
        for bs in bin_sizes:
            oc = ""
            ret[bs] = []
            for region in regions:
                regs = decode_region(region, max_size=1000000000)
                c, (pos1, pos2) = regs[0]
                if oc != c:
                    chr_len = self.io[file_index].get_chromosome_length(c)
                    if chr_len is not None and pos2 == 1000000000:
                        pos2 = chr_len
                    flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                    stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_AUTO)
                    if stat is None or len(stat) == 0:
                        stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_SEX)
                    his_p = self.io[file_index].get_signal(c, bs, "RD", flag_rd)
                    qrd_p = self.io[file_index].get_signal(c, bs, "RD")
                    qrd_u = self.io[file_index].get_signal(c, bs, "RD unique")
                    gc, at, distN = False, False, False
                    if c in rd_gc_chromosomes and self.io_gc.signal_exists(rd_gc_chromosomes[c], None, "GC/AT"):
                        gcat = self.io_gc.get_signal(rd_gc_chromosomes[c], None, "GC/AT")
                        gc, at = gc_at_decompress(gcat)
                        NN = 100 - np.array(gc) - np.array(at)
                        distN = np.zeros_like(NN, dtype="long") - 1
                        distN[NN == 100] = 0
                        prev = 0
                        for Ni in range(0, distN.size):
                            if distN[Ni] == -1:
                                prev += 100
                                distN[Ni] = prev
                            else:
                                prev = 0
                        prev = 0
                        for Ni in range(distN.size - 1, -1, -1):
                            if distN[Ni] > 0:
                                prev += 100
                                if prev < distN[Ni]:
                                    distN[Ni] = prev
                            else:
                                prev = 0
                    snp = c in self.io[file_index].snp_chromosomes()
                    snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                        FLAG_USEHAP if self.snp_use_phase else 0)
                    if snp:
                        snp_likelihood = list(
                            self.io[file_index].get_signal(c, bs, "SNP likelihood", snp_flag).astype("float64"))
                        snp_hets = self.io[file_index].get_signal(c, bs, "SNP bin count 0|1", snp_flag)
                        snp_hets += self.io[file_index].get_signal(c, bs, "SNP bin count 1|0", snp_flag)
                        snp_homs = self.io[file_index].get_signal(c, bs, "SNP bin count 1|1", snp_flag)
                else:
                    if chr_len is not None and pos2 == 1000000000:
                        pos2 = chr_len
                oc = c
                ret[bs].append([c, pos1, pos2])

                bin1 = (pos1 - 1) // bs
                bin2 = (pos2 - 1) // bs
                rc = 0
                rc2 = 0
                sp = 0
                su = 0
                nansize = 0
                if bin1 == bin2:
                    try:
                        if not np.isnan(his_p[bin1]):
                            rc = (pos2 - pos1 + 1) * his_p[bin1] / bs
                            rc2 = (pos2 - pos1 + 1) * his_p[bin1] * his_p[bin1] / bs
                            sp = (pos2 - pos1 + 1) * qrd_p[bin1] / bs
                            su = (pos2 - pos1 + 1) * qrd_u[bin1] / bs
                            nansize = (pos2 - pos1 + 1)
                    except IndexError:
                        pass
                else:
                    try:
                        if not np.isnan(his_p[bin1]):
                            rc += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] / bs
                            rc2 += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] * his_p[bin1] / bs
                            sp += (bin1 * bs - pos1 + 1 + bs) * qrd_p[bin1] / bs
                            su += (bin1 * bs - pos1 + 1 + bs) * qrd_u[bin1] / bs
                            nansize += (bin1 * bs - pos1 + 1 + bs)
                        if not np.isnan(his_p[bin2]):
                            rc += (pos2 - bin2 * bs) * his_p[bin2] / bs
                            rc2 += (pos2 - bin2 * bs) * his_p[bin2] * his_p[bin2] / bs
                            sp += (pos2 - bin2 * bs) * qrd_p[bin2] / bs
                            su += (pos2 - bin2 * bs) * qrd_u[bin2] / bs
                            nansize += (pos2 - bin2 * bs)

                    except IndexError:
                        pass
                    for ix in range(bin1 + 1, bin2):
                        try:
                            if not np.isnan(his_p[ix]):
                                rc += his_p[ix]
                                rc2 += his_p[ix] * his_p[ix]
                                sp += qrd_p[ix]
                                su += qrd_u[ix]
                                nansize += bs
                        except IndexError:
                            pass
                if gc:
                    sbin1 = (pos1 - 1) // 100
                    sbin2 = (pos2 - 1) // 100
                    pN = 0
                    if bin1 == bin2:
                        try:
                            pN = (pos2 - pos1 + 1) * (gc[sbin1] + at[sbin1]) / 100
                        except IndexError:
                            pass
                    else:
                        try:
                            pN += (sbin1 * 100 - pos1 + 101) * (gc[sbin1] + at[sbin1]) / 100
                            pN += (pos2 - sbin2 * 100) * (gc[sbin2] + at[sbin2]) / 100

                        except IndexError:
                            pass
                        for ix in range(sbin1 + 1, sbin2):
                            try:
                                pN += gc[ix] + at[ix]
                            except IndexError:
                                pass

                e1 = getEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9 / bs
                e2 = gaussianEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9
                dG = -1
                if gc:
                    pN = 1 - pN / (pos2 - pos1 + 1)
                    dG = np.min(distN[sbin1:sbin2])
                else:
                    pN = -1
                    dG = -1
                if nansize == 0:
                    rc = np.nan
                else:
                    rc = 2 * rc / (stat[4] * nansize / bs)
                ret[bs][-1].append(rc)
                ret[bs][-1].append(e1)
                ret[bs][-1].append(e2)
                q0 = 0
                if sp != 0:
                    q0 = (sp - su) / sp
                ret[bs][-1].append(q0)
                ret[bs][-1].append(pN)
                ret[bs][-1].append(dG)
                ret[bs][-1].append(nansize / (pos2 - pos1 + 1))
                if snp:
                    homs = np.sum(snp_homs[bin1:bin2 + 1])
                    hets = np.sum(snp_hets[bin1:bin2 + 1])
                    lh = np.ones_like(snp_likelihood[0])
                    for ix in range(bin1, min(bin2 + 1, len(snp_likelihood))):
                        lh *= snp_likelihood[ix]
                        lh /= np.sum(lh)
                    baf, baf_p = likelihood_baf_pval(lh)
                    ret[bs][-1] += [homs, hets, baf, baf_p]
                else:
                    ret[bs][-1] += [0, 0, 0, 1]
        if interactive:
            plist = []
            for bs in bin_sizes:
                if len(plist) == 0:
                    plist = ret[bs]
                else:
                    for ix in range(len(ret[bs])):
                        plist[ix] += ret[bs][ix][3:]
            for r in plist:
                print(
                    ("%s:%d-%d" + (len(bin_sizes) * "\t%.4f\t%e\t%e\t%.4f\t%.4f\t%d\t%.4f\t%d\t%d\t%.4f\t%e")) % tuple(
                        r))
        return ret

    def genotype_prompt(self, bin_sizes=[], all=False):
        done = False
        while not done:
            try:
                try:
                    line = raw_input("")
                except NameError:
                    line = input("")
            except EOFError:
                return
            if line is None or line == "":
                done = True
            else:
                if all:
                    self.genotype_all(bin_sizes, [line], interactive=True)
                else:
                    self.genotype(bin_sizes, line, interactive=True)

    def rd_baf_call_models(self, maxcn=10):
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        self.new_figure(panel_count=n)

        for i in range(n):
            ax = self.next_panel()
            io = self.io[ix[i]]
            ax.set_title(self.file_title(ix[i]), position=(0.1, 0.1),
                         fontdict={'verticalalignment': 'bottom', 'horizontalalignment': 'left'})

            chroms = []
            flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = io.snp_chromosome_name(c)
                if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                    if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append((snp_chr, l))

            x = np.linspace(0, 1, 1000)
            master_lh = {}
            for cn in range(maxcn, -1, -1):
                for h1 in range(cn // 2 + 1):
                    h2 = cn - h1
                    mrd = 2 - 2 * x + x * cn
                    np.seterr(divide='ignore')
                    mbaf = 0.5 - (1 - x + x * h1) / (2 - 2 * x + (h1 + h2) * x)
                    plt.plot(mbaf, mrd, "-", label="%d: %d/%d" % (cn, h1, h2), zorder=6 - cn)

            cix = 0
            cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
            for c, l in chroms:
                call_rd = []
                call_baf = []
                call_label = []
                if io.signal_exists(c, bin_size, "calls combined", flag):
                    calls = io.read_calls(c, bin_size, "calls combined", flag)

                    for call in calls:
                        if call["bins"] > self.min_segment_size:
                            call_rd.append(call["cnv"] * 2)
                            call_baf.append(call["baf"])
                            call_label.append(c + ":" + str(call["start"]) + "-" + str(call["end"]))

                plt.scatter(call_baf, call_rd, s=20, edgecolors='face', marker='.')
                cix += 1

            ax.set_xlabel("|ΔBAF|")
            ax.set_ylabel("Relative RD level")

            ax.legend()

            ax.set_ylim([0, maxcn])
            ax.set_xlim([-0.02, 0.5])
            ax.grid()

        self.fig_show(suffix="models")


def anim_plot_likelihood(likelihood, segments, n, res, iter, prefix, maxp, minp):
    mm = [[0] * res] * n
    for i in range(len(segments)):
        for b in segments[i]:
            mm[b] = list(likelihood[i])
    fig = plt.figure(1, figsize=(16, 9), dpi=120, facecolor='w', edgecolor='k')
    fig.suptitle(
        "Iter: " + str(iter) + "   /   Segments: " + str(len(segments)) + "   /   Overlap interval: (" + (
                '%.4f' % minp) + "," + (
                '%.4f' % maxp) + ")", fontsize='large')
    plt.subplot(211)
    plt.ylabel("BAF")
    plt.imshow(np.transpose(np.array(mm)), aspect='auto')
    plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    plt.yticks([0, 50.5, 101, 151.5, 201], ("1.00", "0.75", "0.50", "0.25", "0.00"))
    # plt.grid(True,color="w")
    plt.subplot(212)
    plt.xlabel("BAF")
    plt.ylabel("Likelihood")
    plt.xticks([0, 0.25, 0.50, 0.75, 1.0])
    plt.grid(True, color="b")
    for i in range(len(likelihood)):
        plt.plot(np.linspace(1. / (res + 1), 1. - 1. / (res + 1), res), likelihood[i])
    plt.savefig(prefix + "_" + str(iter).zfill(4), dpi=150)
    plt.close(fig)


def anim_plot_rd(level, error, segments, n, iter, prefix, maxp, minp, mean):
    rd = [np.nan] * n
    for i in range(len(segments)):
        for b in segments[i]:
            rd[b] = level[i]

    fig = plt.figure(1, figsize=(16, 9), dpi=120, facecolor='w', edgecolor='k')
    fig.suptitle(
        "Iter: " + str(iter) + "   /   Segments: " + str(len(segments)) + "   /   Overlap interval: (" + (
                '%.4f' % minp) + "," + (
                '%.4f' % maxp) + ")", fontsize='large')
    plt.subplot(211)
    plt.ylabel("RD")
    plt.step(range(n), rd, "k")
    plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    plt.yticks(np.arange(0, 3, 0.5) * mean, [])
    plt.ylim([0, 3 * mean])
    plt.grid(True, color="grey")

    plt.subplot(212)
    plt.xlabel("RD")
    plt.ylabel("Likelihood")
    plt.xticks(np.arange(0, 3, 0.5) * mean, [])
    plt.xlim([0, 3 * mean])
    plt.grid(True, color="grey")
    for i in range(len(level)):
        xx = np.linspace(0, 3 * mean, 300)
        yy = normal(xx, 1, level[i], error[i])
        plt.plot(xx, yy)
    plt.savefig(prefix + "_" + str(iter).zfill(4), dpi=150)
    plt.close(fig)


def anim_plot_rd_likelihood(level, error, likelihood, segments, n, res, iter, prefix, maxp, mean):
    rd = [np.nan] * n
    for i in range(len(segments)):
        for b in segments[i]:
            rd[b] = level[i]

    mm = [[0] * res] * n
    for i in range(len(segments)):
        for b in segments[i]:
            mm[b] = list(likelihood[i])

    fig, ax = plt.subplots(2, 2, figsize=(16, 9), dpi=120, facecolor='w', edgecolor='k',
                           gridspec_kw={
                               'width_ratios': [2, 1],
                               'height_ratios': [1, 1]})
    fig.suptitle(
        "Iter: " + str(iter) + "   /   Segments: " + str(len(segments)) + "   /   Maximal overlap: " + (
                '%.4f' % maxp), fontsize='large')
    ax[0][0].set_ylabel("RD")
    ax[0][0].step(range(n), rd, "k")
    ax[0][0].set_xlim([0, n])
    ax[0][0].tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax[0][0].set_yticks(np.arange(0, 3, 0.5) * mean)
    ax[0][0].set_yticklabels([])

    ax[0][0].set_ylim([0, 3 * mean])
    ax[0][0].grid(True, color="grey")

    ax[0][1].set_ylabel("")
    ax[0][1].set_xlabel("")
    ax[0][1].set_yticks(np.arange(0, 3, 0.5) * mean)
    ax[0][1].set_yticklabels([])
    ax[0][1].set_xticklabels([])

    ax[0][1].set_ylim([0, 3 * mean])
    ax[0][1].grid(True, color="grey")
    for i in range(len(level)):
        xx = np.linspace(0, 3 * mean, 300)
        yy = normal(xx, 1, level[i], error[i])
        ax[0][1].plot(yy, xx)

    ax[1][0].set_ylabel("BAF")
    ax[1][0].imshow(np.transpose(np.array(mm)), aspect='auto')
    ax[1][0].tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax[1][0].set_yticks([0, 50.5, 101, 151.5, 201])
    ax[1][0].set_yticklabels(["1.00", "0.75", "0.50", "0.25", "0.00"])

    # plt.grid(True,color="w")

    ax[1][1].set_ylabel("")
    ax[1][1].set_xlabel("Likelihood")
    ax[1][1].set_yticks([0, 0.25, 0.50, 0.75, 1.0])
    ax[1][1].set_yticklabels([])
    ax[1][1].set_xticklabels([])

    ax[1][1].grid(True, color="b")
    for i in range(len(likelihood)):
        ax[1][1].plot(likelihood[i], np.linspace(1. / (res + 1), 1. - 1. / (res + 1), res))

    plt.subplots_adjust(bottom=0.1, left=0.1, wspace=0., hspace=0.)

    plt.savefig(prefix + "_" + str(iter).zfill(4), dpi=150)
    plt.close(fig)

Functions

def anim_plot_likelihood(likelihood, segments, n, res, iter, prefix, maxp, minp)

Source code

def anim_plot_likelihood(likelihood, segments, n, res, iter, prefix, maxp, minp):
    mm = [[0] * res] * n
    for i in range(len(segments)):
        for b in segments[i]:
            mm[b] = list(likelihood[i])
    fig = plt.figure(1, figsize=(16, 9), dpi=120, facecolor='w', edgecolor='k')
    fig.suptitle(
        "Iter: " + str(iter) + "   /   Segments: " + str(len(segments)) + "   /   Overlap interval: (" + (
                '%.4f' % minp) + "," + (
                '%.4f' % maxp) + ")", fontsize='large')
    plt.subplot(211)
    plt.ylabel("BAF")
    plt.imshow(np.transpose(np.array(mm)), aspect='auto')
    plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    plt.yticks([0, 50.5, 101, 151.5, 201], ("1.00", "0.75", "0.50", "0.25", "0.00"))
    # plt.grid(True,color="w")
    plt.subplot(212)
    plt.xlabel("BAF")
    plt.ylabel("Likelihood")
    plt.xticks([0, 0.25, 0.50, 0.75, 1.0])
    plt.grid(True, color="b")
    for i in range(len(likelihood)):
        plt.plot(np.linspace(1. / (res + 1), 1. - 1. / (res + 1), res), likelihood[i])
    plt.savefig(prefix + "_" + str(iter).zfill(4), dpi=150)
    plt.close(fig)

def anim_plot_rd(level, error, segments, n, iter, prefix, maxp, minp, mean)

Source code

def anim_plot_rd(level, error, segments, n, iter, prefix, maxp, minp, mean):
    rd = [np.nan] * n
    for i in range(len(segments)):
        for b in segments[i]:
            rd[b] = level[i]

    fig = plt.figure(1, figsize=(16, 9), dpi=120, facecolor='w', edgecolor='k')
    fig.suptitle(
        "Iter: " + str(iter) + "   /   Segments: " + str(len(segments)) + "   /   Overlap interval: (" + (
                '%.4f' % minp) + "," + (
                '%.4f' % maxp) + ")", fontsize='large')
    plt.subplot(211)
    plt.ylabel("RD")
    plt.step(range(n), rd, "k")
    plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    plt.yticks(np.arange(0, 3, 0.5) * mean, [])
    plt.ylim([0, 3 * mean])
    plt.grid(True, color="grey")

    plt.subplot(212)
    plt.xlabel("RD")
    plt.ylabel("Likelihood")
    plt.xticks(np.arange(0, 3, 0.5) * mean, [])
    plt.xlim([0, 3 * mean])
    plt.grid(True, color="grey")
    for i in range(len(level)):
        xx = np.linspace(0, 3 * mean, 300)
        yy = normal(xx, 1, level[i], error[i])
        plt.plot(xx, yy)
    plt.savefig(prefix + "_" + str(iter).zfill(4), dpi=150)
    plt.close(fig)

def anim_plot_rd_likelihood(level, error, likelihood, segments, n, res, iter, prefix, maxp, mean)

Source code

def anim_plot_rd_likelihood(level, error, likelihood, segments, n, res, iter, prefix, maxp, mean):
    rd = [np.nan] * n
    for i in range(len(segments)):
        for b in segments[i]:
            rd[b] = level[i]

    mm = [[0] * res] * n
    for i in range(len(segments)):
        for b in segments[i]:
            mm[b] = list(likelihood[i])

    fig, ax = plt.subplots(2, 2, figsize=(16, 9), dpi=120, facecolor='w', edgecolor='k',
                           gridspec_kw={
                               'width_ratios': [2, 1],
                               'height_ratios': [1, 1]})
    fig.suptitle(
        "Iter: " + str(iter) + "   /   Segments: " + str(len(segments)) + "   /   Maximal overlap: " + (
                '%.4f' % maxp), fontsize='large')
    ax[0][0].set_ylabel("RD")
    ax[0][0].step(range(n), rd, "k")
    ax[0][0].set_xlim([0, n])
    ax[0][0].tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax[0][0].set_yticks(np.arange(0, 3, 0.5) * mean)
    ax[0][0].set_yticklabels([])

    ax[0][0].set_ylim([0, 3 * mean])
    ax[0][0].grid(True, color="grey")

    ax[0][1].set_ylabel("")
    ax[0][1].set_xlabel("")
    ax[0][1].set_yticks(np.arange(0, 3, 0.5) * mean)
    ax[0][1].set_yticklabels([])
    ax[0][1].set_xticklabels([])

    ax[0][1].set_ylim([0, 3 * mean])
    ax[0][1].grid(True, color="grey")
    for i in range(len(level)):
        xx = np.linspace(0, 3 * mean, 300)
        yy = normal(xx, 1, level[i], error[i])
        ax[0][1].plot(yy, xx)

    ax[1][0].set_ylabel("BAF")
    ax[1][0].imshow(np.transpose(np.array(mm)), aspect='auto')
    ax[1][0].tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    ax[1][0].set_yticks([0, 50.5, 101, 151.5, 201])
    ax[1][0].set_yticklabels(["1.00", "0.75", "0.50", "0.25", "0.00"])

    # plt.grid(True,color="w")

    ax[1][1].set_ylabel("")
    ax[1][1].set_xlabel("Likelihood")
    ax[1][1].set_yticks([0, 0.25, 0.50, 0.75, 1.0])
    ax[1][1].set_yticklabels([])
    ax[1][1].set_xticklabels([])

    ax[1][1].grid(True, color="b")
    for i in range(len(likelihood)):
        ax[1][1].plot(likelihood[i], np.linspace(1. / (res + 1), 1. - 1. / (res + 1), res))

    plt.subplots_adjust(bottom=0.1, left=0.1, wspace=0., hspace=0.)

    plt.savefig(prefix + "_" + str(iter).zfill(4), dpi=150)
    plt.close(fig)

Classes

class Figure (params, force_agg=False)

Class implements matplotlib frequently used figure manipulation and plot panels arrangement.

Parameters

params : dict: Params to be passed to ViewParam class

Source code

class Figure(ViewParams):
    def __init__(self, params, force_agg=False):
        """ Class implements matplotlib frequently used figure manipulation and plot panels arrangement.

        Parameters
        ----------
        params : dict
            Params to be passed to ViewParam class

        """
        if force_agg:
            import matplotlib
            matplotlib.use("Agg")
            import matplotlib.pyplot as plt
        ViewParams.__init__(self, params)
        self.fig = None
        self.fig_grid = None
        self.fig_sub_grid = None
        self.count = 0
        self.current = -1
        self.sg_current = -1

    def new_figure(self, panel_count, grid="auto", panel_size=None, title=None):
        """ Clear figure and create new plot layout.

        Parameters
        ----------
        panel_count : int
            Number of panels
        grid : str or (int, int)
            number of columns and rows (sx, sy) or "auto"
        panel_size : (float, float)
            size of a single panel (only when plots in file)

        """
        if panel_size is None:
            panel_size = self.panel_size
        if grid == "auto":
            grid = self.grid
        plt.clf()
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, dpi=self.dpi, facecolor='w', edgecolor='k')
        if title is not None:
            self.fig.suptitle(title, fontsize=16)
        sx, sy = self._get_grid(grid, panel_count)
        if self.output_filename != "":
            self.fig.set_figheight(panel_size[1] * sy)
            self.fig.set_figwidth(panel_size[0] * sx)
        self.fig_grid = gridspec.GridSpec(sy, sx, hspace=self.margins[5], wspace=self.margins[4])
        self.current = -1
        self.sg_current = -1

    def new_subgrid(self, panel_count, grid="auto", hspace=0, wspace=0):
        if grid == "auto":
            grid = self.subgrid
        sx, sy = self._get_grid(grid, panel_count)
        self.current += 1
        self.fig_sub_grid = gridspec.GridSpecFromSubplotSpec(sy, sx, subplot_spec=self.fig_grid[self.current],
                                                             wspace=wspace, hspace=hspace)
        self.sg_current = -1
        self.sg_current_ax = None

    def next_panel(self):
        """ Return axes of next panel

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel
        """
        self.current += 1
        return self.fig.add_subplot(self.fig_grid[self.current])

    def next_subpanel(self, sharex=False):
        """ Return axes of next sub panel

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel
        """
        self.sg_current += 1
        if self.sg_current == 0 or not sharex:
            self.sg_current_ax = self.fig.add_subplot(self.fig_sub_grid[self.sg_current])
        else:
            self.sg_current_ax = self.fig.add_subplot(self.fig_sub_grid[self.sg_current], sharex=self.sg_current_ax)
        return self.sg_current_ax

    def next_polar_panel(self):
        """ Return axes of next panel

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel
        """
        self.current += 1
        return self.fig.add_subplot(self.fig_grid[self.current], projection="polar")

    def get_panel(self, i):
        """ Returns axes of a i-th panel

        Parameters
        ----------
        i : int
            Panel number

        Returns
        -------
        ax : matplotlib.axes.Axes
            Axes for a given panel

        """
        return self.fig.get_axes()[i]

    def _get_grid(self, grid, panel_count):
        if grid == "auto":
            sx, sy = self._panels_shape(panel_count)
        elif grid == "vertical":
            sx, sy = 1, panel_count
        elif grid == "horizontal":
            sx, sy = panel_count, 1
        else:
            sx, sy = tuple(grid)
        return sx, sy

    def fig_show(self, add_sufix=True, suffix=""):
        """ Plot figure. If output_filename is specified it will plot only into a file.

        Parameters
        ----------
        add_sufix : bool
            If true it will add sufix to output_filename in format prefix.sufix.count.extension
            where count is auto-incremented integer starting from 0 and
            prefix.extension is parsed from output_filename parameter.

        suffix : str
            Sufix used in filename.

        """
        bottom, top, left, right, wspace, hspace = self.margins
        plt.subplots_adjust(bottom=bottom, top=top, wspace=wspace, hspace=hspace, left=left, right=right)
        if self.output_filename != "":
            image_filename = self.output_filename
            if add_sufix:
                image_filename = self._image_filename(suffix)
            if image_filename is not None:
                try:
                    plt.savefig(image_filename, dpi=self.dpi)
                except:
                    _logger.warning("Figure is not saved due to an error!")
                plt.close(self.fig)
            else:
                _logger.warning("Figure is not saved!")
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def _image_filename(self, suffix):
        parts = self.output_filename.split(".")
        if parts[-1] not in ["png", "pdf", "jpg", "eps", "svg"]:
            _logger.warning("File extension should be: .jpg, .png, .svg, .eps or .pdf")
            return None
        if suffix == "":
            suffix = str(self.count).zfill(4)
        else:
            suffix += "." + str(self.count).zfill(4)
        self.count += 1
        parts[-1] = suffix + "." + parts[-1]
        return ".".join(parts)

    @staticmethod
    def _panels_shape(n):
        sx, sy = 1, 1
        if n == 2:
            sy = 2
        elif n in [3, 4]:
            sx, sy = 2, 2
        elif n in [5, 6]:
            sx, sy = 2, 3
        elif n in [7, 8, 9]:
            sx, sy = 3, 3
        elif n in [10, 11, 12]:
            sx, sy = 3, 4
        elif n in [13, 14, 15, 16]:
            sx, sy = 4, 4
        elif n in [17, 18, 19, 20]:
            sx, sy = 4, 5
        elif n in [21, 22, 23, 24]:
            sx, sy = 4, 6
        else:
            while sx * sy < n:
                sy += 1
                sx = int(2. * sy / 3 + 1.)
        return sx, sy

Ancestors

ViewParams

Subclasses

Viewer

Methods

def fig_show(self, add_sufix=True, suffix='')

Plot figure. If output_filename is specified it will plot only into a file.

Parameters

add_sufix : bool: If true it will add sufix to output_filename in format prefix.sufix.count.extension where count is auto-incremented integer starting from 0 and prefix.extension is parsed from output_filename parameter.
suffix : str: Sufix used in filename.

Source code

def fig_show(self, add_sufix=True, suffix=""):
    """ Plot figure. If output_filename is specified it will plot only into a file.

    Parameters
    ----------
    add_sufix : bool
        If true it will add sufix to output_filename in format prefix.sufix.count.extension
        where count is auto-incremented integer starting from 0 and
        prefix.extension is parsed from output_filename parameter.

    suffix : str
        Sufix used in filename.

    """
    bottom, top, left, right, wspace, hspace = self.margins
    plt.subplots_adjust(bottom=bottom, top=top, wspace=wspace, hspace=hspace, left=left, right=right)
    if self.output_filename != "":
        image_filename = self.output_filename
        if add_sufix:
            image_filename = self._image_filename(suffix)
        if image_filename is not None:
            try:
                plt.savefig(image_filename, dpi=self.dpi)
            except:
                _logger.warning("Figure is not saved due to an error!")
            plt.close(self.fig)
        else:
            _logger.warning("Figure is not saved!")
    elif self.interactive:
        plt.show(block=False)
        plt.draw()
    else:
        plt.show()

def get_panel(self, i)

Returns axes of a i-th panel

Parameters

i : int: Panel number

Returns

ax : matplotlib.axes.Axes: Axes for a given panel

Source code

def get_panel(self, i):
    """ Returns axes of a i-th panel

    Parameters
    ----------
    i : int
        Panel number

    Returns
    -------
    ax : matplotlib.axes.Axes
        Axes for a given panel

    """
    return self.fig.get_axes()[i]

def new_figure(self, panel_count, grid='auto', panel_size=None, title=None)

Clear figure and create new plot layout.

Parameters

panel_count : int: Number of panels
grid : str or (int, int): number of columns and rows (sx, sy) or "auto"
panel_size : (float, float): size of a single panel (only when plots in file)

Source code

def new_figure(self, panel_count, grid="auto", panel_size=None, title=None):
    """ Clear figure and create new plot layout.

    Parameters
    ----------
    panel_count : int
        Number of panels
    grid : str or (int, int)
        number of columns and rows (sx, sy) or "auto"
    panel_size : (float, float)
        size of a single panel (only when plots in file)

    """
    if panel_size is None:
        panel_size = self.panel_size
    if grid == "auto":
        grid = self.grid
    plt.clf()
    plt.rcParams["font.size"] = 8
    self.fig = plt.figure(1, dpi=self.dpi, facecolor='w', edgecolor='k')
    if title is not None:
        self.fig.suptitle(title, fontsize=16)
    sx, sy = self._get_grid(grid, panel_count)
    if self.output_filename != "":
        self.fig.set_figheight(panel_size[1] * sy)
        self.fig.set_figwidth(panel_size[0] * sx)
    self.fig_grid = gridspec.GridSpec(sy, sx, hspace=self.margins[5], wspace=self.margins[4])
    self.current = -1
    self.sg_current = -1

def new_subgrid(self, panel_count, grid='auto', hspace=0, wspace=0)

Source code

def new_subgrid(self, panel_count, grid="auto", hspace=0, wspace=0):
    if grid == "auto":
        grid = self.subgrid
    sx, sy = self._get_grid(grid, panel_count)
    self.current += 1
    self.fig_sub_grid = gridspec.GridSpecFromSubplotSpec(sy, sx, subplot_spec=self.fig_grid[self.current],
                                                         wspace=wspace, hspace=hspace)
    self.sg_current = -1
    self.sg_current_ax = None

def next_panel(self)

Return axes of next panel

Returns

ax : matplotlib.axes.Axes: Axes for a given panel

Source code

def next_panel(self):
    """ Return axes of next panel

    Returns
    -------
    ax : matplotlib.axes.Axes
        Axes for a given panel
    """
    self.current += 1
    return self.fig.add_subplot(self.fig_grid[self.current])

def next_polar_panel(self)

Return axes of next panel

Returns

ax : matplotlib.axes.Axes: Axes for a given panel

Source code

def next_polar_panel(self):
    """ Return axes of next panel

    Returns
    -------
    ax : matplotlib.axes.Axes
        Axes for a given panel
    """
    self.current += 1
    return self.fig.add_subplot(self.fig_grid[self.current], projection="polar")

def next_subpanel(self, sharex=False)

Return axes of next sub panel

Returns

ax : matplotlib.axes.Axes: Axes for a given panel

Source code

def next_subpanel(self, sharex=False):
    """ Return axes of next sub panel

    Returns
    -------
    ax : matplotlib.axes.Axes
        Axes for a given panel
    """
    self.sg_current += 1
    if self.sg_current == 0 or not sharex:
        self.sg_current_ax = self.fig.add_subplot(self.fig_sub_grid[self.sg_current])
    else:
        self.sg_current_ax = self.fig.add_subplot(self.fig_sub_grid[self.sg_current], sharex=self.sg_current_ax)
    return self.sg_current_ax

Inherited members

ViewParams:
- bin_size_f

class Reader (files)

Class constructor opens cnvpytor files.

Parameters

files : list of str: List of cnvpytor filenames.

Source code

class Reader:
    def __init__(self, files):
        """ Class constructor opens cnvpytor files.

        Parameters
        ----------
        files : list of str
            List of cnvpytor filenames.

        """
        self.io = [IO(f, ro=True) for f in files]

Subclasses

Show

class Show (files)

Class constructor opens cnvpytor files.

Parameters

files : list of str: List of cnvpytor filenames.

Source code

class Show(Reader):
    def ls(self):
        """ Prints to stdout content of all cnvpytor files.

        """
        for i in self.io:
            i.ls()

    def meta(self):
        """ Prints to stdout meta tags of all cnvpytor files.

        """
        for i in self.io:
            i.read_meta_attribute()

    def info(self, bin_sizes):
        """ Prints to stdout RD info for all cnvpytor files.
        Columns are following:
            filename
            mean read length, stdev of read length in %
            mean template length, stdev of template length in %
            for each bin_size (including 100 always):
                rd level and corresponding stdev for each chromosome type (autosomes, sex chromosomes and mitochondria)

        """
        if 100 not in bin_sizes:
            bin_sizes = [100] + bin_sizes
        labels = ["FILE", "RL", "dRL[%]", "FL", "dFL[%]"]
        for bs in bin_sizes:
            labels.append("RD_AUTO_" + binsize_format(bs))
            labels.append("dRD_AUTO_" + binsize_format(bs) + "[%]")
            labels.append("RD_GC_AUTO_" + binsize_format(bs))
            labels.append("dRD_GC_AUTO_" + binsize_format(bs) + "[%]")
            labels.append("RD_XY_" + binsize_format(bs))
            labels.append("dRD_XY_" + binsize_format(bs) + "[%]")
            labels.append("RD_GC_XY_" + binsize_format(bs))
            labels.append("dRD_GC_XY_" + binsize_format(bs) + "[%]")
            if bs <= 500:
                labels.append("RD_MT_" + binsize_format(bs))
                labels.append("dRD_MT_" + binsize_format(bs) + "[%]")
                labels.append("RD_GC_MT_" + binsize_format(bs))
                labels.append("dRD_CG_MT_" + binsize_format(bs) + "[%]")
        print(("{:}\t{:}\t{:}\t{:}\t{:}\t" + "{:}\t" * (len(labels) - 5)).format(*tuple(labels)))
        for i in self.io:
            rfd = i.get_signal(None, None, "read frg dist")
            rd = np.sum(rfd, axis=1)
            fd = np.sum(rfd, axis=0)
            mrl = np.sum(rd * np.arange(rd.size)) / np.sum(rd)
            mfl = np.sum(fd * np.arange(fd.size)) / np.sum(fd)
            mrl2 = np.sum(rd * np.arange(rd.size) * np.arange(rd.size)) / np.sum(rd)
            mfl2 = np.sum(fd * np.arange(fd.size) * np.arange(fd.size)) / np.sum(fd)
            sdr = 100. * np.sqrt(mrl2 - mrl * mrl) / mrl
            sdf = 100. * np.sqrt(mfl2 - mfl * mfl) / mfl
            print("{:}\t{:.2f}\t{:.2f}\t{:.2f}\t{:.2f}\t".format(i.filename, mrl, sdr, mfl, sdf), end="")
            for bs in bin_sizes:
                for flag in [FLAG_AUTO, FLAG_SEX, FLAG_MT]:
                    if bs <= 500 or not flag == FLAG_MT:
                        if i.signal_exists(None, bs, "RD stat", flags=flag):
                            stat = i.get_signal(None, bs, "RD stat", flags=flag)
                            if stat[4] > 0:
                                stat[5] /= stat[4] / 100.
                            print("{:.2f}\t{:.2f}\t".format(stat[4], stat[5]), end="")
                        else:
                            print("{:}\t{:}\t".format("-", "-"), end="")
                        if i.signal_exists(None, bs, "RD stat", flags=(flag | FLAG_GC_CORR)):
                            stat = i.get_signal(None, bs, "RD stat", flags=(flag | FLAG_GC_CORR))
                            if stat[4] > 0:
                                stat[5] /= stat[4] / 100.
                            print("{:.2f}\t{:.2f}\t".format(stat[4], stat[5]), end="")
                        else:
                            print("{:}\t{:}\t".format("-", "-"), end="")
            print()

Ancestors

Reader

Subclasses

Viewer

Methods

def info(self, bin_sizes)

Prints to stdout RD info for all cnvpytor files. Columns are following: filename mean read length, stdev of read length in % mean template length, stdev of template length in % for each bin_size (including 100 always): rd level and corresponding stdev for each chromosome type (autosomes, sex chromosomes and mitochondria)

Source code

def info(self, bin_sizes):
    """ Prints to stdout RD info for all cnvpytor files.
    Columns are following:
        filename
        mean read length, stdev of read length in %
        mean template length, stdev of template length in %
        for each bin_size (including 100 always):
            rd level and corresponding stdev for each chromosome type (autosomes, sex chromosomes and mitochondria)

    """
    if 100 not in bin_sizes:
        bin_sizes = [100] + bin_sizes
    labels = ["FILE", "RL", "dRL[%]", "FL", "dFL[%]"]
    for bs in bin_sizes:
        labels.append("RD_AUTO_" + binsize_format(bs))
        labels.append("dRD_AUTO_" + binsize_format(bs) + "[%]")
        labels.append("RD_GC_AUTO_" + binsize_format(bs))
        labels.append("dRD_GC_AUTO_" + binsize_format(bs) + "[%]")
        labels.append("RD_XY_" + binsize_format(bs))
        labels.append("dRD_XY_" + binsize_format(bs) + "[%]")
        labels.append("RD_GC_XY_" + binsize_format(bs))
        labels.append("dRD_GC_XY_" + binsize_format(bs) + "[%]")
        if bs <= 500:
            labels.append("RD_MT_" + binsize_format(bs))
            labels.append("dRD_MT_" + binsize_format(bs) + "[%]")
            labels.append("RD_GC_MT_" + binsize_format(bs))
            labels.append("dRD_CG_MT_" + binsize_format(bs) + "[%]")
    print(("{:}\t{:}\t{:}\t{:}\t{:}\t" + "{:}\t" * (len(labels) - 5)).format(*tuple(labels)))
    for i in self.io:
        rfd = i.get_signal(None, None, "read frg dist")
        rd = np.sum(rfd, axis=1)
        fd = np.sum(rfd, axis=0)
        mrl = np.sum(rd * np.arange(rd.size)) / np.sum(rd)
        mfl = np.sum(fd * np.arange(fd.size)) / np.sum(fd)
        mrl2 = np.sum(rd * np.arange(rd.size) * np.arange(rd.size)) / np.sum(rd)
        mfl2 = np.sum(fd * np.arange(fd.size) * np.arange(fd.size)) / np.sum(fd)
        sdr = 100. * np.sqrt(mrl2 - mrl * mrl) / mrl
        sdf = 100. * np.sqrt(mfl2 - mfl * mfl) / mfl
        print("{:}\t{:.2f}\t{:.2f}\t{:.2f}\t{:.2f}\t".format(i.filename, mrl, sdr, mfl, sdf), end="")
        for bs in bin_sizes:
            for flag in [FLAG_AUTO, FLAG_SEX, FLAG_MT]:
                if bs <= 500 or not flag == FLAG_MT:
                    if i.signal_exists(None, bs, "RD stat", flags=flag):
                        stat = i.get_signal(None, bs, "RD stat", flags=flag)
                        if stat[4] > 0:
                            stat[5] /= stat[4] / 100.
                        print("{:.2f}\t{:.2f}\t".format(stat[4], stat[5]), end="")
                    else:
                        print("{:}\t{:}\t".format("-", "-"), end="")
                    if i.signal_exists(None, bs, "RD stat", flags=(flag | FLAG_GC_CORR)):
                        stat = i.get_signal(None, bs, "RD stat", flags=(flag | FLAG_GC_CORR))
                        if stat[4] > 0:
                            stat[5] /= stat[4] / 100.
                        print("{:.2f}\t{:.2f}\t".format(stat[4], stat[5]), end="")
                    else:
                        print("{:}\t{:}\t".format("-", "-"), end="")
        print()

def ls(self)

Prints to stdout content of all cnvpytor files.

Source code

def ls(self):
    """ Prints to stdout content of all cnvpytor files.

    """
    for i in self.io:
        i.ls()

def meta(self)

Prints to stdout meta tags of all cnvpytor files.

Source code

def meta(self):
    """ Prints to stdout meta tags of all cnvpytor files.

    """
    for i in self.io:
        i.read_meta_attribute()

class Viewer (files, params={}, force_agg=False, history_file_size=1000)

Parameters

files : list of str: List of cnvpytor filenames
params : dict: List of parameters different than default to be passed to ViewParams class.

Source code

class Viewer(Show, Figure, HelpDescription):

    def __init__(self, files, params={}, force_agg=False, history_file_size=1000):
        """

        Parameters
        ----------
        files : list of str
            List of cnvpytor filenames
        params : dict
            List of parameters different than default to be passed to ViewParams class.

        """
        _logger.debug("Viewer class init: files [%s], params %s." % (", ".join(files), str(params)))
        Figure.__init__(self, params, force_agg=force_agg)
        Show.__init__(self, files)
        self.history_file_size = history_file_size
        self.cnvpytor_dir = os.path.expanduser('~/.cnvpytor')
        self.save_history = False
        if os.path.exists(self.cnvpytor_dir):
            if os.access(self.cnvpytor_dir, os.W_OK):
                self.save_history = True
            if os.path.exists(self.cnvpytor_dir+"/viewer.conf"):
                conf = eval(open(self.cnvpytor_dir+"/viewer.conf").read())
                for key in conf:
                    setattr(self,key,conf[key])

        self.io_gc = self.io[0]
        self.io_mask = self.io[0]
        self.reference_genome = None
        self.plot_files = list(range(len(files)))
        self.default["plot_files"] = list(range(len(files)))
        if self.io[0].signal_exists(None, None, "reference genome"):
            rg_name = np.array(self.io[0].get_signal(None, None, "reference genome")).astype("str")[0]
            self.reference_genome = Genome.reference_genomes[rg_name]
            if "mask_file" in Genome.reference_genomes[rg_name]:
                self.io_mask = IO(Genome.reference_genomes[rg_name]["mask_file"], ro=True, buffer=True)
            if "gc_file" in Genome.reference_genomes[rg_name]:
                self.io_gc = IO(Genome.reference_genomes[rg_name]["gc_file"], ro=True, buffer=True)

    def parse(self, command):
        current = "regions"
        regions = []

        for p in command:
            if p.isdigit() and (int(p) % 100) == 0:
                self.bin_size = int(p)
                if current == "rd":
                    self.rd()
                if current == "baf":
                    self.baf()
                if current == "likelihood":
                    self.likelihood()
                elif current == "manhattan":
                    self.global_plot()
                elif current == "calls":
                    if len(self.callers) > 0:
                        self.manhattan(plot_type=self.callers[0])
                elif current == "stat":
                    self.stat(int(p))
                elif current == "circular":
                    self.circular()
                elif current == "regions":
                    self.multiple_regions(regions)
                    regions = []
            elif p == "rdstat":
                self.stat()
            elif p == "snp":
                self.snp()
            elif p in ["rd", "baf", "manhattan", "calls", "stat", "regions", "likelihood", "circular"]:
                current = p
            elif current == "regions":
                regions.append(p)
            else:
                current = p

    def plot_command(self, command):
        self.interactive = False
        self.parse(command)

    def prompt(self):
        self.interactive = True

        chromosomes = set({})
        for f in self.io:
            chromosomes = chromosomes.union(set(f.rd_chromosomes()))
            chromosomes = chromosomes.union(set(f.snp_chromosomes()))
        for c in chromosomes:
            self.command_tree[c] = None
        self.command_tree["set"]["style"] = dict(zip(plt.style.available, [None] * len(plt.style.available)))
        if os.path.exists(self.cnvpytor_dir+"/history"):
            readline.read_history_file(self.cnvpytor_dir+"/history")

        readline.parse_and_bind("tab: complete")
        completer = PromptCompleter(self.command_tree)
        readline.set_completer(completer.complete)
        quit = False
        try:
            while not quit:
                prompt_str = ""
                if os.isatty(sys.stdin.fileno()):
                    prompt_str = "cnvpytor> "
                else:
                    self.interactive = False
                try:
                    line = raw_input(prompt_str)
                except NameError:
                    line = input(prompt_str)

                if line[0] == "#" or line[0] == "":
                    continue

                if self.save_history and self.interactive:
                    readline.set_history_length(self.history_file_size)
                    readline.write_history_file(self.cnvpytor_dir+"/history")

                pre = line.split(">")
                f = pre[0].strip().split(" ")
                n = len(f)
                if len(line) == 0:
                    continue
                elif f[0] == "quit" or f[0] == "exit":
                    quit = True
                elif line[0] == "|":
                    try:
                        eval(compile(line[1:], '<string>', 'single'))
                    except Exception as e:
                        print(traceback.format_exc())
                elif f[0] == "save":
                    if n > 1:
                        try:
                            plt.savefig(f[1])
                        except ValueError:
                            _logger.warning("File extension should be: .jpg, .png, .svg, .eps or .pdf")
                        except:
                            _logger.warning("Figure is not saved due to an error!")

                elif f[0] in ["draw", "repaint", "update"]:
                    if n == 1:
                        self.fig.canvas.draw()
                elif f[0] == "ls":
                    self.ls()
                elif f[0] == "meta":
                    self.meta()
                elif f[0] == "show":
                    if n == 1:
                        self.show()
                elif f[0] == "set":
                    if n > 1:
                        self.set(f[1], f[2:])
                elif f[0] == "help" and n > 1:
                    self.help(f[1])
                elif f[0] == "help" and n == 1:
                    self.help("help")
                elif f[0] == "unset":
                    if n > 1:
                        self.unset(f[1])
                elif f[0] == "genotype":
                    if n > 1:
                        self.genotype_all([self.bin_size], f[1:], interactive=True)
                elif f[0] == "snv":
                    if n == 2:
                        self.snp(callset=f[1])
                    elif n == 1:
                        self.snp(callset="default")
                elif f[0] == "compare":
                    if n == 3:
                        self.compare(f[1], f[2], plot=self.plot)
                    elif n == 4:
                        self.compare(f[1], f[2], n_bins=int(f[3]), plot=self.plot)
                elif f[0] == "info":
                    if n > 1:
                        self.info(list(map(binsize_type, f[1:])))
                elif f[0] == "print":
                    if f[1] == "calls":
                        if self.print_filename == "":
                            self.print_calls()
                        else:
                            self.print_calls_file()
                    elif f[1] == "joint_calls":
                        self.print_simple_joint_calls()

                else:
                    try:
                        if f[0] not in ["rdstat", "snp"]:
                            self.parse(f + [str(self.bin_size)])
                        else:
                            self.parse(f)
                        if len(pre) > 1:
                            fns = pre[1].strip().split(" ")
                            if fns[0] != "":
                                plt.savefig(fns[0], dpi=200)
                    except Exception as e:
                        print(traceback.format_exc())
        except (EOFError, KeyboardInterrupt):
            print()
            return

    def help(self, param):
        if param in self.param_help:
            print(self.param_help[param])
        else:
            print("\nUnknown parameter !\n")

    @staticmethod
    def set_style(style):
        if style in plt.style.available:
            plt.style.use("default")
            plt.style.use(style)

    def file_title(self, ix):
        if ix < len(self.file_titles):
            return self.file_titles[ix]
        else:
            return self.io[ix].filename.split("/")[-1].replace(".pytor", "")

    def show(self):
        print("\nParameters")
        for key in sorted(self.params.keys()):
            print("    * %s: %s" % (key, str(self.params[key])))
            if key == "plot_files":
                for i in range(len(self.io)):
                    print("            %d: %s" % (i, self.io[i].filename))
        print()

    def stat(self, his_bin_size=100, return_image=False):
        plt.clf()
        auto = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_AUTO)
        sex = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_SEX)
        mt = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_MT) and (his_bin_size < 1001)
        if not (auto or sex or mt):
            return
        cond = [auto, sex, mt]
        stat_list = []
        n_cols = sum(map(int, cond))
        ix = 1
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, figsize=(4 * n_cols, 8), dpi=90, facecolor='w', edgecolor='k')
        for t, c, flag in zip(["Autosomes", "X/Y", "Mitochondria"], cond, [FLAG_AUTO, FLAG_SEX, FLAG_MT]):
            if c:
                stat = self.io[self.plot_file].get_signal(None, his_bin_size, "RD stat", flag)
                stat_list.append(stat)
                max_rd = int(stat[0])
                bin_size = int(stat[1])
                n_bins = int(stat[2])
                lim_rd = int(max(2 * stat[4], stat[4] + 3 * stat[5]))
                _logger.info("RD stat for %s: %.2f +- %.2f" % (t, stat[4], stat[5]))
                if t == "Mitochondria" and auto:
                    _logger.info("RD stat for %s - number of mitochondria per cell: %.2f +- %.2f" % (
                        t, 2 * stat[4] / stat_list[0][4],
                        2 * stat[5] / stat_list[0][4] + stat_list[0][5] * stat[4] / (
                                stat_list[0][4] * stat_list[0][4])))
                his_p = self.io[self.plot_file].get_signal(None, his_bin_size, "RD p dist", flag)
                his_u = self.io[self.plot_file].get_signal(None, his_bin_size, "RD u dist", flag)
                his_rd_gc = self.io[self.plot_file].get_signal(None, his_bin_size, "RD GC dist", flag)
                gc_corr = self.io[self.plot_file].get_signal(None, his_bin_size, "GC corr", flag)
                ax = plt.subplot(2, n_cols, ix)
                ax.set_xlabel("RD")
                ax.set_ylabel("GC [%]")
                ax.xaxis.set_ticklabels([])
                ax.set_title(t)
                his_rd_gc[0][0] = 0
                ax.imshow(his_rd_gc[:lim_rd // bin_size, :].T, aspect="auto", interpolation='nearest', origin='lower')
                ax.plot(gc_corr * stat[4] / bin_size, range(101), "w-")

                ax = plt.subplot(2, n_cols, ix + n_cols)
                ax.set_ylabel("Normalised distribution")
                ax.set_xlabel("RD")
                ax.set_xlim([0, lim_rd])
                # ax.set_ylim([0, 1.1])
                bins = range(0, max_rd, bin_size)
                x = np.arange(0, max_rd // bin_size * bin_size, 0.1 * bin_size)
                plt.plot(x, normal(x, 1, stat[4], stat[5]), "g-")
                x = np.array(bins)
                plt.plot(x[:len(his_u)], his_u / stat[3], "y*")
                plt.plot(x[:len(his_p)], his_p / stat[3], "b*")
                ix += 1
        plt.subplots_adjust(bottom=0.08, top=0.95, wspace=0.25, hspace=0, left=0.05 * 3 / n_cols, right=0.95)
        if return_image:
            self.fig.canvas.draw()
            import PIL
            pil_image = PIL.Image.frombytes('RGB', self.fig.canvas.get_width_height(),
                                            self.fig.canvas.tostring_rgb())
            return pil_image
        elif self.output_filename != "":
            plt.savefig(self._image_filename("stat"), dpi=150)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def rd(self):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            rd_chr = self.io[self.plot_file].rd_chromosome_name(c)
            if self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", 0) and \
                    self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                    (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append((rd_chr, l))
        self.new_figure(panel_count=len(chroms))
        for c, l in chroms:
            flag_rd = FLAG_USEMASK if self.rd_use_mask else 0
            mean, stdev = self.io[self.plot_file].rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
            his_p = self.io[self.plot_file].get_signal(c, bin_size, "RD", flag_rd)
            his_p_corr = self.io[self.plot_file].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            his_p_seg = self.io[self.plot_file].get_signal(c, bin_size, "RD partition", flag_rd | FLAG_GC_CORR)
            his_p_call = self.io[self.plot_file].get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic segments",
                                                                  flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
            his_p_mosaic_call = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic call",
                                                                   flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg_2d = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic segments 2d",
                                                                     flag_rd | FLAG_GC_CORR)
            his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
            his_p_mosaic_call_2d = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic call 2d",
                                                                      flag_rd | FLAG_GC_CORR)
            his_p_mosaic = np.zeros_like(his_p) * np.nan
            if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                    "rd_mosaic" in self.callers):
                for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                    for segi in seg:
                        his_p_mosaic[segi] = lev
            his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
            if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                    "combined_mosaic" in self.callers):
                for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                    for segi in seg:
                        his_p_mosaic_2d[segi] = lev
            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // bin_size, 10e6 // bin_size), minor=[])
            if (self.rd_range[1] - self.rd_range[0]) < 30:
                ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                                   minor=[])
            ax.set_ylim([self.rd_range[0] * mean / 2, self.rd_range[1] * mean / 2])
            n_bins = l // bin_size
            ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
            ax.grid()
            if self.rd_raw:
                plt.step(his_p, self.rd_colors[0])
            if self.rd_corrected:
                plt.step(his_p_corr, self.rd_colors[1])
            if his_p_seg is not None and len(his_p_seg) > 0 and self.rd_partition:
                plt.step(his_p_seg, self.rd_colors[2])
            if his_p_call is not None and len(his_p_call) > 0 and self.rd_call:
                plt.step(his_p_call, self.rd_colors[3])
            if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                    "rd_mosaic" in self.callers):
                plt.step(his_p_mosaic, self.rd_colors[4])
            if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                    "combined_mosaic" in self.callers):
                plt.step(his_p_mosaic_2d, self.rd_colors[5])
        self.fig_show(suffix="rd")

    def rd_diff(self, file1, file2):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            rd_chr = self.io[self.plot_file].rd_chromosome_name(c)
            if self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", 0) and \
                    self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                    (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append((rd_chr, l))
        self.new_figure(panel_count=len(chroms))
        for c, l in chroms:
            flag = FLAG_MT if Genome.is_mt_chrom(c) else FLAG_SEX if Genome.is_sex_chrom(c) else FLAG_AUTO
            stat1 = self.io[file1].get_signal(None, bin_size, "RD stat", flag)
            stat2 = self.io[file2].get_signal(None, bin_size, "RD stat", flag)
            if stat1 is None:
                _logger.error(
                    "Data for bin size %d is missing in file '%s'!" % (bin_size, self.io[file1].filename))
                return
            if stat2 is None:
                _logger.error(
                    "Data for bin size %d is missing in file '%s'!" % (bin_size, self.io[file2].filename))
                return
            flag_rd = (FLAG_USEMASK if self.rd_use_mask else 0)
            his_p_corr1 = self.io[file1].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            his_p_corr2 = self.io[file2].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            if (self.rd_range[1] - self.rd_range[0]) < 30:
                ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                                   minor=[])
            ax.yaxis.set_ticks(np.arange(0, 2, 0.25), minor=[])
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // bin_size, 10e6 // bin_size), minor=[])
            ax.set_ylim([0, 1])
            n_bins = l // bin_size
            ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
            ax.grid()

            plt.step(np.abs(his_p_corr1 / stat1[4] - his_p_corr2 / stat2[4]), "k")
        self.fig_show(suffix="rd_diff")

    def likelihood(self):
        bin_size = self.bin_size
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        chroms = []
        if self.reference_genome is None:
            chroms = self.io[self.plot_file].snp_chromosomes()
        else:
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
                if self.io[self.plot_file].signal_exists(snp_chr, bin_size, "SNP likelihood", snp_flag) and (
                        Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                    chroms.append(snp_chr)
        self.new_figure(panel_count=len(chroms))
        for c in chroms:
            likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood", snp_flag)
            img = np.array(likelihood).transpose()
            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.imshow(img, aspect='auto')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.xaxis.set_ticks(np.arange(0, likelihood.shape[0], 50), minor=[])
            ax.set_xlim([0, likelihood.shape[0]])
            if self.snp_call and ("baf_mosaic" in self.callers):
                likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                segments = segments_decode(
                    self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
                call_pos = []
                call_i1 = []
                call_i2 = []
                call_c = []
                for s, lh in zip(segments, likelihood):
                    i1, i2, p = likelihood_pixels_pval(lh)
                    if i1 != i2 and len(s) > self.min_segment_size:
                        alpha = -np.log(p + 1e-40) / self.contrast
                        if alpha > 1:
                            alpha = 1
                        for pos in s:
                            call_pos.append(pos)
                            call_i1.append(min(i1, i2))
                            call_i2.append(max(i1, i2))
                            color = colors.to_rgb(self.lh_colors[0]) + (alpha,)
                            call_c.append(color)
                plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
                plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
            if self.snp_call and ("combined_mosaic" in self.callers):
                likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood call 2d", snp_flag)
                segments = segments_decode(
                    self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood segments 2d", snp_flag))
                call_pos = []
                call_i1 = []
                call_i2 = []
                call_c = []
                for s, lh in zip(segments, likelihood):
                    i1, i2, p = likelihood_pixels_pval(lh)
                    if i1 != i2 and len(s) > self.min_segment_size:
                        alpha = -np.log(p + 1e-40) / self.contrast
                        if alpha > 1:
                            alpha = 1
                        for pos in s:
                            call_pos.append(pos)
                            call_i1.append(min(i1, i2))
                            call_i2.append(max(i1, i2))
                            color = colors.to_rgb(self.lh_colors[1]) + (alpha,)
                            call_c.append(color)
                plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
                plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
        self.fig_show(suffix="likelihood")

    def baf(self):
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for gview.")
            return
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
            if self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP baf", snp_flag) and \
                    self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP maf", snp_flag) and \
                    self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP i1", snp_flag) and \
                    self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP i2", snp_flag) and \
                    (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append((snp_chr, l))

        self.new_figure(panel_count=len(chroms))
        for c, l in chroms:
            baf = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP baf", snp_flag)
            maf = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP maf", snp_flag)
            i1 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i1", snp_flag)
            i2 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i2", snp_flag)

            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // self.bin_size, 10e6 // self.bin_size), minor=[])
            ax.set_ylim([0, 1])
            n_bins = l // self.bin_size
            ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
            ax.grid()
            ax.step(baf, self.baf_colors[0])
            ax.step(maf, self.baf_colors[1])
            ax.step(i1, self.baf_colors[2])
        self.fig_show(suffix="baf")

    def snp(self, plot_gt=None, plot_pmask=None, callset=None):
        if plot_pmask is None:
            plot_pmask = [0, 1]
        if plot_gt is None:
            plot_gt = [0, 1, 2, 3]
        chroms = []
        if self.reference_genome is None:
            chroms = self.io[self.plot_file].snp_chromosomes()
        else:
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
                if callset is None:
                    if self.io[self.plot_file].signal_exists(snp_chr, None, "SNP pos", 0) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "SNP desc", 0) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "SNP counts", 0) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "SNP qual", 0) and \
                            (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append(snp_chr)
                else:
                    if self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP pos", 0, name=callset) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP desc", 0,
                                                                  name=callset) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP counts", 0,
                                                                  name=callset) and \
                            self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP qual", 0,
                                                                  name=callset) and \
                            (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append(snp_chr)
        self.new_figure(panel_count=len(chroms))
        for c in chroms:
            pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
            hpos = []
            baf = []
            color = []
            qlpha = 0.7
            for i in range(len(pos)):
                if (nref[i] + nalt[i]) != 0:
                    if (gt[i] % 4 in plot_gt) and ((flag[i] >> 1) in plot_pmask):
                        hpos.append(pos[i])
                        if gt[i] % 4 != 2:
                            baf.append(1.0 * nalt[i] / (nref[i] + nalt[i]))
                        else:
                            baf.append(1.0 * nref[i] / (nref[i] + nalt[i]))
                        if self.snp_alpha_P:
                            alpha = None
                            color.append(colors.to_rgba(self.snp_colors[(gt[i] % 4) * 2 + 1], (flag[i] >> 1) * 0.4))
                        else:
                            color.append(self.snp_colors[(gt[i] % 4) * 2 + (flag[i] >> 1)])

            ax = self.next_panel()
            ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
            l = max(pos)
            ax.xaxis.set_ticks(np.arange(0, (l + 10e6), 10e6), minor=[])
            ax.set_ylim([0., 1.])
            ax.set_xlim([-0.05 * l, 1.05 * l])
            ax.grid()
            if self.markersize == "auto":
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=0.7)
            else:
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=0.7)
        self.fig_show(suffix="snp")

    def get_calls(self):
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        if self.annotate:
            annotator = Annotator(self.reference_genome)
        ret = []
        for caller in self.callers:
            if caller == "rd_mean_shift":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                            if io.signal_exists(c, bin_size, "calls", flag):
                                calls = io.read_calls(c, bin_size, "calls", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range) \
                                            and in_interval(call["dG"], self.dG_range):
                                        type = "duplication" if call["type"] == 1 else "deletion"

                                        row = [self.file_title(ix[i]), caller, type, c, call["start"], call["end"],
                                               call["size"], call["cnv"], call["p_val"], call["p_val_2"],
                                               call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"], call["dG"]]
                                        if self.annotate:
                                            row.append(annotator.get_info("%s:%d-%d" % (c, call["start"], call["end"])))
                                        ret.append(row)
            elif caller == "combined_mosaic":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR | \
                                   (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                            if io.signal_exists(c, bin_size, "calls combined", flag):
                                calls = io.read_calls(c, bin_size, "calls combined", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range):

                                        if n > 1:
                                            print("%s\t" % self.file_title(ix[i]), end="")
                                        if len(self.callers) > 1:
                                            print("%s\t" % caller, end="")
                                        keys = ["start", "end", "size", "cnv", "p_val", "lh_del", "lh_loh",
                                                "lh_dup", "Q0", "pN", "pNS", "pP", "bins", "baf",
                                                "rd_p_val", "baf_p_val", "segment", "hets", "homs"]
                                        type = {-1: "deletion", 0: "cnnloh", 1: "duplication"}[call["type"]]
                                        row = [self.file_title(i), caller, type, c] + [call[k] for k in keys]
                                        for m in range(2):
                                            row += call["models"][m]

                                        if self.annotate:
                                            row.append(annotator.get_info("%s:%d-%d" % (data[3], data[4], data[5])))
                                        ret.append(row)
        return ret

    def print_calls_file(self):
        format = self.print_filename.split(".")[-1]
        calls = self.get_calls()
        if self.print_filename == "":
            for call in calls:
                print(*call, sep="\t")
        elif format == "tsv":
            with open(self.print_filename, 'w') as f:
                for call in calls:
                    print(*call, sep="\t", file=f)
        elif format == "xlsx":
            import xlsxwriter
            workbook = xlsxwriter.Workbook(self.print_filename)
            files_callers = []
            sheets = {}
            rix = {}
            for call in calls:
                caller = call[1]
                fc = call[0] + " (" + caller + ")"
                sfc = call[0][:25] + " " + ({"rd_mean_shift": "ms", "combined_mosaic": "2d"}[caller])
                if fc not in files_callers:
                    sheets[fc] = workbook.add_worksheet(sfc)
                    rix[fc] = 0
                    files_callers.append(fc)
            for call in calls:
                caller = call[1]
                fc = call[0] + " (" + caller + ")"
                cix = 0
                for f in call[2:]:
                    sheets[fc].write(rix[fc], cix, f)
                    cix += 1
                rix[fc] += 1
            workbook.close()
        elif format == "vcf":
            samples = []
            for call in calls:
                sample = call[0]
                if sample not in samples:
                    samples.append(sample)
            header = """##fileformat=VCFv4.1
##fileDate={date}
##reference={rg}
##source=CNVpytor
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=pytorRD,Number=1,Type=Float,Description="Normalized RD">
##INFO=<ID=pytorP1,Number=1,Type=Float,Description="e-val by t-test">
##INFO=<ID=pytorP2,Number=1,Type=Float,Description="e-val by Gaussian tail">
##INFO=<ID=pytorP3,Number=1,Type=Float,Description="e-val by t-test (middle)">
##INFO=<ID=pytorP4,Number=1,Type=Float,Description="e-val by Gaussian tail (middle)">
##INFO=<ID=pytorQ0,Number=1,Type=Float,Description="Fraction of reads with 0 mapping quality">
##INFO=<ID=pytorPN,Number=1,Type=Integer,Description="Fraction of N bases">
##INFO=<ID=pytorDG,Number=1,Type=Integer,Description="Distance to nearest gap in reference genome">
##INFO=<ID=pytorCL,Number=1,Type=Integer,Description="Caller method">
##INFO=<ID=SAMPLES,Number=.,Type=String,Description="Sample genotyped to have the variant">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=LOH,Description="Copy number neutral loss of heterozygosity">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">;
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{samples}"""
            if self.reference_genome:
                rg = self.reference_genome["name"]
            else:
                rg = "unknown"
            header = header.format(date=datetime.date.today().strftime("%Y-%m-%d"), rg=rg, samples="\t".join(samples))
            ii = 0
            with open(self.print_filename, 'w') as f:
                print(header, file=f)
                for call in calls:
                    ii += 1
                    id = "CNVpytor_" + {"deletion": "del", "duplication": "dup", "cnnloh": "loh"}[call[2]] + str(ii)
                    alt = {"deletion": "<DEL>", "duplication": "<DUP>", "cnnloh": "<LOH>"}[call[2]]
                    info = "END=" + str(int(call[5])) + ";IMPRECISE;SVLEN=" + str(int(call[6])) + ";SVTYPE=" + alt[1:4]
                    info += ";pytorRD=" + str(call[7])
                    info += ";pytorP1=" + str(call[8])
                    info += ";pytorP2=" + str(call[9])
                    info += ";pytorP3=" + str(call[10])
                    info += ";pytorP4=" + str(call[11])
                    info += ";pytorQ0=" + str(call[12])
                    info += ";pytorPN=" + str(call[13])
                    info += ";pytorDG=" + str(call[14])
                    info += ";pytorCL=" + call[1]
                    format = "GT:CN"
                    row = [call[3], int(call[4]), id, ".", alt, ".", "PASS", info, format]
                    for sample in samples:
                        if sample == call[0]:
                            if call[2] == "deletion" and call[7] < 0.25:
                                row.append("1/1:0")
                            elif call[2] == "deletion" and call[7] > 0.25:
                                row.append("0/1:0")
                            elif call[2] == "duplication" and call[7] <= 1.75:
                                row.append("0/1:2")
                            elif call[2] == "duplication" and call[7] > 1.75 and call[7] <= 2.25:
                                row.append("1/1:2")
                            elif call[2] == "duplication" and call[7] > 2.25:
                                row.append("./1:%.2f" % call[7])
                            else:
                                row.append("./.:.")
                        else:
                            row.append("./.:.")
                    print(*row, sep="\t", file=f)
        if self.plot:
            for call in calls:
                plot_start = call[4] - call[6]
                if plot_start < 0:
                    plot_start = 0
                plot_end = call[5] + call[6]
                self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])

    def print_calls(self):
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        if self.annotate:
            annotator = Annotator(self.reference_genome)
        for caller in self.callers:
            if caller == "rd_mean_shift":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                            if io.signal_exists(c, bin_size, "calls", flag):
                                calls = io.read_calls(c, bin_size, "calls", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range) \
                                            and in_interval(call["dG"], self.dG_range):
                                        type = "duplication" if call["type"] == 1 else "deletion"
                                        if n > 1:
                                            print("%s\t" % self.file_title(i), end="")
                                        if len(self.callers) > 1:
                                            print("%s\t" % caller, end="")
                                        print("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e\t%.4f\t%.4f\t%d\t" % (
                                            type, c, call["start"], call["end"], call["size"], call["cnv"],
                                            call["p_val"],
                                            call["p_val_2"], call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"],
                                            call["dG"]), end="")
                                        if self.annotate:
                                            print("\t%s" % annotator.get_info(
                                                "%s:%d-%d" % (c, call["start"], call["end"])))
                                        else:
                                            print()
                                        if self.plot:
                                            plot_start = call["start"] - call["size"]
                                            if plot_start < 0:
                                                plot_start = 0
                                            plot_end = call["end"] + call["size"]
                                            self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])
            elif caller == "combined_mosaic":
                for i in range(n):
                    io = self.io[ix[i]]
                    chroms = io.rd_chromosomes()
                    for c in chroms:
                        if (c in self.chrom) or len(self.chrom) == 0:
                            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR | \
                                   (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                            if io.signal_exists(c, bin_size, "calls combined", flag):
                                calls = io.read_calls(c, bin_size, "calls combined", flag)
                                for call in calls:
                                    if in_interval(call["size"], self.size_range) \
                                            and in_interval(call["p_val"], self.p_range) \
                                            and in_interval(call["pN"], self.pN_range) \
                                            and in_interval(call["Q0"], self.Q0_range):
                                        type = "duplication" if call["type"] == 1 else "deletion"
                                        if n > 1:
                                            print("%s\t" % self.file_title(i), end="")
                                        if len(self.callers) > 1:
                                            print("%s\t" % caller, end="")
                                        keys = ["start", "end", "size", "cnv", "p_val", "lh_del", "lh_loh",
                                                "lh_dup", "Q0", "pN", "pNS", "pP", "bins", "baf",
                                                "rd_p_val", "baf_p_val", "segment", "hets", "homs"]
                                        type = {-1: "deletion", 0: "cnnloh", 1: "duplication"}[call["type"]]
                                        data = [type, c] + [call[k] for k in keys]
                                        for m in range(2):
                                            data += call["models"][m]

                                        print(("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e" + \
                                               "\t%.4f\t%.4f\t%.4f\t%.4f\t" + "%d\t%d\t%.4f\t%e\t%e\t%d\t%d\t%d\t" + \
                                               "CN%d/CN%d\t%e\t%.4f\t%d\tCN%d/CN%d\t%e\t%.4f") % tuple(data), end="")
                                        if self.annotate:
                                            print("\t%s" % annotator.get_info("%s:%d-%d" % (data[1], data[2], data[3])))
                                        else:
                                            print()
                                        if self.plot:
                                            plot_start = call["start"] - call["size"]
                                            if plot_start < 0:
                                                plot_start = 0
                                            plot_end = call["end"] + call["size"]
                                            self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])

    def print_simple_joint_calls(self):

        bin_size = self.bin_size
        n = len(self.plot_files)
        if n == 0:
            return
        ix = self.plot_files
        format = self.print_filename.split(".")[-1]
        if format == "tsv":
            f = open(self.print_filename, 'w')
        elif format == "xlsx":
            import xlsxwriter
            if os.path.exists(self.print_filename):
                os.remove(self.print_filename)
            workbook = xlsxwriter.Workbook(self.print_filename)
            sheet = workbook.add_worksheet("merged_calls")
            header = ["TYPE", "REGION", "SIZE"]
            for i in range(n):
                header.append(self.file_title(ix[i]))
            if self.annotate:
                header.append("GENES")
            styleh = workbook.add_format({'bold': True, 'font_color': 'white'})
            styleh.set_pattern(1)  # This is optional when using a solid fill.
            styleh.set_bg_color('#555555')
            styleh2 = workbook.add_format({'bold': True, 'font_color': 'white'})
            styleh2.set_pattern(1)  # This is optional when using a solid fill.
            styleh2.set_bg_color('#555555')
            styleh2.set_rotation(75)
            style_r = workbook.add_format()
            style_r.set_pattern(1)  # This is optional when using a solid fill.
            style_r.set_bg_color('red')
            style_g = workbook.add_format()
            style_g.set_pattern(1)  # This is optional when using a solid fill.
            style_g.set_bg_color('green')
            style_size = workbook.add_format({'num_format': '#,##0'})
            style_cn = workbook.add_format({'num_format': '0'})
            style_cn_b = workbook.add_format({'num_format': '0', 'bold': True})
            sheet.set_column(0, 0, 10)
            sheet.set_column(1, 1, 22)
            sheet.set_column(2, 2, 10)
            if self.annotate:
                sheet.set_column(len(header) - 1, len(header) - 1, 100)

            for col, val in enumerate(header):
                if col > 2 and col < len(header) - int(self.annotate):
                    sheet.write(0, col, val, styleh2)
                else:
                    sheet.write(0, col, val, styleh)
            ri = 0
        if self.annotate:
            annotator = Annotator(self.reference_genome)
        chroms = self.io[ix[0]].rd_chromosomes()
        for c in chroms:
            if (c in self.chrom) or len(self.chrom) == 0:
                flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                calls = [list(filter(lambda call: in_interval(call["size"], self.size_range) \
                                                  and in_interval(call["p_val"], self.p_range) \
                                                  and in_interval(call["pN"], self.pN_range) \
                                                  and in_interval(call["Q0"], self.Q0_range) \
                                                  and in_interval(call["dG"], self.dG_range),
                                     self.io[ix[i]].read_calls(c, bin_size, "calls", flag))) for i in range(n)]
                pointers = [0] * n
                while any([pointers[i] < len(calls[i]) for i in range(n)]):
                    starts = [calls[i][pointers[i]]["start"] if pointers[i] < len(calls[i]) else np.inf for i in
                              range(n)]
                    mini = starts.index(min(starts))
                    maxend = 0
                    toupdate = []
                    minend = calls[mini][pointers[mini]]["end"]
                    maxstart = 0
                    files = []
                    types = []
                    cns = []
                    for i in range(n):
                        if (pointers[i] < len(calls[i])) and ((min(calls[i][pointers[i]]["end"],
                                                                   calls[mini][pointers[mini]]["end"]) -
                                                               calls[i][pointers[i]]["start"]) > (
                                                                      0.5 * calls[mini][pointers[mini]]["size"])) \
                                and ((min(calls[i][pointers[i]]["end"],
                                          calls[mini][pointers[mini]]["end"]) -
                                      calls[i][pointers[i]]["start"]) > (
                                             0.5 * (calls[i][pointers[i]]["end"] - calls[i][pointers[i]]["start"]))):
                            toupdate.append(i)
                            call = calls[i][pointers[i]]
                            if call["end"] > maxend:
                                maxend = call["end"]
                            if call["end"] < minend:
                                minend = call["end"]
                            if call["start"] > maxstart:
                                maxstart = call["start"]
                            type = "duplication" if call["type"] == 1 else "deletion"
                            types.append(type)
                            files.append(i)
                            cns.append(int(call["cnv"] * 2))
                    type = max(set(types), key=types.count)
                    data = [type, c, maxstart, minend, minend - maxstart + 1]
                    genotypes = [
                        self.genotype([bin_size], "%s:%d-%d" % (c, maxstart, minend), file_index=ix[i], p_val=True)[0]
                        for i
                        in range(n)]
                    copynumbers = [c[3] for c in genotypes]
                    if np.all([np.abs(c - np.round(c)) < 0.25 for c in copynumbers]) or True:
                        if self.print_filename == "":
                            print(("%s\t%s:%d-%d\t%d" + n * "\t%.2f") % tuple(data + copynumbers), end="")
                            print("\t%s" % str(files), end="")
                            if self.annotate:
                                print("\t%s" % annotator.get_info("%s:%d-%d" % (c, maxstart, minend)))
                            else:
                                print()
                        elif format == "tsv":
                            print(("%s\t%s:%d-%d\t%d" + n * "\t%.2f") % tuple(data + copynumbers), end="", file=f)
                            print("\t%s" % str(files), end="", file=f)
                            if self.annotate:
                                print("\t%s" % annotator.get_info("%s:%d-%d" % (c, maxstart, minend)), file=f)
                            else:
                                print(file=f)
                        elif format == "xlsx":
                            ri += 1
                            if type == "deletion":
                                sheet.write(ri, 0, data[0], style_r)
                            else:
                                sheet.write(ri, 0, data[0], style_g)
                            sheet.write(ri, 1, "%s:%d-%d" % (c, maxstart, minend))
                            sheet.write(ri, 2, data[4], style_size)
                            for col, val in enumerate(copynumbers):
                                if col in files:
                                    sheet.write(ri, 3 + col, val, style_cn_b)
                                else:
                                    sheet.write(ri, 3 + col, val, style_cn)
                            if self.annotate:
                                sheet.write(ri, 3 + len(copynumbers),
                                            annotator.get_info("%s:%d-%d" % (c, maxstart, minend)))

                        if self.plot:
                            plot_start = maxstart - (minend - maxstart)
                            if plot_start < 0:
                                plot_start = 0
                            plot_end = minend + (minend - maxstart)
                            self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])
                    for i in toupdate:
                        pointers[i] += 1
        if format == "tsv":
            f.close()
        elif format == "xlsx":
            sheet.conditional_format(1, 3, ri, len(header) - int(self.annotate), {'type': '3_color_scale',
                                                                                  'min_color': "#FF0000",
                                                                                  'mid_color': "#FFFFFF",
                                                                                  'max_color': "#00FF00",
                                                                                  'min_type': 'num',
                                                                                  'min_value': 0,
                                                                                  'mid_type': 'num',
                                                                                  'mid_value': 2,
                                                                                  'max_type': 'num',
                                                                                  'max_value': 4
                                                                                  })
            workbook.close()

    def manhattan(self, plot_type="rd"):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for manhattan.")
            return
        n = len(self.plot_files)
        ix = self.plot_files

        self.new_figure(panel_count=n, grid=(1, n), panel_size=(24, 2))
        for i in range(n):
            ax = self.next_panel()
            io = self.io[ix[i]]
            ax.set_title(self.file_title(ix[i]), position=(0.01, 1.01),
                         fontdict={'verticalalignment': 'bottom', 'horizontalalignment': 'left'})

            if plot_type == "rd":
                chroms = []
                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    rd_chr = io.rd_chromosome_name(c)
                    if len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom):
                        if io.signal_exists(rd_chr, bin_size, "RD", 0) and \
                                io.signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                                (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((rd_chr, l))

                apos = 0
                xticks = [0]

                max_m, stdev = io.rd_normal_level(bin_size, FLAG_GC_CORR)
                for c, l in chroms:
                    flag_rd = (FLAG_USEMASK if self.rd_use_mask else 0)
                    his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                    his_p_corr = io.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
                    if self.rd_manhattan_call:
                        his_p_call = io.get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg = io.get_signal(c, bin_size, "RD mosaic segments",
                                                         flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
                        his_p_mosaic_call = io.get_signal(c, bin_size, "RD mosaic call",
                                                          flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg_2d = io.get_signal(c, bin_size, "RD mosaic segments 2d",
                                                            flag_rd | FLAG_GC_CORR)
                        his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
                        his_p_mosaic_call_2d = io.get_signal(c, bin_size, "RD mosaic call 2d",
                                                             flag_rd | FLAG_GC_CORR)
                        his_p_mosaic = np.zeros_like(his_p) * np.nan
                        if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and (
                                "rd_mosaic" in self.callers):
                            for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                                for segi in seg:
                                    his_p_mosaic[segi] = lev
                        his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
                        if his_p_mosaic_call_2d is not None and len(
                                his_p_mosaic_call_2d) > 0 and ("combined_mosaic" in self.callers):
                            for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                                for segi in seg:
                                    his_p_mosaic_2d[segi] = lev
                    pos = range(apos, apos + len(his_p))
                    ax.text(apos + len(his_p) // 2, max_m // 10, Genome.canonical_chrom_name(c),
                            fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                    if self.markersize == "auto":
                        plt.plot(pos, his_p_corr, ls='', marker='.')
                    else:
                        plt.plot(pos, his_p_corr, ls='', marker='.', markersize=self.markersize)
                    if self.rd_manhattan_call:
                        if his_p_call is not None and len(his_p_call) > 0 and ("rd_mean_shift" in self.callers):
                            plt.step(pos, his_p_call, "r")
                        if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and (
                                "rd_mosaic" in self.callers):
                            plt.plot(pos, his_p_mosaic, "k")
                        if his_p_mosaic_call_2d is not None and len(
                                his_p_mosaic_call_2d) > 0 and ("combined_mosaic" in self.callers):
                            plt.plot(pos, his_p_mosaic_2d, "k")
                    apos += len(his_p)
                    xticks.append(apos)
                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 15, 0.5) * max_m, minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([self.rd_manhattan_range[0] * max_m, self.rd_manhattan_range[1] * max_m])
                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

            elif plot_type == "baf_mosaic":
                chroms = []
                snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0)
                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    snp_chr = io.snp_chromosome_name(c)
                    if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                        if io.signal_exists(snp_chr, bin_size, "SNP likelihood call", snp_flag) and \
                                io.signal_exists(snp_chr, bin_size, "SNP likelihood segments", snp_flag) and \
                                (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((snp_chr, l))

                apos = 0
                xticks = [0]

                cix = 0
                cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
                for c, l in chroms:
                    likelihood = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                    segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
                    call_pos = []
                    call_baf = []
                    call_c = []
                    for s, lh in zip(segments, likelihood):
                        b, p = likelihood_baf_pval(lh)
                        if b > 0 and len(s) > self.min_segment_size:
                            alpha = -np.log(p + 1e-40) / self.contrast
                            if alpha > 1:
                                alpha = 1
                            for pos in s:
                                call_pos.append(apos + pos)
                                call_baf.append(b)
                                color = cmap[cix % len(cmap)]
                                color = (color[0], color[1], color[2], alpha)
                                call_c.append(color)

                    ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                            fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                    plt.scatter(call_pos, call_baf, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                    apos += l // bin_size
                    xticks.append(apos)
                    cix += 1

                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 0.5, 0.1), minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([0, 0.5])
                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

            elif plot_type == "rd_mean_shift":
                chroms = []
                flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    rd_chr = io.rd_chromosome_name(c)
                    if rd_chr is not None and len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom):
                        if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((rd_chr, l))

                apos = 0
                xticks = [0]

                cix = 0
                cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
                for c, l in chroms:
                    call_pos = []
                    call_conc = []
                    call_c = []
                    if io.signal_exists(c, bin_size, "calls", flag):
                        calls = io.read_calls(c, bin_size, "calls", flag)

                        for call in calls:
                            if in_interval(call["size"], self.size_range) and in_interval(call["p_val"], self.p_range) \
                                    and in_interval(call["pN"], self.pN_range) \
                                    and in_interval(call["Q0"], self.Q0_range):
                                alpha = - np.log(call["p_val"] + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                if alpha < 0:
                                    alpha = 0
                                for pos in range(int(call["start"]) // bin_size, int(call["end"]) // bin_size + 1):
                                    call_pos.append(apos + pos)
                                    level = call["cnv"] * 2
                                    if level > 4:
                                        level = 4
                                    call_conc.append(level)
                                    if call["type"] == 1:
                                        call_c.append((0, 1, 0, alpha))
                                    elif call["type"] == -1:
                                        call_c.append((1, 0, 0, alpha))
                                    else:
                                        call_c.append((0, 0, 1, alpha))
                        ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                                fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                        plt.scatter(call_pos, call_conc, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                        apos += l // bin_size
                        xticks.append(apos)
                        cix += 1

                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 4.0, 1.0), minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([0, 4.0])
                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

            elif plot_type == "combined_mosaic":
                chroms = []
                flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

                for c, (l, t) in self.reference_genome["chromosomes"].items():
                    snp_chr = io.snp_chromosome_name(c)
                    if snp_chr is not None and len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                        if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                            chroms.append((snp_chr, l))

                apos = 0
                xticks = [0]

                cix = 0
                cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
                for c, l in chroms:
                    call_pos = []
                    call_conc = []
                    call_c = []
                    if io.signal_exists(c, bin_size, "calls combined", flag):
                        calls = io.read_calls(c, bin_size, "calls combined", flag)

                        for call in calls:
                            if call["bins"] > self.min_segment_size:
                                alpha = -np.log(call["p_val"] + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                for pos in range(int(call["start"]) // bin_size, int(call["end"]) // bin_size + 1):
                                    call_pos.append(apos + pos)
                                    call_conc.append(call["models"][0][4])
                                    if call["type"] == 1:
                                        call_c.append((0, 1, 0, alpha))
                                    elif call["type"] == -1:
                                        call_c.append((1, 0, 0, alpha))
                                    else:
                                        call_c.append((0, 0, 1, alpha))

                        ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                                fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                        plt.scatter(call_pos, call_conc, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                        apos += l // bin_size
                        xticks.append(apos)
                        cix += 1

                ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks(np.arange(0, 1.0, 0.1), minor=[])
                ax.xaxis.set_ticks(xticks, minor=[])
                ax.set_ylim([0, 1.0])

                n_bins = apos
                ax.set_xlim([0, n_bins])
                ax.grid()

        self.fig_show(suffix="manhattan" if plot_type == "rd" else "snp_calls")

    def callmap(self, color="frequency", background="white", pixel_size=1700000, max_p_val=1e-20, min_freq=0.01,
                plot="cmap"):
        bin_size = self.bin_size
        if self.reference_genome is None:
            _logger.warning("Missing reference genome required for callmap.")
            return
        n = len(self.plot_files)
        ix = self.plot_files

        if plot:
            self.new_figure(panel_count=n, grid=(1, 1), panel_size=(24, 0.24 * n))

        chroms = []
        starts = []
        ends = []
        pixels = 0
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            if l > 10 * bin_size:
                if len(self.chrom) == 0 or (c in self.chrom) or (self.io[0].snp_chromosome_name(c) in self.chrom):
                    chroms.append(c)
                    starts.append(pixels)
                    pixels += l // pixel_size + 1
                    ends.append(pixels - 1)

        cmap = np.zeros((n, pixels, 3))
        cmap[:, ends, :] = 1

        for i in range(n):
            io = self.io[ix[i]]
            print(io.filename)
            flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
            flag_rd = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
            for c, start in zip(chroms, starts):
                snp_chr = io.snp_chromosome_name(c)
                if io.signal_exists(snp_chr, bin_size, "calls combined", flag):

                    calls = io.read_calls(snp_chr, bin_size, "calls combined", flag)
                    segments = io.get_signal(snp_chr, bin_size, "RD mosaic segments 2d", flag_rd)
                    segments = segments_decode(segments)

                    for call in calls:
                        if call["bins"] > self.min_segment_size and call["p_val"] < max_p_val and "segment" in call and \
                                call["models"][0][4] > min_freq:
                            cix = int(call["type"]) + 1
                            for b in segments[int(call["segment"])]:
                                if color == "frequency":
                                    cmap[i, start + b * bin_size // pixel_size, cix] = max(
                                        cmap[i, start + b * bin_size // pixel_size, cix], call["models"][0][4])
                                elif color == "coverage":
                                    cmap[i, start + b * bin_size // pixel_size, cix] += bin_size / pixel_size
                                else:  # model copy number
                                    if call["models"][0][0] == 0:
                                        cmap[i, start + b * bin_size // pixel_size, 0] = 1
                                    elif call["models"][0][0] == 1:
                                        cmap[i, start + b * bin_size // pixel_size, 0] = 1
                                        cmap[i, start + b * bin_size // pixel_size, 1] = 1
                                    elif call["models"][0][0] == 2:
                                        cmap[i, start + b * bin_size // pixel_size, 2] = 1
                                    else:
                                        cn = call["models"][0][0]
                                        if cn > 6:
                                            cn = 6
                                        cmap[i, start + b * bin_size // pixel_size, 1] = (2 + cn) / 8

        def b2w(pixel):
            if np.all(pixel == 1):
                pixel[:] = 0
            elif pixel[0] > pixel[1] and pixel[0] > pixel[2]:
                pixel[1] = pixel[2] = 1 - pixel[0]
                pixel[0] = 1
            elif pixel[1] > pixel[2]:
                pixel[0] = pixel[2] = 1 - pixel[1]
                pixel[1] = 1
            else:
                pixel[0] = pixel[1] = 1 - pixel[2]
                pixel[2] = 1
            return pixel

        if background == "white":
            cmap = cmap.reshape(n * pixels, 3)
            np.apply_along_axis(b2w, 1, cmap)
            cmap = cmap.reshape(n, pixels, 3)

        cmap = (255 * cmap).astype("int")
        if plot == "cmap":
            self.new_figure(panel_count=1, grid=(1, 1), panel_size=(24, 0.24 * n))
            ax = self.next_panel()
            plt.imshow(cmap, aspect='auto')
            for i in ends[:-1]:
                plt.axvline(x=i - 0.5, color='red', linewidth=0.5)
            ax.set_yticks([])
            ax.set_yticklabels([])
            ax.set_xticks((np.array(starts) + np.array(ends)) / 2)
            chroms = list(map(Genome.canonical_chrom_name, chroms))
            ax.set_xticklabels(chroms)
            self.fig_show(suffix="callmap")
        elif plot == "regions":
            self.new_figure(panel_count=1, grid=(1, 1), panel_size=(24, 24))
            ax = self.next_panel()
            corr = np.corrcoef(
                np.concatenate((cmap[:, :, 0].transpose(), cmap[:, :, 1].transpose(), cmap[:, :, 2].transpose()),
                               axis=0))
            plt.imshow(corr, aspect='auto', vmin=-1, vmax=1)
            plt.colorbar()
            starts3 = np.concatenate((np.array(starts), np.array(starts) + ends[-1], np.array(starts) + 2 * ends[-1]))
            ends3 = np.concatenate((np.array(ends), np.array(ends) + ends[-1], np.array(ends) + 2 * ends[-1]))
            for i in ends3[:-1]:
                plt.axvline(x=i - 0.5, color='red', linewidth=0.5)
                plt.axhline(y=i - 0.5, color='red', linewidth=0.5)

            ax.set_xticks((starts3 + ends3) / 2)
            ax.set_yticks((starts3 + ends3) / 2)
            chroms = list(map(Genome.canonical_chrom_name, chroms))
            ax.set_xticklabels(chroms + chroms + chroms)
            ax.set_yticklabels(chroms + chroms + chroms)
            self.fig_show(suffix="callmap")
        else:
            self.new_figure(panel_count=2, panel_size=(12, 12))
            ax = self.next_panel()
            x = np.concatenate((cmap[:, :, 0], cmap[:, :, 1], cmap[:, :, 2]),
                               axis=1)
            corr = np.corrcoef(x)
            plt.imshow(corr, aspect='auto', vmin=-1, vmax=1)
            plt.colorbar()
            ax = plt.gca()

            ax.set_xticks(range(n))
            ax.set_yticks(range(n))
            ax = self.next_panel()
            Z = hierarchy.linkage(x, 'average', 'correlation')
            dn = hierarchy.dendrogram(Z)

            self.fig_show(suffix="callmap")
        return cmap

    def multiple_regions(self, regions):
        n = len(self.plot_files) * len(regions)
        self.new_figure(panel_count=n)
        j = 0
        for i in range(len(self.plot_files)):
            for r in regions:
                self.regions(self.plot_files[i], r)
                j += 1
        self.fig_show(suffix="regions")

    def regions(self, ix, region):
        panels = self.panels
        bin_size = self.bin_size
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        self.new_subgrid(len(panels), hspace=0.05, wspace=0.1)
        r = decode_region(region, max_size=1000000000)
        io = self.io[ix]
        for i in range(len(panels)):
            ax = self.next_subpanel(sharex=True)
            if i == 0 and self.title:
                ax.set_title(self.file_title(ix) + ": " + region, position=(0.01, 0.9),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                             color='C0')

            if panels[i] == "rd":
                g_p = [0]
                g_p_corr = [0]
                g_p_seg = [0]
                g_p_call = [0]
                g_p_call_mosaic = [0]
                g_p_call_mosaic_2d = [0]
                mean, stdev = 0, 0
                borders = []
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    flag_rd = 0
                    if self.rd_use_mask:
                        flag_rd = FLAG_USEMASK
                    mean, stdev = io.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
                    his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                    his_p_corr = io.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
                    his_p_seg = io.get_signal(c, bin_size, "RD partition", flag_rd | FLAG_GC_CORR)
                    his_p_call = io.get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg = io.get_signal(c, bin_size, "RD mosaic segments",
                                                     flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
                    his_p_mosaic_call = io.get_signal(c, bin_size, "RD mosaic call",
                                                      flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg_2d = io.get_signal(c, bin_size, "RD mosaic segments 2d",
                                                        flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
                    his_p_mosaic_call_2d = io.get_signal(c, bin_size, "RD mosaic call 2d",
                                                         flag_rd | FLAG_GC_CORR)
                    his_p_mosaic = np.zeros_like(his_p) * np.nan
                    if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and ("rd_mosaic" in self.callers):
                        for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                            for segi in seg:
                                his_p_mosaic[segi] = lev
                    his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
                    if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and (
                            "combined_mosaic" in self.callers):
                        for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                            for segi in seg:
                                his_p_mosaic_2d[segi] = lev
                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    bins = len(list(his_p[start_bin:end_bin]))
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])

                    g_p.extend(list(his_p[start_bin:end_bin]))
                    g_p_corr.extend(list(his_p_corr[start_bin:end_bin]))
                    if his_p_seg is not None and len(his_p_seg) > 0 and self.rd_partition:
                        g_p_seg.extend(list(his_p_seg[start_bin:end_bin]))
                    if his_p_call is not None and len(his_p_call) > 0 and self.rd_call and (
                            "rd_mean_shift" in self.callers):
                        g_p_call.extend(list(his_p_call[start_bin:end_bin]))
                    if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                            "rd_mosaic" in self.callers):
                        g_p_call_mosaic.extend(list(his_p_mosaic[start_bin:end_bin]))
                    if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                            "combined_mosaic" in self.callers):
                        g_p_call_mosaic_2d.extend(list(his_p_mosaic_2d[start_bin:end_bin]))
                    borders.append(len(g_p) - 1)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(g_p)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)

                if (self.rd_range[1] - self.rd_range[0]) < 30:
                    ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                                       minor=[])
                    ax.yaxis.set_ticklabels([str(i) for i in range(int(self.rd_range[0]), int(self.rd_range[1] + 1))])
                ax.set_ylim([self.rd_range[0] * mean / 2, self.rd_range[1] * mean / 2])
                ax.set_ylabel("Read depth")
                ax.yaxis.grid()

                if self.rd_raw:
                    ax.step(g_p, self.rd_colors[0], label="raw")
                if self.rd_corrected:
                    ax.step(g_p_corr, self.rd_colors[1], label="GC corrected")
                if len(g_p_seg) > 1:
                    plt.step(g_p_seg, self.rd_colors[2], label="partitioning")
                if len(g_p_call) > 1:
                    plt.step(g_p_call, self.rd_colors[3], label="cnv calls")
                if len(g_p_call_mosaic) > 1:
                    plt.step(g_p_call_mosaic, self.rd_colors[4], label="mosaic cnv calls")
                if len(g_p_call_mosaic_2d) > 1:
                    plt.step(g_p_call_mosaic_2d, self.rd_colors[5], label="combined cnv calls")
                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                if self.legend:
                    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), shadow=True, ncol=2)
                self.fig.add_subplot(ax)

            elif panels[i] == "snp":
                borders = []
                hpos = []
                baf = []
                color = []
                alpha = 0.7
                start_pos = 0
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c)
                    ix = 0
                    mdp = 0
                    while ix < len(pos) and pos[ix] <= pos2:
                        if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                            hpos.append((start_pos + pos[ix] - pos1) / bin_size)
                            if pos[ix] - pos1 > mdp:
                                mdp = pos[ix] - pos1
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                            if self.snp_alpha_P:
                                alpha = None
                                color.append(
                                    colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                            else:
                                color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                        ix += 1
                    start_pos += pos2 - pos1
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size))
                    borders.append(start_pos / bin_size)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(pos_x)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)

                # ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("Allele frequency")
                l = max(hpos)
                ax.set_ylim([-0.05, 1.05])
                # ax.set_xlim([0, borders[-1]])
                ax.yaxis.grid()
                if self.markersize == "auto":
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=alpha)
                else:
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)

                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "snv" or panels[i][:4] == "snv:":
                callset = "default"
                if panels[i][:4] == "snv:":
                    callset = panels[i].split(":")[1]
                borders = []
                hpos = []
                baf = []
                color = []
                alpha = 0.7
                start_pos = 0
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c, callset=callset)
                    ix = 0
                    mdp = 0
                    while ix < len(pos) and pos[ix] <= pos2:
                        if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0:
                            hpos.append((start_pos + pos[ix] - pos1) / bin_size)
                            if pos[ix] - pos1 > mdp:
                                mdp = pos[ix] - pos1
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                            if self.snp_alpha_P:
                                alpha = None
                                color.append(
                                    colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                            else:
                                color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                        ix += 1
                    start_pos += pos2 - pos1
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size))
                    borders.append(start_pos / bin_size)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(pos_x)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)
                ax.xaxis.grid()
                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("Allele frequency")
                ax.set_ylim([0., 1.])
                ax.yaxis.grid()
                if self.markersize == "auto":
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=alpha)
                else:
                    ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)

                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "baf":
                g_baf, g_maf, g_i1, g_i2 = [0], [0], [0], [0]
                borders = []
                pos_x = []

                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000

                    flag_snp = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                        FLAG_USEHAP if self.snp_use_phase else 0)
                    baf = io.get_signal(c, bin_size, "SNP baf", flag_snp)
                    maf = io.get_signal(c, bin_size, "SNP maf", flag_snp)
                    i1 = io.get_signal(c, bin_size, "SNP i1", flag_snp)
                    i2 = io.get_signal(c, bin_size, "SNP i2", flag_snp)

                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    bins = len(list(baf[start_bin:end_bin]))
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])

                    g_baf.extend(list(baf[start_bin:end_bin]))
                    g_maf.extend(list(maf[start_bin:end_bin]))
                    g_i1.extend(list(i1[start_bin:end_bin]))
                    g_i2.extend(list(i2[start_bin:end_bin]))
                    borders.append(len(g_baf) - 1)

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(g_baf)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()

                ax.yaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("Allele frequency")

                ax.set_ylim([0, 1])
                # ax.set_xlim([-l * 0.0, l * 1.0])

                ax.yaxis.grid()
                # ax.xaxis.grid()
                ax.step(g_baf, self.baf_colors[0], label="BAF")
                ax.step(g_maf, self.baf_colors[1], label="MAF")
                ax.step(g_i1, self.baf_colors[2], label="I1")
                if self.legend:
                    ax.legend()
                for i in borders[:-1]:
                    ax.axvline(i, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "likelihood":
                borders = []
                gl = []
                call_pos = []
                call_i1 = []
                call_i2 = []
                call_c = []
                call_pos_2d = []
                call_i1_2d = []
                call_i2_2d = []
                call_c_2d = []
                tlen = 0
                tlen_2d = 0
                pos_x = []
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000
                    likelihood = io.get_signal(c, bin_size, "SNP likelihood", snp_flag)
                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    bins = len(list(likelihood[start_bin:end_bin]))
                    pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])
                    gl.extend(list(likelihood[start_bin:end_bin]))
                    borders.append(len(gl) - 1)
                    if self.snp_call and ("baf_mosaic" in self.callers):
                        likelihood_call = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                        segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))

                        for s, lh in zip(segments, likelihood_call):
                            i1, i2, p = likelihood_pixels_pval(lh)
                            if i1 != i2 and len(s) > self.min_segment_size:
                                alpha = -np.log(p + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                for pos in s:
                                    if pos >= start_bin and pos < end_bin:
                                        call_pos.append(pos - start_bin + tlen)
                                        call_i1.append(min(i1, i2))
                                        call_i2.append(max(i1, i2))
                                        color = colors.to_rgb(self.lh_colors[0]) + (alpha,)
                                        call_c.append(color)
                        tlen += end_bin - start_bin
                    if self.snp_call and ("combined_mosaic" in self.callers):
                        likelihood_call = io.get_signal(c, bin_size, "SNP likelihood call 2d", snp_flag)
                        segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments 2d", snp_flag))

                        for s, lh in zip(segments, likelihood_call):
                            i1, i2, p = likelihood_pixels_pval(lh)
                            if i1 != i2 and len(s) > self.min_segment_size:
                                alpha = -np.log(p + 1e-40) / self.contrast
                                if alpha > 1:
                                    alpha = 1
                                for pos in s:
                                    if pos >= start_bin and pos < end_bin:
                                        call_pos_2d.append(pos - start_bin + tlen_2d)
                                        call_i1_2d.append(min(i1, i2))
                                        call_i2_2d.append(max(i1, i2))
                                        color = colors.to_rgb(self.lh_colors[1]) + (alpha,)
                                        call_c_2d.append(color)
                        tlen_2d += end_bin - start_bin

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                img = np.array(gl).transpose()
                l = img.shape[1]
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    # ax.xaxis.grid()
                else:
                    plt.setp(ax.get_xticklabels(), visible=False)

                ax.imshow(img, aspect='auto')
                # ax.xaxis.set_ticklabels([])
                ax.yaxis.set_ticks([0, img.shape[0] / 4, img.shape[0] / 2, 3 * img.shape[0] / 4, img.shape[0] - 1],
                                   minor=[])
                ax.yaxis.set_ticklabels(["1", "3/4", "1/2", "1/4", "0"])
                ax.set_ylabel("Allele frequency")
                # ax.xaxis.set_ticks(np.arange(0, len(gl), 50), minor=[])
                # ax.set_xlim([-0.5, img.shape[1] - 0.5])
                if self.snp_call and ("baf_mosaic" in self.callers):
                    plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                                marker=self.lh_marker)
                    plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                                marker=self.lh_marker)
                if self.snp_call and ("combined_mosaic" in self.callers):
                    plt.scatter(call_pos_2d, call_i1_2d, s=self.lh_markersize, color=np.array(call_c_2d),
                                edgecolors='face', marker=self.lh_marker)
                    plt.scatter(call_pos_2d, call_i2_2d, s=self.lh_markersize, color=np.array(call_c_2d),
                                edgecolors='face', marker=self.lh_marker)

                for i in borders[:-1]:
                    ax.axvline(i + 0.5, color="g", lw=1)
                self.fig.add_subplot(ax)

            elif panels[i] == "CN":
                borders = []
                gh1 = []
                gh2 = []
                tlen = 0
                tlen_2d = 0
                for c, (pos1, pos2) in r:
                    if pos2 == 1000000000:
                        pos2 = io.get_chromosome_length(c)
                        if pos2 is None:
                            pos2 = 1000000000

                    his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                    start_bin = (pos1 - 1) // bin_size
                    end_bin = pos2 // bin_size
                    if end_bin > len(his_p):
                        end_bin = len(his_p)
                    h1 = np.array([0] * (end_bin - start_bin))
                    h2 = np.array([0] * (end_bin - start_bin))
                    h1[his_p != 0] = 1
                    h2[his_p != 0] = 1

                    flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                        FLAG_USEHAP if self.snp_use_phase else 0) | (
                               FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                    flag_rd = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
                    if io.signal_exists(c, bin_size, "calls combined", flag):
                        calls = io.read_calls(c, bin_size, "calls combined", flag)
                        segments = io.get_signal(c, bin_size, "RD mosaic segments 2d", flag_rd)
                        segments = segments_decode(segments)

                        for call in calls:
                            for b in segments[int(call["segment"])]:
                                if b < end_bin and b >= start_bin:
                                    h1[b - start_bin] = call["models"][0][1]
                                    h2[b - start_bin] = call["models"][0][2]
                    gh1.extend(list(h1))
                    gh2.extend(list(h2))
                    borders.append(len(gh1) - 1)
                x = range(len(gh1))
                plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)
                plt.stackplot(x, gh1, gh2, baseline='sym')

                def format_func(value, tick_number):
                    ix = int(value)
                    if ix + 1 < len(pos_x):
                        p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    elif ix < len(pos_x):
                        p = pos_x[ix]
                        return "{0} Mbp".format(int(p / 100) / 10000)
                    else:
                        return ""

                l = len(gh1)
                if i == len(panels) - 1:
                    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                    ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                    ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                    ax.xaxis.grid()

                for i in borders[:-1]:
                    ax.axvline(i + 0.5, color="g", lw=1)
                self.fig.add_subplot(ax)

    def global_plot(self):
        chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            rd_chr = self.io[self.plot_files[0]].rd_chromosome_name(c)
            if (len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom)) and rd_chr is not None:
                if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                    chroms.append((rd_chr, l))
        panels = self.panels
        bin_size = self.bin_size
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        rd_flag = (FLAG_USEMASK if self.rd_use_mask else 0) | (FLAG_GC_CORR if self.rd_use_gc_corr else 0)
        n = len(self.plot_files)
        self.new_figure(panel_count=n)
        for ii in range(len(self.plot_files)):
            ix = self.plot_files[ii]
            self.new_subgrid(len(panels), hspace=0.05, wspace=0.05)
            io = self.io[ix]
            for i in range(len(panels)):
                ax = self.next_subpanel(sharex=True)
                if i == 0:
                    ax.set_title(self.file_title(ix), position=(0.01, 0.9),
                                 fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                                 color='C0')

                if panels[i] == "rd":
                    start = 0
                    xticks = [0]
                    xticks_minor = []
                    xticks_labels = []
                    for c, l in chroms:
                        mean, stdev = io.rd_normal_level(bin_size, rd_flag | FLAG_GC_CORR)
                        his_p = io.get_signal(c, bin_size, "RD", rd_flag)
                        pos = range(start, start + len(his_p))
                        if self.markersize == "auto":
                            plt.plot(pos, his_p, ls='', marker='.', markersize=1)
                        else:
                            plt.plot(pos, his_p, ls='', marker='.', markersize=self.markersize)
                        xticks_minor.append(start + len(his_p) // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)

                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(which="both"), visible=False)
                    yticks = np.arange(self.rd_manhattan_range[0], self.rd_manhattan_range[1], 0.5)
                    ax.yaxis.set_ticklabels([str(int(2 * t)) for t in yticks])
                    ax.yaxis.set_ticks(yticks * mean)
                    ax.set_ylabel("RD [CN]")
                    ax.set_ylim([self.rd_manhattan_range[0] * mean, self.rd_manhattan_range[1] * mean])
                    ax.grid()
                    self.fig.add_subplot(ax)

                elif panels[i] == "snp":
                    start = 0
                    xticks = []
                    xticks_minor = []
                    xticks_labels = []
                    pos_x = []
                    for c, l in chroms:
                        pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c)
                        ix = 0
                        hpos = []
                        color = []
                        alpha = 0.7
                        baf = []
                        while ix < len(pos):
                            if (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                                hpos.append(start + (pos[ix] / bin_size))
                                if gt[ix] % 4 != 2:
                                    baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                else:
                                    baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                if self.snp_alpha_P:
                                    alpha = None
                                    color.append(
                                        colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                                else:
                                    color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                            ix += 1
                        if self.markersize == "auto":
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=0.1, alpha=alpha)
                        else:
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)
                        xticks_minor.append(start + l // bin_size // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)
                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(minor=True), visible=False)
                    ax.grid()
                    ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0])
                    ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                    ax.set_ylabel("BAF")
                    ax.set_ylim([-0.05, 1.05])
                    ax.yaxis.grid()
                    self.fig.add_subplot(ax)

                elif panels[i] == "snv" or panels[i][:4] == "snv:":
                    callset = "default"
                    if panels[i][:4] == "snv:":
                        callset = panels[i].split(":")[1]
                    start = 0
                    xticks = []
                    xticks_minor = []
                    xticks_labels = []
                    pos_x = []
                    for c, l in chroms:
                        pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c, callset=callset)
                        ix = 0
                        hpos = []
                        color = []
                        alpha = 0.7
                        baf = []
                        while ix < len(pos):
                            if (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                                hpos.append(start + (pos[ix] / bin_size))
                                if gt[ix] % 4 != 2:
                                    baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                else:
                                    baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                if self.snp_alpha_P:
                                    alpha = None
                                    color.append(
                                        colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                                else:
                                    color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                            ix += 1
                        if self.markersize == "auto":
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=0.1, alpha=alpha)
                        else:
                            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)
                        xticks_minor.append(start + l // bin_size // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)
                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(minor=True), visible=False)
                    ax.grid()
                    ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0])
                    ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                    ax.set_ylabel("BAF")
                    ax.set_ylim([-0.05, 1.05])
                    ax.yaxis.grid()
                    self.fig.add_subplot(ax)


                elif panels[i] == "likelihood":
                    start = 0
                    xticks = [0]
                    xticks_minor = []
                    xticks_labels = []
                    gl = []
                    for c, l in chroms:
                        likelihood = io.get_signal(c, bin_size, "SNP likelihood", snp_flag)
                        lh = list(likelihood)
                        size = l // bin_size + 1
                        if len(lh) < size:
                            if len(lh)>0:
                                lh.extend([lh[-1] for jj in range(size - len(lh))])
                            elif len(gl)>0:
                                lh.extend([gl[-1] for jj in range(size - len(lh))])

                        gl.extend(lh)
                        xticks_minor.append(start + l // bin_size // 2)
                        xticks_labels.append(Genome.canonical_chrom_name(c))
                        start += l // bin_size + 1
                        xticks.append(start)

                    img = np.array(gl).transpose()
                    img[0, :] = 0
                    img[-1, :] = 0
                    ax.imshow(img, aspect='auto')
                    ax.yaxis.set_ticks([0, img.shape[0] / 4, img.shape[0] / 2, 3 * img.shape[0] / 4, img.shape[0] - 1],
                                       minor=[])
                    ax.yaxis.set_ticklabels(["1", "3/4", "1/2", "1/4", "0"])
                    ax.set_ylabel("BAF")
                    ax.set_xlim([0, start])
                    ax.xaxis.set_ticks(xticks)
                    ax.xaxis.set_ticklabels([""] * len(xticks))
                    if i == (len(panels) - 1):
                        ax.xaxis.set_ticks(xticks_minor, minor=True)
                        ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                    else:
                        plt.setp(ax.get_xticklabels(minor=True), visible=False)
                    ax.xaxis.grid()
                    self.fig.add_subplot(ax)

        self.fig_show(suffix="global")

    def circular(self):
        chroms = self.chrom
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        rd_flag = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
        self.new_figure(panel_count=n)
        for i in range(n):
            ax = self.next_polar_panel()
            ax.set_theta_zero_location("N")
            ax.set_theta_direction(-1)
            rainbow = ax._get_lines
            io = self.io[ix[i]]
            plot_len = 0
            plot_chroms = []
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                rd_chr = io.rd_chromosome_name(c)
                if rd_chr is not None and (len(chroms) == 0 or (rd_chr in chroms) or (c in chroms)) and (
                        Genome.is_autosome(c) or Genome.is_sex_chrom(c)
                ) and io.signal_exists(rd_chr, bin_size, "SNP maf", snp_flag) and io.signal_exists(
                    rd_chr, bin_size, "RD", rd_flag):
                    plot_chroms.append((rd_chr, l))
                    plot_len += l // bin_size + 1
            rd_mean, stdev = io.rd_normal_level(bin_size, rd_flag)
            tl = 0
            dt = 2.0 * np.pi / plot_len
            theta = np.arange(0, 2.0 * np.pi, dt)
            angles = []
            labels = []
            for j in range(len(plot_chroms)):
                c, l = plot_chroms[j]
                next_color = rainbow.get_next_color()
                rd_color = self.rd_circular_colors[j % len(self.rd_circular_colors)]
                snp_color = self.snp_circular_colors[j % len(self.snp_circular_colors)]
                rd = io.get_signal(c, bin_size, "RD", rd_flag)
                maf = io.get_signal(c, bin_size, "SNP maf", snp_flag)
                c01 = io.get_signal(c, bin_size, "SNP bin count 0|1", snp_flag)
                c10 = io.get_signal(c, bin_size, "SNP bin count 1|0", snp_flag)
                hets = c01 + c10
                np.warnings.filterwarnings('ignore')
                maf[hets < (bin_size / 10000)] = 0
                # plt.polar(theta[tl:tl + maf.size], 1 - maf / 2, color=snp_color, linewidth=0.3)
                # plt.fill_between(theta[tl:tl + maf.size], 1 - maf / 2, np.ones_like(maf), color=snp_color, alpha=0.8)
                plt.polar(theta[tl:tl + maf.size], 1 - maf / 2, linewidth=0.3, color=next_color)
                plt.fill_between(theta[tl:tl + maf.size], 1 - maf / 2, np.ones_like(maf), alpha=1, color=next_color)
                markersize = 5
                if self.markersize != "auto":
                    markersize = self.markersize
                ax.scatter(theta[tl:tl + rd.size], np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
                           s=markersize, alpha=0.7, color=next_color)

                # plt.polar(theta[tl:tl + rd.size], np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
                #          color=rd_color, linewidth=0.3)
                # plt.fill_between(theta[tl:tl + rd.size], np.ones_like(rd) / 10.,
                #                 np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
                #                 color=rd_color,
                #                 alpha=0.8)

                # ax.text(theta[tl + maf.size // 3], 0.8, c, fontsize=8)
                labels.append(Genome.canonical_chrom_name(c))
                angles.append(180 * theta[tl + rd.size // 2] / np.pi)
                tl += l // bin_size + 1
            for cn in range(int(self.rd_range[1])):
                plt.polar(theta, np.ones_like(theta) * (0.1 + 0.7 * (cn / self.rd_range[1])), color="k", linewidth=0.1)
            ax.set_rmax(1.0)
            ax.set_rticks([])
            ax.set_thetagrids(angles, labels=labels, fontsize=10, weight="bold", color="black")
            ax.set_title(self.file_title(ix[i]), loc="left", fontsize=10, weight="bold", color="black")
            ax.grid(False)
        self.fig_show(suffix="circular")

    def rd_baf(self, hist=True):
        plt.clf()
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, figsize=(12, 8), facecolor='w', edgecolor='k')
        n = len(self.plot_files)
        ix = self.plot_files
        if self.grid == "auto":
            sx, sy = self._panels_shape(n)
        else:
            sx, sy = tuple(self.grid)
        grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        bin_size = self.bin_size
        for i in range(n):
            ax = self.fig.add_subplot(grid[i])
            io = self.io[ix[i]]

            chroms = []
            snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0)
            rd_flag = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = io.snp_chromosome_name(c)
                if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                    if io.signal_exists(snp_chr, bin_size, "SNP likelihood call", snp_flag) and \
                            io.signal_exists(snp_chr, bin_size, "SNP likelihood segments", snp_flag) and \
                            io.signal_exists(snp_chr, bin_size, "RD mosaic call", rd_flag) and \
                            io.signal_exists(snp_chr, bin_size, "RD mosaic segments", rd_flag) and \
                            Genome.is_autosome(c):
                        chroms.append((snp_chr, l))
            x = []
            y = []
            for c, l in chroms:
                flag = FLAG_MT if Genome.is_mt_chrom(c) else FLAG_SEX if Genome.is_sex_chrom(c) else FLAG_AUTO

                likelihood = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                segments_baf = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
                rd = io.get_signal(c, bin_size, "RD mosaic call", rd_flag)
                segments_rd = segments_decode(io.get_signal(c, bin_size, "RD mosaic segments", rd_flag))

                mbaf = {}
                mrd = {}
                for s, lh in zip(segments_baf, likelihood):
                    b, p = likelihood_baf_pval(lh)
                    for pos in s:
                        mbaf[pos] = 0.5 - b
                for s, r in zip(segments_rd, rd[0]):
                    for pos in s:
                        mrd[pos] = r
                for p in mbaf:
                    if p in mrd:
                        x.append(mbaf[p])
                        y.append(mrd[p])

            if hist:
                from matplotlib.colors import LogNorm
                ax.hist2d(x, y, bins=[np.arange(0, 0.51, 0.01), np.arange(0, max(y), max(y) / 100.)], norm=LogNorm())
            else:
                ax.scatter(x, y, marker=".", alpha=0.5)

        if self.output_filename != "":
            plt.savefig(self._image_filename("rd_baf"), dpi=150)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def dispersion(self, legend=True):
        plt.clf()
        plt.rcParams["font.size"] = 8
        self.fig = plt.figure(1, facecolor='w', edgecolor='k')
        if self.output_filename != "":
            self.fig.set_figheight(8)
            self.fig.set_figwidth(12)
        grid = gridspec.GridSpec(1, 2, wspace=0.2, hspace=0.2)

        ax = self.fig.add_subplot(grid[0])
        for i in self.io:
            bin_sizes = sorted(set([int(x[1]) for x in i.chromosomes_bin_sizes_with_signal("RD")]))
            rd = []
            drd = []
            for bs in bin_sizes:
                if i.signal_exists(None, bs, "RD stat", flags=FLAG_AUTO):
                    stat = i.get_signal(None, bs, "RD stat", flags=FLAG_AUTO)
                    rd.append(stat[4])
                    drd.append(stat[5])
            ax.set_yscale("log")
            ax.set_xscale("log")
            ax.grid(True)
            ax.set_xlabel("mean RD")
            ax.set_ylabel("stdev RD")
            if legend:
                ax.legend(loc="upper left")
            ax.plot(rd, drd, "*-", label=i.filename)

        ax = self.fig.add_subplot(grid[1])
        for i in self.io:
            bin_sizes = sorted(set([int(x[1]) for x in i.chromosomes_bin_sizes_with_signal("RD")]))
            rd = []
            drd = []
            for bs in bin_sizes:
                if i.signal_exists(None, bs, "RD stat", flags=FLAG_AUTO | FLAG_GC_CORR):
                    stat = i.get_signal(None, bs, "RD stat", flags=FLAG_AUTO | FLAG_GC_CORR)
                    rd.append(stat[4])
                    drd.append(stat[5])
            ax.set_yscale("log")
            ax.set_xscale("log")
            ax.grid(True)
            ax.set_xlabel("mean RD (GC corr)")
            ax.set_ylabel("stdev RD (GC corr)")
            if legend:
                ax.legend(loc="upper left")
            ax.plot(rd, drd, "*-", label=i.filename)

        if self.output_filename != "":
            plt.savefig(self._image_filename("dispersion"), dpi=200)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    def region_rd_stat(self, region, n_bins=21, plot=False, legend=True):
        n = len(self.plot_files)
        ix = self.plot_files
        if plot:
            plt.clf()
            plt.rcParams["font.size"] = 8
            if self.grid == "auto":
                sx, sy = self._panels_shape(n)
            else:
                sx, sy = tuple(self.grid)
            self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
            if self.output_filename != "":
                self.fig.set_figheight(3 * sy)
                self.fig.set_figwidth(4 * sx)
            grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        for i in range(n):
            io = self.io[ix[i]]
            if plot:
                ax = self.fig.add_subplot(grid[i])
                ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs = decode_region(region)
            data = []
            for c, (pos1, pos2) in regs:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])

            data = np.array(data)
            dmin = np.min(data)
            dmax = np.max(data)
            p1 = np.percentile(data, 1)
            p99 = np.percentile(data, 99)
            data = data[data > p1]
            data = data[data < p99]
            mean = np.mean(data)
            std = np.std(data)

            rd_min = mean - 5 * std
            rd_max = mean + 5 * std
            bins = np.linspace(rd_min, rd_max, n_bins)

            hist, binsr = np.histogram(data, bins=bins)

            fitn, fitm, fits = fit_normal(bins[:-1], hist)[0]

            print("%s\t%s\t%.4f\t%.4f\t%e\t%e\t%.4f\t%.4f\t%.4f\t%.4f" % (
                io.filename, region, fitm, fits, dmin, dmax, p1, p99, mean, std))

            if plot:
                x = np.linspace(bins[0], bins[-1], 1001)
                plt.plot(x, normal(x, fitn, fitm, fits), "g-", label=region)
                plt.plot(bins[:-1], hist, "b*")
                if legend:
                    plt.legend()

        if plot:
            if self.output_filename != "":
                plt.savefig(self._image_filename("comp"), dpi=200)
                plt.close(self.fig)
            elif self.interactive:
                plt.show(block=False)
                plt.draw()
            else:
                plt.show()

    def compare(self, region1, region2, n_bins=21, plot=False, stdout=True, legend=True):
        n = len(self.plot_files)
        ix = self.plot_files
        ret = []

        if plot:
            plt.clf()
            plt.rcParams["font.size"] = 8
            if self.grid == "auto":
                sx, sy = self._panels_shape(n)
            else:
                sx, sy = tuple(self.grid)
            self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
            if self.output_filename != "":
                self.fig.set_figheight(3 * sy)
                self.fig.set_figwidth(4 * sx)
            grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        for i in range(n):
            io = self.io[ix[i]]
            if plot:
                ax = self.fig.add_subplot(grid[i])
                ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs1 = decode_region(region1)
            regs2 = decode_region(region2)
            data1 = []
            data2 = []
            for c, (pos1, pos2) in regs1:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data1 += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])
            for c, (pos1, pos2) in regs2:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data2 += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])

            data1 = np.array(data1)
            p1_1 = np.percentile(data1, 1)
            p99_1 = np.percentile(data1, 99)
            data1 = data1[data1 > p1_1]
            data1 = data1[data1 < p99_1]
            mean1 = np.mean(data1)
            std1 = np.std(data1)

            data2 = np.array(data2)
            p1_2 = np.percentile(data2, 1)
            p99_2 = np.percentile(data2, 99)
            data2 = data2[data2 > p1_2]
            data2 = data2[data2 < p99_2]
            mean2 = np.mean(data2)
            std2 = np.std(data2)

            rd_min = min(mean1 - 5 * std1, mean2 - 5 * std2)
            rd_max = max(mean1 + 5 * std1, mean2 + 5 * std2)
            bins = np.linspace(rd_min, rd_max, n_bins)

            hist1, binsr = np.histogram(data1, bins=bins)
            hist2, binsr = np.histogram(data2, bins=bins)

            fitn1, fitm1, fits1 = fit_normal(bins[:-1], hist1)[0]
            fitn2, fitm2, fits2 = fit_normal(bins[:-1], hist2)[0]

            pval = t_test_2_samples(fitm1, fits1, sum(hist1), fitm2, fits2, sum(hist2))

            if stdout:
                print("%s\t%s\t%s\t%.4f\t%.4f\t%.4f\t%.4f\t%e\t%.4f\t%.4f" % (
                    io.filename, region1, region2, fitm1, fits1, fitm2, fits2, pval, fitm1 / fitm2,
                    fitm1 / fitm2 * (fits1 / fitm1 / np.sqrt(sum(hist1)) + fits2 / fitm2 / np.sqrt(sum(hist2)))))
            ret.append([io.filename, region1, region2, fitm1, fits1, fitm2, fits2, pval, fitm1 / fitm2,
                        fitm1 / fitm2 * (fits1 / fitm1 / np.sqrt(sum(hist1)) + fits2 / fitm2 / np.sqrt(sum(hist2)))])

            if plot:
                x = np.linspace(bins[0], bins[-1], 1001)
                plt.plot(x, normal(x, fitn1, fitm1, fits1), "g-", label=region1)
                plt.plot(x, normal(x, fitn2, fitm2, fits2), "b-", label=region2)
                plt.plot(bins[:-1], hist1, "g*")
                plt.plot(bins[:-1], hist2, "b*")
                if legend:
                    plt.legend()

        if plot:
            if self.output_filename != "":
                plt.savefig(self._image_filename("comp"), dpi=200)
                plt.close(self.fig)
            elif self.interactive:
                plt.show(block=False)
                plt.draw()
            else:
                plt.show()

        return ret

    def compare_baf(self, region1, region2, plot=False, stdout=True, legend=True):
        n = len(self.plot_files)
        ix = self.plot_files
        ret = []

        if plot:
            plt.clf()
            plt.rcParams["font.size"] = 8
            if self.grid == "auto":
                sx, sy = self._panels_shape(n)
            else:
                sx, sy = tuple(self.grid)
            self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
            if self.output_filename != "":
                self.fig.set_figheight(3 * sy)
                self.fig.set_figwidth(4 * sx)
            grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
        for i in range(n):
            io = self.io[ix[i]]
            if plot:
                ax = self.fig.add_subplot(grid[i])
                ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs1 = decode_region(region1)
            regs2 = decode_region(region2)
            data1 = []
            data2 = []
            for c, (pos1, pos2) in regs1:
                flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                his_p = io.get_signal(c, self.bin_size, "SNP likelihood", flag)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size
                data1 += list(his_p[bin1:bin2 + 1])
            for c, (pos1, pos2) in regs2:
                flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                his_p = io.get_signal(c, self.bin_size, "SNP likelihood", flag)
                bin1 = (pos1 - 1) // self.bin_size
                bin2 = (pos2 - 1) // self.bin_size

                data2 += list(his_p[bin1:bin2 + 1])

            d1 = np.array(data1)
            d2 = np.array(data2)
            h1 = np.ones_like(d1[0])
            h2 = np.ones_like(d2[0])
            for i in range(len(d1)):
                if sum(d1[i]) != 0:
                    h1 *= d1[i]
                h1 /= sum(h1)
            for i in range(len(d2)):
                if sum(d2[i]) != 0:
                    h2 *= d2[i]
                h2 /= sum(h2)

            b1, p1 = likelihood_baf_pval(h1)
            b2, p2 = likelihood_baf_pval(h2)

            if stdout:
                print("%s\t%s\t%s\t%.4f\t%e\t%.4f\t%e" % (
                    io.filename, region1, region2, b1, p1, b2, p2))
            ret.append([io.filename, region1, region2, b1, p1, b2, p2])

            if plot:
                plt.plot(h1, "g")
                plt.plot(h2, "b")

        if plot:
            if self.output_filename != "":
                plt.savefig(self._image_filename("comp_baf"), dpi=200)
                plt.close(self.fig)
            elif self.interactive:
                plt.show(block=False)
                plt.draw()
            else:
                plt.show()

        return ret

    def single_cell_allelic_dropout(self, callset=None, res=1000, n_bins=100, threshold=0.1, snp_threshold=0.01,
                                    neigh=False, plot=False, stdout=True, title=None):
        """
        Function used to identify regions without allelic dropout in the case of single cell amplification.
        It requires baf data for bin size. It will filter out all bins with at least one SNP bellow snp_threshold and
        all bins with collective maximum baf likelihood bellow threshold parameter.

        Parameters
        ----------
        callset : str or None
            Name of callset if not default.
        res : int
            Resolution in bins used to calculate percentage of dropouts in region.
        n_bins : int
            Number of bins in histograms.
        threshold : float
            Collective threshold of AF for allelic dropout
        snp_threshold : float
            Single SNP threshold of AF for allelic dropout
        neigh : bool
            Remove neighbouring bins also.
        plot : bool
            Make plots.
        stdout : bool
            Print out good regions

        """

        if plot:
            self.new_figure(panel_count=2, panel_size=(16, 6), title=title)
            ax = self.next_panel()
            bafG = []
            baf = []
            cpos = 0
            sizeG = []
            sizeB = []
        for c in self.io[self.plot_file].snp_chromosomes():
            if len(self.chrom) == 0 or (c in self.chrom):
                snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0)
                i1 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i1", snp_flag)
                pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
                c00 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 0|0", snp_flag)
                c11 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 1|1", snp_flag)
                homs = c00 + c11
                c01 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 0|1", snp_flag)
                c10 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 1|0", snp_flag)
                hets = c01 + c10
                count = c01 + c10 + c00 + c11
                mask = np.zeros_like(i1)
                density = np.zeros(len(mask) // res)
                # mask[hets == 0] = 1
                mask[hets == 0] = 2
                mask[i1 > (0.5 - threshold)] = 1
                for ix in range(len(pos)):
                    if (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in [1, 2]):
                        b = 1.0 * nalt[ix] / (nref[ix] + nalt[ix])
                        if (b < snp_threshold) or (b > (1 - snp_threshold)):
                            mask[(pos[ix] - 1) // self.bin_size] = 1

                if neigh:
                    ada = mask == 1
                    ada1 = np.roll(ada, 1)
                    ada2 = np.roll(ada, -1)
                    ada1[0] = False
                    ada2[-1] = False
                    mask[ada1] = 1
                    mask[ada2] = 1
                ix = 0
                while ix < len(mask):
                    if mask[ix] == 2:
                        adan = 0
                        if ix > 0 and mask[ix - 1] == 1:
                            adan = 1
                        jx = ix
                        while jx < len(mask) and mask[jx] == 2:
                            jx += 1
                        if jx < len(mask) and mask[jx] == 1:
                            adan = 1
                        mask[ix:jx] = adan
                        ix = jx
                    else:
                        ix += 1
                ix = 0
                ojx = 0
                while ix < len(mask):
                    if mask[ix] == 0:
                        jx = ix
                        while jx < len(mask) and mask[jx] == 0:
                            jx += 1
                        if stdout:
                            print("%s\t%d\t%d" % (c, ix * self.bin_size + 1, jx * self.bin_size))
                        sizeG.append((jx - ix) * self.bin_size)
                        if ix > ojx:
                            sizeB.append((ix - ojx) * self.bin_size)
                        ojx = jx
                        ix = jx
                    else:
                        ix += 1
                if plot:
                    for ix in range(len(density)):
                        density[ix] = np.mean(mask[res * ix:res * (ix + 1)])
                    ax.plot(np.arange(cpos, cpos + len(density)) * res, density)
                    cpos += len(density)
                    for ix in range(len(pos)):
                        if (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in [1, 2]):
                            baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            if mask[(pos[ix] - 1) // self.bin_size] == 0:
                                bafG.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
        ax.set_xlabel("Position in genome [bins]")
        ax.set_ylabel("Percentage of allelic dropout")
        ax.grid(True)
        if plot:
            self.new_subgrid(2, grid="horizontal", hspace=0.05, wspace=0.2)
            ax = self.next_subpanel()
            ms = 5 * max(np.mean(sizeG), np.mean(sizeB))
            ax.hist(sizeB, bins=np.arange(1, ms, self.bin_size), histtype="step", log=True,
                    label="Allelic dropout regions", linewidth=3)
            ax.hist(sizeG, bins=np.arange(1, ms, self.bin_size), histtype="step", log=True,
                    label="Region with both alleles", linewidth=3)
            ax.legend()
            ax.grid(True)
            ax.set_xlabel("Size [bp]")
            ax.set_ylabel("Number of regions")
            self.fig.add_subplot(ax)

            ax = self.next_subpanel()
            ax.hist(baf, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)),
                    label="All heterozygous variants")
            ax.hist(bafG, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)),
                    label="Region with both alleles")
            ax.legend()
            ax.grid(True)
            ax.set_xlabel("VAF")
            ax.set_ylabel("Distribution")
            self.fig.add_subplot(ax)

            self.fig_show(suffix="allelic_dropout")

    def compare_rd_dist(self, regions):
        self.new_figure(panel_count=1)
        ax = self.next_panel()
        ax.set_ylabel("Normalised distribution")
        ax.set_xlabel("Difference in copy number")
        regs = decode_region(regions)
        io1 = self.io[self.plot_files[0]]
        io2 = self.io[self.plot_files[1]]
        bin_size = self.bin_size
        drd = []
        for c, (pos1, pos2) in regs:
            flag_rd = 0
            if self.rd_use_mask:
                flag_rd = FLAG_USEMASK
            mean1, stdev = io1.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
            mean2, stdev = io2.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
            his_p_corr1 = io1.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            his_p_corr2 = io2.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
            for i in range(len(his_p_corr1)):
                drd.append(his_p_corr1[i] * 2 / mean1 - his_p_corr2[i] * 2 / mean2)

        # for i in range(n):
        #     io = self.io[ix[i]]
        #     stat = self.io[self.plot_file].get_signal(None, self.bin_size, "RD stat", FLAG_AUTO)
        #     his_p = io.get_signal(None, self.bin_size, "RD p dist", FLAG_AUTO)
        #     bin_size = int(stat[1])
        #     max_rd = int(stat[0])
        #     lim_rd = int(max(2 * stat[4], stat[4] + 3 * stat[5]))
        #     ax.set_xlim([0, lim_rd])
        #     bins = range(0, 2*max_rd + 5*bin_size, bin_size)
        #     x = np.arange(0, max_rd // bin_size * bin_size, 0.1 * bin_size)
        #     #plt.plot(x, normal(x, 1, stat[4], stat[5]), "g-")
        #     x = np.array(bins)
        #     plt.plot(x[1:len(his_p)], his_p[1:] / stat[3],label = io.filename)
        ax.hist(drd, bins=np.linspace(-0.5, 0.5, 100))
        # ax.legend()
        ax.set_yticklabels([])
        ax.grid()
        self.fig_show(suffix="compare_rd")

    def snp_dist(self, regions, callset=None, n_bins=100, gt_plot=[0, 1, 2, 3], titles=None, beta_distribution=False,
                 log_scale=False):
        nf = len(self.plot_files)
        regions = regions.split(" ")
        nr = len(regions)
        n = nf * nr
        self.new_figure(panel_count=n)
        for ii in range(nf):
            for i in range(nr):
                ax = self.next_panel()
                if titles is None:
                    ax.set_title(self.file_title(self.plot_files[ii]) + ": " + regions[i], position=(0.01, 1.10),
                                 fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
                else:
                    ax.set_title(titles[i], position=(0.01, 1.10),
                                 fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
                regs = decode_region(regions[i])
                baf = []
                bafP = []
                bafNP = []
                mean_rd = 0
                for c, (pos1, pos2) in regs:
                    pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_files[ii]].read_snp(c,
                                                                                                      callset=callset)
                    ix = 0
                    while ix < len(pos) and pos[ix] <= pos2:
                        if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in gt_plot):
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                if flag[ix] & 2:
                                    bafP.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                    mean_rd += nref[ix] + nalt[ix]
                                else:
                                    bafNP.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                if flag[ix] & 2:
                                    bafP.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                    mean_rd += nref[ix] + nalt[ix]
                                else:
                                    bafNP.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                        ix += 1
                mean_rd /= len(bafP)
                x_bins = np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1))
                ax.hist(baf, bins=x_bins, label="All heterozygous variants")
                ax.hist(bafP, bins=x_bins, label="P bases only")
                # ax.hist(bafNP, bins=x_bins, label="non-P bases only", histtype=u'step')
                if log_scale:
                    plt.yscale('log', nonposy='clip')

                if beta_distribution:
                    xx = np.linspace(0.2, 0.8, 200)
                    ax.plot(xx, beta.pdf(xx, mean_rd / 2, mean_rd / 2) * len(bafP) / n_bins, c="black",
                            label="Beta distribution")
                ax.legend(bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left", mode="expand", borderaxespad=0, ncol=3)
                ax.set_xlabel("VAF")
                ax.set_ylabel("Distribution")

        self.fig_show(suffix="snp_dist")

    def phased_baf(self, regions, callset=None, print=False):
        regions = regions.split(" ")
        n = len(regions)
        ret = []
        for i in range(n):
            regs = decode_region(regions[i])
            talt = 0
            tref = 0
            taltP = 0
            trefP = 0
            for c, (pos1, pos2) in regs:
                pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
                ix = 0
                while ix < len(pos) and pos[ix] <= pos2:
                    if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0:
                        if gt[ix] == 5:
                            talt += nalt[ix]
                            tref += nref[ix]
                            if flag[ix] & 2:
                                taltP += nalt[ix]
                                trefP += nref[ix]
                        elif gt[ix] == 6:
                            tref += nalt[ix]
                            talt += nref[ix]
                            if flag[ix] & 2:
                                trefP += nalt[ix]
                                taltP += nref[ix]
                    ix += 1
            baf = talt / (tref + talt)
            bafP = taltP / (trefP + taltP)
            ret.append([baf, bafP])
            if print:
                print("%s\t%f\t%f" % (regions[i], baf, bafP))
        return ret

    def snp_compare(self, regions, ix1, ix2, callset=None, n_bins=100, titles=None, test_loh=False):
        regions = regions.split(" ")
        n = len(regions)
        self.new_figure(panel_count=n)
        for i in range(n):
            ax = self.next_panel()
            if titles is None:
                ax.set_title(regions[i], position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            else:
                ax.set_title(titles[i], position=(0.01, 1.07),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs = decode_region(regions[i])
            oval = []
            for c, (pos_start, pos_end) in regs:
                pos1, ref1, alt1, nref1, nalt1, gt1, flag1, qual1 = self.io[ix1].read_snp(c, callset=callset)
                pos2, ref2, alt2, nref2, nalt2, gt2, flag2, qual2 = self.io[ix2].read_snp(c, callset=callset)

                counts1 = {}
                counts2 = {}
                ix = 0
                while ix < len(pos1) and pos1[ix] <= pos_end:
                    if pos1[ix] >= pos_start and (nref1[ix] + nalt1[ix]) != 0:
                        counts1[pos1[ix]] = (nref1[ix] / np.sqrt(nref1[ix] ** 2 + nalt1[ix] ** 2),
                                             nalt1[ix] / np.sqrt(nref1[ix] ** 2 + nalt1[ix] ** 2))
                    ix += 1
                ix = 0
                xx = []
                yy = []
                cc = []
                hist1 = []
                hist2 = []
                while ix < len(pos2) and pos2[ix] <= pos_end:
                    if pos2[ix] >= pos_start and (nref2[ix] + nalt2[ix]) != 0:
                        counts2[pos2[ix]] = (nref2[ix], nalt2[ix])
                    ix += 1
                for p in counts1:
                    if p in counts2:
                        xx.append(p)
                        yy.append(counts1[p][1] / (counts1[p][0] + counts1[p][1]))
                        cc.append("green")
                        xx.append(p)
                        yy.append(counts2[p][1] / (counts2[p][0] + counts2[p][1]))
                        cc.append("blue")
                        if counts2[p][1] / (counts2[p][0] + counts2[p][1]) > 0.8:
                            t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                            if t > 0.2 and t < 0.8:
                                hist1.append(t)
                        else:
                            t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                            if t > 0.2 and t < 0.8:
                                hist2.append(t)
                    else:
                        xx.append(p)
                        yy.append(counts1[p][1] / (counts1[p][0] + counts1[p][1]))
                        cc.append("red")
                        t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                        if t > 0.2 and t < 0.8:
                            hist2.append(t)
                for p in counts2:
                    if not (p in counts1):
                        xx.append(p)
                        yy.append(counts2[p][1] / (counts2[p][0] + counts2[p][1]))
                        cc.append("orange")

            if test_loh:
                ax.hist(hist1, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)), histtype='step')
                ax.hist(hist2, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)), histtype='step')
                print("H1:", np.mean(hist1), np.std(hist1), len(hist1))
                print("H2:", np.mean(hist2), np.std(hist2), len(hist2))
                ax.set_xlabel("baf")
                ax.set_ylabel("distribnution")
            else:
                ax.scatter(xx, yy, marker=".", s=0.1, c=cc)
                # ax.hist(oval, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)))
                ax.set_xlabel("position")
                ax.set_ylabel("baf")

        self.fig_show(suffix="snp_dist")

    def denovo_calls(self, sample, reference, call_type="mosaic"):
        bin_size = self.bin_size
        io = self.io[sample]
        if call_type == "mosaic":
            chroms = io.rd_chromosomes()
            for c in chroms:
                if (c in self.chrom) or len(self.chrom) == 0:
                    flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                    if io.signal_exists(c, bin_size, "calls", flag):
                        calls = io.read_calls(c, bin_size, "calls", flag)
                        for call in calls:
                            if in_interval(call["size"], self.size_range) \
                                    and in_interval(call["p_val"], self.p_range) \
                                    and in_interval(call["pN"], self.pN_range) \
                                    and in_interval(call["Q0"], self.Q0_range):
                                type = "duplication" if call["type"] == 1 else "deletion"
                                region = "%s:%d-%d" % (c, call["start"], call["end"])

                                cn0 = self.genotype([bin_size], region, file_index=sample)[0][3]
                                cref = list(
                                    map(lambda x: self.genotype([bin_size], region, file_index=x)[0][3], reference))
                                if (((sum(map(lambda x: 0 if (cn0 - x) > 0.5 else 1, cref)) == 0) and cn0 > 2.5) \
                                    or ((sum(map(lambda x: 0 if (x - cn0) > 0.5 else 1, cref)) == 0) and cn0 < 1.5)) \
                                        and (sum(map(lambda x: 0 if np.abs(x - 2.) < 0.5 else 1, cref)) == 0):
                                    print(type, region, call["cnv"], cn0, cref)

                                # if n > 1:
                                #     print("%s\t" % self.file_title(i), end="")
                                # print("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e\t%.4f\t%.4f\t" % (
                                #     type, c, call["start"], call["end"], call["size"], call["cnv"], call["p_val"],
                                #     call["p_val_2"], call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"]))

    def genotype(self, bin_sizes, region, p_val=False, interactive=False, file_index=None):
        if file_index is None:
            file_index = self.plot_file
        ret = []
        regs = decode_region(region, max_size=1000000000)
        for c, (pos1, pos2) in regs:
            chr_len = self.io[file_index].get_chromosome_length(c)
            if chr_len is not None and pos2 == 1000000000:
                pos2 = chr_len
            if interactive:
                print(c + ":" + str(pos1) + "-" + str(pos2), end="")
            ret.append([c, pos1, pos2])
            for bs in bin_sizes:
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_AUTO)
                if stat is None or len(stat) == 0:
                    stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_SEX)
                his_p = self.io[file_index].get_signal(c, bs, "RD", flag_rd)
                bin1 = (pos1 - 1) // bs
                bin2 = (pos2 - 1) // bs
                rc = 0
                rc2 = 0
                if bin1 == bin2:
                    try:
                        rc = (pos2 - pos1 + 1) * his_p[bin1] / bs
                        rc2 = (pos2 - pos1 + 1) * his_p[bin1] * his_p[bin1] / bs
                    except IndexError:
                        pass
                else:
                    try:
                        rc += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] / bs
                        rc += (pos2 - bin2 * bs) * his_p[bin2] / bs
                        rc2 += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] * his_p[bin1] / bs
                        rc2 += (pos2 - bin2 * bs) * his_p[bin2] * his_p[bin2] / bs
                    except IndexError:
                        pass
                    for ix in range(bin1 + 1, bin2):
                        try:
                            rc += his_p[ix]
                            rc2 += his_p[ix] * his_p[ix]
                        except IndexError:
                            pass
                e2 = 0
                if p_val:
                    e1 = getEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9 / bs
                    e2 = gaussianEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9
                if interactive:
                    print("\t%f" % (2. * rc / (stat[4] * (pos2 - pos1 + 1) / bs)), end="")
                    if p_val:
                        print("\t%e\t%e" % (e1, e2), end="")

                ret[-1].append(2. * rc / (stat[4] * (pos2 - pos1 + 1) / bs))
                if p_val:
                    ret[-1].append(e2)
            if interactive:
                print()

        return ret

    def genotype_all(self, bin_sizes, regions, interactive=False, file_index=None):
        if file_index is None:
            file_index = self.plot_file
        rd_gc_chromosomes = {}
        for c in self.io_gc.gc_chromosomes():
            rd_name = self.io[file_index].rd_chromosome_name(c)
            if not rd_name is None:
                rd_gc_chromosomes[rd_name] = c
        ret = {}
        for bs in bin_sizes:
            oc = ""
            ret[bs] = []
            for region in regions:
                regs = decode_region(region, max_size=1000000000)
                c, (pos1, pos2) = regs[0]
                if oc != c:
                    chr_len = self.io[file_index].get_chromosome_length(c)
                    if chr_len is not None and pos2 == 1000000000:
                        pos2 = chr_len
                    flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                    stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_AUTO)
                    if stat is None or len(stat) == 0:
                        stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_SEX)
                    his_p = self.io[file_index].get_signal(c, bs, "RD", flag_rd)
                    qrd_p = self.io[file_index].get_signal(c, bs, "RD")
                    qrd_u = self.io[file_index].get_signal(c, bs, "RD unique")
                    gc, at, distN = False, False, False
                    if c in rd_gc_chromosomes and self.io_gc.signal_exists(rd_gc_chromosomes[c], None, "GC/AT"):
                        gcat = self.io_gc.get_signal(rd_gc_chromosomes[c], None, "GC/AT")
                        gc, at = gc_at_decompress(gcat)
                        NN = 100 - np.array(gc) - np.array(at)
                        distN = np.zeros_like(NN, dtype="long") - 1
                        distN[NN == 100] = 0
                        prev = 0
                        for Ni in range(0, distN.size):
                            if distN[Ni] == -1:
                                prev += 100
                                distN[Ni] = prev
                            else:
                                prev = 0
                        prev = 0
                        for Ni in range(distN.size - 1, -1, -1):
                            if distN[Ni] > 0:
                                prev += 100
                                if prev < distN[Ni]:
                                    distN[Ni] = prev
                            else:
                                prev = 0
                    snp = c in self.io[file_index].snp_chromosomes()
                    snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                        FLAG_USEHAP if self.snp_use_phase else 0)
                    if snp:
                        snp_likelihood = list(
                            self.io[file_index].get_signal(c, bs, "SNP likelihood", snp_flag).astype("float64"))
                        snp_hets = self.io[file_index].get_signal(c, bs, "SNP bin count 0|1", snp_flag)
                        snp_hets += self.io[file_index].get_signal(c, bs, "SNP bin count 1|0", snp_flag)
                        snp_homs = self.io[file_index].get_signal(c, bs, "SNP bin count 1|1", snp_flag)
                else:
                    if chr_len is not None and pos2 == 1000000000:
                        pos2 = chr_len
                oc = c
                ret[bs].append([c, pos1, pos2])

                bin1 = (pos1 - 1) // bs
                bin2 = (pos2 - 1) // bs
                rc = 0
                rc2 = 0
                sp = 0
                su = 0
                nansize = 0
                if bin1 == bin2:
                    try:
                        if not np.isnan(his_p[bin1]):
                            rc = (pos2 - pos1 + 1) * his_p[bin1] / bs
                            rc2 = (pos2 - pos1 + 1) * his_p[bin1] * his_p[bin1] / bs
                            sp = (pos2 - pos1 + 1) * qrd_p[bin1] / bs
                            su = (pos2 - pos1 + 1) * qrd_u[bin1] / bs
                            nansize = (pos2 - pos1 + 1)
                    except IndexError:
                        pass
                else:
                    try:
                        if not np.isnan(his_p[bin1]):
                            rc += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] / bs
                            rc2 += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] * his_p[bin1] / bs
                            sp += (bin1 * bs - pos1 + 1 + bs) * qrd_p[bin1] / bs
                            su += (bin1 * bs - pos1 + 1 + bs) * qrd_u[bin1] / bs
                            nansize += (bin1 * bs - pos1 + 1 + bs)
                        if not np.isnan(his_p[bin2]):
                            rc += (pos2 - bin2 * bs) * his_p[bin2] / bs
                            rc2 += (pos2 - bin2 * bs) * his_p[bin2] * his_p[bin2] / bs
                            sp += (pos2 - bin2 * bs) * qrd_p[bin2] / bs
                            su += (pos2 - bin2 * bs) * qrd_u[bin2] / bs
                            nansize += (pos2 - bin2 * bs)

                    except IndexError:
                        pass
                    for ix in range(bin1 + 1, bin2):
                        try:
                            if not np.isnan(his_p[ix]):
                                rc += his_p[ix]
                                rc2 += his_p[ix] * his_p[ix]
                                sp += qrd_p[ix]
                                su += qrd_u[ix]
                                nansize += bs
                        except IndexError:
                            pass
                if gc:
                    sbin1 = (pos1 - 1) // 100
                    sbin2 = (pos2 - 1) // 100
                    pN = 0
                    if bin1 == bin2:
                        try:
                            pN = (pos2 - pos1 + 1) * (gc[sbin1] + at[sbin1]) / 100
                        except IndexError:
                            pass
                    else:
                        try:
                            pN += (sbin1 * 100 - pos1 + 101) * (gc[sbin1] + at[sbin1]) / 100
                            pN += (pos2 - sbin2 * 100) * (gc[sbin2] + at[sbin2]) / 100

                        except IndexError:
                            pass
                        for ix in range(sbin1 + 1, sbin2):
                            try:
                                pN += gc[ix] + at[ix]
                            except IndexError:
                                pass

                e1 = getEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9 / bs
                e2 = gaussianEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9
                dG = -1
                if gc:
                    pN = 1 - pN / (pos2 - pos1 + 1)
                    dG = np.min(distN[sbin1:sbin2])
                else:
                    pN = -1
                    dG = -1
                if nansize == 0:
                    rc = np.nan
                else:
                    rc = 2 * rc / (stat[4] * nansize / bs)
                ret[bs][-1].append(rc)
                ret[bs][-1].append(e1)
                ret[bs][-1].append(e2)
                q0 = 0
                if sp != 0:
                    q0 = (sp - su) / sp
                ret[bs][-1].append(q0)
                ret[bs][-1].append(pN)
                ret[bs][-1].append(dG)
                ret[bs][-1].append(nansize / (pos2 - pos1 + 1))
                if snp:
                    homs = np.sum(snp_homs[bin1:bin2 + 1])
                    hets = np.sum(snp_hets[bin1:bin2 + 1])
                    lh = np.ones_like(snp_likelihood[0])
                    for ix in range(bin1, min(bin2 + 1, len(snp_likelihood))):
                        lh *= snp_likelihood[ix]
                        lh /= np.sum(lh)
                    baf, baf_p = likelihood_baf_pval(lh)
                    ret[bs][-1] += [homs, hets, baf, baf_p]
                else:
                    ret[bs][-1] += [0, 0, 0, 1]
        if interactive:
            plist = []
            for bs in bin_sizes:
                if len(plist) == 0:
                    plist = ret[bs]
                else:
                    for ix in range(len(ret[bs])):
                        plist[ix] += ret[bs][ix][3:]
            for r in plist:
                print(
                    ("%s:%d-%d" + (len(bin_sizes) * "\t%.4f\t%e\t%e\t%.4f\t%.4f\t%d\t%.4f\t%d\t%d\t%.4f\t%e")) % tuple(
                        r))
        return ret

    def genotype_prompt(self, bin_sizes=[], all=False):
        done = False
        while not done:
            try:
                try:
                    line = raw_input("")
                except NameError:
                    line = input("")
            except EOFError:
                return
            if line is None or line == "":
                done = True
            else:
                if all:
                    self.genotype_all(bin_sizes, [line], interactive=True)
                else:
                    self.genotype(bin_sizes, line, interactive=True)

    def rd_baf_call_models(self, maxcn=10):
        bin_size = self.bin_size
        n = len(self.plot_files)
        ix = self.plot_files
        self.new_figure(panel_count=n)

        for i in range(n):
            ax = self.next_panel()
            io = self.io[ix[i]]
            ax.set_title(self.file_title(ix[i]), position=(0.1, 0.1),
                         fontdict={'verticalalignment': 'bottom', 'horizontalalignment': 'left'})

            chroms = []
            flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = io.snp_chromosome_name(c)
                if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                    if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append((snp_chr, l))

            x = np.linspace(0, 1, 1000)
            master_lh = {}
            for cn in range(maxcn, -1, -1):
                for h1 in range(cn // 2 + 1):
                    h2 = cn - h1
                    mrd = 2 - 2 * x + x * cn
                    np.seterr(divide='ignore')
                    mbaf = 0.5 - (1 - x + x * h1) / (2 - 2 * x + (h1 + h2) * x)
                    plt.plot(mbaf, mrd, "-", label="%d: %d/%d" % (cn, h1, h2), zorder=6 - cn)

            cix = 0
            cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
            for c, l in chroms:
                call_rd = []
                call_baf = []
                call_label = []
                if io.signal_exists(c, bin_size, "calls combined", flag):
                    calls = io.read_calls(c, bin_size, "calls combined", flag)

                    for call in calls:
                        if call["bins"] > self.min_segment_size:
                            call_rd.append(call["cnv"] * 2)
                            call_baf.append(call["baf"])
                            call_label.append(c + ":" + str(call["start"]) + "-" + str(call["end"]))

                plt.scatter(call_baf, call_rd, s=20, edgecolors='face', marker='.')
                cix += 1

            ax.set_xlabel("|ΔBAF|")
            ax.set_ylabel("Relative RD level")

            ax.legend()

            ax.set_ylim([0, maxcn])
            ax.set_xlim([-0.02, 0.5])
            ax.grid()

        self.fig_show(suffix="models")

Ancestors

Static methods

def set_style(style)

Source code

@staticmethod
def set_style(style):
    if style in plt.style.available:
        plt.style.use("default")
        plt.style.use(style)

Methods

def baf(self)

Source code

def baf(self):
    if self.reference_genome is None:
        _logger.warning("Missing reference genome required for gview.")
        return
    snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
        FLAG_USEHAP if self.snp_use_phase else 0)
    chroms = []
    for c, (l, t) in self.reference_genome["chromosomes"].items():
        snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
        if self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP baf", snp_flag) and \
                self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP maf", snp_flag) and \
                self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP i1", snp_flag) and \
                self.io[self.plot_file].signal_exists(snp_chr, self.bin_size, "SNP i2", snp_flag) and \
                (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
            chroms.append((snp_chr, l))

    self.new_figure(panel_count=len(chroms))
    for c, l in chroms:
        baf = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP baf", snp_flag)
        maf = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP maf", snp_flag)
        i1 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i1", snp_flag)
        i2 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i2", snp_flag)

        ax = self.next_panel()
        ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                     color='C0')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
        ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // self.bin_size, 10e6 // self.bin_size), minor=[])
        ax.set_ylim([0, 1])
        n_bins = l // self.bin_size
        ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
        ax.grid()
        ax.step(baf, self.baf_colors[0])
        ax.step(maf, self.baf_colors[1])
        ax.step(i1, self.baf_colors[2])
    self.fig_show(suffix="baf")

def callmap(self, color='frequency', background='white', pixel_size=1700000, max_p_val=1e-20, min_freq=0.01, plot='cmap')

Source code

def callmap(self, color="frequency", background="white", pixel_size=1700000, max_p_val=1e-20, min_freq=0.01,
            plot="cmap"):
    bin_size = self.bin_size
    if self.reference_genome is None:
        _logger.warning("Missing reference genome required for callmap.")
        return
    n = len(self.plot_files)
    ix = self.plot_files

    if plot:
        self.new_figure(panel_count=n, grid=(1, 1), panel_size=(24, 0.24 * n))

    chroms = []
    starts = []
    ends = []
    pixels = 0
    for c, (l, t) in self.reference_genome["chromosomes"].items():
        if l > 10 * bin_size:
            if len(self.chrom) == 0 or (c in self.chrom) or (self.io[0].snp_chromosome_name(c) in self.chrom):
                chroms.append(c)
                starts.append(pixels)
                pixels += l // pixel_size + 1
                ends.append(pixels - 1)

    cmap = np.zeros((n, pixels, 3))
    cmap[:, ends, :] = 1

    for i in range(n):
        io = self.io[ix[i]]
        print(io.filename)
        flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
        flag_rd = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
        for c, start in zip(chroms, starts):
            snp_chr = io.snp_chromosome_name(c)
            if io.signal_exists(snp_chr, bin_size, "calls combined", flag):

                calls = io.read_calls(snp_chr, bin_size, "calls combined", flag)
                segments = io.get_signal(snp_chr, bin_size, "RD mosaic segments 2d", flag_rd)
                segments = segments_decode(segments)

                for call in calls:
                    if call["bins"] > self.min_segment_size and call["p_val"] < max_p_val and "segment" in call and \
                            call["models"][0][4] > min_freq:
                        cix = int(call["type"]) + 1
                        for b in segments[int(call["segment"])]:
                            if color == "frequency":
                                cmap[i, start + b * bin_size // pixel_size, cix] = max(
                                    cmap[i, start + b * bin_size // pixel_size, cix], call["models"][0][4])
                            elif color == "coverage":
                                cmap[i, start + b * bin_size // pixel_size, cix] += bin_size / pixel_size
                            else:  # model copy number
                                if call["models"][0][0] == 0:
                                    cmap[i, start + b * bin_size // pixel_size, 0] = 1
                                elif call["models"][0][0] == 1:
                                    cmap[i, start + b * bin_size // pixel_size, 0] = 1
                                    cmap[i, start + b * bin_size // pixel_size, 1] = 1
                                elif call["models"][0][0] == 2:
                                    cmap[i, start + b * bin_size // pixel_size, 2] = 1
                                else:
                                    cn = call["models"][0][0]
                                    if cn > 6:
                                        cn = 6
                                    cmap[i, start + b * bin_size // pixel_size, 1] = (2 + cn) / 8

    def b2w(pixel):
        if np.all(pixel == 1):
            pixel[:] = 0
        elif pixel[0] > pixel[1] and pixel[0] > pixel[2]:
            pixel[1] = pixel[2] = 1 - pixel[0]
            pixel[0] = 1
        elif pixel[1] > pixel[2]:
            pixel[0] = pixel[2] = 1 - pixel[1]
            pixel[1] = 1
        else:
            pixel[0] = pixel[1] = 1 - pixel[2]
            pixel[2] = 1
        return pixel

    if background == "white":
        cmap = cmap.reshape(n * pixels, 3)
        np.apply_along_axis(b2w, 1, cmap)
        cmap = cmap.reshape(n, pixels, 3)

    cmap = (255 * cmap).astype("int")
    if plot == "cmap":
        self.new_figure(panel_count=1, grid=(1, 1), panel_size=(24, 0.24 * n))
        ax = self.next_panel()
        plt.imshow(cmap, aspect='auto')
        for i in ends[:-1]:
            plt.axvline(x=i - 0.5, color='red', linewidth=0.5)
        ax.set_yticks([])
        ax.set_yticklabels([])
        ax.set_xticks((np.array(starts) + np.array(ends)) / 2)
        chroms = list(map(Genome.canonical_chrom_name, chroms))
        ax.set_xticklabels(chroms)
        self.fig_show(suffix="callmap")
    elif plot == "regions":
        self.new_figure(panel_count=1, grid=(1, 1), panel_size=(24, 24))
        ax = self.next_panel()
        corr = np.corrcoef(
            np.concatenate((cmap[:, :, 0].transpose(), cmap[:, :, 1].transpose(), cmap[:, :, 2].transpose()),
                           axis=0))
        plt.imshow(corr, aspect='auto', vmin=-1, vmax=1)
        plt.colorbar()
        starts3 = np.concatenate((np.array(starts), np.array(starts) + ends[-1], np.array(starts) + 2 * ends[-1]))
        ends3 = np.concatenate((np.array(ends), np.array(ends) + ends[-1], np.array(ends) + 2 * ends[-1]))
        for i in ends3[:-1]:
            plt.axvline(x=i - 0.5, color='red', linewidth=0.5)
            plt.axhline(y=i - 0.5, color='red', linewidth=0.5)

        ax.set_xticks((starts3 + ends3) / 2)
        ax.set_yticks((starts3 + ends3) / 2)
        chroms = list(map(Genome.canonical_chrom_name, chroms))
        ax.set_xticklabels(chroms + chroms + chroms)
        ax.set_yticklabels(chroms + chroms + chroms)
        self.fig_show(suffix="callmap")
    else:
        self.new_figure(panel_count=2, panel_size=(12, 12))
        ax = self.next_panel()
        x = np.concatenate((cmap[:, :, 0], cmap[:, :, 1], cmap[:, :, 2]),
                           axis=1)
        corr = np.corrcoef(x)
        plt.imshow(corr, aspect='auto', vmin=-1, vmax=1)
        plt.colorbar()
        ax = plt.gca()

        ax.set_xticks(range(n))
        ax.set_yticks(range(n))
        ax = self.next_panel()
        Z = hierarchy.linkage(x, 'average', 'correlation')
        dn = hierarchy.dendrogram(Z)

        self.fig_show(suffix="callmap")
    return cmap

def circular(self)

Source code

def circular(self):
    chroms = self.chrom
    bin_size = self.bin_size
    n = len(self.plot_files)
    ix = self.plot_files
    snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
        FLAG_USEHAP if self.snp_use_phase else 0)
    rd_flag = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
    self.new_figure(panel_count=n)
    for i in range(n):
        ax = self.next_polar_panel()
        ax.set_theta_zero_location("N")
        ax.set_theta_direction(-1)
        rainbow = ax._get_lines
        io = self.io[ix[i]]
        plot_len = 0
        plot_chroms = []
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            rd_chr = io.rd_chromosome_name(c)
            if rd_chr is not None and (len(chroms) == 0 or (rd_chr in chroms) or (c in chroms)) and (
                    Genome.is_autosome(c) or Genome.is_sex_chrom(c)
            ) and io.signal_exists(rd_chr, bin_size, "SNP maf", snp_flag) and io.signal_exists(
                rd_chr, bin_size, "RD", rd_flag):
                plot_chroms.append((rd_chr, l))
                plot_len += l // bin_size + 1
        rd_mean, stdev = io.rd_normal_level(bin_size, rd_flag)
        tl = 0
        dt = 2.0 * np.pi / plot_len
        theta = np.arange(0, 2.0 * np.pi, dt)
        angles = []
        labels = []
        for j in range(len(plot_chroms)):
            c, l = plot_chroms[j]
            next_color = rainbow.get_next_color()
            rd_color = self.rd_circular_colors[j % len(self.rd_circular_colors)]
            snp_color = self.snp_circular_colors[j % len(self.snp_circular_colors)]
            rd = io.get_signal(c, bin_size, "RD", rd_flag)
            maf = io.get_signal(c, bin_size, "SNP maf", snp_flag)
            c01 = io.get_signal(c, bin_size, "SNP bin count 0|1", snp_flag)
            c10 = io.get_signal(c, bin_size, "SNP bin count 1|0", snp_flag)
            hets = c01 + c10
            np.warnings.filterwarnings('ignore')
            maf[hets < (bin_size / 10000)] = 0
            # plt.polar(theta[tl:tl + maf.size], 1 - maf / 2, color=snp_color, linewidth=0.3)
            # plt.fill_between(theta[tl:tl + maf.size], 1 - maf / 2, np.ones_like(maf), color=snp_color, alpha=0.8)
            plt.polar(theta[tl:tl + maf.size], 1 - maf / 2, linewidth=0.3, color=next_color)
            plt.fill_between(theta[tl:tl + maf.size], 1 - maf / 2, np.ones_like(maf), alpha=1, color=next_color)
            markersize = 5
            if self.markersize != "auto":
                markersize = self.markersize
            ax.scatter(theta[tl:tl + rd.size], np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
                       s=markersize, alpha=0.7, color=next_color)

            # plt.polar(theta[tl:tl + rd.size], np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
            #          color=rd_color, linewidth=0.3)
            # plt.fill_between(theta[tl:tl + rd.size], np.ones_like(rd) / 10.,
            #                 np.ones_like(rd) / 10. + 0.7 * rd / (self.rd_range[1] * rd_mean / 2),
            #                 color=rd_color,
            #                 alpha=0.8)

            # ax.text(theta[tl + maf.size // 3], 0.8, c, fontsize=8)
            labels.append(Genome.canonical_chrom_name(c))
            angles.append(180 * theta[tl + rd.size // 2] / np.pi)
            tl += l // bin_size + 1
        for cn in range(int(self.rd_range[1])):
            plt.polar(theta, np.ones_like(theta) * (0.1 + 0.7 * (cn / self.rd_range[1])), color="k", linewidth=0.1)
        ax.set_rmax(1.0)
        ax.set_rticks([])
        ax.set_thetagrids(angles, labels=labels, fontsize=10, weight="bold", color="black")
        ax.set_title(self.file_title(ix[i]), loc="left", fontsize=10, weight="bold", color="black")
        ax.grid(False)
    self.fig_show(suffix="circular")

def compare(self, region1, region2, n_bins=21, plot=False, stdout=True, legend=True)

Source code

def compare(self, region1, region2, n_bins=21, plot=False, stdout=True, legend=True):
    n = len(self.plot_files)
    ix = self.plot_files
    ret = []

    if plot:
        plt.clf()
        plt.rcParams["font.size"] = 8
        if self.grid == "auto":
            sx, sy = self._panels_shape(n)
        else:
            sx, sy = tuple(self.grid)
        self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
        if self.output_filename != "":
            self.fig.set_figheight(3 * sy)
            self.fig.set_figwidth(4 * sx)
        grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
    for i in range(n):
        io = self.io[ix[i]]
        if plot:
            ax = self.fig.add_subplot(grid[i])
            ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                         fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
        regs1 = decode_region(region1)
        regs2 = decode_region(region2)
        data1 = []
        data2 = []
        for c, (pos1, pos2) in regs1:
            flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
            his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
            bin1 = (pos1 - 1) // self.bin_size
            bin2 = (pos2 - 1) // self.bin_size
            data1 += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])
        for c, (pos1, pos2) in regs2:
            flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
            his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
            bin1 = (pos1 - 1) // self.bin_size
            bin2 = (pos2 - 1) // self.bin_size
            data2 += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])

        data1 = np.array(data1)
        p1_1 = np.percentile(data1, 1)
        p99_1 = np.percentile(data1, 99)
        data1 = data1[data1 > p1_1]
        data1 = data1[data1 < p99_1]
        mean1 = np.mean(data1)
        std1 = np.std(data1)

        data2 = np.array(data2)
        p1_2 = np.percentile(data2, 1)
        p99_2 = np.percentile(data2, 99)
        data2 = data2[data2 > p1_2]
        data2 = data2[data2 < p99_2]
        mean2 = np.mean(data2)
        std2 = np.std(data2)

        rd_min = min(mean1 - 5 * std1, mean2 - 5 * std2)
        rd_max = max(mean1 + 5 * std1, mean2 + 5 * std2)
        bins = np.linspace(rd_min, rd_max, n_bins)

        hist1, binsr = np.histogram(data1, bins=bins)
        hist2, binsr = np.histogram(data2, bins=bins)

        fitn1, fitm1, fits1 = fit_normal(bins[:-1], hist1)[0]
        fitn2, fitm2, fits2 = fit_normal(bins[:-1], hist2)[0]

        pval = t_test_2_samples(fitm1, fits1, sum(hist1), fitm2, fits2, sum(hist2))

        if stdout:
            print("%s\t%s\t%s\t%.4f\t%.4f\t%.4f\t%.4f\t%e\t%.4f\t%.4f" % (
                io.filename, region1, region2, fitm1, fits1, fitm2, fits2, pval, fitm1 / fitm2,
                fitm1 / fitm2 * (fits1 / fitm1 / np.sqrt(sum(hist1)) + fits2 / fitm2 / np.sqrt(sum(hist2)))))
        ret.append([io.filename, region1, region2, fitm1, fits1, fitm2, fits2, pval, fitm1 / fitm2,
                    fitm1 / fitm2 * (fits1 / fitm1 / np.sqrt(sum(hist1)) + fits2 / fitm2 / np.sqrt(sum(hist2)))])

        if plot:
            x = np.linspace(bins[0], bins[-1], 1001)
            plt.plot(x, normal(x, fitn1, fitm1, fits1), "g-", label=region1)
            plt.plot(x, normal(x, fitn2, fitm2, fits2), "b-", label=region2)
            plt.plot(bins[:-1], hist1, "g*")
            plt.plot(bins[:-1], hist2, "b*")
            if legend:
                plt.legend()

    if plot:
        if self.output_filename != "":
            plt.savefig(self._image_filename("comp"), dpi=200)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    return ret

def compare_baf(self, region1, region2, plot=False, stdout=True, legend=True)

Source code

def compare_baf(self, region1, region2, plot=False, stdout=True, legend=True):
    n = len(self.plot_files)
    ix = self.plot_files
    ret = []

    if plot:
        plt.clf()
        plt.rcParams["font.size"] = 8
        if self.grid == "auto":
            sx, sy = self._panels_shape(n)
        else:
            sx, sy = tuple(self.grid)
        self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
        if self.output_filename != "":
            self.fig.set_figheight(3 * sy)
            self.fig.set_figwidth(4 * sx)
        grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
    for i in range(n):
        io = self.io[ix[i]]
        if plot:
            ax = self.fig.add_subplot(grid[i])
            ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                         fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
        regs1 = decode_region(region1)
        regs2 = decode_region(region2)
        data1 = []
        data2 = []
        for c, (pos1, pos2) in regs1:
            flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
            his_p = io.get_signal(c, self.bin_size, "SNP likelihood", flag)
            bin1 = (pos1 - 1) // self.bin_size
            bin2 = (pos2 - 1) // self.bin_size
            data1 += list(his_p[bin1:bin2 + 1])
        for c, (pos1, pos2) in regs2:
            flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
            his_p = io.get_signal(c, self.bin_size, "SNP likelihood", flag)
            bin1 = (pos1 - 1) // self.bin_size
            bin2 = (pos2 - 1) // self.bin_size

            data2 += list(his_p[bin1:bin2 + 1])

        d1 = np.array(data1)
        d2 = np.array(data2)
        h1 = np.ones_like(d1[0])
        h2 = np.ones_like(d2[0])
        for i in range(len(d1)):
            if sum(d1[i]) != 0:
                h1 *= d1[i]
            h1 /= sum(h1)
        for i in range(len(d2)):
            if sum(d2[i]) != 0:
                h2 *= d2[i]
            h2 /= sum(h2)

        b1, p1 = likelihood_baf_pval(h1)
        b2, p2 = likelihood_baf_pval(h2)

        if stdout:
            print("%s\t%s\t%s\t%.4f\t%e\t%.4f\t%e" % (
                io.filename, region1, region2, b1, p1, b2, p2))
        ret.append([io.filename, region1, region2, b1, p1, b2, p2])

        if plot:
            plt.plot(h1, "g")
            plt.plot(h2, "b")

    if plot:
        if self.output_filename != "":
            plt.savefig(self._image_filename("comp_baf"), dpi=200)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

    return ret

def compare_rd_dist(self, regions)

Source code

def compare_rd_dist(self, regions):
    self.new_figure(panel_count=1)
    ax = self.next_panel()
    ax.set_ylabel("Normalised distribution")
    ax.set_xlabel("Difference in copy number")
    regs = decode_region(regions)
    io1 = self.io[self.plot_files[0]]
    io2 = self.io[self.plot_files[1]]
    bin_size = self.bin_size
    drd = []
    for c, (pos1, pos2) in regs:
        flag_rd = 0
        if self.rd_use_mask:
            flag_rd = FLAG_USEMASK
        mean1, stdev = io1.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
        mean2, stdev = io2.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
        his_p_corr1 = io1.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
        his_p_corr2 = io2.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
        for i in range(len(his_p_corr1)):
            drd.append(his_p_corr1[i] * 2 / mean1 - his_p_corr2[i] * 2 / mean2)

    # for i in range(n):
    #     io = self.io[ix[i]]
    #     stat = self.io[self.plot_file].get_signal(None, self.bin_size, "RD stat", FLAG_AUTO)
    #     his_p = io.get_signal(None, self.bin_size, "RD p dist", FLAG_AUTO)
    #     bin_size = int(stat[1])
    #     max_rd = int(stat[0])
    #     lim_rd = int(max(2 * stat[4], stat[4] + 3 * stat[5]))
    #     ax.set_xlim([0, lim_rd])
    #     bins = range(0, 2*max_rd + 5*bin_size, bin_size)
    #     x = np.arange(0, max_rd // bin_size * bin_size, 0.1 * bin_size)
    #     #plt.plot(x, normal(x, 1, stat[4], stat[5]), "g-")
    #     x = np.array(bins)
    #     plt.plot(x[1:len(his_p)], his_p[1:] / stat[3],label = io.filename)
    ax.hist(drd, bins=np.linspace(-0.5, 0.5, 100))
    # ax.legend()
    ax.set_yticklabels([])
    ax.grid()
    self.fig_show(suffix="compare_rd")

def denovo_calls(self, sample, reference, call_type='mosaic')

Source code

def denovo_calls(self, sample, reference, call_type="mosaic"):
    bin_size = self.bin_size
    io = self.io[sample]
    if call_type == "mosaic":
        chroms = io.rd_chromosomes()
        for c in chroms:
            if (c in self.chrom) or len(self.chrom) == 0:
                flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                if io.signal_exists(c, bin_size, "calls", flag):
                    calls = io.read_calls(c, bin_size, "calls", flag)
                    for call in calls:
                        if in_interval(call["size"], self.size_range) \
                                and in_interval(call["p_val"], self.p_range) \
                                and in_interval(call["pN"], self.pN_range) \
                                and in_interval(call["Q0"], self.Q0_range):
                            type = "duplication" if call["type"] == 1 else "deletion"
                            region = "%s:%d-%d" % (c, call["start"], call["end"])

                            cn0 = self.genotype([bin_size], region, file_index=sample)[0][3]
                            cref = list(
                                map(lambda x: self.genotype([bin_size], region, file_index=x)[0][3], reference))
                            if (((sum(map(lambda x: 0 if (cn0 - x) > 0.5 else 1, cref)) == 0) and cn0 > 2.5) \
                                or ((sum(map(lambda x: 0 if (x - cn0) > 0.5 else 1, cref)) == 0) and cn0 < 1.5)) \
                                    and (sum(map(lambda x: 0 if np.abs(x - 2.) < 0.5 else 1, cref)) == 0):
                                print(type, region, call["cnv"], cn0, cref)

def dispersion(self, legend=True)

Source code

def dispersion(self, legend=True):
    plt.clf()
    plt.rcParams["font.size"] = 8
    self.fig = plt.figure(1, facecolor='w', edgecolor='k')
    if self.output_filename != "":
        self.fig.set_figheight(8)
        self.fig.set_figwidth(12)
    grid = gridspec.GridSpec(1, 2, wspace=0.2, hspace=0.2)

    ax = self.fig.add_subplot(grid[0])
    for i in self.io:
        bin_sizes = sorted(set([int(x[1]) for x in i.chromosomes_bin_sizes_with_signal("RD")]))
        rd = []
        drd = []
        for bs in bin_sizes:
            if i.signal_exists(None, bs, "RD stat", flags=FLAG_AUTO):
                stat = i.get_signal(None, bs, "RD stat", flags=FLAG_AUTO)
                rd.append(stat[4])
                drd.append(stat[5])
        ax.set_yscale("log")
        ax.set_xscale("log")
        ax.grid(True)
        ax.set_xlabel("mean RD")
        ax.set_ylabel("stdev RD")
        if legend:
            ax.legend(loc="upper left")
        ax.plot(rd, drd, "*-", label=i.filename)

    ax = self.fig.add_subplot(grid[1])
    for i in self.io:
        bin_sizes = sorted(set([int(x[1]) for x in i.chromosomes_bin_sizes_with_signal("RD")]))
        rd = []
        drd = []
        for bs in bin_sizes:
            if i.signal_exists(None, bs, "RD stat", flags=FLAG_AUTO | FLAG_GC_CORR):
                stat = i.get_signal(None, bs, "RD stat", flags=FLAG_AUTO | FLAG_GC_CORR)
                rd.append(stat[4])
                drd.append(stat[5])
        ax.set_yscale("log")
        ax.set_xscale("log")
        ax.grid(True)
        ax.set_xlabel("mean RD (GC corr)")
        ax.set_ylabel("stdev RD (GC corr)")
        if legend:
            ax.legend(loc="upper left")
        ax.plot(rd, drd, "*-", label=i.filename)

    if self.output_filename != "":
        plt.savefig(self._image_filename("dispersion"), dpi=200)
        plt.close(self.fig)
    elif self.interactive:
        plt.show(block=False)
        plt.draw()
    else:
        plt.show()

def file_title(self, ix)

Source code

def file_title(self, ix):
    if ix < len(self.file_titles):
        return self.file_titles[ix]
    else:
        return self.io[ix].filename.split("/")[-1].replace(".pytor", "")

def genotype(self, bin_sizes, region, p_val=False, interactive=False, file_index=None)

Source code

def genotype(self, bin_sizes, region, p_val=False, interactive=False, file_index=None):
    if file_index is None:
        file_index = self.plot_file
    ret = []
    regs = decode_region(region, max_size=1000000000)
    for c, (pos1, pos2) in regs:
        chr_len = self.io[file_index].get_chromosome_length(c)
        if chr_len is not None and pos2 == 1000000000:
            pos2 = chr_len
        if interactive:
            print(c + ":" + str(pos1) + "-" + str(pos2), end="")
        ret.append([c, pos1, pos2])
        for bs in bin_sizes:
            flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
            stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_AUTO)
            if stat is None or len(stat) == 0:
                stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_SEX)
            his_p = self.io[file_index].get_signal(c, bs, "RD", flag_rd)
            bin1 = (pos1 - 1) // bs
            bin2 = (pos2 - 1) // bs
            rc = 0
            rc2 = 0
            if bin1 == bin2:
                try:
                    rc = (pos2 - pos1 + 1) * his_p[bin1] / bs
                    rc2 = (pos2 - pos1 + 1) * his_p[bin1] * his_p[bin1] / bs
                except IndexError:
                    pass
            else:
                try:
                    rc += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] / bs
                    rc += (pos2 - bin2 * bs) * his_p[bin2] / bs
                    rc2 += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] * his_p[bin1] / bs
                    rc2 += (pos2 - bin2 * bs) * his_p[bin2] * his_p[bin2] / bs
                except IndexError:
                    pass
                for ix in range(bin1 + 1, bin2):
                    try:
                        rc += his_p[ix]
                        rc2 += his_p[ix] * his_p[ix]
                    except IndexError:
                        pass
            e2 = 0
            if p_val:
                e1 = getEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9 / bs
                e2 = gaussianEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9
            if interactive:
                print("\t%f" % (2. * rc / (stat[4] * (pos2 - pos1 + 1) / bs)), end="")
                if p_val:
                    print("\t%e\t%e" % (e1, e2), end="")

            ret[-1].append(2. * rc / (stat[4] * (pos2 - pos1 + 1) / bs))
            if p_val:
                ret[-1].append(e2)
        if interactive:
            print()

    return ret

def genotype_all(self, bin_sizes, regions, interactive=False, file_index=None)

Source code

def genotype_all(self, bin_sizes, regions, interactive=False, file_index=None):
    if file_index is None:
        file_index = self.plot_file
    rd_gc_chromosomes = {}
    for c in self.io_gc.gc_chromosomes():
        rd_name = self.io[file_index].rd_chromosome_name(c)
        if not rd_name is None:
            rd_gc_chromosomes[rd_name] = c
    ret = {}
    for bs in bin_sizes:
        oc = ""
        ret[bs] = []
        for region in regions:
            regs = decode_region(region, max_size=1000000000)
            c, (pos1, pos2) = regs[0]
            if oc != c:
                chr_len = self.io[file_index].get_chromosome_length(c)
                if chr_len is not None and pos2 == 1000000000:
                    pos2 = chr_len
                flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
                stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_AUTO)
                if stat is None or len(stat) == 0:
                    stat = self.io[file_index].get_signal(c, bs, "RD stat", flag_rd | FLAG_SEX)
                his_p = self.io[file_index].get_signal(c, bs, "RD", flag_rd)
                qrd_p = self.io[file_index].get_signal(c, bs, "RD")
                qrd_u = self.io[file_index].get_signal(c, bs, "RD unique")
                gc, at, distN = False, False, False
                if c in rd_gc_chromosomes and self.io_gc.signal_exists(rd_gc_chromosomes[c], None, "GC/AT"):
                    gcat = self.io_gc.get_signal(rd_gc_chromosomes[c], None, "GC/AT")
                    gc, at = gc_at_decompress(gcat)
                    NN = 100 - np.array(gc) - np.array(at)
                    distN = np.zeros_like(NN, dtype="long") - 1
                    distN[NN == 100] = 0
                    prev = 0
                    for Ni in range(0, distN.size):
                        if distN[Ni] == -1:
                            prev += 100
                            distN[Ni] = prev
                        else:
                            prev = 0
                    prev = 0
                    for Ni in range(distN.size - 1, -1, -1):
                        if distN[Ni] > 0:
                            prev += 100
                            if prev < distN[Ni]:
                                distN[Ni] = prev
                        else:
                            prev = 0
                snp = c in self.io[file_index].snp_chromosomes()
                snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0)
                if snp:
                    snp_likelihood = list(
                        self.io[file_index].get_signal(c, bs, "SNP likelihood", snp_flag).astype("float64"))
                    snp_hets = self.io[file_index].get_signal(c, bs, "SNP bin count 0|1", snp_flag)
                    snp_hets += self.io[file_index].get_signal(c, bs, "SNP bin count 1|0", snp_flag)
                    snp_homs = self.io[file_index].get_signal(c, bs, "SNP bin count 1|1", snp_flag)
            else:
                if chr_len is not None and pos2 == 1000000000:
                    pos2 = chr_len
            oc = c
            ret[bs].append([c, pos1, pos2])

            bin1 = (pos1 - 1) // bs
            bin2 = (pos2 - 1) // bs
            rc = 0
            rc2 = 0
            sp = 0
            su = 0
            nansize = 0
            if bin1 == bin2:
                try:
                    if not np.isnan(his_p[bin1]):
                        rc = (pos2 - pos1 + 1) * his_p[bin1] / bs
                        rc2 = (pos2 - pos1 + 1) * his_p[bin1] * his_p[bin1] / bs
                        sp = (pos2 - pos1 + 1) * qrd_p[bin1] / bs
                        su = (pos2 - pos1 + 1) * qrd_u[bin1] / bs
                        nansize = (pos2 - pos1 + 1)
                except IndexError:
                    pass
            else:
                try:
                    if not np.isnan(his_p[bin1]):
                        rc += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] / bs
                        rc2 += (bin1 * bs - pos1 + 1 + bs) * his_p[bin1] * his_p[bin1] / bs
                        sp += (bin1 * bs - pos1 + 1 + bs) * qrd_p[bin1] / bs
                        su += (bin1 * bs - pos1 + 1 + bs) * qrd_u[bin1] / bs
                        nansize += (bin1 * bs - pos1 + 1 + bs)
                    if not np.isnan(his_p[bin2]):
                        rc += (pos2 - bin2 * bs) * his_p[bin2] / bs
                        rc2 += (pos2 - bin2 * bs) * his_p[bin2] * his_p[bin2] / bs
                        sp += (pos2 - bin2 * bs) * qrd_p[bin2] / bs
                        su += (pos2 - bin2 * bs) * qrd_u[bin2] / bs
                        nansize += (pos2 - bin2 * bs)

                except IndexError:
                    pass
                for ix in range(bin1 + 1, bin2):
                    try:
                        if not np.isnan(his_p[ix]):
                            rc += his_p[ix]
                            rc2 += his_p[ix] * his_p[ix]
                            sp += qrd_p[ix]
                            su += qrd_u[ix]
                            nansize += bs
                    except IndexError:
                        pass
            if gc:
                sbin1 = (pos1 - 1) // 100
                sbin2 = (pos2 - 1) // 100
                pN = 0
                if bin1 == bin2:
                    try:
                        pN = (pos2 - pos1 + 1) * (gc[sbin1] + at[sbin1]) / 100
                    except IndexError:
                        pass
                else:
                    try:
                        pN += (sbin1 * 100 - pos1 + 101) * (gc[sbin1] + at[sbin1]) / 100
                        pN += (pos2 - sbin2 * 100) * (gc[sbin2] + at[sbin2]) / 100

                    except IndexError:
                        pass
                    for ix in range(sbin1 + 1, sbin2):
                        try:
                            pN += gc[ix] + at[ix]
                        except IndexError:
                            pass

            e1 = getEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9 / bs
            e2 = gaussianEValue(stat[4], stat[5], his_p, bin1, bin2 + 1) * 2.9e9
            dG = -1
            if gc:
                pN = 1 - pN / (pos2 - pos1 + 1)
                dG = np.min(distN[sbin1:sbin2])
            else:
                pN = -1
                dG = -1
            if nansize == 0:
                rc = np.nan
            else:
                rc = 2 * rc / (stat[4] * nansize / bs)
            ret[bs][-1].append(rc)
            ret[bs][-1].append(e1)
            ret[bs][-1].append(e2)
            q0 = 0
            if sp != 0:
                q0 = (sp - su) / sp
            ret[bs][-1].append(q0)
            ret[bs][-1].append(pN)
            ret[bs][-1].append(dG)
            ret[bs][-1].append(nansize / (pos2 - pos1 + 1))
            if snp:
                homs = np.sum(snp_homs[bin1:bin2 + 1])
                hets = np.sum(snp_hets[bin1:bin2 + 1])
                lh = np.ones_like(snp_likelihood[0])
                for ix in range(bin1, min(bin2 + 1, len(snp_likelihood))):
                    lh *= snp_likelihood[ix]
                    lh /= np.sum(lh)
                baf, baf_p = likelihood_baf_pval(lh)
                ret[bs][-1] += [homs, hets, baf, baf_p]
            else:
                ret[bs][-1] += [0, 0, 0, 1]
    if interactive:
        plist = []
        for bs in bin_sizes:
            if len(plist) == 0:
                plist = ret[bs]
            else:
                for ix in range(len(ret[bs])):
                    plist[ix] += ret[bs][ix][3:]
        for r in plist:
            print(
                ("%s:%d-%d" + (len(bin_sizes) * "\t%.4f\t%e\t%e\t%.4f\t%.4f\t%d\t%.4f\t%d\t%d\t%.4f\t%e")) % tuple(
                    r))
    return ret

def genotype_prompt(self, bin_sizes=[], all=False)

Source code

def genotype_prompt(self, bin_sizes=[], all=False):
    done = False
    while not done:
        try:
            try:
                line = raw_input("")
            except NameError:
                line = input("")
        except EOFError:
            return
        if line is None or line == "":
            done = True
        else:
            if all:
                self.genotype_all(bin_sizes, [line], interactive=True)
            else:
                self.genotype(bin_sizes, line, interactive=True)

def get_calls(self)

Source code

def get_calls(self):
    bin_size = self.bin_size
    n = len(self.plot_files)
    ix = self.plot_files
    if self.annotate:
        annotator = Annotator(self.reference_genome)
    ret = []
    for caller in self.callers:
        if caller == "rd_mean_shift":
            for i in range(n):
                io = self.io[ix[i]]
                chroms = io.rd_chromosomes()
                for c in chroms:
                    if (c in self.chrom) or len(self.chrom) == 0:
                        flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                        if io.signal_exists(c, bin_size, "calls", flag):
                            calls = io.read_calls(c, bin_size, "calls", flag)
                            for call in calls:
                                if in_interval(call["size"], self.size_range) \
                                        and in_interval(call["p_val"], self.p_range) \
                                        and in_interval(call["pN"], self.pN_range) \
                                        and in_interval(call["Q0"], self.Q0_range) \
                                        and in_interval(call["dG"], self.dG_range):
                                    type = "duplication" if call["type"] == 1 else "deletion"

                                    row = [self.file_title(ix[i]), caller, type, c, call["start"], call["end"],
                                           call["size"], call["cnv"], call["p_val"], call["p_val_2"],
                                           call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"], call["dG"]]
                                    if self.annotate:
                                        row.append(annotator.get_info("%s:%d-%d" % (c, call["start"], call["end"])))
                                    ret.append(row)
        elif caller == "combined_mosaic":
            for i in range(n):
                io = self.io[ix[i]]
                chroms = io.rd_chromosomes()
                for c in chroms:
                    if (c in self.chrom) or len(self.chrom) == 0:
                        flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR | \
                               (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                        if io.signal_exists(c, bin_size, "calls combined", flag):
                            calls = io.read_calls(c, bin_size, "calls combined", flag)
                            for call in calls:
                                if in_interval(call["size"], self.size_range) \
                                        and in_interval(call["p_val"], self.p_range) \
                                        and in_interval(call["pN"], self.pN_range) \
                                        and in_interval(call["Q0"], self.Q0_range):

                                    if n > 1:
                                        print("%s\t" % self.file_title(ix[i]), end="")
                                    if len(self.callers) > 1:
                                        print("%s\t" % caller, end="")
                                    keys = ["start", "end", "size", "cnv", "p_val", "lh_del", "lh_loh",
                                            "lh_dup", "Q0", "pN", "pNS", "pP", "bins", "baf",
                                            "rd_p_val", "baf_p_val", "segment", "hets", "homs"]
                                    type = {-1: "deletion", 0: "cnnloh", 1: "duplication"}[call["type"]]
                                    row = [self.file_title(i), caller, type, c] + [call[k] for k in keys]
                                    for m in range(2):
                                        row += call["models"][m]

                                    if self.annotate:
                                        row.append(annotator.get_info("%s:%d-%d" % (data[3], data[4], data[5])))
                                    ret.append(row)
    return ret

def global_plot(self)

Source code

def global_plot(self):
    chroms = []
    for c, (l, t) in self.reference_genome["chromosomes"].items():
        rd_chr = self.io[self.plot_files[0]].rd_chromosome_name(c)
        if (len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom)) and rd_chr is not None:
            if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append((rd_chr, l))
    panels = self.panels
    bin_size = self.bin_size
    snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
        FLAG_USEHAP if self.snp_use_phase else 0)
    rd_flag = (FLAG_USEMASK if self.rd_use_mask else 0) | (FLAG_GC_CORR if self.rd_use_gc_corr else 0)
    n = len(self.plot_files)
    self.new_figure(panel_count=n)
    for ii in range(len(self.plot_files)):
        ix = self.plot_files[ii]
        self.new_subgrid(len(panels), hspace=0.05, wspace=0.05)
        io = self.io[ix]
        for i in range(len(panels)):
            ax = self.next_subpanel(sharex=True)
            if i == 0:
                ax.set_title(self.file_title(ix), position=(0.01, 0.9),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                             color='C0')

            if panels[i] == "rd":
                start = 0
                xticks = [0]
                xticks_minor = []
                xticks_labels = []
                for c, l in chroms:
                    mean, stdev = io.rd_normal_level(bin_size, rd_flag | FLAG_GC_CORR)
                    his_p = io.get_signal(c, bin_size, "RD", rd_flag)
                    pos = range(start, start + len(his_p))
                    if self.markersize == "auto":
                        plt.plot(pos, his_p, ls='', marker='.', markersize=1)
                    else:
                        plt.plot(pos, his_p, ls='', marker='.', markersize=self.markersize)
                    xticks_minor.append(start + len(his_p) // 2)
                    xticks_labels.append(Genome.canonical_chrom_name(c))
                    start += l // bin_size + 1
                    xticks.append(start)

                ax.set_xlim([0, start])
                ax.xaxis.set_ticks(xticks)
                ax.xaxis.set_ticklabels([""] * len(xticks))
                if i == (len(panels) - 1):
                    ax.xaxis.set_ticks(xticks_minor, minor=True)
                    ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                else:
                    plt.setp(ax.get_xticklabels(which="both"), visible=False)
                yticks = np.arange(self.rd_manhattan_range[0], self.rd_manhattan_range[1], 0.5)
                ax.yaxis.set_ticklabels([str(int(2 * t)) for t in yticks])
                ax.yaxis.set_ticks(yticks * mean)
                ax.set_ylabel("RD [CN]")
                ax.set_ylim([self.rd_manhattan_range[0] * mean, self.rd_manhattan_range[1] * mean])
                ax.grid()
                self.fig.add_subplot(ax)

            elif panels[i] == "snp":
                start = 0
                xticks = []
                xticks_minor = []
                xticks_labels = []
                pos_x = []
                for c, l in chroms:
                    pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c)
                    ix = 0
                    hpos = []
                    color = []
                    alpha = 0.7
                    baf = []
                    while ix < len(pos):
                        if (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                            hpos.append(start + (pos[ix] / bin_size))
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                            if self.snp_alpha_P:
                                alpha = None
                                color.append(
                                    colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                            else:
                                color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                        ix += 1
                    if self.markersize == "auto":
                        ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=0.1, alpha=alpha)
                    else:
                        ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)
                    xticks_minor.append(start + l // bin_size // 2)
                    xticks_labels.append(Genome.canonical_chrom_name(c))
                    start += l // bin_size + 1
                    xticks.append(start)
                ax.set_xlim([0, start])
                ax.xaxis.set_ticks(xticks)
                ax.xaxis.set_ticklabels([""] * len(xticks))
                if i == (len(panels) - 1):
                    ax.xaxis.set_ticks(xticks_minor, minor=True)
                    ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                else:
                    plt.setp(ax.get_xticklabels(minor=True), visible=False)
                ax.grid()
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("BAF")
                ax.set_ylim([-0.05, 1.05])
                ax.yaxis.grid()
                self.fig.add_subplot(ax)

            elif panels[i] == "snv" or panels[i][:4] == "snv:":
                callset = "default"
                if panels[i][:4] == "snv:":
                    callset = panels[i].split(":")[1]
                start = 0
                xticks = []
                xticks_minor = []
                xticks_labels = []
                pos_x = []
                for c, l in chroms:
                    pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c, callset=callset)
                    ix = 0
                    hpos = []
                    color = []
                    alpha = 0.7
                    baf = []
                    while ix < len(pos):
                        if (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                            hpos.append(start + (pos[ix] / bin_size))
                            if gt[ix] % 4 != 2:
                                baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            else:
                                baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                            if self.snp_alpha_P:
                                alpha = None
                                color.append(
                                    colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                            else:
                                color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                        ix += 1
                    if self.markersize == "auto":
                        ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=0.1, alpha=alpha)
                    else:
                        ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)
                    xticks_minor.append(start + l // bin_size // 2)
                    xticks_labels.append(Genome.canonical_chrom_name(c))
                    start += l // bin_size + 1
                    xticks.append(start)
                ax.set_xlim([0, start])
                ax.xaxis.set_ticks(xticks)
                ax.xaxis.set_ticklabels([""] * len(xticks))
                if i == (len(panels) - 1):
                    ax.xaxis.set_ticks(xticks_minor, minor=True)
                    ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                else:
                    plt.setp(ax.get_xticklabels(minor=True), visible=False)
                ax.grid()
                ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0])
                ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
                ax.set_ylabel("BAF")
                ax.set_ylim([-0.05, 1.05])
                ax.yaxis.grid()
                self.fig.add_subplot(ax)


            elif panels[i] == "likelihood":
                start = 0
                xticks = [0]
                xticks_minor = []
                xticks_labels = []
                gl = []
                for c, l in chroms:
                    likelihood = io.get_signal(c, bin_size, "SNP likelihood", snp_flag)
                    lh = list(likelihood)
                    size = l // bin_size + 1
                    if len(lh) < size:
                        if len(lh)>0:
                            lh.extend([lh[-1] for jj in range(size - len(lh))])
                        elif len(gl)>0:
                            lh.extend([gl[-1] for jj in range(size - len(lh))])

                    gl.extend(lh)
                    xticks_minor.append(start + l // bin_size // 2)
                    xticks_labels.append(Genome.canonical_chrom_name(c))
                    start += l // bin_size + 1
                    xticks.append(start)

                img = np.array(gl).transpose()
                img[0, :] = 0
                img[-1, :] = 0
                ax.imshow(img, aspect='auto')
                ax.yaxis.set_ticks([0, img.shape[0] / 4, img.shape[0] / 2, 3 * img.shape[0] / 4, img.shape[0] - 1],
                                   minor=[])
                ax.yaxis.set_ticklabels(["1", "3/4", "1/2", "1/4", "0"])
                ax.set_ylabel("BAF")
                ax.set_xlim([0, start])
                ax.xaxis.set_ticks(xticks)
                ax.xaxis.set_ticklabels([""] * len(xticks))
                if i == (len(panels) - 1):
                    ax.xaxis.set_ticks(xticks_minor, minor=True)
                    ax.xaxis.set_ticklabels(xticks_labels, minor=True)
                else:
                    plt.setp(ax.get_xticklabels(minor=True), visible=False)
                ax.xaxis.grid()
                self.fig.add_subplot(ax)

    self.fig_show(suffix="global")

def help(self, param)

Source code

def help(self, param):
    if param in self.param_help:
        print(self.param_help[param])
    else:
        print("\nUnknown parameter !\n")

def likelihood(self)

Source code

def likelihood(self):
    bin_size = self.bin_size
    snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
        FLAG_USEHAP if self.snp_use_phase else 0)
    if self.reference_genome is None:
        _logger.warning("Missing reference genome required for gview.")
        return
    chroms = []
    if self.reference_genome is None:
        chroms = self.io[self.plot_file].snp_chromosomes()
    else:
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
            if self.io[self.plot_file].signal_exists(snp_chr, bin_size, "SNP likelihood", snp_flag) and (
                    Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                chroms.append(snp_chr)
    self.new_figure(panel_count=len(chroms))
    for c in chroms:
        likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood", snp_flag)
        img = np.array(likelihood).transpose()
        ax = self.next_panel()
        ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                     color='C0')
        ax.imshow(img, aspect='auto')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        ax.xaxis.set_ticks(np.arange(0, likelihood.shape[0], 50), minor=[])
        ax.set_xlim([0, likelihood.shape[0]])
        if self.snp_call and ("baf_mosaic" in self.callers):
            likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood call", snp_flag)
            segments = segments_decode(
                self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
            call_pos = []
            call_i1 = []
            call_i2 = []
            call_c = []
            for s, lh in zip(segments, likelihood):
                i1, i2, p = likelihood_pixels_pval(lh)
                if i1 != i2 and len(s) > self.min_segment_size:
                    alpha = -np.log(p + 1e-40) / self.contrast
                    if alpha > 1:
                        alpha = 1
                    for pos in s:
                        call_pos.append(pos)
                        call_i1.append(min(i1, i2))
                        call_i2.append(max(i1, i2))
                        color = colors.to_rgb(self.lh_colors[0]) + (alpha,)
                        call_c.append(color)
            plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                        marker=self.lh_marker)
            plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                        marker=self.lh_marker)
        if self.snp_call and ("combined_mosaic" in self.callers):
            likelihood = self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood call 2d", snp_flag)
            segments = segments_decode(
                self.io[self.plot_file].get_signal(c, bin_size, "SNP likelihood segments 2d", snp_flag))
            call_pos = []
            call_i1 = []
            call_i2 = []
            call_c = []
            for s, lh in zip(segments, likelihood):
                i1, i2, p = likelihood_pixels_pval(lh)
                if i1 != i2 and len(s) > self.min_segment_size:
                    alpha = -np.log(p + 1e-40) / self.contrast
                    if alpha > 1:
                        alpha = 1
                    for pos in s:
                        call_pos.append(pos)
                        call_i1.append(min(i1, i2))
                        call_i2.append(max(i1, i2))
                        color = colors.to_rgb(self.lh_colors[1]) + (alpha,)
                        call_c.append(color)
            plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                        marker=self.lh_marker)
            plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                        marker=self.lh_marker)
    self.fig_show(suffix="likelihood")

def manhattan(self, plot_type='rd')

Source code

def manhattan(self, plot_type="rd"):
    bin_size = self.bin_size
    if self.reference_genome is None:
        _logger.warning("Missing reference genome required for manhattan.")
        return
    n = len(self.plot_files)
    ix = self.plot_files

    self.new_figure(panel_count=n, grid=(1, n), panel_size=(24, 2))
    for i in range(n):
        ax = self.next_panel()
        io = self.io[ix[i]]
        ax.set_title(self.file_title(ix[i]), position=(0.01, 1.01),
                     fontdict={'verticalalignment': 'bottom', 'horizontalalignment': 'left'})

        if plot_type == "rd":
            chroms = []
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                rd_chr = io.rd_chromosome_name(c)
                if len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom):
                    if io.signal_exists(rd_chr, bin_size, "RD", 0) and \
                            io.signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                            (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append((rd_chr, l))

            apos = 0
            xticks = [0]

            max_m, stdev = io.rd_normal_level(bin_size, FLAG_GC_CORR)
            for c, l in chroms:
                flag_rd = (FLAG_USEMASK if self.rd_use_mask else 0)
                his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                his_p_corr = io.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
                if self.rd_manhattan_call:
                    his_p_call = io.get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg = io.get_signal(c, bin_size, "RD mosaic segments",
                                                     flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
                    his_p_mosaic_call = io.get_signal(c, bin_size, "RD mosaic call",
                                                      flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg_2d = io.get_signal(c, bin_size, "RD mosaic segments 2d",
                                                        flag_rd | FLAG_GC_CORR)
                    his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
                    his_p_mosaic_call_2d = io.get_signal(c, bin_size, "RD mosaic call 2d",
                                                         flag_rd | FLAG_GC_CORR)
                    his_p_mosaic = np.zeros_like(his_p) * np.nan
                    if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and (
                            "rd_mosaic" in self.callers):
                        for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                            for segi in seg:
                                his_p_mosaic[segi] = lev
                    his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
                    if his_p_mosaic_call_2d is not None and len(
                            his_p_mosaic_call_2d) > 0 and ("combined_mosaic" in self.callers):
                        for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                            for segi in seg:
                                his_p_mosaic_2d[segi] = lev
                pos = range(apos, apos + len(his_p))
                ax.text(apos + len(his_p) // 2, max_m // 10, Genome.canonical_chrom_name(c),
                        fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                if self.markersize == "auto":
                    plt.plot(pos, his_p_corr, ls='', marker='.')
                else:
                    plt.plot(pos, his_p_corr, ls='', marker='.', markersize=self.markersize)
                if self.rd_manhattan_call:
                    if his_p_call is not None and len(his_p_call) > 0 and ("rd_mean_shift" in self.callers):
                        plt.step(pos, his_p_call, "r")
                    if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and (
                            "rd_mosaic" in self.callers):
                        plt.plot(pos, his_p_mosaic, "k")
                    if his_p_mosaic_call_2d is not None and len(
                            his_p_mosaic_call_2d) > 0 and ("combined_mosaic" in self.callers):
                        plt.plot(pos, his_p_mosaic_2d, "k")
                apos += len(his_p)
                xticks.append(apos)
            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks(np.arange(0, 15, 0.5) * max_m, minor=[])
            ax.xaxis.set_ticks(xticks, minor=[])
            ax.set_ylim([self.rd_manhattan_range[0] * max_m, self.rd_manhattan_range[1] * max_m])
            n_bins = apos
            ax.set_xlim([0, n_bins])
            ax.grid()

        elif plot_type == "baf_mosaic":
            chroms = []
            snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0)
            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = io.snp_chromosome_name(c)
                if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                    if io.signal_exists(snp_chr, bin_size, "SNP likelihood call", snp_flag) and \
                            io.signal_exists(snp_chr, bin_size, "SNP likelihood segments", snp_flag) and \
                            (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append((snp_chr, l))

            apos = 0
            xticks = [0]

            cix = 0
            cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
            for c, l in chroms:
                likelihood = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
                call_pos = []
                call_baf = []
                call_c = []
                for s, lh in zip(segments, likelihood):
                    b, p = likelihood_baf_pval(lh)
                    if b > 0 and len(s) > self.min_segment_size:
                        alpha = -np.log(p + 1e-40) / self.contrast
                        if alpha > 1:
                            alpha = 1
                        for pos in s:
                            call_pos.append(apos + pos)
                            call_baf.append(b)
                            color = cmap[cix % len(cmap)]
                            color = (color[0], color[1], color[2], alpha)
                            call_c.append(color)

                ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                        fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                plt.scatter(call_pos, call_baf, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                apos += l // bin_size
                xticks.append(apos)
                cix += 1

            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks(np.arange(0, 0.5, 0.1), minor=[])
            ax.xaxis.set_ticks(xticks, minor=[])
            ax.set_ylim([0, 0.5])
            n_bins = apos
            ax.set_xlim([0, n_bins])
            ax.grid()

        elif plot_type == "rd_mean_shift":
            chroms = []
            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

            for c, (l, t) in self.reference_genome["chromosomes"].items():
                rd_chr = io.rd_chromosome_name(c)
                if rd_chr is not None and len(self.chrom) == 0 or (rd_chr in self.chrom) or (c in self.chrom):
                    if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append((rd_chr, l))

            apos = 0
            xticks = [0]

            cix = 0
            cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
            for c, l in chroms:
                call_pos = []
                call_conc = []
                call_c = []
                if io.signal_exists(c, bin_size, "calls", flag):
                    calls = io.read_calls(c, bin_size, "calls", flag)

                    for call in calls:
                        if in_interval(call["size"], self.size_range) and in_interval(call["p_val"], self.p_range) \
                                and in_interval(call["pN"], self.pN_range) \
                                and in_interval(call["Q0"], self.Q0_range):
                            alpha = - np.log(call["p_val"] + 1e-40) / self.contrast
                            if alpha > 1:
                                alpha = 1
                            if alpha < 0:
                                alpha = 0
                            for pos in range(int(call["start"]) // bin_size, int(call["end"]) // bin_size + 1):
                                call_pos.append(apos + pos)
                                level = call["cnv"] * 2
                                if level > 4:
                                    level = 4
                                call_conc.append(level)
                                if call["type"] == 1:
                                    call_c.append((0, 1, 0, alpha))
                                elif call["type"] == -1:
                                    call_c.append((1, 0, 0, alpha))
                                else:
                                    call_c.append((0, 0, 1, alpha))
                    ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                            fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                    plt.scatter(call_pos, call_conc, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                    apos += l // bin_size
                    xticks.append(apos)
                    cix += 1

            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks(np.arange(0, 4.0, 1.0), minor=[])
            ax.xaxis.set_ticks(xticks, minor=[])
            ax.set_ylim([0, 4.0])
            n_bins = apos
            ax.set_xlim([0, n_bins])
            ax.grid()

        elif plot_type == "combined_mosaic":
            chroms = []
            flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

            for c, (l, t) in self.reference_genome["chromosomes"].items():
                snp_chr = io.snp_chromosome_name(c)
                if snp_chr is not None and len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                    if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                        chroms.append((snp_chr, l))

            apos = 0
            xticks = [0]

            cix = 0
            cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
            for c, l in chroms:
                call_pos = []
                call_conc = []
                call_c = []
                if io.signal_exists(c, bin_size, "calls combined", flag):
                    calls = io.read_calls(c, bin_size, "calls combined", flag)

                    for call in calls:
                        if call["bins"] > self.min_segment_size:
                            alpha = -np.log(call["p_val"] + 1e-40) / self.contrast
                            if alpha > 1:
                                alpha = 1
                            for pos in range(int(call["start"]) // bin_size, int(call["end"]) // bin_size + 1):
                                call_pos.append(apos + pos)
                                call_conc.append(call["models"][0][4])
                                if call["type"] == 1:
                                    call_c.append((0, 1, 0, alpha))
                                elif call["type"] == -1:
                                    call_c.append((1, 0, 0, alpha))
                                else:
                                    call_c.append((0, 0, 1, alpha))

                    ax.text(apos + l // bin_size // 2, 0.4, Genome.canonical_chrom_name(c),
                            fontsize=8, verticalalignment='bottom', horizontalalignment='center', )
                    plt.scatter(call_pos, call_conc, s=20, color=np.array(call_c), edgecolors='face', marker='|')
                    apos += l // bin_size
                    xticks.append(apos)
                    cix += 1

            ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks(np.arange(0, 1.0, 0.1), minor=[])
            ax.xaxis.set_ticks(xticks, minor=[])
            ax.set_ylim([0, 1.0])

            n_bins = apos
            ax.set_xlim([0, n_bins])
            ax.grid()

    self.fig_show(suffix="manhattan" if plot_type == "rd" else "snp_calls")

def multiple_regions(self, regions)

Source code

def multiple_regions(self, regions):
    n = len(self.plot_files) * len(regions)
    self.new_figure(panel_count=n)
    j = 0
    for i in range(len(self.plot_files)):
        for r in regions:
            self.regions(self.plot_files[i], r)
            j += 1
    self.fig_show(suffix="regions")

def parse(self, command)

Source code

def parse(self, command):
    current = "regions"
    regions = []

    for p in command:
        if p.isdigit() and (int(p) % 100) == 0:
            self.bin_size = int(p)
            if current == "rd":
                self.rd()
            if current == "baf":
                self.baf()
            if current == "likelihood":
                self.likelihood()
            elif current == "manhattan":
                self.global_plot()
            elif current == "calls":
                if len(self.callers) > 0:
                    self.manhattan(plot_type=self.callers[0])
            elif current == "stat":
                self.stat(int(p))
            elif current == "circular":
                self.circular()
            elif current == "regions":
                self.multiple_regions(regions)
                regions = []
        elif p == "rdstat":
            self.stat()
        elif p == "snp":
            self.snp()
        elif p in ["rd", "baf", "manhattan", "calls", "stat", "regions", "likelihood", "circular"]:
            current = p
        elif current == "regions":
            regions.append(p)
        else:
            current = p

def phased_baf(self, regions, callset=None, print=False)

Source code

def phased_baf(self, regions, callset=None, print=False):
    regions = regions.split(" ")
    n = len(regions)
    ret = []
    for i in range(n):
        regs = decode_region(regions[i])
        talt = 0
        tref = 0
        taltP = 0
        trefP = 0
        for c, (pos1, pos2) in regs:
            pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
            ix = 0
            while ix < len(pos) and pos[ix] <= pos2:
                if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0:
                    if gt[ix] == 5:
                        talt += nalt[ix]
                        tref += nref[ix]
                        if flag[ix] & 2:
                            taltP += nalt[ix]
                            trefP += nref[ix]
                    elif gt[ix] == 6:
                        tref += nalt[ix]
                        talt += nref[ix]
                        if flag[ix] & 2:
                            trefP += nalt[ix]
                            taltP += nref[ix]
                ix += 1
        baf = talt / (tref + talt)
        bafP = taltP / (trefP + taltP)
        ret.append([baf, bafP])
        if print:
            print("%s\t%f\t%f" % (regions[i], baf, bafP))
    return ret

def plot_command(self, command)

Source code

def plot_command(self, command):
    self.interactive = False
    self.parse(command)

def print_calls(self)

Source code

def print_calls(self):
    bin_size = self.bin_size
    n = len(self.plot_files)
    ix = self.plot_files
    if self.annotate:
        annotator = Annotator(self.reference_genome)
    for caller in self.callers:
        if caller == "rd_mean_shift":
            for i in range(n):
                io = self.io[ix[i]]
                chroms = io.rd_chromosomes()
                for c in chroms:
                    if (c in self.chrom) or len(self.chrom) == 0:
                        flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                        if io.signal_exists(c, bin_size, "calls", flag):
                            calls = io.read_calls(c, bin_size, "calls", flag)
                            for call in calls:
                                if in_interval(call["size"], self.size_range) \
                                        and in_interval(call["p_val"], self.p_range) \
                                        and in_interval(call["pN"], self.pN_range) \
                                        and in_interval(call["Q0"], self.Q0_range) \
                                        and in_interval(call["dG"], self.dG_range):
                                    type = "duplication" if call["type"] == 1 else "deletion"
                                    if n > 1:
                                        print("%s\t" % self.file_title(i), end="")
                                    if len(self.callers) > 1:
                                        print("%s\t" % caller, end="")
                                    print("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e\t%.4f\t%.4f\t%d\t" % (
                                        type, c, call["start"], call["end"], call["size"], call["cnv"],
                                        call["p_val"],
                                        call["p_val_2"], call["p_val_3"], call["p_val_4"], call["Q0"], call["pN"],
                                        call["dG"]), end="")
                                    if self.annotate:
                                        print("\t%s" % annotator.get_info(
                                            "%s:%d-%d" % (c, call["start"], call["end"])))
                                    else:
                                        print()
                                    if self.plot:
                                        plot_start = call["start"] - call["size"]
                                        if plot_start < 0:
                                            plot_start = 0
                                        plot_end = call["end"] + call["size"]
                                        self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])
        elif caller == "combined_mosaic":
            for i in range(n):
                io = self.io[ix[i]]
                chroms = io.rd_chromosomes()
                for c in chroms:
                    if (c in self.chrom) or len(self.chrom) == 0:
                        flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR | \
                               (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0)
                        if io.signal_exists(c, bin_size, "calls combined", flag):
                            calls = io.read_calls(c, bin_size, "calls combined", flag)
                            for call in calls:
                                if in_interval(call["size"], self.size_range) \
                                        and in_interval(call["p_val"], self.p_range) \
                                        and in_interval(call["pN"], self.pN_range) \
                                        and in_interval(call["Q0"], self.Q0_range):
                                    type = "duplication" if call["type"] == 1 else "deletion"
                                    if n > 1:
                                        print("%s\t" % self.file_title(i), end="")
                                    if len(self.callers) > 1:
                                        print("%s\t" % caller, end="")
                                    keys = ["start", "end", "size", "cnv", "p_val", "lh_del", "lh_loh",
                                            "lh_dup", "Q0", "pN", "pNS", "pP", "bins", "baf",
                                            "rd_p_val", "baf_p_val", "segment", "hets", "homs"]
                                    type = {-1: "deletion", 0: "cnnloh", 1: "duplication"}[call["type"]]
                                    data = [type, c] + [call[k] for k in keys]
                                    for m in range(2):
                                        data += call["models"][m]

                                    print(("%s\t%s:%d-%d\t%d\t%.4f\t%e\t%e\t%e\t%e" + \
                                           "\t%.4f\t%.4f\t%.4f\t%.4f\t" + "%d\t%d\t%.4f\t%e\t%e\t%d\t%d\t%d\t" + \
                                           "CN%d/CN%d\t%e\t%.4f\t%d\tCN%d/CN%d\t%e\t%.4f") % tuple(data), end="")
                                    if self.annotate:
                                        print("\t%s" % annotator.get_info("%s:%d-%d" % (data[1], data[2], data[3])))
                                    else:
                                        print()
                                    if self.plot:
                                        plot_start = call["start"] - call["size"]
                                        if plot_start < 0:
                                            plot_start = 0
                                        plot_end = call["end"] + call["size"]
                                        self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])

def print_calls_file(self)

Source code

    def print_calls_file(self):
        format = self.print_filename.split(".")[-1]
        calls = self.get_calls()
        if self.print_filename == "":
            for call in calls:
                print(*call, sep="\t")
        elif format == "tsv":
            with open(self.print_filename, 'w') as f:
                for call in calls:
                    print(*call, sep="\t", file=f)
        elif format == "xlsx":
            import xlsxwriter
            workbook = xlsxwriter.Workbook(self.print_filename)
            files_callers = []
            sheets = {}
            rix = {}
            for call in calls:
                caller = call[1]
                fc = call[0] + " (" + caller + ")"
                sfc = call[0][:25] + " " + ({"rd_mean_shift": "ms", "combined_mosaic": "2d"}[caller])
                if fc not in files_callers:
                    sheets[fc] = workbook.add_worksheet(sfc)
                    rix[fc] = 0
                    files_callers.append(fc)
            for call in calls:
                caller = call[1]
                fc = call[0] + " (" + caller + ")"
                cix = 0
                for f in call[2:]:
                    sheets[fc].write(rix[fc], cix, f)
                    cix += 1
                rix[fc] += 1
            workbook.close()
        elif format == "vcf":
            samples = []
            for call in calls:
                sample = call[0]
                if sample not in samples:
                    samples.append(sample)
            header = """##fileformat=VCFv4.1
##fileDate={date}
##reference={rg}
##source=CNVpytor
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=pytorRD,Number=1,Type=Float,Description="Normalized RD">
##INFO=<ID=pytorP1,Number=1,Type=Float,Description="e-val by t-test">
##INFO=<ID=pytorP2,Number=1,Type=Float,Description="e-val by Gaussian tail">
##INFO=<ID=pytorP3,Number=1,Type=Float,Description="e-val by t-test (middle)">
##INFO=<ID=pytorP4,Number=1,Type=Float,Description="e-val by Gaussian tail (middle)">
##INFO=<ID=pytorQ0,Number=1,Type=Float,Description="Fraction of reads with 0 mapping quality">
##INFO=<ID=pytorPN,Number=1,Type=Integer,Description="Fraction of N bases">
##INFO=<ID=pytorDG,Number=1,Type=Integer,Description="Distance to nearest gap in reference genome">
##INFO=<ID=pytorCL,Number=1,Type=Integer,Description="Caller method">
##INFO=<ID=SAMPLES,Number=.,Type=String,Description="Sample genotyped to have the variant">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=LOH,Description="Copy number neutral loss of heterozygosity">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">;
##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{samples}"""
            if self.reference_genome:
                rg = self.reference_genome["name"]
            else:
                rg = "unknown"
            header = header.format(date=datetime.date.today().strftime("%Y-%m-%d"), rg=rg, samples="\t".join(samples))
            ii = 0
            with open(self.print_filename, 'w') as f:
                print(header, file=f)
                for call in calls:
                    ii += 1
                    id = "CNVpytor_" + {"deletion": "del", "duplication": "dup", "cnnloh": "loh"}[call[2]] + str(ii)
                    alt = {"deletion": "<DEL>", "duplication": "<DUP>", "cnnloh": "<LOH>"}[call[2]]
                    info = "END=" + str(int(call[5])) + ";IMPRECISE;SVLEN=" + str(int(call[6])) + ";SVTYPE=" + alt[1:4]
                    info += ";pytorRD=" + str(call[7])
                    info += ";pytorP1=" + str(call[8])
                    info += ";pytorP2=" + str(call[9])
                    info += ";pytorP3=" + str(call[10])
                    info += ";pytorP4=" + str(call[11])
                    info += ";pytorQ0=" + str(call[12])
                    info += ";pytorPN=" + str(call[13])
                    info += ";pytorDG=" + str(call[14])
                    info += ";pytorCL=" + call[1]
                    format = "GT:CN"
                    row = [call[3], int(call[4]), id, ".", alt, ".", "PASS", info, format]
                    for sample in samples:
                        if sample == call[0]:
                            if call[2] == "deletion" and call[7] < 0.25:
                                row.append("1/1:0")
                            elif call[2] == "deletion" and call[7] > 0.25:
                                row.append("0/1:0")
                            elif call[2] == "duplication" and call[7] <= 1.75:
                                row.append("0/1:2")
                            elif call[2] == "duplication" and call[7] > 1.75 and call[7] <= 2.25:
                                row.append("1/1:2")
                            elif call[2] == "duplication" and call[7] > 2.25:
                                row.append("./1:%.2f" % call[7])
                            else:
                                row.append("./.:.")
                        else:
                            row.append("./.:.")
                    print(*row, sep="\t", file=f)
        if self.plot:
            for call in calls:
                plot_start = call[4] - call[6]
                if plot_start < 0:
                    plot_start = 0
                plot_end = call[5] + call[6]
                self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])

def print_simple_joint_calls(self)

Source code

def print_simple_joint_calls(self):

    bin_size = self.bin_size
    n = len(self.plot_files)
    if n == 0:
        return
    ix = self.plot_files
    format = self.print_filename.split(".")[-1]
    if format == "tsv":
        f = open(self.print_filename, 'w')
    elif format == "xlsx":
        import xlsxwriter
        if os.path.exists(self.print_filename):
            os.remove(self.print_filename)
        workbook = xlsxwriter.Workbook(self.print_filename)
        sheet = workbook.add_worksheet("merged_calls")
        header = ["TYPE", "REGION", "SIZE"]
        for i in range(n):
            header.append(self.file_title(ix[i]))
        if self.annotate:
            header.append("GENES")
        styleh = workbook.add_format({'bold': True, 'font_color': 'white'})
        styleh.set_pattern(1)  # This is optional when using a solid fill.
        styleh.set_bg_color('#555555')
        styleh2 = workbook.add_format({'bold': True, 'font_color': 'white'})
        styleh2.set_pattern(1)  # This is optional when using a solid fill.
        styleh2.set_bg_color('#555555')
        styleh2.set_rotation(75)
        style_r = workbook.add_format()
        style_r.set_pattern(1)  # This is optional when using a solid fill.
        style_r.set_bg_color('red')
        style_g = workbook.add_format()
        style_g.set_pattern(1)  # This is optional when using a solid fill.
        style_g.set_bg_color('green')
        style_size = workbook.add_format({'num_format': '#,##0'})
        style_cn = workbook.add_format({'num_format': '0'})
        style_cn_b = workbook.add_format({'num_format': '0', 'bold': True})
        sheet.set_column(0, 0, 10)
        sheet.set_column(1, 1, 22)
        sheet.set_column(2, 2, 10)
        if self.annotate:
            sheet.set_column(len(header) - 1, len(header) - 1, 100)

        for col, val in enumerate(header):
            if col > 2 and col < len(header) - int(self.annotate):
                sheet.write(0, col, val, styleh2)
            else:
                sheet.write(0, col, val, styleh)
        ri = 0
    if self.annotate:
        annotator = Annotator(self.reference_genome)
    chroms = self.io[ix[0]].rd_chromosomes()
    for c in chroms:
        if (c in self.chrom) or len(self.chrom) == 0:
            flag = (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
            calls = [list(filter(lambda call: in_interval(call["size"], self.size_range) \
                                              and in_interval(call["p_val"], self.p_range) \
                                              and in_interval(call["pN"], self.pN_range) \
                                              and in_interval(call["Q0"], self.Q0_range) \
                                              and in_interval(call["dG"], self.dG_range),
                                 self.io[ix[i]].read_calls(c, bin_size, "calls", flag))) for i in range(n)]
            pointers = [0] * n
            while any([pointers[i] < len(calls[i]) for i in range(n)]):
                starts = [calls[i][pointers[i]]["start"] if pointers[i] < len(calls[i]) else np.inf for i in
                          range(n)]
                mini = starts.index(min(starts))
                maxend = 0
                toupdate = []
                minend = calls[mini][pointers[mini]]["end"]
                maxstart = 0
                files = []
                types = []
                cns = []
                for i in range(n):
                    if (pointers[i] < len(calls[i])) and ((min(calls[i][pointers[i]]["end"],
                                                               calls[mini][pointers[mini]]["end"]) -
                                                           calls[i][pointers[i]]["start"]) > (
                                                                  0.5 * calls[mini][pointers[mini]]["size"])) \
                            and ((min(calls[i][pointers[i]]["end"],
                                      calls[mini][pointers[mini]]["end"]) -
                                  calls[i][pointers[i]]["start"]) > (
                                         0.5 * (calls[i][pointers[i]]["end"] - calls[i][pointers[i]]["start"]))):
                        toupdate.append(i)
                        call = calls[i][pointers[i]]
                        if call["end"] > maxend:
                            maxend = call["end"]
                        if call["end"] < minend:
                            minend = call["end"]
                        if call["start"] > maxstart:
                            maxstart = call["start"]
                        type = "duplication" if call["type"] == 1 else "deletion"
                        types.append(type)
                        files.append(i)
                        cns.append(int(call["cnv"] * 2))
                type = max(set(types), key=types.count)
                data = [type, c, maxstart, minend, minend - maxstart + 1]
                genotypes = [
                    self.genotype([bin_size], "%s:%d-%d" % (c, maxstart, minend), file_index=ix[i], p_val=True)[0]
                    for i
                    in range(n)]
                copynumbers = [c[3] for c in genotypes]
                if np.all([np.abs(c - np.round(c)) < 0.25 for c in copynumbers]) or True:
                    if self.print_filename == "":
                        print(("%s\t%s:%d-%d\t%d" + n * "\t%.2f") % tuple(data + copynumbers), end="")
                        print("\t%s" % str(files), end="")
                        if self.annotate:
                            print("\t%s" % annotator.get_info("%s:%d-%d" % (c, maxstart, minend)))
                        else:
                            print()
                    elif format == "tsv":
                        print(("%s\t%s:%d-%d\t%d" + n * "\t%.2f") % tuple(data + copynumbers), end="", file=f)
                        print("\t%s" % str(files), end="", file=f)
                        if self.annotate:
                            print("\t%s" % annotator.get_info("%s:%d-%d" % (c, maxstart, minend)), file=f)
                        else:
                            print(file=f)
                    elif format == "xlsx":
                        ri += 1
                        if type == "deletion":
                            sheet.write(ri, 0, data[0], style_r)
                        else:
                            sheet.write(ri, 0, data[0], style_g)
                        sheet.write(ri, 1, "%s:%d-%d" % (c, maxstart, minend))
                        sheet.write(ri, 2, data[4], style_size)
                        for col, val in enumerate(copynumbers):
                            if col in files:
                                sheet.write(ri, 3 + col, val, style_cn_b)
                            else:
                                sheet.write(ri, 3 + col, val, style_cn)
                        if self.annotate:
                            sheet.write(ri, 3 + len(copynumbers),
                                        annotator.get_info("%s:%d-%d" % (c, maxstart, minend)))

                    if self.plot:
                        plot_start = maxstart - (minend - maxstart)
                        if plot_start < 0:
                            plot_start = 0
                        plot_end = minend + (minend - maxstart)
                        self.multiple_regions(["%s:%d-%d" % (c, plot_start, plot_end)])
                for i in toupdate:
                    pointers[i] += 1
    if format == "tsv":
        f.close()
    elif format == "xlsx":
        sheet.conditional_format(1, 3, ri, len(header) - int(self.annotate), {'type': '3_color_scale',
                                                                              'min_color': "#FF0000",
                                                                              'mid_color': "#FFFFFF",
                                                                              'max_color': "#00FF00",
                                                                              'min_type': 'num',
                                                                              'min_value': 0,
                                                                              'mid_type': 'num',
                                                                              'mid_value': 2,
                                                                              'max_type': 'num',
                                                                              'max_value': 4
                                                                              })
        workbook.close()

def prompt(self)

Source code

def prompt(self):
    self.interactive = True

    chromosomes = set({})
    for f in self.io:
        chromosomes = chromosomes.union(set(f.rd_chromosomes()))
        chromosomes = chromosomes.union(set(f.snp_chromosomes()))
    for c in chromosomes:
        self.command_tree[c] = None
    self.command_tree["set"]["style"] = dict(zip(plt.style.available, [None] * len(plt.style.available)))
    if os.path.exists(self.cnvpytor_dir+"/history"):
        readline.read_history_file(self.cnvpytor_dir+"/history")

    readline.parse_and_bind("tab: complete")
    completer = PromptCompleter(self.command_tree)
    readline.set_completer(completer.complete)
    quit = False
    try:
        while not quit:
            prompt_str = ""
            if os.isatty(sys.stdin.fileno()):
                prompt_str = "cnvpytor> "
            else:
                self.interactive = False
            try:
                line = raw_input(prompt_str)
            except NameError:
                line = input(prompt_str)

            if line[0] == "#" or line[0] == "":
                continue

            if self.save_history and self.interactive:
                readline.set_history_length(self.history_file_size)
                readline.write_history_file(self.cnvpytor_dir+"/history")

            pre = line.split(">")
            f = pre[0].strip().split(" ")
            n = len(f)
            if len(line) == 0:
                continue
            elif f[0] == "quit" or f[0] == "exit":
                quit = True
            elif line[0] == "|":
                try:
                    eval(compile(line[1:], '<string>', 'single'))
                except Exception as e:
                    print(traceback.format_exc())
            elif f[0] == "save":
                if n > 1:
                    try:
                        plt.savefig(f[1])
                    except ValueError:
                        _logger.warning("File extension should be: .jpg, .png, .svg, .eps or .pdf")
                    except:
                        _logger.warning("Figure is not saved due to an error!")

            elif f[0] in ["draw", "repaint", "update"]:
                if n == 1:
                    self.fig.canvas.draw()
            elif f[0] == "ls":
                self.ls()
            elif f[0] == "meta":
                self.meta()
            elif f[0] == "show":
                if n == 1:
                    self.show()
            elif f[0] == "set":
                if n > 1:
                    self.set(f[1], f[2:])
            elif f[0] == "help" and n > 1:
                self.help(f[1])
            elif f[0] == "help" and n == 1:
                self.help("help")
            elif f[0] == "unset":
                if n > 1:
                    self.unset(f[1])
            elif f[0] == "genotype":
                if n > 1:
                    self.genotype_all([self.bin_size], f[1:], interactive=True)
            elif f[0] == "snv":
                if n == 2:
                    self.snp(callset=f[1])
                elif n == 1:
                    self.snp(callset="default")
            elif f[0] == "compare":
                if n == 3:
                    self.compare(f[1], f[2], plot=self.plot)
                elif n == 4:
                    self.compare(f[1], f[2], n_bins=int(f[3]), plot=self.plot)
            elif f[0] == "info":
                if n > 1:
                    self.info(list(map(binsize_type, f[1:])))
            elif f[0] == "print":
                if f[1] == "calls":
                    if self.print_filename == "":
                        self.print_calls()
                    else:
                        self.print_calls_file()
                elif f[1] == "joint_calls":
                    self.print_simple_joint_calls()

            else:
                try:
                    if f[0] not in ["rdstat", "snp"]:
                        self.parse(f + [str(self.bin_size)])
                    else:
                        self.parse(f)
                    if len(pre) > 1:
                        fns = pre[1].strip().split(" ")
                        if fns[0] != "":
                            plt.savefig(fns[0], dpi=200)
                except Exception as e:
                    print(traceback.format_exc())
    except (EOFError, KeyboardInterrupt):
        print()
        return

def rd(self)

Source code

def rd(self):
    bin_size = self.bin_size
    if self.reference_genome is None:
        _logger.warning("Missing reference genome required for gview.")
        return
    chroms = []
    for c, (l, t) in self.reference_genome["chromosomes"].items():
        rd_chr = self.io[self.plot_file].rd_chromosome_name(c)
        if self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", 0) and \
                self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
            chroms.append((rd_chr, l))
    self.new_figure(panel_count=len(chroms))
    for c, l in chroms:
        flag_rd = FLAG_USEMASK if self.rd_use_mask else 0
        mean, stdev = self.io[self.plot_file].rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
        his_p = self.io[self.plot_file].get_signal(c, bin_size, "RD", flag_rd)
        his_p_corr = self.io[self.plot_file].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
        his_p_seg = self.io[self.plot_file].get_signal(c, bin_size, "RD partition", flag_rd | FLAG_GC_CORR)
        his_p_call = self.io[self.plot_file].get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
        his_p_mosaic_seg = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic segments",
                                                              flag_rd | FLAG_GC_CORR)
        his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
        his_p_mosaic_call = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic call",
                                                               flag_rd | FLAG_GC_CORR)
        his_p_mosaic_seg_2d = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic segments 2d",
                                                                 flag_rd | FLAG_GC_CORR)
        his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
        his_p_mosaic_call_2d = self.io[self.plot_file].get_signal(c, bin_size, "RD mosaic call 2d",
                                                                  flag_rd | FLAG_GC_CORR)
        his_p_mosaic = np.zeros_like(his_p) * np.nan
        if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                "rd_mosaic" in self.callers):
            for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                for segi in seg:
                    his_p_mosaic[segi] = lev
        his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
        if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                "combined_mosaic" in self.callers):
            for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                for segi in seg:
                    his_p_mosaic_2d[segi] = lev
        ax = self.next_panel()
        ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                     color='C0')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // bin_size, 10e6 // bin_size), minor=[])
        if (self.rd_range[1] - self.rd_range[0]) < 30:
            ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                               minor=[])
        ax.set_ylim([self.rd_range[0] * mean / 2, self.rd_range[1] * mean / 2])
        n_bins = l // bin_size
        ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
        ax.grid()
        if self.rd_raw:
            plt.step(his_p, self.rd_colors[0])
        if self.rd_corrected:
            plt.step(his_p_corr, self.rd_colors[1])
        if his_p_seg is not None and len(his_p_seg) > 0 and self.rd_partition:
            plt.step(his_p_seg, self.rd_colors[2])
        if his_p_call is not None and len(his_p_call) > 0 and self.rd_call:
            plt.step(his_p_call, self.rd_colors[3])
        if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                "rd_mosaic" in self.callers):
            plt.step(his_p_mosaic, self.rd_colors[4])
        if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                "combined_mosaic" in self.callers):
            plt.step(his_p_mosaic_2d, self.rd_colors[5])
    self.fig_show(suffix="rd")

def rd_baf(self, hist=True)

Source code

def rd_baf(self, hist=True):
    plt.clf()
    plt.rcParams["font.size"] = 8
    self.fig = plt.figure(1, figsize=(12, 8), facecolor='w', edgecolor='k')
    n = len(self.plot_files)
    ix = self.plot_files
    if self.grid == "auto":
        sx, sy = self._panels_shape(n)
    else:
        sx, sy = tuple(self.grid)
    grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
    bin_size = self.bin_size
    for i in range(n):
        ax = self.fig.add_subplot(grid[i])
        io = self.io[ix[i]]

        chroms = []
        snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0)
        rd_flag = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            snp_chr = io.snp_chromosome_name(c)
            if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                if io.signal_exists(snp_chr, bin_size, "SNP likelihood call", snp_flag) and \
                        io.signal_exists(snp_chr, bin_size, "SNP likelihood segments", snp_flag) and \
                        io.signal_exists(snp_chr, bin_size, "RD mosaic call", rd_flag) and \
                        io.signal_exists(snp_chr, bin_size, "RD mosaic segments", rd_flag) and \
                        Genome.is_autosome(c):
                    chroms.append((snp_chr, l))
        x = []
        y = []
        for c, l in chroms:
            flag = FLAG_MT if Genome.is_mt_chrom(c) else FLAG_SEX if Genome.is_sex_chrom(c) else FLAG_AUTO

            likelihood = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
            segments_baf = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))
            rd = io.get_signal(c, bin_size, "RD mosaic call", rd_flag)
            segments_rd = segments_decode(io.get_signal(c, bin_size, "RD mosaic segments", rd_flag))

            mbaf = {}
            mrd = {}
            for s, lh in zip(segments_baf, likelihood):
                b, p = likelihood_baf_pval(lh)
                for pos in s:
                    mbaf[pos] = 0.5 - b
            for s, r in zip(segments_rd, rd[0]):
                for pos in s:
                    mrd[pos] = r
            for p in mbaf:
                if p in mrd:
                    x.append(mbaf[p])
                    y.append(mrd[p])

        if hist:
            from matplotlib.colors import LogNorm
            ax.hist2d(x, y, bins=[np.arange(0, 0.51, 0.01), np.arange(0, max(y), max(y) / 100.)], norm=LogNorm())
        else:
            ax.scatter(x, y, marker=".", alpha=0.5)

    if self.output_filename != "":
        plt.savefig(self._image_filename("rd_baf"), dpi=150)
        plt.close(self.fig)
    elif self.interactive:
        plt.show(block=False)
        plt.draw()
    else:
        plt.show()

def rd_baf_call_models(self, maxcn=10)

Source code

def rd_baf_call_models(self, maxcn=10):
    bin_size = self.bin_size
    n = len(self.plot_files)
    ix = self.plot_files
    self.new_figure(panel_count=n)

    for i in range(n):
        ax = self.next_panel()
        io = self.io[ix[i]]
        ax.set_title(self.file_title(ix[i]), position=(0.1, 0.1),
                     fontdict={'verticalalignment': 'bottom', 'horizontalalignment': 'left'})

        chroms = []
        flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
            FLAG_USEHAP if self.snp_use_phase else 0) | (FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR

        for c, (l, t) in self.reference_genome["chromosomes"].items():
            snp_chr = io.snp_chromosome_name(c)
            if len(self.chrom) == 0 or (snp_chr in self.chrom) or (c in self.chrom):
                if (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                    chroms.append((snp_chr, l))

        x = np.linspace(0, 1, 1000)
        master_lh = {}
        for cn in range(maxcn, -1, -1):
            for h1 in range(cn // 2 + 1):
                h2 = cn - h1
                mrd = 2 - 2 * x + x * cn
                np.seterr(divide='ignore')
                mbaf = 0.5 - (1 - x + x * h1) / (2 - 2 * x + (h1 + h2) * x)
                plt.plot(mbaf, mrd, "-", label="%d: %d/%d" % (cn, h1, h2), zorder=6 - cn)

        cix = 0
        cmap = list(map(colors.to_rgba, plt.rcParams['axes.prop_cycle'].by_key()['color']))
        for c, l in chroms:
            call_rd = []
            call_baf = []
            call_label = []
            if io.signal_exists(c, bin_size, "calls combined", flag):
                calls = io.read_calls(c, bin_size, "calls combined", flag)

                for call in calls:
                    if call["bins"] > self.min_segment_size:
                        call_rd.append(call["cnv"] * 2)
                        call_baf.append(call["baf"])
                        call_label.append(c + ":" + str(call["start"]) + "-" + str(call["end"]))

            plt.scatter(call_baf, call_rd, s=20, edgecolors='face', marker='.')
            cix += 1

        ax.set_xlabel("|ΔBAF|")
        ax.set_ylabel("Relative RD level")

        ax.legend()

        ax.set_ylim([0, maxcn])
        ax.set_xlim([-0.02, 0.5])
        ax.grid()

    self.fig_show(suffix="models")

def rd_diff(self, file1, file2)

Source code

def rd_diff(self, file1, file2):
    bin_size = self.bin_size
    if self.reference_genome is None:
        _logger.warning("Missing reference genome required for gview.")
        return
    chroms = []
    for c, (l, t) in self.reference_genome["chromosomes"].items():
        rd_chr = self.io[self.plot_file].rd_chromosome_name(c)
        if self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", 0) and \
                self.io[self.plot_file].signal_exists(rd_chr, bin_size, "RD", FLAG_GC_CORR) and \
                (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
            chroms.append((rd_chr, l))
    self.new_figure(panel_count=len(chroms))
    for c, l in chroms:
        flag = FLAG_MT if Genome.is_mt_chrom(c) else FLAG_SEX if Genome.is_sex_chrom(c) else FLAG_AUTO
        stat1 = self.io[file1].get_signal(None, bin_size, "RD stat", flag)
        stat2 = self.io[file2].get_signal(None, bin_size, "RD stat", flag)
        if stat1 is None:
            _logger.error(
                "Data for bin size %d is missing in file '%s'!" % (bin_size, self.io[file1].filename))
            return
        if stat2 is None:
            _logger.error(
                "Data for bin size %d is missing in file '%s'!" % (bin_size, self.io[file2].filename))
            return
        flag_rd = (FLAG_USEMASK if self.rd_use_mask else 0)
        his_p_corr1 = self.io[file1].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
        his_p_corr2 = self.io[file2].get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
        ax = self.next_panel()
        ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                     color='C0')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        if (self.rd_range[1] - self.rd_range[0]) < 30:
            ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                               minor=[])
        ax.yaxis.set_ticks(np.arange(0, 2, 0.25), minor=[])
        ax.xaxis.set_ticks(np.arange(0, (l + 10e6) // bin_size, 10e6 // bin_size), minor=[])
        ax.set_ylim([0, 1])
        n_bins = l // bin_size
        ax.set_xlim([-n_bins * 0.05, n_bins * 1.05])
        ax.grid()

        plt.step(np.abs(his_p_corr1 / stat1[4] - his_p_corr2 / stat2[4]), "k")
    self.fig_show(suffix="rd_diff")

def region_rd_stat(self, region, n_bins=21, plot=False, legend=True)

Source code

def region_rd_stat(self, region, n_bins=21, plot=False, legend=True):
    n = len(self.plot_files)
    ix = self.plot_files
    if plot:
        plt.clf()
        plt.rcParams["font.size"] = 8
        if self.grid == "auto":
            sx, sy = self._panels_shape(n)
        else:
            sx, sy = tuple(self.grid)
        self.fig = plt.figure(1, dpi=200, facecolor='w', edgecolor='k')
        if self.output_filename != "":
            self.fig.set_figheight(3 * sy)
            self.fig.set_figwidth(4 * sx)
        grid = gridspec.GridSpec(sy, sx, wspace=0.2, hspace=0.2)
    for i in range(n):
        io = self.io[ix[i]]
        if plot:
            ax = self.fig.add_subplot(grid[i])
            ax.set_title(self.file_title(ix[i]), position=(0.01, 1.07),
                         fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
        regs = decode_region(region)
        data = []
        for c, (pos1, pos2) in regs:
            flag_rd = (FLAG_GC_CORR if self.rd_use_gc_corr else 0) | (FLAG_USEMASK if self.rd_use_mask else 0)
            his_p = io.get_signal(c, self.bin_size, "RD", flag_rd)
            bin1 = (pos1 - 1) // self.bin_size
            bin2 = (pos2 - 1) // self.bin_size
            data += list(his_p[bin1:bin2 + 1][np.isfinite(his_p[bin1:bin2 + 1])])

        data = np.array(data)
        dmin = np.min(data)
        dmax = np.max(data)
        p1 = np.percentile(data, 1)
        p99 = np.percentile(data, 99)
        data = data[data > p1]
        data = data[data < p99]
        mean = np.mean(data)
        std = np.std(data)

        rd_min = mean - 5 * std
        rd_max = mean + 5 * std
        bins = np.linspace(rd_min, rd_max, n_bins)

        hist, binsr = np.histogram(data, bins=bins)

        fitn, fitm, fits = fit_normal(bins[:-1], hist)[0]

        print("%s\t%s\t%.4f\t%.4f\t%e\t%e\t%.4f\t%.4f\t%.4f\t%.4f" % (
            io.filename, region, fitm, fits, dmin, dmax, p1, p99, mean, std))

        if plot:
            x = np.linspace(bins[0], bins[-1], 1001)
            plt.plot(x, normal(x, fitn, fitm, fits), "g-", label=region)
            plt.plot(bins[:-1], hist, "b*")
            if legend:
                plt.legend()

    if plot:
        if self.output_filename != "":
            plt.savefig(self._image_filename("comp"), dpi=200)
            plt.close(self.fig)
        elif self.interactive:
            plt.show(block=False)
            plt.draw()
        else:
            plt.show()

def regions(self, ix, region)

Source code

def regions(self, ix, region):
    panels = self.panels
    bin_size = self.bin_size
    snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
        FLAG_USEHAP if self.snp_use_phase else 0)
    self.new_subgrid(len(panels), hspace=0.05, wspace=0.1)
    r = decode_region(region, max_size=1000000000)
    io = self.io[ix]
    for i in range(len(panels)):
        ax = self.next_subpanel(sharex=True)
        if i == 0 and self.title:
            ax.set_title(self.file_title(ix) + ": " + region, position=(0.01, 0.9),
                         fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                         color='C0')

        if panels[i] == "rd":
            g_p = [0]
            g_p_corr = [0]
            g_p_seg = [0]
            g_p_call = [0]
            g_p_call_mosaic = [0]
            g_p_call_mosaic_2d = [0]
            mean, stdev = 0, 0
            borders = []
            pos_x = []
            for c, (pos1, pos2) in r:
                if pos2 == 1000000000:
                    pos2 = io.get_chromosome_length(c)
                    if pos2 is None:
                        pos2 = 1000000000
                flag_rd = 0
                if self.rd_use_mask:
                    flag_rd = FLAG_USEMASK
                mean, stdev = io.rd_normal_level(bin_size, flag_rd | FLAG_GC_CORR)
                his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                his_p_corr = io.get_signal(c, bin_size, "RD", flag_rd | FLAG_GC_CORR)
                his_p_seg = io.get_signal(c, bin_size, "RD partition", flag_rd | FLAG_GC_CORR)
                his_p_call = io.get_signal(c, bin_size, "RD call", flag_rd | FLAG_GC_CORR)
                his_p_mosaic_seg = io.get_signal(c, bin_size, "RD mosaic segments",
                                                 flag_rd | FLAG_GC_CORR)
                his_p_mosaic_seg = segments_decode(his_p_mosaic_seg)
                his_p_mosaic_call = io.get_signal(c, bin_size, "RD mosaic call",
                                                  flag_rd | FLAG_GC_CORR)
                his_p_mosaic_seg_2d = io.get_signal(c, bin_size, "RD mosaic segments 2d",
                                                    flag_rd | FLAG_GC_CORR)
                his_p_mosaic_seg_2d = segments_decode(his_p_mosaic_seg_2d)
                his_p_mosaic_call_2d = io.get_signal(c, bin_size, "RD mosaic call 2d",
                                                     flag_rd | FLAG_GC_CORR)
                his_p_mosaic = np.zeros_like(his_p) * np.nan
                if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and ("rd_mosaic" in self.callers):
                    for seg, lev in zip(list(his_p_mosaic_seg), list(his_p_mosaic_call[0])):
                        for segi in seg:
                            his_p_mosaic[segi] = lev
                his_p_mosaic_2d = np.zeros_like(his_p) * np.nan
                if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and (
                        "combined_mosaic" in self.callers):
                    for seg, lev in zip(list(his_p_mosaic_seg_2d), list(his_p_mosaic_call_2d[0])):
                        for segi in seg:
                            his_p_mosaic_2d[segi] = lev
                start_bin = (pos1 - 1) // bin_size
                end_bin = pos2 // bin_size
                bins = len(list(his_p[start_bin:end_bin]))
                pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])

                g_p.extend(list(his_p[start_bin:end_bin]))
                g_p_corr.extend(list(his_p_corr[start_bin:end_bin]))
                if his_p_seg is not None and len(his_p_seg) > 0 and self.rd_partition:
                    g_p_seg.extend(list(his_p_seg[start_bin:end_bin]))
                if his_p_call is not None and len(his_p_call) > 0 and self.rd_call and (
                        "rd_mean_shift" in self.callers):
                    g_p_call.extend(list(his_p_call[start_bin:end_bin]))
                if his_p_mosaic_call is not None and len(his_p_mosaic_call) > 0 and self.rd_call and (
                        "rd_mosaic" in self.callers):
                    g_p_call_mosaic.extend(list(his_p_mosaic[start_bin:end_bin]))
                if his_p_mosaic_call_2d is not None and len(his_p_mosaic_call_2d) > 0 and self.rd_call and (
                        "combined_mosaic" in self.callers):
                    g_p_call_mosaic_2d.extend(list(his_p_mosaic_2d[start_bin:end_bin]))
                borders.append(len(g_p) - 1)

            def format_func(value, tick_number):
                ix = int(value)
                if ix + 1 < len(pos_x):
                    p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                    return "{0} Mbp".format(int(p / 100) / 10000)
                elif ix < len(pos_x):
                    p = pos_x[ix]
                    return "{0} Mbp".format(int(p / 100) / 10000)
                else:
                    return ""

            l = len(g_p)
            if i == len(panels) - 1:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                ax.xaxis.grid()
            else:
                plt.setp(ax.get_xticklabels(), visible=False)

            if (self.rd_range[1] - self.rd_range[0]) < 30:
                ax.yaxis.set_ticks(np.arange(int(self.rd_range[0]), int(self.rd_range[1] + 1), 1) * mean / 2,
                                   minor=[])
                ax.yaxis.set_ticklabels([str(i) for i in range(int(self.rd_range[0]), int(self.rd_range[1] + 1))])
            ax.set_ylim([self.rd_range[0] * mean / 2, self.rd_range[1] * mean / 2])
            ax.set_ylabel("Read depth")
            ax.yaxis.grid()

            if self.rd_raw:
                ax.step(g_p, self.rd_colors[0], label="raw")
            if self.rd_corrected:
                ax.step(g_p_corr, self.rd_colors[1], label="GC corrected")
            if len(g_p_seg) > 1:
                plt.step(g_p_seg, self.rd_colors[2], label="partitioning")
            if len(g_p_call) > 1:
                plt.step(g_p_call, self.rd_colors[3], label="cnv calls")
            if len(g_p_call_mosaic) > 1:
                plt.step(g_p_call_mosaic, self.rd_colors[4], label="mosaic cnv calls")
            if len(g_p_call_mosaic_2d) > 1:
                plt.step(g_p_call_mosaic_2d, self.rd_colors[5], label="combined cnv calls")
            for i in borders[:-1]:
                ax.axvline(i, color="g", lw=1)
            if self.legend:
                ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), shadow=True, ncol=2)
            self.fig.add_subplot(ax)

        elif panels[i] == "snp":
            borders = []
            hpos = []
            baf = []
            color = []
            alpha = 0.7
            start_pos = 0
            pos_x = []
            for c, (pos1, pos2) in r:
                if pos2 == 1000000000:
                    pos2 = io.get_chromosome_length(c)
                    if pos2 is None:
                        pos2 = 1000000000
                pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c)
                ix = 0
                mdp = 0
                while ix < len(pos) and pos[ix] <= pos2:
                    if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0 and ((not self.snp_use_id) or (flag[ix] & 1)):
                        hpos.append((start_pos + pos[ix] - pos1) / bin_size)
                        if pos[ix] - pos1 > mdp:
                            mdp = pos[ix] - pos1
                        if gt[ix] % 4 != 2:
                            baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                        else:
                            baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                        if self.snp_alpha_P:
                            alpha = None
                            color.append(
                                colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                        else:
                            color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                    ix += 1
                start_pos += pos2 - pos1
                pos_x.extend(range(pos1, pos2 + bin_size, bin_size))
                borders.append(start_pos / bin_size)

            def format_func(value, tick_number):
                ix = int(value)
                if ix + 1 < len(pos_x):
                    p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                    return "{0} Mbp".format(int(p / 100) / 10000)
                elif ix < len(pos_x):
                    p = pos_x[ix]
                    return "{0} Mbp".format(int(p / 100) / 10000)
                else:
                    return ""

            l = len(pos_x)
            if i == len(panels) - 1:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                ax.xaxis.grid()
            else:
                plt.setp(ax.get_xticklabels(), visible=False)

            # ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
            ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
            ax.set_ylabel("Allele frequency")
            l = max(hpos)
            ax.set_ylim([-0.05, 1.05])
            # ax.set_xlim([0, borders[-1]])
            ax.yaxis.grid()
            if self.markersize == "auto":
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=alpha)
            else:
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)

            for i in borders[:-1]:
                ax.axvline(i, color="g", lw=1)
            self.fig.add_subplot(ax)

        elif panels[i] == "snv" or panels[i][:4] == "snv:":
            callset = "default"
            if panels[i][:4] == "snv:":
                callset = panels[i].split(":")[1]
            borders = []
            hpos = []
            baf = []
            color = []
            alpha = 0.7
            start_pos = 0
            pos_x = []
            for c, (pos1, pos2) in r:
                if pos2 == 1000000000:
                    pos2 = io.get_chromosome_length(c)
                    if pos2 is None:
                        pos2 = 1000000000
                pos, ref, alt, nref, nalt, gt, flag, qual = io.read_snp(c, callset=callset)
                ix = 0
                mdp = 0
                while ix < len(pos) and pos[ix] <= pos2:
                    if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0:
                        hpos.append((start_pos + pos[ix] - pos1) / bin_size)
                        if pos[ix] - pos1 > mdp:
                            mdp = pos[ix] - pos1
                        if gt[ix] % 4 != 2:
                            baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                        else:
                            baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                        if self.snp_alpha_P:
                            alpha = None
                            color.append(
                                colors.to_rgba(self.snp_colors[(gt[ix] % 4) * 2 + 1], (flag[ix] >> 1) * 0.4))
                        else:
                            color.append(self.snp_colors[(gt[ix] % 4) * 2 + (flag[ix] >> 1)])
                    ix += 1
                start_pos += pos2 - pos1
                pos_x.extend(range(pos1, pos2 + bin_size, bin_size))
                borders.append(start_pos / bin_size)

            def format_func(value, tick_number):
                ix = int(value)
                if ix + 1 < len(pos_x):
                    p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                    return "{0} Mbp".format(int(p / 100) / 10000)
                elif ix < len(pos_x):
                    p = pos_x[ix]
                    return "{0} Mbp".format(int(p / 100) / 10000)
                else:
                    return ""

            l = len(pos_x)
            if i == len(panels) - 1:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
            else:
                plt.setp(ax.get_xticklabels(), visible=False)
            ax.xaxis.grid()
            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
            ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
            ax.set_ylabel("Allele frequency")
            ax.set_ylim([0., 1.])
            ax.yaxis.grid()
            if self.markersize == "auto":
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=alpha)
            else:
                ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=alpha)

            for i in borders[:-1]:
                ax.axvline(i, color="g", lw=1)
            self.fig.add_subplot(ax)

        elif panels[i] == "baf":
            g_baf, g_maf, g_i1, g_i2 = [0], [0], [0], [0]
            borders = []
            pos_x = []

            for c, (pos1, pos2) in r:
                if pos2 == 1000000000:
                    pos2 = io.get_chromosome_length(c)
                    if pos2 is None:
                        pos2 = 1000000000

                flag_snp = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0)
                baf = io.get_signal(c, bin_size, "SNP baf", flag_snp)
                maf = io.get_signal(c, bin_size, "SNP maf", flag_snp)
                i1 = io.get_signal(c, bin_size, "SNP i1", flag_snp)
                i2 = io.get_signal(c, bin_size, "SNP i2", flag_snp)

                start_bin = (pos1 - 1) // bin_size
                end_bin = pos2 // bin_size
                bins = len(list(baf[start_bin:end_bin]))
                pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])

                g_baf.extend(list(baf[start_bin:end_bin]))
                g_maf.extend(list(maf[start_bin:end_bin]))
                g_i1.extend(list(i1[start_bin:end_bin]))
                g_i2.extend(list(i2[start_bin:end_bin]))
                borders.append(len(g_baf) - 1)

            def format_func(value, tick_number):
                ix = int(value)
                if ix + 1 < len(pos_x):
                    p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                    return "{0} Mbp".format(int(p / 100) / 10000)
                elif ix < len(pos_x):
                    p = pos_x[ix]
                    return "{0} Mbp".format(int(p / 100) / 10000)
                else:
                    return ""

            l = len(g_baf)
            if i == len(panels) - 1:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                ax.xaxis.grid()

            ax.yaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
            ax.yaxis.set_ticklabels(["0", "1/4", "1/2", "3/4", "1"])
            ax.set_ylabel("Allele frequency")

            ax.set_ylim([0, 1])
            # ax.set_xlim([-l * 0.0, l * 1.0])

            ax.yaxis.grid()
            # ax.xaxis.grid()
            ax.step(g_baf, self.baf_colors[0], label="BAF")
            ax.step(g_maf, self.baf_colors[1], label="MAF")
            ax.step(g_i1, self.baf_colors[2], label="I1")
            if self.legend:
                ax.legend()
            for i in borders[:-1]:
                ax.axvline(i, color="g", lw=1)
            self.fig.add_subplot(ax)

        elif panels[i] == "likelihood":
            borders = []
            gl = []
            call_pos = []
            call_i1 = []
            call_i2 = []
            call_c = []
            call_pos_2d = []
            call_i1_2d = []
            call_i2_2d = []
            call_c_2d = []
            tlen = 0
            tlen_2d = 0
            pos_x = []
            for c, (pos1, pos2) in r:
                if pos2 == 1000000000:
                    pos2 = io.get_chromosome_length(c)
                    if pos2 is None:
                        pos2 = 1000000000
                likelihood = io.get_signal(c, bin_size, "SNP likelihood", snp_flag)
                start_bin = (pos1 - 1) // bin_size
                end_bin = pos2 // bin_size
                bins = len(list(likelihood[start_bin:end_bin]))
                pos_x.extend(range(pos1, pos2 + bin_size, bin_size)[0:bins])
                gl.extend(list(likelihood[start_bin:end_bin]))
                borders.append(len(gl) - 1)
                if self.snp_call and ("baf_mosaic" in self.callers):
                    likelihood_call = io.get_signal(c, bin_size, "SNP likelihood call", snp_flag)
                    segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments", snp_flag))

                    for s, lh in zip(segments, likelihood_call):
                        i1, i2, p = likelihood_pixels_pval(lh)
                        if i1 != i2 and len(s) > self.min_segment_size:
                            alpha = -np.log(p + 1e-40) / self.contrast
                            if alpha > 1:
                                alpha = 1
                            for pos in s:
                                if pos >= start_bin and pos < end_bin:
                                    call_pos.append(pos - start_bin + tlen)
                                    call_i1.append(min(i1, i2))
                                    call_i2.append(max(i1, i2))
                                    color = colors.to_rgb(self.lh_colors[0]) + (alpha,)
                                    call_c.append(color)
                    tlen += end_bin - start_bin
                if self.snp_call and ("combined_mosaic" in self.callers):
                    likelihood_call = io.get_signal(c, bin_size, "SNP likelihood call 2d", snp_flag)
                    segments = segments_decode(io.get_signal(c, bin_size, "SNP likelihood segments 2d", snp_flag))

                    for s, lh in zip(segments, likelihood_call):
                        i1, i2, p = likelihood_pixels_pval(lh)
                        if i1 != i2 and len(s) > self.min_segment_size:
                            alpha = -np.log(p + 1e-40) / self.contrast
                            if alpha > 1:
                                alpha = 1
                            for pos in s:
                                if pos >= start_bin and pos < end_bin:
                                    call_pos_2d.append(pos - start_bin + tlen_2d)
                                    call_i1_2d.append(min(i1, i2))
                                    call_i2_2d.append(max(i1, i2))
                                    color = colors.to_rgb(self.lh_colors[1]) + (alpha,)
                                    call_c_2d.append(color)
                    tlen_2d += end_bin - start_bin

            def format_func(value, tick_number):
                ix = int(value)
                if ix + 1 < len(pos_x):
                    p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                    return "{0} Mbp".format(int(p / 100) / 10000)
                elif ix < len(pos_x):
                    p = pos_x[ix]
                    return "{0} Mbp".format(int(p / 100) / 10000)
                else:
                    return ""

            img = np.array(gl).transpose()
            l = img.shape[1]
            if i == len(panels) - 1:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                # ax.xaxis.grid()
            else:
                plt.setp(ax.get_xticklabels(), visible=False)

            ax.imshow(img, aspect='auto')
            # ax.xaxis.set_ticklabels([])
            ax.yaxis.set_ticks([0, img.shape[0] / 4, img.shape[0] / 2, 3 * img.shape[0] / 4, img.shape[0] - 1],
                               minor=[])
            ax.yaxis.set_ticklabels(["1", "3/4", "1/2", "1/4", "0"])
            ax.set_ylabel("Allele frequency")
            # ax.xaxis.set_ticks(np.arange(0, len(gl), 50), minor=[])
            # ax.set_xlim([-0.5, img.shape[1] - 0.5])
            if self.snp_call and ("baf_mosaic" in self.callers):
                plt.scatter(call_pos, call_i1, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
                plt.scatter(call_pos, call_i2, s=self.lh_markersize, color=np.array(call_c), edgecolors='face',
                            marker=self.lh_marker)
            if self.snp_call and ("combined_mosaic" in self.callers):
                plt.scatter(call_pos_2d, call_i1_2d, s=self.lh_markersize, color=np.array(call_c_2d),
                            edgecolors='face', marker=self.lh_marker)
                plt.scatter(call_pos_2d, call_i2_2d, s=self.lh_markersize, color=np.array(call_c_2d),
                            edgecolors='face', marker=self.lh_marker)

            for i in borders[:-1]:
                ax.axvline(i + 0.5, color="g", lw=1)
            self.fig.add_subplot(ax)

        elif panels[i] == "CN":
            borders = []
            gh1 = []
            gh2 = []
            tlen = 0
            tlen_2d = 0
            for c, (pos1, pos2) in r:
                if pos2 == 1000000000:
                    pos2 = io.get_chromosome_length(c)
                    if pos2 is None:
                        pos2 = 1000000000

                his_p = io.get_signal(c, bin_size, "RD", flag_rd)
                start_bin = (pos1 - 1) // bin_size
                end_bin = pos2 // bin_size
                if end_bin > len(his_p):
                    end_bin = len(his_p)
                h1 = np.array([0] * (end_bin - start_bin))
                h2 = np.array([0] * (end_bin - start_bin))
                h1[his_p != 0] = 1
                h2[his_p != 0] = 1

                flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                    FLAG_USEHAP if self.snp_use_phase else 0) | (
                           FLAG_USEMASK if self.rd_use_mask else 0) | FLAG_GC_CORR
                flag_rd = FLAG_GC_CORR | (FLAG_USEMASK if self.rd_use_mask else 0)
                if io.signal_exists(c, bin_size, "calls combined", flag):
                    calls = io.read_calls(c, bin_size, "calls combined", flag)
                    segments = io.get_signal(c, bin_size, "RD mosaic segments 2d", flag_rd)
                    segments = segments_decode(segments)

                    for call in calls:
                        for b in segments[int(call["segment"])]:
                            if b < end_bin and b >= start_bin:
                                h1[b - start_bin] = call["models"][0][1]
                                h2[b - start_bin] = call["models"][0][2]
                gh1.extend(list(h1))
                gh2.extend(list(h2))
                borders.append(len(gh1) - 1)
            x = range(len(gh1))
            plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)
            plt.stackplot(x, gh1, gh2, baseline='sym')

            def format_func(value, tick_number):
                ix = int(value)
                if ix + 1 < len(pos_x):
                    p = pos_x[ix] + (pos_x[ix + 1] - pos_x[ix]) * (value - ix)
                    return "{0} Mbp".format(int(p / 100) / 10000)
                elif ix < len(pos_x):
                    p = pos_x[ix]
                    return "{0} Mbp".format(int(p / 100) / 10000)
                else:
                    return ""

            l = len(gh1)
            if i == len(panels) - 1:
                ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
                ax.xaxis.set_major_locator(plt.MaxNLocator(5))
                ax.set_xlim([-l * 0.0, (l - 1) * 1.0])
                ax.xaxis.grid()

            for i in borders[:-1]:
                ax.axvline(i + 0.5, color="g", lw=1)
            self.fig.add_subplot(ax)

def show(self)

Source code

def show(self):
    print("\nParameters")
    for key in sorted(self.params.keys()):
        print("    * %s: %s" % (key, str(self.params[key])))
        if key == "plot_files":
            for i in range(len(self.io)):
                print("            %d: %s" % (i, self.io[i].filename))
    print()

def single_cell_allelic_dropout(self, callset=None, res=1000, n_bins=100, threshold=0.1, snp_threshold=0.01, neigh=False, plot=False, stdout=True, title=None)

Function used to identify regions without allelic dropout in the case of single cell amplification. It requires baf data for bin size. It will filter out all bins with at least one SNP bellow snp_threshold and all bins with collective maximum baf likelihood bellow threshold parameter.

Parameters

callset : str or None: Name of callset if not default.
res : int: Resolution in bins used to calculate percentage of dropouts in region.
n_bins : int: Number of bins in histograms.
threshold : float: Collective threshold of AF for allelic dropout
snp_threshold : float: Single SNP threshold of AF for allelic dropout
neigh : bool: Remove neighbouring bins also.
plot : bool: Make plots.
stdout : bool: Print out good regions

Source code

def single_cell_allelic_dropout(self, callset=None, res=1000, n_bins=100, threshold=0.1, snp_threshold=0.01,
                                neigh=False, plot=False, stdout=True, title=None):
    """
    Function used to identify regions without allelic dropout in the case of single cell amplification.
    It requires baf data for bin size. It will filter out all bins with at least one SNP bellow snp_threshold and
    all bins with collective maximum baf likelihood bellow threshold parameter.

    Parameters
    ----------
    callset : str or None
        Name of callset if not default.
    res : int
        Resolution in bins used to calculate percentage of dropouts in region.
    n_bins : int
        Number of bins in histograms.
    threshold : float
        Collective threshold of AF for allelic dropout
    snp_threshold : float
        Single SNP threshold of AF for allelic dropout
    neigh : bool
        Remove neighbouring bins also.
    plot : bool
        Make plots.
    stdout : bool
        Print out good regions

    """

    if plot:
        self.new_figure(panel_count=2, panel_size=(16, 6), title=title)
        ax = self.next_panel()
        bafG = []
        baf = []
        cpos = 0
        sizeG = []
        sizeB = []
    for c in self.io[self.plot_file].snp_chromosomes():
        if len(self.chrom) == 0 or (c in self.chrom):
            snp_flag = (FLAG_USEMASK if self.snp_use_mask else 0) | (FLAG_USEID if self.snp_use_id else 0) | (
                FLAG_USEHAP if self.snp_use_phase else 0)
            i1 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP i1", snp_flag)
            pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
            c00 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 0|0", snp_flag)
            c11 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 1|1", snp_flag)
            homs = c00 + c11
            c01 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 0|1", snp_flag)
            c10 = self.io[self.plot_file].get_signal(c, self.bin_size, "SNP bin count 1|0", snp_flag)
            hets = c01 + c10
            count = c01 + c10 + c00 + c11
            mask = np.zeros_like(i1)
            density = np.zeros(len(mask) // res)
            # mask[hets == 0] = 1
            mask[hets == 0] = 2
            mask[i1 > (0.5 - threshold)] = 1
            for ix in range(len(pos)):
                if (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in [1, 2]):
                    b = 1.0 * nalt[ix] / (nref[ix] + nalt[ix])
                    if (b < snp_threshold) or (b > (1 - snp_threshold)):
                        mask[(pos[ix] - 1) // self.bin_size] = 1

            if neigh:
                ada = mask == 1
                ada1 = np.roll(ada, 1)
                ada2 = np.roll(ada, -1)
                ada1[0] = False
                ada2[-1] = False
                mask[ada1] = 1
                mask[ada2] = 1
            ix = 0
            while ix < len(mask):
                if mask[ix] == 2:
                    adan = 0
                    if ix > 0 and mask[ix - 1] == 1:
                        adan = 1
                    jx = ix
                    while jx < len(mask) and mask[jx] == 2:
                        jx += 1
                    if jx < len(mask) and mask[jx] == 1:
                        adan = 1
                    mask[ix:jx] = adan
                    ix = jx
                else:
                    ix += 1
            ix = 0
            ojx = 0
            while ix < len(mask):
                if mask[ix] == 0:
                    jx = ix
                    while jx < len(mask) and mask[jx] == 0:
                        jx += 1
                    if stdout:
                        print("%s\t%d\t%d" % (c, ix * self.bin_size + 1, jx * self.bin_size))
                    sizeG.append((jx - ix) * self.bin_size)
                    if ix > ojx:
                        sizeB.append((ix - ojx) * self.bin_size)
                    ojx = jx
                    ix = jx
                else:
                    ix += 1
            if plot:
                for ix in range(len(density)):
                    density[ix] = np.mean(mask[res * ix:res * (ix + 1)])
                ax.plot(np.arange(cpos, cpos + len(density)) * res, density)
                cpos += len(density)
                for ix in range(len(pos)):
                    if (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in [1, 2]):
                        baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                        if mask[(pos[ix] - 1) // self.bin_size] == 0:
                            bafG.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
    ax.set_xlabel("Position in genome [bins]")
    ax.set_ylabel("Percentage of allelic dropout")
    ax.grid(True)
    if plot:
        self.new_subgrid(2, grid="horizontal", hspace=0.05, wspace=0.2)
        ax = self.next_subpanel()
        ms = 5 * max(np.mean(sizeG), np.mean(sizeB))
        ax.hist(sizeB, bins=np.arange(1, ms, self.bin_size), histtype="step", log=True,
                label="Allelic dropout regions", linewidth=3)
        ax.hist(sizeG, bins=np.arange(1, ms, self.bin_size), histtype="step", log=True,
                label="Region with both alleles", linewidth=3)
        ax.legend()
        ax.grid(True)
        ax.set_xlabel("Size [bp]")
        ax.set_ylabel("Number of regions")
        self.fig.add_subplot(ax)

        ax = self.next_subpanel()
        ax.hist(baf, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)),
                label="All heterozygous variants")
        ax.hist(bafG, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)),
                label="Region with both alleles")
        ax.legend()
        ax.grid(True)
        ax.set_xlabel("VAF")
        ax.set_ylabel("Distribution")
        self.fig.add_subplot(ax)

        self.fig_show(suffix="allelic_dropout")

def snp(self, plot_gt=None, plot_pmask=None, callset=None)

Source code

def snp(self, plot_gt=None, plot_pmask=None, callset=None):
    if plot_pmask is None:
        plot_pmask = [0, 1]
    if plot_gt is None:
        plot_gt = [0, 1, 2, 3]
    chroms = []
    if self.reference_genome is None:
        chroms = self.io[self.plot_file].snp_chromosomes()
    else:
        for c, (l, t) in self.reference_genome["chromosomes"].items():
            snp_chr = self.io[self.plot_file].snp_chromosome_name(c)
            if callset is None:
                if self.io[self.plot_file].signal_exists(snp_chr, None, "SNP pos", 0) and \
                        self.io[self.plot_file].signal_exists(snp_chr, None, "SNP desc", 0) and \
                        self.io[self.plot_file].signal_exists(snp_chr, None, "SNP counts", 0) and \
                        self.io[self.plot_file].signal_exists(snp_chr, None, "SNP qual", 0) and \
                        (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                    chroms.append(snp_chr)
            else:
                if self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP pos", 0, name=callset) and \
                        self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP desc", 0,
                                                              name=callset) and \
                        self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP counts", 0,
                                                              name=callset) and \
                        self.io[self.plot_file].signal_exists(snp_chr, None, "somatic SNP qual", 0,
                                                              name=callset) and \
                        (Genome.is_autosome(c) or Genome.is_sex_chrom(c)):
                    chroms.append(snp_chr)
    self.new_figure(panel_count=len(chroms))
    for c in chroms:
        pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_file].read_snp(c, callset=callset)
        hpos = []
        baf = []
        color = []
        qlpha = 0.7
        for i in range(len(pos)):
            if (nref[i] + nalt[i]) != 0:
                if (gt[i] % 4 in plot_gt) and ((flag[i] >> 1) in plot_pmask):
                    hpos.append(pos[i])
                    if gt[i] % 4 != 2:
                        baf.append(1.0 * nalt[i] / (nref[i] + nalt[i]))
                    else:
                        baf.append(1.0 * nref[i] / (nref[i] + nalt[i]))
                    if self.snp_alpha_P:
                        alpha = None
                        color.append(colors.to_rgba(self.snp_colors[(gt[i] % 4) * 2 + 1], (flag[i] >> 1) * 0.4))
                    else:
                        color.append(self.snp_colors[(gt[i] % 4) * 2 + (flag[i] >> 1)])

        ax = self.next_panel()
        ax.set_title(c, position=(0.01, 0.9), fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'},
                     color='C0')
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        ax.yaxis.set_ticks([0, 0.25, 0.5, 0.75, 1.0], minor=[])
        l = max(pos)
        ax.xaxis.set_ticks(np.arange(0, (l + 10e6), 10e6), minor=[])
        ax.set_ylim([0., 1.])
        ax.set_xlim([-0.05 * l, 1.05 * l])
        ax.grid()
        if self.markersize == "auto":
            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=10, alpha=0.7)
        else:
            ax.scatter(hpos, baf, marker='.', edgecolor=color, c=color, s=self.markersize, alpha=0.7)
    self.fig_show(suffix="snp")

def snp_compare(self, regions, ix1, ix2, callset=None, n_bins=100, titles=None, test_loh=False)

Source code

def snp_compare(self, regions, ix1, ix2, callset=None, n_bins=100, titles=None, test_loh=False):
    regions = regions.split(" ")
    n = len(regions)
    self.new_figure(panel_count=n)
    for i in range(n):
        ax = self.next_panel()
        if titles is None:
            ax.set_title(regions[i], position=(0.01, 1.07),
                         fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
        else:
            ax.set_title(titles[i], position=(0.01, 1.07),
                         fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
        regs = decode_region(regions[i])
        oval = []
        for c, (pos_start, pos_end) in regs:
            pos1, ref1, alt1, nref1, nalt1, gt1, flag1, qual1 = self.io[ix1].read_snp(c, callset=callset)
            pos2, ref2, alt2, nref2, nalt2, gt2, flag2, qual2 = self.io[ix2].read_snp(c, callset=callset)

            counts1 = {}
            counts2 = {}
            ix = 0
            while ix < len(pos1) and pos1[ix] <= pos_end:
                if pos1[ix] >= pos_start and (nref1[ix] + nalt1[ix]) != 0:
                    counts1[pos1[ix]] = (nref1[ix] / np.sqrt(nref1[ix] ** 2 + nalt1[ix] ** 2),
                                         nalt1[ix] / np.sqrt(nref1[ix] ** 2 + nalt1[ix] ** 2))
                ix += 1
            ix = 0
            xx = []
            yy = []
            cc = []
            hist1 = []
            hist2 = []
            while ix < len(pos2) and pos2[ix] <= pos_end:
                if pos2[ix] >= pos_start and (nref2[ix] + nalt2[ix]) != 0:
                    counts2[pos2[ix]] = (nref2[ix], nalt2[ix])
                ix += 1
            for p in counts1:
                if p in counts2:
                    xx.append(p)
                    yy.append(counts1[p][1] / (counts1[p][0] + counts1[p][1]))
                    cc.append("green")
                    xx.append(p)
                    yy.append(counts2[p][1] / (counts2[p][0] + counts2[p][1]))
                    cc.append("blue")
                    if counts2[p][1] / (counts2[p][0] + counts2[p][1]) > 0.8:
                        t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                        if t > 0.2 and t < 0.8:
                            hist1.append(t)
                    else:
                        t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                        if t > 0.2 and t < 0.8:
                            hist2.append(t)
                else:
                    xx.append(p)
                    yy.append(counts1[p][1] / (counts1[p][0] + counts1[p][1]))
                    cc.append("red")
                    t = counts1[p][1] / (counts1[p][0] + counts1[p][1])
                    if t > 0.2 and t < 0.8:
                        hist2.append(t)
            for p in counts2:
                if not (p in counts1):
                    xx.append(p)
                    yy.append(counts2[p][1] / (counts2[p][0] + counts2[p][1]))
                    cc.append("orange")

        if test_loh:
            ax.hist(hist1, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)), histtype='step')
            ax.hist(hist2, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)), histtype='step')
            print("H1:", np.mean(hist1), np.std(hist1), len(hist1))
            print("H2:", np.mean(hist2), np.std(hist2), len(hist2))
            ax.set_xlabel("baf")
            ax.set_ylabel("distribnution")
        else:
            ax.scatter(xx, yy, marker=".", s=0.1, c=cc)
            # ax.hist(oval, bins=np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1)))
            ax.set_xlabel("position")
            ax.set_ylabel("baf")

    self.fig_show(suffix="snp_dist")

def snp_dist(self, regions, callset=None, n_bins=100, gt_plot=[0, 1, 2, 3], titles=None, beta_distribution=False, log_scale=False)

Source code

def snp_dist(self, regions, callset=None, n_bins=100, gt_plot=[0, 1, 2, 3], titles=None, beta_distribution=False,
             log_scale=False):
    nf = len(self.plot_files)
    regions = regions.split(" ")
    nr = len(regions)
    n = nf * nr
    self.new_figure(panel_count=n)
    for ii in range(nf):
        for i in range(nr):
            ax = self.next_panel()
            if titles is None:
                ax.set_title(self.file_title(self.plot_files[ii]) + ": " + regions[i], position=(0.01, 1.10),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            else:
                ax.set_title(titles[i], position=(0.01, 1.10),
                             fontdict={'verticalalignment': 'top', 'horizontalalignment': 'left'})
            regs = decode_region(regions[i])
            baf = []
            bafP = []
            bafNP = []
            mean_rd = 0
            for c, (pos1, pos2) in regs:
                pos, ref, alt, nref, nalt, gt, flag, qual = self.io[self.plot_files[ii]].read_snp(c,
                                                                                                  callset=callset)
                ix = 0
                while ix < len(pos) and pos[ix] <= pos2:
                    if pos[ix] >= pos1 and (nref[ix] + nalt[ix]) != 0 and ((gt[ix] % 4) in gt_plot):
                        if gt[ix] % 4 != 2:
                            baf.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                            if flag[ix] & 2:
                                bafP.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                                mean_rd += nref[ix] + nalt[ix]
                            else:
                                bafNP.append(1.0 * nalt[ix] / (nref[ix] + nalt[ix]))
                        else:
                            baf.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                            if flag[ix] & 2:
                                bafP.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                                mean_rd += nref[ix] + nalt[ix]
                            else:
                                bafNP.append(1.0 * nref[ix] / (nref[ix] + nalt[ix]))
                    ix += 1
            mean_rd /= len(bafP)
            x_bins = np.arange(0, 1.0 + 1. / (n_bins + 1), 1. / (n_bins + 1))
            ax.hist(baf, bins=x_bins, label="All heterozygous variants")
            ax.hist(bafP, bins=x_bins, label="P bases only")
            # ax.hist(bafNP, bins=x_bins, label="non-P bases only", histtype=u'step')
            if log_scale:
                plt.yscale('log', nonposy='clip')

            if beta_distribution:
                xx = np.linspace(0.2, 0.8, 200)
                ax.plot(xx, beta.pdf(xx, mean_rd / 2, mean_rd / 2) * len(bafP) / n_bins, c="black",
                        label="Beta distribution")
            ax.legend(bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left", mode="expand", borderaxespad=0, ncol=3)
            ax.set_xlabel("VAF")
            ax.set_ylabel("Distribution")

    self.fig_show(suffix="snp_dist")

def stat(self, his_bin_size=100, return_image=False)

Source code

def stat(self, his_bin_size=100, return_image=False):
    plt.clf()
    auto = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_AUTO)
    sex = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_SEX)
    mt = self.io[self.plot_file].signal_exists(None, his_bin_size, "RD stat", FLAG_MT) and (his_bin_size < 1001)
    if not (auto or sex or mt):
        return
    cond = [auto, sex, mt]
    stat_list = []
    n_cols = sum(map(int, cond))
    ix = 1
    plt.rcParams["font.size"] = 8
    self.fig = plt.figure(1, figsize=(4 * n_cols, 8), dpi=90, facecolor='w', edgecolor='k')
    for t, c, flag in zip(["Autosomes", "X/Y", "Mitochondria"], cond, [FLAG_AUTO, FLAG_SEX, FLAG_MT]):
        if c:
            stat = self.io[self.plot_file].get_signal(None, his_bin_size, "RD stat", flag)
            stat_list.append(stat)
            max_rd = int(stat[0])
            bin_size = int(stat[1])
            n_bins = int(stat[2])
            lim_rd = int(max(2 * stat[4], stat[4] + 3 * stat[5]))
            _logger.info("RD stat for %s: %.2f +- %.2f" % (t, stat[4], stat[5]))
            if t == "Mitochondria" and auto:
                _logger.info("RD stat for %s - number of mitochondria per cell: %.2f +- %.2f" % (
                    t, 2 * stat[4] / stat_list[0][4],
                    2 * stat[5] / stat_list[0][4] + stat_list[0][5] * stat[4] / (
                            stat_list[0][4] * stat_list[0][4])))
            his_p = self.io[self.plot_file].get_signal(None, his_bin_size, "RD p dist", flag)
            his_u = self.io[self.plot_file].get_signal(None, his_bin_size, "RD u dist", flag)
            his_rd_gc = self.io[self.plot_file].get_signal(None, his_bin_size, "RD GC dist", flag)
            gc_corr = self.io[self.plot_file].get_signal(None, his_bin_size, "GC corr", flag)
            ax = plt.subplot(2, n_cols, ix)
            ax.set_xlabel("RD")
            ax.set_ylabel("GC [%]")
            ax.xaxis.set_ticklabels([])
            ax.set_title(t)
            his_rd_gc[0][0] = 0
            ax.imshow(his_rd_gc[:lim_rd // bin_size, :].T, aspect="auto", interpolation='nearest', origin='lower')
            ax.plot(gc_corr * stat[4] / bin_size, range(101), "w-")

            ax = plt.subplot(2, n_cols, ix + n_cols)
            ax.set_ylabel("Normalised distribution")
            ax.set_xlabel("RD")
            ax.set_xlim([0, lim_rd])
            # ax.set_ylim([0, 1.1])
            bins = range(0, max_rd, bin_size)
            x = np.arange(0, max_rd // bin_size * bin_size, 0.1 * bin_size)
            plt.plot(x, normal(x, 1, stat[4], stat[5]), "g-")
            x = np.array(bins)
            plt.plot(x[:len(his_u)], his_u / stat[3], "y*")
            plt.plot(x[:len(his_p)], his_p / stat[3], "b*")
            ix += 1
    plt.subplots_adjust(bottom=0.08, top=0.95, wspace=0.25, hspace=0, left=0.05 * 3 / n_cols, right=0.95)
    if return_image:
        self.fig.canvas.draw()
        import PIL
        pil_image = PIL.Image.frombytes('RGB', self.fig.canvas.get_width_height(),
                                        self.fig.canvas.tostring_rgb())
        return pil_image
    elif self.output_filename != "":
        plt.savefig(self._image_filename("stat"), dpi=150)
        plt.close(self.fig)
    elif self.interactive:
        plt.show(block=False)
        plt.draw()
    else:
        plt.show()

Inherited members

Show:
- info
- ls
- meta
Figure:
- bin_size_f
- fig_show
- get_panel
- new_figure
- next_panel
- next_polar_panel
- next_subpanel