Module kitchen.kitchen

Manipulate .h5ad files and cook scRNA-seq data from command line

Expand source code
# -*- coding: utf-8 -*-
"""
Manipulate .h5ad files and cook scRNA-seq data from command line
"""
import argparse, os
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from scipy import sparse
from dropkick import recipe_dropkick

from .ingredients import (
    check_dir_exists,
    cellranger2,
    cellranger3,
    subset_adata,
    cc_score,
    dim_reduce,
    plot_embedding,
    plot_genes,
    plot_genes_cnmf,
    rank_genes_cnmf,
    cluster_pie,
)
from ._version import get_versions


def info(args):
    """Prints information about .h5ad file to console"""
    print("Reading {}\n".format(args.file))
    adata = sc.read(args.file)
    print(adata, "\n")
    print(".X: {} with {}\n".format(type(adata.X), adata.X.dtype))
    print("obs_names: {}".format(adata.obs_names))
    print("var_names: {}".format(adata.var_names))


def to_h5ad(args):
    """Converts counts matrix from flat file (.txt, .csv) to .h5ad"""
    # get basename of file for writing outputs
    name = os.path.splitext(os.path.basename(args.file))[0]
    # check to make sure it's an .h5ad file
    if os.path.splitext(args.file)[1] == ".h5ad":
        raise ValueError("Input file already in .h5ad format")
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        # print information about counts, including names of cells and genes
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
        print("obs_names: {}".format(a.obs_names))
        print("var_names: {}".format(a.var_names))
    # sparsify counts slot
    if args.verbose:
        print("sparsifying counts...")
    a.X = sparse.csr_matrix(a.X, dtype=int)
    # save file as .h5ad
    if args.verbose:
        print("Writing counts to {}/{}.h5ad".format(args.outdir, name))
    check_dir_exists(args.outdir)
    a.write("{}/{}.h5ad".format(args.outdir, name), compression="gzip")
    if args.rm_flat_file:
        # remove original, noncompressed flat file
        if args.verbose:
            print("Removing {}".format(args.file))
        os.remove(args.file)


def mtx_to_h5ad(args):
    """Converts .mtx files from 10x CellRanger or DropEst to .h5ad"""
    if args.dropest:
        if args.verbose:
            print("Reading DropEst .mtx files from {}".format(args.dir))
        mtx_file = [x for x in os.listdir(args.dir) if x.endswith("_counts.mtx")][
            0
        ]  # get name of .mtx
        # define filenames
        mtx = args.dir + "/" + mtx_file
        genes = args.dir + "/" + mtx_file.split("_counts")[0] + "_features.txt"
        barcodes = args.dir + "/" + mtx_file.split("_counts")[0] + "_barcodes.txt"
        # read files
        a = sc.read_mtx(mtx)  # read matrix
        a = a.T  # transpose DropEst matrix to cells x genes
        g = pd.read_csv(genes, delimiter="\t", header=None)  # read genes
        b = pd.read_csv(barcodes, delimiter="\t", header=None)  # read barcodes
        # add gene and barcode names
        a.obs_names = b[0].values
        a.var_names = g[0].values
        if args.verbose:
            print(
                "Writing counts to {}/{}.h5ad - {} cells and {} genes".format(
                    args.outdir, mtx_file.split("_counts")[0], a.shape[0], a.shape[1]
                )
            )
        a.write(
            "{}/{}.h5ad".format(args.outdir, mtx_file.split("_counts")[0]),
            compression="gzip",
        )
    else:
        if args.verbose:
            print("Reading 10x CellRanger .mtx files from {}".format(args.dir))
        a = sc.read_10x_mtx(args.dir)
        name = args.dir.split("_gene_bc_matrices")[
            0
        ]  # extract name from 10x directory name
        if args.verbose:
            print(
                "Writing counts to {}/{}.h5ad - {} cells and {} genes".format(
                    args.outdir, name, a.shape[0], a.shape[1]
                )
            )
        a.write("{}/{}.h5ad".format(args.outdir, name), compression="gzip")


def h5ad_to_csv(args):
    """Converts counts matrix from .h5ad to flat file (.txt, .csv)"""
    # get basename of file for writing outputs
    name = os.path.splitext(os.path.basename(args.file))[0]
    # check to make sure it's an .h5ad file
    if os.path.splitext(args.file)[1] != ".h5ad":
        raise ValueError("Input file must be in .h5ad format")
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        # print information about counts, including names of cells and genes
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # swap to desired layer
    if args.layer is not None:
        if args.verbose:
            print("Using .layers['{}']".format(args.layer))
        a.X = a.layers[args.layer].copy()
    # check for/create output directory
    check_dir_exists(args.outdir)
    if args.obs_only:
        if args.verbose:
            print("Writing .obs dataframe to {}/{}_obs.csv".format(args.outdir, name))
        a.obs.to_csv("{}/{}_obs.csv".format(args.outdir, name))
    elif args.separate_indices:
        if args.verbose:
            print("Writing counts to {}/{}_X.csv".format(args.outdir, name))
        if isinstance(a.X, sparse.csr.csr_matrix):
            df = pd.DataFrame.sparse.from_spmatrix(a.X)
            df.to_csv(
                "{}/{}_X.csv".format(args.outdir, name),
                sep=",",
                header=False,
                index=False,
            )
        elif isinstance(a.X, np.ndarray):
            np.savetxt("{}/{}_X.csv".format(args.outdir, name), a.X, delimiter=",")
        if args.verbose:
            print("Writing obs names to {}/{}_obs.csv".format(args.outdir, name))
        pd.DataFrame(a.obs_names).to_csv(
            "{}/{}_obs.csv".format(args.outdir, name), header=False, index=False
        )
        if args.verbose:
            print("Writing var names to {}/{}_var.csv".format(args.outdir, name))
        pd.DataFrame(a.var_names).to_csv(
            "{}/{}_var.csv".format(args.outdir, name), header=False, index=False
        )
    else:
        if isinstance(a.X, sparse.csr.csr_matrix):
            df = pd.DataFrame.sparse.from_spmatrix(
                a.X, index=a.obs_names, columns=a.var_names
            )
        else:
            df = pd.DataFrame(a.X, index=a.obs_names, columns=a.var_names)
        # save file as .csv
        if args.verbose:
            print("Writing counts to {}/{}.csv".format(args.outdir, name))
        df.to_csv(
            "{}/{}.csv".format(args.outdir, name), sep=",", header=True, index=True
        )
    if args.rm_h5ad_file:
        # remove original, h5ad file
        if args.verbose:
            print("Removing {}".format(args.file))
        os.remove(args.file)


def transpose(args):
    """Transposes anndata object, replacing obs with var, and overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    # transpose file
    if args.verbose:
        print("transposing file and saving...")
    a = a.T
    # save file as .h5ad
    a.write(args.file, compression="gzip")


def to_sparse(args):
    """Converts .X slot of anndata object to scipy CSR format, overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    if isinstance(a.X, sparse.csr.csr_matrix):
        print("{} already in sparse format".format(args.file))
        return
    # sparsify counts slot
    if args.verbose:
        print("sparsifying counts...")
    a.X = sparse.csr_matrix(a.X, dtype=int)
    # save file as .h5ad
    a.write(args.file, compression="gzip")


def to_dense(args):
    """Converts .X slot of anndata object to numpy.matrix format, overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    if isinstance(a.X, np.matrix) or isinstance(a.X, np.ndarray):
        print("{} already in dense format".format(args.file))
        return
    # densify counts slot
    if args.verbose:
        print("densifying counts...")
    a.X = a.X.todense()
    a.X = a.X.astype(int)
    # save file as .h5ad
    a.write(args.file, compression="gzip")


def to_X(args):
    """Swaps a matrix from .layers to .X slot of anndata object, overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    # swap layers
    if args.verbose:
        print("Putting .layers['{}'] in .X and saving".format(args.layer))
    a.X = a.layers[args.layer].copy()
    # save file as .h5ad
    a.write(args.file, compression="gzip")


def rename_obs(args):
    """Renames .obs columns in anndata object, and overwrites .h5ad file"""
    if args.verbose:
        print("Reading {}".format(args.file))
    adata = sc.read(args.file)
    if args.verbose:
        print("Renaming columns {} to {}".format(args.old_names, args.new_names))
    adata.obs.rename(columns=dict(zip(args.old_names, args.new_names)), inplace=True)
    adata.write(args.file, compression="gzip")


def label_info(args):
    """Prints value counts for .obs labels to console"""
    print("Reading {}\n".format(args.file))
    adata = sc.read(args.file)
    print(adata, "\n")
    for l in args.labels:
        print("{}\n{}\n".format(l, adata.obs[l].value_counts()))


def obs_to_categorical(args):
    """Makes .obs label categorical dtype"""
    if args.verbose:
        print("Reading {}".format(args.file))
    adata = sc.read(args.file)
    for label in args.labels:
        if args.verbose:
            print("Converting .obs['{}'] to categorical".format(label))
        if args.to_bool:
            adata.obs[label] = adata.obs[label].astype(bool).astype("category")
        else:
            adata.obs[label] = adata.obs[label].astype("category")
    adata.write(args.file, compression="gzip")


def add_label(args):
    """
    Uses .obs_names from filtered counts matrix to add binary label to a reference
    anndata object, "True" = present in filt, "False" = not present.
    Overwrites reference .h5ad file.
    """
    # read reference file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.ref_file))
    a = sc.read(args.ref_file)
    if args.verbose:
        print("\t", a)
    # read query file into anndata obj
    if args.verbose:
        print("\nReading {}".format(args.filt_file))
    b = sc.read(args.filt_file)
    if args.verbose:
        print("\t", b)
    # add .obs column to ref_file
    a.obs[args.label] = "False"
    a.obs.loc[b.obs_names, args.label] = "True"
    if args.verbose:
        print(
            "\nTransferring labels to {}:\n{}".format(
                args.ref_file, a.obs[args.label].value_counts()
            )
        )
    # save file as .h5ad
    if args.verbose:
        print("\nWriting counts to {}".format(args.ref_file))
    a.write(args.ref_file, compression="gzip")
    if args.rm_orig_file:
        # remove filtered file
        if args.verbose:
            print("\nRemoving {}".format(args.filt_file))
        os.remove(args.filt_file)


def knee_point(args):
    """Labels cells using "knee point" method from CellRanger 2.1"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # add knee_point label to anndata
    cellranger2(
        a,
        expected=args.expected,
        upper_quant=args.upper_quant,
        lower_prop=args.lower_prop,
        label=args.label,
        verbose=args.verbose,
    )
    # save file as .h5ad
    print("Writing counts to {}".format(args.file))
    a.write(args.file, compression="gzip")


def emptydrops(args):
    """Labels cells using "emptydrops" method from CellRanger 3.0"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # add EmptyDrops label to anndata
    cellranger3(
        a,
        init_counts=args.init_counts,
        min_umi_frac_of_median=args.min_umi_frac,
        min_umis_nonambient=args.min_umi,
        max_adj_pvalue=args.max_adj_pval,
    )
    # save file as .h5ad
    if args.verbose:
        print("Writing counts to {}".format(args.file))
    a.write(args.file, compression="gzip")


def subset(args):
    """Subsets anndata object on binary .obs label(s), saves to new .h5ad file"""
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    a = subset_adata(a, subset=args.subset, verbose=args.verbose)
    if args.verbose:
        print("Writing subsetted counts to {}".format(args.out))
    a.write(args.out, compression="gzip")


def concatenate(args):
    """Concatenates list of anndata objects in .h5ad format, keeping union of genes"""
    # read first file
    if args.verbose:
        print("Reading {}".format(args.files[0]))
    adata_0 = sc.read(args.files[0])
    # read the rest of the files into list
    adatas = []
    for f in args.files[1:]:
        # read file into anndata obj
        if args.verbose:
            print("Reading {}".format(f))
        adatas.append(sc.read(f))
    # concatenate all files
    if args.verbose:
        print("Concatenating files...")
    concat = adata_0.concatenate(
        adatas,
        join="outer",
        batch_categories=[os.path.splitext(os.path.basename(x))[0] for x in args.files],
        fill_value=0,
    )
    if args.verbose:
        print(
            "Final shape: {} cells and {} genes".format(
                concat.shape[0], concat.shape[1]
            )
        )
    # save file as .h5ad
    if args.verbose:
        print("Writing counts to {}".format(args.out))
    concat.write(args.out, compression="gzip")


def recipe(args):
    """Full automated processing of scRNA-seq data"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    if args.subset is not None:
        name.append("_".join(args.subset))
    if args.layer is not None:
        name.append(args.layer)
    if args.use_rep is not None:
        name.append(args.use_rep)
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # subset anndata on .obs column if desired
    if args.subset is not None:
        a = subset_adata(a, subset=args.subset)
    if args.process:
        # switch to proper layer
        if args.layer is not None:
            if args.verbose:
                print("Using layer {} to reduce dimensions".format(args.layer))
            a.X = a.layers[args.layer].copy()
        # preprocess with dropkick recipe
        a = recipe_dropkick(
            a,
            X_final="arcsinh_norm",
            verbose=args.verbose,
            filter=True,
            min_genes=args.min_genes,
        )
        # reduce dimensions
        dim_reduce(
            a,
            use_rep=args.use_rep,
            clust_resolution=args.resolution,
            paga=args.paga,
            verbose=args.verbose,
            seed=args.seed,
        )
    # run cell cycle inference
    if args.cell_cycle:
        cc_score(a, verbose=args.verbose)
        args.colors = ["phase"] + args.colors
    # make sure output dir exists before saving plots
    check_dir_exists(args.outdir)
    # if there's DE to do, plot genes
    if args.diff_expr is not None:
        if isinstance(args.diff_expr, str):
            args.diff_expr = [args.diff_expr]
        for de in args.diff_expr:
            plot_genes(
                a,
                de_method=args.de_method,
                plot_type=de,
                groupby="leiden",
                n_genes=5,
                cmap=args.cmap,
                save_to="{}/{}_{}.png".format(args.outdir, de, "_".join(name)),
                verbose=args.verbose,
            )
        # if there's cnmf results, plot those on a heatmap/matrix/dotplot too
        if "cnmf_spectra" in a.varm:
            for de in args.diff_expr:
                plot_genes_cnmf(
                    a,
                    plot_type=de,
                    groupby="leiden",
                    attr="varm",
                    keys="cnmf_spectra",
                    indices=None,
                    n_genes=5,
                    cmap=args.cmap,
                    save_to="{}/{}_cnmf_{}.png".format(args.outdir, de, "_".join(name)),
                )
    # if there's a cnmf flag, try to plot loadings
    if args.cnmf:
        # check for cnmf results in anndata object
        if "cnmf_spectra" in a.varm:
            _ = rank_genes_cnmf(a, show=False)
            if args.verbose:
                print(
                    "Saving cNMF loadings to {}/{}_cnmfspectra.png".format(
                        args.outdir, "_".join(name)
                    )
                )
            plt.savefig("{}/{}_cnmfspectra.png".format(args.outdir, "_".join(name)))
            if args.verbose:
                print(
                    "Saving embeddings to {}/{}_embedding.png".format(
                        args.outdir, "_".join(name)
                    )
                )
            # save embedding plot with cNMF loadings
            if args.colors is None:
                args.colors = []
            plot_embedding(
                a,
                colors=args.colors
                + a.obs.columns[a.obs.columns.str.startswith("usage_")].tolist(),
                show_clustering=True,
                n_cnmf_markers=args.n_cnmf_markers,
                cmap=args.cmap,
                seed=args.seed,
                save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)),
                verbose=args.verbose,
                size=args.point_size,
            )
        else:
            print(
                "cNMF results not detected in {}. Skipping cNMF overlay for embedding.".format(
                    args.file
                )
            )
            # save embedding plot without cNMF loadings
            if args.verbose:
                print(
                    "Saving embeddings to {}/{}_embedding.png".format(
                        args.outdir, "_".join(name)
                    )
                )
            plot_embedding(
                a,
                colors=args.colors,
                show_clustering=True,
                cmap=args.cmap,
                seed=args.seed,
                save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)),
                verbose=args.verbose,
                size=args.point_size,
            )
    else:
        # save embedding plot
        if args.verbose:
            print(
                "Saving embeddings to {}/{}_embedding.png".format(
                    args.outdir, "_".join(name)
                )
            )
        plot_embedding(
            a,
            colors=args.colors,
            show_clustering=True,
            cmap=args.cmap,
            seed=args.seed,
            save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)),
            verbose=args.verbose,
            size=args.point_size,
        )
    # save file as .h5ad
    if args.save_adata:
        if args.verbose:
            print(
                "Saving AnnData object to to {}/{}_processed.h5ad".format(
                    args.outdir, "_".join(name)
                )
            )
        a.write(
            "{}/{}_processed.h5ad".format(args.outdir, "_".join(name)),
            compression="gzip",
        )


def de(args):
    """Performs differential expression analysis on a processed .h5ad file and plots results"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    if isinstance(args.plot_type, str):
        args.plot_type = [args.plot_type]
    # perform DE analysis and plot genes
    for plot in args.plot_type:
        plot_genes(
            a,
            de_method=args.de_method,
            plot_type=plot,
            groupby=args.groupby,
            n_genes=args.n_genes,
            dendrogram=args.dendrogram,
            ambient=args.ambient,
            cmap=args.cmap,
            save_to="{}/{}_{}.png".format(args.outdir, plot, "_".join(name)),
            verbose=args.verbose,
        )


def cnmf_markers(args):
    """Plots heatmap/matrix/dotplot of cNMF loadings for desired groups"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    if isinstance(args.plot_type, str):
        args.plot_type = [args.plot_type]
    # plot cNMF marker genes
    for plot in args.plot_type:
        plot_genes_cnmf(
            a,
            plot_type=plot,
            groupby=args.groupby,
            n_genes=args.n_genes,
            dendrogram=args.dendrogram,
            cmap=args.cmap,
            save_to="{}/{}_cnmf_{}.png".format(args.outdir, plot, "_".join(name)),
        )


def pie(args):
    """Plots populational pie charts for desired groups"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # generate cluster_pie plot
    os.chdir(args.outdir)  # set output directory for scanpy figures
    _ = cluster_pie(a, pie_by=args.pieby, groupby=args.groupby)
    if args.verbose:
        print(
            "Saving cluster pie charts to {}/{}_pie.png".format(
                args.outdir, "_".join(name)
            )
        )
    plt.savefig("{}/{}_pie.png".format(args.outdir, "_".join(name)))


def main():
    parser = argparse.ArgumentParser(prog="kitchen")
    parser.add_argument(
        "-V",
        "--version",
        action="version",
        version=get_versions()["version"],
    )
    subparsers = parser.add_subparsers()

    info_parser = subparsers.add_parser(
        "info",
        help="Show information about .h5ad file",
    )
    info_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    info_parser.set_defaults(func=info)

    to_h5ad_parser = subparsers.add_parser(
        "to_h5ad",
        help="Convert counts matrix to .h5ad format",
    )
    to_h5ad_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as comma or tab delimited text file",
    )
    to_h5ad_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for writing h5ad. Default './'",
    )
    to_h5ad_parser.add_argument(
        "-rm",
        "--rm-flat-file",
        help="Remove original flat file. Default False",
        action="store_true",
    )
    to_h5ad_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_h5ad_parser.set_defaults(func=to_h5ad)

    mtx_to_h5ad_parser = subparsers.add_parser(
        "mtx_to_h5ad",
        help="Convert .mtx files from DropEst or 10x CellRanger to .h5ad format",
    )
    mtx_to_h5ad_parser.add_argument(
        "dir",
        type=str,
        help="Directory containing counts matrix as .mtx and text files",
    )
    mtx_to_h5ad_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for writing h5ad. Default './'",
    )
    mtx_to_h5ad_parser.add_argument(
        "--dropest",
        help="Directory contains DropEst outputs, rather than 10x CellRanger outputs",
        action="store_true",
    )
    mtx_to_h5ad_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    mtx_to_h5ad_parser.set_defaults(func=mtx_to_h5ad)

    to_csv_parser = subparsers.add_parser(
        "to_csv",
        help="Save .h5ad counts to .csv file(s)",
    )
    to_csv_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    to_csv_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for writing csv file(s). Default './'",
    )
    to_csv_parser.add_argument(
        "-l",
        "--layer",
        type=str,
        default=None,
        help="Key from .layers to save. Default '.X'.",
    )
    to_csv_parser.add_argument(
        "-s",
        "--separate-indices",
        help="Save indices (.obs and .var names) to separate files",
        action="store_true",
    )
    to_csv_parser.add_argument(
        "-obs",
        "--obs-only",
        help="Save .obs dataframe from .h5ad object to csv file, not counts matrix.",
        action="store_true",
    )
    to_csv_parser.add_argument(
        "-rm",
        "--rm-h5ad-file",
        help="Remove original h5ad file. Default False",
        action="store_true",
    )
    to_csv_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_csv_parser.set_defaults(func=h5ad_to_csv)

    transpose_parser = subparsers.add_parser(
        "transpose",
        help="Transpose counts matrix and save as .h5ad",
    )
    transpose_parser.add_argument(
        "file",
        type=str,
        help="Input counts matrix as .h5ad or tab delimited text file",
    )
    transpose_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    transpose_parser.set_defaults(func=transpose)

    to_sparse_parser = subparsers.add_parser(
        "to_sparse",
        help="Convert .X slot of anndata object to scipy CSR format, overwrite .h5ad file",
    )
    to_sparse_parser.add_argument(
        "file",
        type=str,
        help="Input counts matrix as .h5ad or tab delimited text file",
    )
    to_sparse_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_sparse_parser.set_defaults(func=to_sparse)

    to_dense_parser = subparsers.add_parser(
        "to_dense",
        help="Convert .X slot of anndata object to numpy.matrix format, overwrite .h5ad file",
    )
    to_dense_parser.add_argument(
        "file",
        type=str,
        help="Input counts matrix as .h5ad or tab delimited text file",
    )
    to_dense_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_dense_parser.set_defaults(func=to_dense)

    to_X_parser = subparsers.add_parser(
        "to_X",
        help="Swap a matrix from .layers to .X slot of anndata object",
    )
    to_X_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    to_X_parser.add_argument(
        "-l",
        "--layer",
        type=str,
        required=True,
        help="Key from .layers to replace .X with",
    )
    to_X_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_X_parser.set_defaults(func=to_X)

    rename_obs_parser = subparsers.add_parser(
        "rename_obs",
        help="Rename .obs columns in .h5ad file",
    )
    rename_obs_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    rename_obs_parser.add_argument(
        "-o",
        "--old-names",
        type=str,
        nargs="+",
        required=True,
        help="List of existing .obs column names to change",
    )
    rename_obs_parser.add_argument(
        "-n",
        "--new-names",
        type=str,
        nargs="+",
        required=True,
        help="List of new .obs column names corresponding to args.old_names",
    )
    rename_obs_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    rename_obs_parser.set_defaults(func=rename_obs)

    label_info_parser = subparsers.add_parser(
        "label_info",
        help="Print value counts for .obs labels to console",
    )
    label_info_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    label_info_parser.add_argument(
        "-l",
        "--labels",
        type=str,
        nargs="+",
        required=True,
        help="List of .obs column names to print value counts for",
    )
    label_info_parser.set_defaults(func=label_info)

    to_categorical_parser = subparsers.add_parser(
        "to_categorical",
        help="Make .obs label categorical dtype",
    )
    to_categorical_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    to_categorical_parser.add_argument(
        "-l",
        "--labels",
        type=str,
        nargs="+",
        required=True,
        help="List of .obs column names to convert to categorical dtype",
    )
    to_categorical_parser.add_argument(
        "-b",
        "--to-bool",
        required=False,
        help="Convert to boolean first (for columns with values of {0,1})",
        action="store_true",
    )
    to_categorical_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_categorical_parser.set_defaults(func=obs_to_categorical)

    add_label_parser = subparsers.add_parser(
        "add_label",
        help="Add label to .obs of .h5ad reference file using cells in a filtered file",
    )
    add_label_parser.add_argument(
        "ref_file",
        type=str,
        help="Reference counts matrix to add label to, as .h5ad file",
    )
    add_label_parser.add_argument(
        "filt_file",
        type=str,
        help="Filtered counts matrix providing positive labels, as .h5ad file",
    )
    add_label_parser.add_argument(
        "-l",
        "--label",
        type=str,
        required=True,
        help=".obs column name to place final labels in",
    )
    add_label_parser.add_argument(
        "-rm",
        "--rm-orig-file",
        help="Remove filtered file. Default False",
        action="store_true",
    )
    add_label_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    add_label_parser.set_defaults(func=add_label)

    cellranger2_parser = subparsers.add_parser(
        "cellranger2",
        help="Label cells using 'knee point' method from CellRanger 2.1",
    )
    cellranger2_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    cellranger2_parser.add_argument(
        "-e",
        "--expected",
        type=int,
        nargs="?",
        default=3000,
        help="Number of expected cells in the dataset; default 3000",
    )
    cellranger2_parser.add_argument(
        "-u",
        "--upper-quant",
        type=float,
        default=0.99,
        help="Upper quantile of expected cells for knee point; default 0.99",
    )
    cellranger2_parser.add_argument(
        "-lp",
        "--lower-prop",
        type=float,
        default=0.1,
        help="Lower proportion of cells to set threshold at; default 0.1",
    )
    cellranger2_parser.add_argument(
        "-l",
        "--label",
        type=str,
        default="CellRanger_2",
        help=".obs column name to place final labels in",
    )
    cellranger2_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    cellranger2_parser.set_defaults(func=knee_point)

    cellranger3_parser = subparsers.add_parser(
        "cellranger3",
        help="Label cells using 'emptydrops' method from CellRanger 3.0",
    )
    cellranger3_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    cellranger3_parser.add_argument(
        "--init-counts",
        type=int,
        nargs="?",
        default=15000,
        help="Initial total counts threshold for calling cells; default 15000",
    )
    cellranger3_parser.add_argument(
        "--min-umi-frac",
        type=float,
        default=0.01,
        help="Minimum total counts for testing barcodes as fraction of median counts for initially labeled cells; default 0.01",
    )
    cellranger3_parser.add_argument(
        "--min-umi",
        type=int,
        default=500,
        help="Minimum total counts for testing barcodes; default 500",
    )
    cellranger3_parser.add_argument(
        "--max-adj-pval",
        type=float,
        default=0.01,
        help="Maximum p-value for cell calling after B-H correction; default 0.01",
    )
    cellranger3_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    cellranger3_parser.set_defaults(func=emptydrops)

    subset_parser = subparsers.add_parser(
        "subset",
        help="Subset AnnData object on one or more .obs columns",
    )
    subset_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    subset_parser.add_argument(
        "-s",
        "--subset",
        default=None,
        nargs="*",
        help=".obs column(s) to subset cells on",
    )
    subset_parser.add_argument(
        "-o",
        "--out",
        type=str,
        nargs="?",
        default="./subset.h5ad",
        help="Path to output .h5ad file. Default './subset.h5ad'",
    )
    subset_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    subset_parser.set_defaults(func=subset)

    concatenate_parser = subparsers.add_parser(
        "concatenate",
        help="Combine multiple .h5ad files",
    )
    concatenate_parser.add_argument(
        "files", type=str, nargs="*", help="List of .h5ad files to concatenate"
    )
    concatenate_parser.add_argument(
        "-o",
        "--out",
        type=str,
        nargs="?",
        default="./concat.h5ad",
        help="Path to output .h5ad file. Default './concat.h5ad'",
    )
    concatenate_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    concatenate_parser.set_defaults(func=concatenate)

    recipe_parser = subparsers.add_parser(
        "recipe",
        help="Full automated processing of scRNA-seq data from command line",
    )
    recipe_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    recipe_parser.add_argument(
        "-p",
        "--process",
        help="Process AnnData (PCA, PAGA, UMAP). Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "--min-genes",
        required=False,
        type=int,
        default=1,
        help="Minimum number of genes detected to keep cell. Default 1.",
    )
    recipe_parser.add_argument(
        "-s",
        "--subset",
        nargs="*",
        default=None,
        help=".obs column(s) to subset cells on before embedding",
    )
    recipe_parser.add_argument(
        "-l",
        "--layer",
        type=str,
        default=None,
        help="Key from .layers to use for embedding. Default '.X'.",
    )
    recipe_parser.add_argument(
        "-ur",
        "--use-rep",
        type=str,
        default=None,
        help="Key from .obsm to use for neighbors graph and embedding. Default run PCA.",
    )
    recipe_parser.add_argument(
        "-r",
        "--resolution",
        type=float,
        default=1.0,
        help="Resolution between 0.0 and 1.0 for Leiden clustering. Default 1.0.",
    )
    recipe_parser.add_argument(
        "-cc",
        "--cell-cycle",
        help="Calculate cell cycle scores. Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "--paga",
        help="Run PAGA to seed UMAP embedding. Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "-de",
        "--diff_expr",
        type=str,
        nargs="*",
        default=None,
        help="Type(s) of DE gene expression plots ['heatmap', 'dotplot', 'matrixplot']",
    )
    recipe_parser.add_argument(
        "-m",
        "--de-method",
        required=False,
        type=str,
        default="t-test_overestim_var",
        help="DE testing method if -de is not None ['t-test_overestim_var', 't-test', 'wilcoxon']",
    )
    recipe_parser.add_argument(
        "-c",
        "--colors",
        type=str,
        nargs="*",
        default=[],
        help="Colors to plot on embedding. Can be .obs columns or gene names.",
    )
    recipe_parser.add_argument(
        "-cm",
        "--cmap",
        required=False,
        type=str,
        default="viridis",
        help="Color map to use in embedding overlays and genes plot",
    )
    recipe_parser.add_argument(
        "-ps",
        "--point-size",
        required=False,
        type=int,
        default=None,
        help="Size of points in embedding. Default 120000/n_cells.",
    )
    recipe_parser.add_argument(
        "--cnmf",
        help="Plot cNMF usages on embedding. Default False.",
        action="store_true",
    )
    recipe_parser.add_argument(
        "--n-cnmf-markers",
        required=False,
        type=int,
        default=7,
        help="Number of top loaded genes to print on cNMF embeddings. Default 7.",
    )
    recipe_parser.add_argument(
        "--seed",
        type=int,
        help="Random state for generating embeddings. Default 18",
        default=18,
    )
    recipe_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for saving plots. Default './'",
    )
    recipe_parser.add_argument(
        "-sa",
        "--save-adata",
        help="Save updated AnnData to .h5ad file. Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    recipe_parser.set_defaults(func=recipe)

    de_parser = subparsers.add_parser(
        "de",
        help="Perform differential expression analysis on a processed .h5ad file and plot results",
    )
    de_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    de_parser.add_argument(
        "-m",
        "--de-method",
        required=False,
        type=str,
        default="t-test_overestim_var",
        help="DE testing method ['t-test_overestim_var', 't-test', 'wilcoxon']",
    )
    de_parser.add_argument(
        "-p",
        "--plot-type",
        type=str,
        nargs="*",
        default=None,
        help="Type(s) of DE gene expression plot ['heatmap', 'dotplot', 'matrixplot']",
    )
    de_parser.add_argument(
        "-g",
        "--groupby",
        required=False,
        type=str,
        default="leiden",
        help=".obs variable to group cells by for DE analysis",
    )
    de_parser.add_argument(
        "-n",
        "--n-genes",
        type=int,
        default=5,
        help="Number of genes to plot per group. Default 5.",
    )
    de_parser.add_argument(
        "-cm",
        "--cmap",
        required=False,
        type=str,
        default="Reds",
        help="Color map to use in genes plot",
    )
    de_parser.add_argument(
        "-a",
        "--ambient",
        help="Include ambient genes",
        action="store_true",
    )
    de_parser.add_argument(
        "-d",
        "--dendrogram",
        help="Generate dendrogram of group similarities",
        action="store_true",
    )
    de_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        help="Output directory for saving plots. Default './'",
        nargs="?",
        default=".",
    )
    de_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    de_parser.set_defaults(func=de)

    pie_parser = subparsers.add_parser(
        "pie",
        help="Plot populational pie charts for desired groups",
    )
    pie_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    pie_parser.add_argument(
        "-g",
        "--groupby",
        required=False,
        type=str,
        default="leiden",
        help=".obs variable to group cells by for plotting pie charts",
    )
    pie_parser.add_argument(
        "-p",
        "--pieby",
        required=False,
        type=str,
        default="batch",
        help=".obs variable to group cells by for pie chart within each groupby category",
    )
    pie_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for saving plots. Default './'",
    )
    pie_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    pie_parser.set_defaults(func=pie)

    cnmf_markers_parser = subparsers.add_parser(
        "cnmf_markers",
        help="Plot heatmap/matrix/dotplot of cNMF loadings for desired groups",
    )
    cnmf_markers_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    cnmf_markers_parser.add_argument(
        "-p",
        "--plot-type",
        type=str,
        nargs="*",
        default=None,
        help="Type(s) of gene expression plot ['heatmap', 'dotplot', 'matrixplot']",
    )
    cnmf_markers_parser.add_argument(
        "-g",
        "--groupby",
        required=False,
        type=str,
        default="leiden",
        help=".obs variable to group cells by for plotting cNMF genes",
    )
    cnmf_markers_parser.add_argument(
        "-n",
        "--n-genes",
        type=int,
        default=5,
        help="Number of genes to plot per group. Default 5.",
    )
    cnmf_markers_parser.add_argument(
        "-cm",
        "--cmap",
        required=False,
        type=str,
        default="Reds",
        help="Color map to use in genes plot",
    )
    cnmf_markers_parser.add_argument(
        "-d",
        "--dendrogram",
        help="Generate dendrogram of group similarities",
        action="store_true",
    )
    cnmf_markers_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for saving plots. Default './'",
    )
    cnmf_markers_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    cnmf_markers_parser.set_defaults(func=cnmf_markers)

    args = parser.parse_args()

    # if --quietly specified, reverse verbosity
    if hasattr(args, "quietly"):
        if args.quietly:
            args.verbose = False
            del args.quietly
        else:
            args.verbose = True
            del args.quietly

    args.func(args)

Functions

def add_label(args)

Uses .obs_names from filtered counts matrix to add binary label to a reference anndata object, "True" = present in filt, "False" = not present. Overwrites reference .h5ad file.

Expand source code
def add_label(args):
    """
    Uses .obs_names from filtered counts matrix to add binary label to a reference
    anndata object, "True" = present in filt, "False" = not present.
    Overwrites reference .h5ad file.
    """
    # read reference file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.ref_file))
    a = sc.read(args.ref_file)
    if args.verbose:
        print("\t", a)
    # read query file into anndata obj
    if args.verbose:
        print("\nReading {}".format(args.filt_file))
    b = sc.read(args.filt_file)
    if args.verbose:
        print("\t", b)
    # add .obs column to ref_file
    a.obs[args.label] = "False"
    a.obs.loc[b.obs_names, args.label] = "True"
    if args.verbose:
        print(
            "\nTransferring labels to {}:\n{}".format(
                args.ref_file, a.obs[args.label].value_counts()
            )
        )
    # save file as .h5ad
    if args.verbose:
        print("\nWriting counts to {}".format(args.ref_file))
    a.write(args.ref_file, compression="gzip")
    if args.rm_orig_file:
        # remove filtered file
        if args.verbose:
            print("\nRemoving {}".format(args.filt_file))
        os.remove(args.filt_file)
def cnmf_markers(args)

Plots heatmap/matrix/dotplot of cNMF loadings for desired groups

Expand source code
def cnmf_markers(args):
    """Plots heatmap/matrix/dotplot of cNMF loadings for desired groups"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    if isinstance(args.plot_type, str):
        args.plot_type = [args.plot_type]
    # plot cNMF marker genes
    for plot in args.plot_type:
        plot_genes_cnmf(
            a,
            plot_type=plot,
            groupby=args.groupby,
            n_genes=args.n_genes,
            dendrogram=args.dendrogram,
            cmap=args.cmap,
            save_to="{}/{}_cnmf_{}.png".format(args.outdir, plot, "_".join(name)),
        )
def concatenate(args)

Concatenates list of anndata objects in .h5ad format, keeping union of genes

Expand source code
def concatenate(args):
    """Concatenates list of anndata objects in .h5ad format, keeping union of genes"""
    # read first file
    if args.verbose:
        print("Reading {}".format(args.files[0]))
    adata_0 = sc.read(args.files[0])
    # read the rest of the files into list
    adatas = []
    for f in args.files[1:]:
        # read file into anndata obj
        if args.verbose:
            print("Reading {}".format(f))
        adatas.append(sc.read(f))
    # concatenate all files
    if args.verbose:
        print("Concatenating files...")
    concat = adata_0.concatenate(
        adatas,
        join="outer",
        batch_categories=[os.path.splitext(os.path.basename(x))[0] for x in args.files],
        fill_value=0,
    )
    if args.verbose:
        print(
            "Final shape: {} cells and {} genes".format(
                concat.shape[0], concat.shape[1]
            )
        )
    # save file as .h5ad
    if args.verbose:
        print("Writing counts to {}".format(args.out))
    concat.write(args.out, compression="gzip")
def de(args)

Performs differential expression analysis on a processed .h5ad file and plots results

Expand source code
def de(args):
    """Performs differential expression analysis on a processed .h5ad file and plots results"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    if isinstance(args.plot_type, str):
        args.plot_type = [args.plot_type]
    # perform DE analysis and plot genes
    for plot in args.plot_type:
        plot_genes(
            a,
            de_method=args.de_method,
            plot_type=plot,
            groupby=args.groupby,
            n_genes=args.n_genes,
            dendrogram=args.dendrogram,
            ambient=args.ambient,
            cmap=args.cmap,
            save_to="{}/{}_{}.png".format(args.outdir, plot, "_".join(name)),
            verbose=args.verbose,
        )
def emptydrops(args)

Labels cells using "emptydrops" method from CellRanger 3.0

Expand source code
def emptydrops(args):
    """Labels cells using "emptydrops" method from CellRanger 3.0"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # add EmptyDrops label to anndata
    cellranger3(
        a,
        init_counts=args.init_counts,
        min_umi_frac_of_median=args.min_umi_frac,
        min_umis_nonambient=args.min_umi,
        max_adj_pvalue=args.max_adj_pval,
    )
    # save file as .h5ad
    if args.verbose:
        print("Writing counts to {}".format(args.file))
    a.write(args.file, compression="gzip")
def h5ad_to_csv(args)

Converts counts matrix from .h5ad to flat file (.txt, .csv)

Expand source code
def h5ad_to_csv(args):
    """Converts counts matrix from .h5ad to flat file (.txt, .csv)"""
    # get basename of file for writing outputs
    name = os.path.splitext(os.path.basename(args.file))[0]
    # check to make sure it's an .h5ad file
    if os.path.splitext(args.file)[1] != ".h5ad":
        raise ValueError("Input file must be in .h5ad format")
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        # print information about counts, including names of cells and genes
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # swap to desired layer
    if args.layer is not None:
        if args.verbose:
            print("Using .layers['{}']".format(args.layer))
        a.X = a.layers[args.layer].copy()
    # check for/create output directory
    check_dir_exists(args.outdir)
    if args.obs_only:
        if args.verbose:
            print("Writing .obs dataframe to {}/{}_obs.csv".format(args.outdir, name))
        a.obs.to_csv("{}/{}_obs.csv".format(args.outdir, name))
    elif args.separate_indices:
        if args.verbose:
            print("Writing counts to {}/{}_X.csv".format(args.outdir, name))
        if isinstance(a.X, sparse.csr.csr_matrix):
            df = pd.DataFrame.sparse.from_spmatrix(a.X)
            df.to_csv(
                "{}/{}_X.csv".format(args.outdir, name),
                sep=",",
                header=False,
                index=False,
            )
        elif isinstance(a.X, np.ndarray):
            np.savetxt("{}/{}_X.csv".format(args.outdir, name), a.X, delimiter=",")
        if args.verbose:
            print("Writing obs names to {}/{}_obs.csv".format(args.outdir, name))
        pd.DataFrame(a.obs_names).to_csv(
            "{}/{}_obs.csv".format(args.outdir, name), header=False, index=False
        )
        if args.verbose:
            print("Writing var names to {}/{}_var.csv".format(args.outdir, name))
        pd.DataFrame(a.var_names).to_csv(
            "{}/{}_var.csv".format(args.outdir, name), header=False, index=False
        )
    else:
        if isinstance(a.X, sparse.csr.csr_matrix):
            df = pd.DataFrame.sparse.from_spmatrix(
                a.X, index=a.obs_names, columns=a.var_names
            )
        else:
            df = pd.DataFrame(a.X, index=a.obs_names, columns=a.var_names)
        # save file as .csv
        if args.verbose:
            print("Writing counts to {}/{}.csv".format(args.outdir, name))
        df.to_csv(
            "{}/{}.csv".format(args.outdir, name), sep=",", header=True, index=True
        )
    if args.rm_h5ad_file:
        # remove original, h5ad file
        if args.verbose:
            print("Removing {}".format(args.file))
        os.remove(args.file)
def info(args)

Prints information about .h5ad file to console

Expand source code
def info(args):
    """Prints information about .h5ad file to console"""
    print("Reading {}\n".format(args.file))
    adata = sc.read(args.file)
    print(adata, "\n")
    print(".X: {} with {}\n".format(type(adata.X), adata.X.dtype))
    print("obs_names: {}".format(adata.obs_names))
    print("var_names: {}".format(adata.var_names))
def knee_point(args)

Labels cells using "knee point" method from CellRanger 2.1

Expand source code
def knee_point(args):
    """Labels cells using "knee point" method from CellRanger 2.1"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # add knee_point label to anndata
    cellranger2(
        a,
        expected=args.expected,
        upper_quant=args.upper_quant,
        lower_prop=args.lower_prop,
        label=args.label,
        verbose=args.verbose,
    )
    # save file as .h5ad
    print("Writing counts to {}".format(args.file))
    a.write(args.file, compression="gzip")
def label_info(args)

Prints value counts for .obs labels to console

Expand source code
def label_info(args):
    """Prints value counts for .obs labels to console"""
    print("Reading {}\n".format(args.file))
    adata = sc.read(args.file)
    print(adata, "\n")
    for l in args.labels:
        print("{}\n{}\n".format(l, adata.obs[l].value_counts()))
def main()
Expand source code
def main():
    parser = argparse.ArgumentParser(prog="kitchen")
    parser.add_argument(
        "-V",
        "--version",
        action="version",
        version=get_versions()["version"],
    )
    subparsers = parser.add_subparsers()

    info_parser = subparsers.add_parser(
        "info",
        help="Show information about .h5ad file",
    )
    info_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    info_parser.set_defaults(func=info)

    to_h5ad_parser = subparsers.add_parser(
        "to_h5ad",
        help="Convert counts matrix to .h5ad format",
    )
    to_h5ad_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as comma or tab delimited text file",
    )
    to_h5ad_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for writing h5ad. Default './'",
    )
    to_h5ad_parser.add_argument(
        "-rm",
        "--rm-flat-file",
        help="Remove original flat file. Default False",
        action="store_true",
    )
    to_h5ad_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_h5ad_parser.set_defaults(func=to_h5ad)

    mtx_to_h5ad_parser = subparsers.add_parser(
        "mtx_to_h5ad",
        help="Convert .mtx files from DropEst or 10x CellRanger to .h5ad format",
    )
    mtx_to_h5ad_parser.add_argument(
        "dir",
        type=str,
        help="Directory containing counts matrix as .mtx and text files",
    )
    mtx_to_h5ad_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for writing h5ad. Default './'",
    )
    mtx_to_h5ad_parser.add_argument(
        "--dropest",
        help="Directory contains DropEst outputs, rather than 10x CellRanger outputs",
        action="store_true",
    )
    mtx_to_h5ad_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    mtx_to_h5ad_parser.set_defaults(func=mtx_to_h5ad)

    to_csv_parser = subparsers.add_parser(
        "to_csv",
        help="Save .h5ad counts to .csv file(s)",
    )
    to_csv_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    to_csv_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for writing csv file(s). Default './'",
    )
    to_csv_parser.add_argument(
        "-l",
        "--layer",
        type=str,
        default=None,
        help="Key from .layers to save. Default '.X'.",
    )
    to_csv_parser.add_argument(
        "-s",
        "--separate-indices",
        help="Save indices (.obs and .var names) to separate files",
        action="store_true",
    )
    to_csv_parser.add_argument(
        "-obs",
        "--obs-only",
        help="Save .obs dataframe from .h5ad object to csv file, not counts matrix.",
        action="store_true",
    )
    to_csv_parser.add_argument(
        "-rm",
        "--rm-h5ad-file",
        help="Remove original h5ad file. Default False",
        action="store_true",
    )
    to_csv_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_csv_parser.set_defaults(func=h5ad_to_csv)

    transpose_parser = subparsers.add_parser(
        "transpose",
        help="Transpose counts matrix and save as .h5ad",
    )
    transpose_parser.add_argument(
        "file",
        type=str,
        help="Input counts matrix as .h5ad or tab delimited text file",
    )
    transpose_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    transpose_parser.set_defaults(func=transpose)

    to_sparse_parser = subparsers.add_parser(
        "to_sparse",
        help="Convert .X slot of anndata object to scipy CSR format, overwrite .h5ad file",
    )
    to_sparse_parser.add_argument(
        "file",
        type=str,
        help="Input counts matrix as .h5ad or tab delimited text file",
    )
    to_sparse_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_sparse_parser.set_defaults(func=to_sparse)

    to_dense_parser = subparsers.add_parser(
        "to_dense",
        help="Convert .X slot of anndata object to numpy.matrix format, overwrite .h5ad file",
    )
    to_dense_parser.add_argument(
        "file",
        type=str,
        help="Input counts matrix as .h5ad or tab delimited text file",
    )
    to_dense_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_dense_parser.set_defaults(func=to_dense)

    to_X_parser = subparsers.add_parser(
        "to_X",
        help="Swap a matrix from .layers to .X slot of anndata object",
    )
    to_X_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    to_X_parser.add_argument(
        "-l",
        "--layer",
        type=str,
        required=True,
        help="Key from .layers to replace .X with",
    )
    to_X_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_X_parser.set_defaults(func=to_X)

    rename_obs_parser = subparsers.add_parser(
        "rename_obs",
        help="Rename .obs columns in .h5ad file",
    )
    rename_obs_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    rename_obs_parser.add_argument(
        "-o",
        "--old-names",
        type=str,
        nargs="+",
        required=True,
        help="List of existing .obs column names to change",
    )
    rename_obs_parser.add_argument(
        "-n",
        "--new-names",
        type=str,
        nargs="+",
        required=True,
        help="List of new .obs column names corresponding to args.old_names",
    )
    rename_obs_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    rename_obs_parser.set_defaults(func=rename_obs)

    label_info_parser = subparsers.add_parser(
        "label_info",
        help="Print value counts for .obs labels to console",
    )
    label_info_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    label_info_parser.add_argument(
        "-l",
        "--labels",
        type=str,
        nargs="+",
        required=True,
        help="List of .obs column names to print value counts for",
    )
    label_info_parser.set_defaults(func=label_info)

    to_categorical_parser = subparsers.add_parser(
        "to_categorical",
        help="Make .obs label categorical dtype",
    )
    to_categorical_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    to_categorical_parser.add_argument(
        "-l",
        "--labels",
        type=str,
        nargs="+",
        required=True,
        help="List of .obs column names to convert to categorical dtype",
    )
    to_categorical_parser.add_argument(
        "-b",
        "--to-bool",
        required=False,
        help="Convert to boolean first (for columns with values of {0,1})",
        action="store_true",
    )
    to_categorical_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    to_categorical_parser.set_defaults(func=obs_to_categorical)

    add_label_parser = subparsers.add_parser(
        "add_label",
        help="Add label to .obs of .h5ad reference file using cells in a filtered file",
    )
    add_label_parser.add_argument(
        "ref_file",
        type=str,
        help="Reference counts matrix to add label to, as .h5ad file",
    )
    add_label_parser.add_argument(
        "filt_file",
        type=str,
        help="Filtered counts matrix providing positive labels, as .h5ad file",
    )
    add_label_parser.add_argument(
        "-l",
        "--label",
        type=str,
        required=True,
        help=".obs column name to place final labels in",
    )
    add_label_parser.add_argument(
        "-rm",
        "--rm-orig-file",
        help="Remove filtered file. Default False",
        action="store_true",
    )
    add_label_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    add_label_parser.set_defaults(func=add_label)

    cellranger2_parser = subparsers.add_parser(
        "cellranger2",
        help="Label cells using 'knee point' method from CellRanger 2.1",
    )
    cellranger2_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    cellranger2_parser.add_argument(
        "-e",
        "--expected",
        type=int,
        nargs="?",
        default=3000,
        help="Number of expected cells in the dataset; default 3000",
    )
    cellranger2_parser.add_argument(
        "-u",
        "--upper-quant",
        type=float,
        default=0.99,
        help="Upper quantile of expected cells for knee point; default 0.99",
    )
    cellranger2_parser.add_argument(
        "-lp",
        "--lower-prop",
        type=float,
        default=0.1,
        help="Lower proportion of cells to set threshold at; default 0.1",
    )
    cellranger2_parser.add_argument(
        "-l",
        "--label",
        type=str,
        default="CellRanger_2",
        help=".obs column name to place final labels in",
    )
    cellranger2_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    cellranger2_parser.set_defaults(func=knee_point)

    cellranger3_parser = subparsers.add_parser(
        "cellranger3",
        help="Label cells using 'emptydrops' method from CellRanger 3.0",
    )
    cellranger3_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    cellranger3_parser.add_argument(
        "--init-counts",
        type=int,
        nargs="?",
        default=15000,
        help="Initial total counts threshold for calling cells; default 15000",
    )
    cellranger3_parser.add_argument(
        "--min-umi-frac",
        type=float,
        default=0.01,
        help="Minimum total counts for testing barcodes as fraction of median counts for initially labeled cells; default 0.01",
    )
    cellranger3_parser.add_argument(
        "--min-umi",
        type=int,
        default=500,
        help="Minimum total counts for testing barcodes; default 500",
    )
    cellranger3_parser.add_argument(
        "--max-adj-pval",
        type=float,
        default=0.01,
        help="Maximum p-value for cell calling after B-H correction; default 0.01",
    )
    cellranger3_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    cellranger3_parser.set_defaults(func=emptydrops)

    subset_parser = subparsers.add_parser(
        "subset",
        help="Subset AnnData object on one or more .obs columns",
    )
    subset_parser.add_argument(
        "file",
        type=str,
        help="Counts matrix as .h5ad file",
    )
    subset_parser.add_argument(
        "-s",
        "--subset",
        default=None,
        nargs="*",
        help=".obs column(s) to subset cells on",
    )
    subset_parser.add_argument(
        "-o",
        "--out",
        type=str,
        nargs="?",
        default="./subset.h5ad",
        help="Path to output .h5ad file. Default './subset.h5ad'",
    )
    subset_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    subset_parser.set_defaults(func=subset)

    concatenate_parser = subparsers.add_parser(
        "concatenate",
        help="Combine multiple .h5ad files",
    )
    concatenate_parser.add_argument(
        "files", type=str, nargs="*", help="List of .h5ad files to concatenate"
    )
    concatenate_parser.add_argument(
        "-o",
        "--out",
        type=str,
        nargs="?",
        default="./concat.h5ad",
        help="Path to output .h5ad file. Default './concat.h5ad'",
    )
    concatenate_parser.add_argument(
        "-q",
        "--quietly",
        required=False,
        help="Run without printing processing updates to console",
        action="store_true",
    )
    concatenate_parser.set_defaults(func=concatenate)

    recipe_parser = subparsers.add_parser(
        "recipe",
        help="Full automated processing of scRNA-seq data from command line",
    )
    recipe_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    recipe_parser.add_argument(
        "-p",
        "--process",
        help="Process AnnData (PCA, PAGA, UMAP). Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "--min-genes",
        required=False,
        type=int,
        default=1,
        help="Minimum number of genes detected to keep cell. Default 1.",
    )
    recipe_parser.add_argument(
        "-s",
        "--subset",
        nargs="*",
        default=None,
        help=".obs column(s) to subset cells on before embedding",
    )
    recipe_parser.add_argument(
        "-l",
        "--layer",
        type=str,
        default=None,
        help="Key from .layers to use for embedding. Default '.X'.",
    )
    recipe_parser.add_argument(
        "-ur",
        "--use-rep",
        type=str,
        default=None,
        help="Key from .obsm to use for neighbors graph and embedding. Default run PCA.",
    )
    recipe_parser.add_argument(
        "-r",
        "--resolution",
        type=float,
        default=1.0,
        help="Resolution between 0.0 and 1.0 for Leiden clustering. Default 1.0.",
    )
    recipe_parser.add_argument(
        "-cc",
        "--cell-cycle",
        help="Calculate cell cycle scores. Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "--paga",
        help="Run PAGA to seed UMAP embedding. Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "-de",
        "--diff_expr",
        type=str,
        nargs="*",
        default=None,
        help="Type(s) of DE gene expression plots ['heatmap', 'dotplot', 'matrixplot']",
    )
    recipe_parser.add_argument(
        "-m",
        "--de-method",
        required=False,
        type=str,
        default="t-test_overestim_var",
        help="DE testing method if -de is not None ['t-test_overestim_var', 't-test', 'wilcoxon']",
    )
    recipe_parser.add_argument(
        "-c",
        "--colors",
        type=str,
        nargs="*",
        default=[],
        help="Colors to plot on embedding. Can be .obs columns or gene names.",
    )
    recipe_parser.add_argument(
        "-cm",
        "--cmap",
        required=False,
        type=str,
        default="viridis",
        help="Color map to use in embedding overlays and genes plot",
    )
    recipe_parser.add_argument(
        "-ps",
        "--point-size",
        required=False,
        type=int,
        default=None,
        help="Size of points in embedding. Default 120000/n_cells.",
    )
    recipe_parser.add_argument(
        "--cnmf",
        help="Plot cNMF usages on embedding. Default False.",
        action="store_true",
    )
    recipe_parser.add_argument(
        "--n-cnmf-markers",
        required=False,
        type=int,
        default=7,
        help="Number of top loaded genes to print on cNMF embeddings. Default 7.",
    )
    recipe_parser.add_argument(
        "--seed",
        type=int,
        help="Random state for generating embeddings. Default 18",
        default=18,
    )
    recipe_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for saving plots. Default './'",
    )
    recipe_parser.add_argument(
        "-sa",
        "--save-adata",
        help="Save updated AnnData to .h5ad file. Default False",
        action="store_true",
    )
    recipe_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    recipe_parser.set_defaults(func=recipe)

    de_parser = subparsers.add_parser(
        "de",
        help="Perform differential expression analysis on a processed .h5ad file and plot results",
    )
    de_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    de_parser.add_argument(
        "-m",
        "--de-method",
        required=False,
        type=str,
        default="t-test_overestim_var",
        help="DE testing method ['t-test_overestim_var', 't-test', 'wilcoxon']",
    )
    de_parser.add_argument(
        "-p",
        "--plot-type",
        type=str,
        nargs="*",
        default=None,
        help="Type(s) of DE gene expression plot ['heatmap', 'dotplot', 'matrixplot']",
    )
    de_parser.add_argument(
        "-g",
        "--groupby",
        required=False,
        type=str,
        default="leiden",
        help=".obs variable to group cells by for DE analysis",
    )
    de_parser.add_argument(
        "-n",
        "--n-genes",
        type=int,
        default=5,
        help="Number of genes to plot per group. Default 5.",
    )
    de_parser.add_argument(
        "-cm",
        "--cmap",
        required=False,
        type=str,
        default="Reds",
        help="Color map to use in genes plot",
    )
    de_parser.add_argument(
        "-a",
        "--ambient",
        help="Include ambient genes",
        action="store_true",
    )
    de_parser.add_argument(
        "-d",
        "--dendrogram",
        help="Generate dendrogram of group similarities",
        action="store_true",
    )
    de_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        help="Output directory for saving plots. Default './'",
        nargs="?",
        default=".",
    )
    de_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    de_parser.set_defaults(func=de)

    pie_parser = subparsers.add_parser(
        "pie",
        help="Plot populational pie charts for desired groups",
    )
    pie_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    pie_parser.add_argument(
        "-g",
        "--groupby",
        required=False,
        type=str,
        default="leiden",
        help=".obs variable to group cells by for plotting pie charts",
    )
    pie_parser.add_argument(
        "-p",
        "--pieby",
        required=False,
        type=str,
        default="batch",
        help=".obs variable to group cells by for pie chart within each groupby category",
    )
    pie_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for saving plots. Default './'",
    )
    pie_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    pie_parser.set_defaults(func=pie)

    cnmf_markers_parser = subparsers.add_parser(
        "cnmf_markers",
        help="Plot heatmap/matrix/dotplot of cNMF loadings for desired groups",
    )
    cnmf_markers_parser.add_argument(
        "file",
        type=str,
        help="Counts file as .h5ad or flat (.csv, .txt) in cells x genes format",
    )
    cnmf_markers_parser.add_argument(
        "-p",
        "--plot-type",
        type=str,
        nargs="*",
        default=None,
        help="Type(s) of gene expression plot ['heatmap', 'dotplot', 'matrixplot']",
    )
    cnmf_markers_parser.add_argument(
        "-g",
        "--groupby",
        required=False,
        type=str,
        default="leiden",
        help=".obs variable to group cells by for plotting cNMF genes",
    )
    cnmf_markers_parser.add_argument(
        "-n",
        "--n-genes",
        type=int,
        default=5,
        help="Number of genes to plot per group. Default 5.",
    )
    cnmf_markers_parser.add_argument(
        "-cm",
        "--cmap",
        required=False,
        type=str,
        default="Reds",
        help="Color map to use in genes plot",
    )
    cnmf_markers_parser.add_argument(
        "-d",
        "--dendrogram",
        help="Generate dendrogram of group similarities",
        action="store_true",
    )
    cnmf_markers_parser.add_argument(
        "-o",
        "--outdir",
        type=str,
        nargs="?",
        default=".",
        help="Output directory for saving plots. Default './'",
    )
    cnmf_markers_parser.add_argument(
        "-q",
        "--quietly",
        help="Don't print updates to console",
        action="store_true",
    )
    cnmf_markers_parser.set_defaults(func=cnmf_markers)

    args = parser.parse_args()

    # if --quietly specified, reverse verbosity
    if hasattr(args, "quietly"):
        if args.quietly:
            args.verbose = False
            del args.quietly
        else:
            args.verbose = True
            del args.quietly

    args.func(args)
def mtx_to_h5ad(args)

Converts .mtx files from 10x CellRanger or DropEst to .h5ad

Expand source code
def mtx_to_h5ad(args):
    """Converts .mtx files from 10x CellRanger or DropEst to .h5ad"""
    if args.dropest:
        if args.verbose:
            print("Reading DropEst .mtx files from {}".format(args.dir))
        mtx_file = [x for x in os.listdir(args.dir) if x.endswith("_counts.mtx")][
            0
        ]  # get name of .mtx
        # define filenames
        mtx = args.dir + "/" + mtx_file
        genes = args.dir + "/" + mtx_file.split("_counts")[0] + "_features.txt"
        barcodes = args.dir + "/" + mtx_file.split("_counts")[0] + "_barcodes.txt"
        # read files
        a = sc.read_mtx(mtx)  # read matrix
        a = a.T  # transpose DropEst matrix to cells x genes
        g = pd.read_csv(genes, delimiter="\t", header=None)  # read genes
        b = pd.read_csv(barcodes, delimiter="\t", header=None)  # read barcodes
        # add gene and barcode names
        a.obs_names = b[0].values
        a.var_names = g[0].values
        if args.verbose:
            print(
                "Writing counts to {}/{}.h5ad - {} cells and {} genes".format(
                    args.outdir, mtx_file.split("_counts")[0], a.shape[0], a.shape[1]
                )
            )
        a.write(
            "{}/{}.h5ad".format(args.outdir, mtx_file.split("_counts")[0]),
            compression="gzip",
        )
    else:
        if args.verbose:
            print("Reading 10x CellRanger .mtx files from {}".format(args.dir))
        a = sc.read_10x_mtx(args.dir)
        name = args.dir.split("_gene_bc_matrices")[
            0
        ]  # extract name from 10x directory name
        if args.verbose:
            print(
                "Writing counts to {}/{}.h5ad - {} cells and {} genes".format(
                    args.outdir, name, a.shape[0], a.shape[1]
                )
            )
        a.write("{}/{}.h5ad".format(args.outdir, name), compression="gzip")
def obs_to_categorical(args)

Makes .obs label categorical dtype

Expand source code
def obs_to_categorical(args):
    """Makes .obs label categorical dtype"""
    if args.verbose:
        print("Reading {}".format(args.file))
    adata = sc.read(args.file)
    for label in args.labels:
        if args.verbose:
            print("Converting .obs['{}'] to categorical".format(label))
        if args.to_bool:
            adata.obs[label] = adata.obs[label].astype(bool).astype("category")
        else:
            adata.obs[label] = adata.obs[label].astype("category")
    adata.write(args.file, compression="gzip")
def pie(args)

Plots populational pie charts for desired groups

Expand source code
def pie(args):
    """Plots populational pie charts for desired groups"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # generate cluster_pie plot
    os.chdir(args.outdir)  # set output directory for scanpy figures
    _ = cluster_pie(a, pie_by=args.pieby, groupby=args.groupby)
    if args.verbose:
        print(
            "Saving cluster pie charts to {}/{}_pie.png".format(
                args.outdir, "_".join(name)
            )
        )
    plt.savefig("{}/{}_pie.png".format(args.outdir, "_".join(name)))
def recipe(args)

Full automated processing of scRNA-seq data

Expand source code
def recipe(args):
    """Full automated processing of scRNA-seq data"""
    # get basename of file for writing outputs
    name = [os.path.splitext(os.path.basename(args.file))[0]]
    if args.subset is not None:
        name.append("_".join(args.subset))
    if args.layer is not None:
        name.append(args.layer)
    if args.use_rep is not None:
        name.append(args.use_rep)
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    # subset anndata on .obs column if desired
    if args.subset is not None:
        a = subset_adata(a, subset=args.subset)
    if args.process:
        # switch to proper layer
        if args.layer is not None:
            if args.verbose:
                print("Using layer {} to reduce dimensions".format(args.layer))
            a.X = a.layers[args.layer].copy()
        # preprocess with dropkick recipe
        a = recipe_dropkick(
            a,
            X_final="arcsinh_norm",
            verbose=args.verbose,
            filter=True,
            min_genes=args.min_genes,
        )
        # reduce dimensions
        dim_reduce(
            a,
            use_rep=args.use_rep,
            clust_resolution=args.resolution,
            paga=args.paga,
            verbose=args.verbose,
            seed=args.seed,
        )
    # run cell cycle inference
    if args.cell_cycle:
        cc_score(a, verbose=args.verbose)
        args.colors = ["phase"] + args.colors
    # make sure output dir exists before saving plots
    check_dir_exists(args.outdir)
    # if there's DE to do, plot genes
    if args.diff_expr is not None:
        if isinstance(args.diff_expr, str):
            args.diff_expr = [args.diff_expr]
        for de in args.diff_expr:
            plot_genes(
                a,
                de_method=args.de_method,
                plot_type=de,
                groupby="leiden",
                n_genes=5,
                cmap=args.cmap,
                save_to="{}/{}_{}.png".format(args.outdir, de, "_".join(name)),
                verbose=args.verbose,
            )
        # if there's cnmf results, plot those on a heatmap/matrix/dotplot too
        if "cnmf_spectra" in a.varm:
            for de in args.diff_expr:
                plot_genes_cnmf(
                    a,
                    plot_type=de,
                    groupby="leiden",
                    attr="varm",
                    keys="cnmf_spectra",
                    indices=None,
                    n_genes=5,
                    cmap=args.cmap,
                    save_to="{}/{}_cnmf_{}.png".format(args.outdir, de, "_".join(name)),
                )
    # if there's a cnmf flag, try to plot loadings
    if args.cnmf:
        # check for cnmf results in anndata object
        if "cnmf_spectra" in a.varm:
            _ = rank_genes_cnmf(a, show=False)
            if args.verbose:
                print(
                    "Saving cNMF loadings to {}/{}_cnmfspectra.png".format(
                        args.outdir, "_".join(name)
                    )
                )
            plt.savefig("{}/{}_cnmfspectra.png".format(args.outdir, "_".join(name)))
            if args.verbose:
                print(
                    "Saving embeddings to {}/{}_embedding.png".format(
                        args.outdir, "_".join(name)
                    )
                )
            # save embedding plot with cNMF loadings
            if args.colors is None:
                args.colors = []
            plot_embedding(
                a,
                colors=args.colors
                + a.obs.columns[a.obs.columns.str.startswith("usage_")].tolist(),
                show_clustering=True,
                n_cnmf_markers=args.n_cnmf_markers,
                cmap=args.cmap,
                seed=args.seed,
                save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)),
                verbose=args.verbose,
                size=args.point_size,
            )
        else:
            print(
                "cNMF results not detected in {}. Skipping cNMF overlay for embedding.".format(
                    args.file
                )
            )
            # save embedding plot without cNMF loadings
            if args.verbose:
                print(
                    "Saving embeddings to {}/{}_embedding.png".format(
                        args.outdir, "_".join(name)
                    )
                )
            plot_embedding(
                a,
                colors=args.colors,
                show_clustering=True,
                cmap=args.cmap,
                seed=args.seed,
                save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)),
                verbose=args.verbose,
                size=args.point_size,
            )
    else:
        # save embedding plot
        if args.verbose:
            print(
                "Saving embeddings to {}/{}_embedding.png".format(
                    args.outdir, "_".join(name)
                )
            )
        plot_embedding(
            a,
            colors=args.colors,
            show_clustering=True,
            cmap=args.cmap,
            seed=args.seed,
            save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)),
            verbose=args.verbose,
            size=args.point_size,
        )
    # save file as .h5ad
    if args.save_adata:
        if args.verbose:
            print(
                "Saving AnnData object to to {}/{}_processed.h5ad".format(
                    args.outdir, "_".join(name)
                )
            )
        a.write(
            "{}/{}_processed.h5ad".format(args.outdir, "_".join(name)),
            compression="gzip",
        )
def rename_obs(args)

Renames .obs columns in anndata object, and overwrites .h5ad file

Expand source code
def rename_obs(args):
    """Renames .obs columns in anndata object, and overwrites .h5ad file"""
    if args.verbose:
        print("Reading {}".format(args.file))
    adata = sc.read(args.file)
    if args.verbose:
        print("Renaming columns {} to {}".format(args.old_names, args.new_names))
    adata.obs.rename(columns=dict(zip(args.old_names, args.new_names)), inplace=True)
    adata.write(args.file, compression="gzip")
def subset(args)

Subsets anndata object on binary .obs label(s), saves to new .h5ad file

Expand source code
def subset(args):
    """Subsets anndata object on binary .obs label(s), saves to new .h5ad file"""
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
    a = subset_adata(a, subset=args.subset, verbose=args.verbose)
    if args.verbose:
        print("Writing subsetted counts to {}".format(args.out))
    a.write(args.out, compression="gzip")
def to_X(args)

Swaps a matrix from .layers to .X slot of anndata object, overwrites .h5ad file

Expand source code
def to_X(args):
    """Swaps a matrix from .layers to .X slot of anndata object, overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    # swap layers
    if args.verbose:
        print("Putting .layers['{}'] in .X and saving".format(args.layer))
    a.X = a.layers[args.layer].copy()
    # save file as .h5ad
    a.write(args.file, compression="gzip")
def to_dense(args)

Converts .X slot of anndata object to numpy.matrix format, overwrites .h5ad file

Expand source code
def to_dense(args):
    """Converts .X slot of anndata object to numpy.matrix format, overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    if isinstance(a.X, np.matrix) or isinstance(a.X, np.ndarray):
        print("{} already in dense format".format(args.file))
        return
    # densify counts slot
    if args.verbose:
        print("densifying counts...")
    a.X = a.X.todense()
    a.X = a.X.astype(int)
    # save file as .h5ad
    a.write(args.file, compression="gzip")
def to_h5ad(args)

Converts counts matrix from flat file (.txt, .csv) to .h5ad

Expand source code
def to_h5ad(args):
    """Converts counts matrix from flat file (.txt, .csv) to .h5ad"""
    # get basename of file for writing outputs
    name = os.path.splitext(os.path.basename(args.file))[0]
    # check to make sure it's an .h5ad file
    if os.path.splitext(args.file)[1] == ".h5ad":
        raise ValueError("Input file already in .h5ad format")
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file), end="")
    a = sc.read(args.file)
    if args.verbose:
        # print information about counts, including names of cells and genes
        print(" - {} cells and {} genes".format(a.shape[0], a.shape[1]))
        print("obs_names: {}".format(a.obs_names))
        print("var_names: {}".format(a.var_names))
    # sparsify counts slot
    if args.verbose:
        print("sparsifying counts...")
    a.X = sparse.csr_matrix(a.X, dtype=int)
    # save file as .h5ad
    if args.verbose:
        print("Writing counts to {}/{}.h5ad".format(args.outdir, name))
    check_dir_exists(args.outdir)
    a.write("{}/{}.h5ad".format(args.outdir, name), compression="gzip")
    if args.rm_flat_file:
        # remove original, noncompressed flat file
        if args.verbose:
            print("Removing {}".format(args.file))
        os.remove(args.file)
def to_sparse(args)

Converts .X slot of anndata object to scipy CSR format, overwrites .h5ad file

Expand source code
def to_sparse(args):
    """Converts .X slot of anndata object to scipy CSR format, overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    if isinstance(a.X, sparse.csr.csr_matrix):
        print("{} already in sparse format".format(args.file))
        return
    # sparsify counts slot
    if args.verbose:
        print("sparsifying counts...")
    a.X = sparse.csr_matrix(a.X, dtype=int)
    # save file as .h5ad
    a.write(args.file, compression="gzip")
def transpose(args)

Transposes anndata object, replacing obs with var, and overwrites .h5ad file

Expand source code
def transpose(args):
    """Transposes anndata object, replacing obs with var, and overwrites .h5ad file"""
    # read file into anndata obj
    if args.verbose:
        print("Reading {}".format(args.file))
    a = sc.read(args.file)
    if args.verbose:
        print(a)
    # transpose file
    if args.verbose:
        print("transposing file and saving...")
    a = a.T
    # save file as .h5ad
    a.write(args.file, compression="gzip")