Package kitchen
Manipulate counts matrix files and cook scRNA-seq data from command line
Expand source code
# -*- coding: utf-8 -*-
"""
Manipulate counts matrix files and cook scRNA-seq data from command line
"""
from .kitchen import (
info,
to_h5ad,
transpose,
rename_obs,
add_label,
knee_point,
subset,
concatenate,
recipe,
)
__all__ = [
"info",
"to_h5ad",
"transpose",
"rename_obs",
"add_label",
"knee_point",
"subset",
"concatenate",
"recipe",
]
from ._version import get_versions
__version__ = get_versions()["version"]
del get_versions
Sub-modules
kitchen.ingredients
-
Functions for manipulating .h5ad objects and automated processing of scRNA-seq data
kitchen.kitchen
-
Manipulate .h5ad files and cook scRNA-seq data from command line
Functions
def add_label(args)
-
Uses .obs_names from filtered counts matrix to add binary label to a reference anndata object, "True" = present in filt, "False" = not present. Overwrites reference .h5ad file.
Expand source code
def add_label(args): """ Uses .obs_names from filtered counts matrix to add binary label to a reference anndata object, "True" = present in filt, "False" = not present. Overwrites reference .h5ad file. """ # read reference file into anndata obj if args.verbose: print("Reading {}".format(args.ref_file)) a = sc.read(args.ref_file) if args.verbose: print("\t", a) # read query file into anndata obj if args.verbose: print("\nReading {}".format(args.filt_file)) b = sc.read(args.filt_file) if args.verbose: print("\t", b) # add .obs column to ref_file a.obs[args.label] = "False" a.obs.loc[b.obs_names, args.label] = "True" if args.verbose: print( "\nTransferring labels to {}:\n{}".format( args.ref_file, a.obs[args.label].value_counts() ) ) # save file as .h5ad if args.verbose: print("\nWriting counts to {}".format(args.ref_file)) a.write(args.ref_file, compression="gzip") if args.rm_orig_file: # remove filtered file if args.verbose: print("\nRemoving {}".format(args.filt_file)) os.remove(args.filt_file)
def concatenate(args)
-
Concatenates list of anndata objects in .h5ad format, keeping union of genes
Expand source code
def concatenate(args): """Concatenates list of anndata objects in .h5ad format, keeping union of genes""" # read first file if args.verbose: print("Reading {}".format(args.files[0])) adata_0 = sc.read(args.files[0]) # read the rest of the files into list adatas = [] for f in args.files[1:]: # read file into anndata obj if args.verbose: print("Reading {}".format(f)) adatas.append(sc.read(f)) # concatenate all files if args.verbose: print("Concatenating files...") concat = adata_0.concatenate( adatas, join="outer", batch_categories=[os.path.splitext(os.path.basename(x))[0] for x in args.files], fill_value=0, ) if args.verbose: print( "Final shape: {} cells and {} genes".format( concat.shape[0], concat.shape[1] ) ) # save file as .h5ad if args.verbose: print("Writing counts to {}".format(args.out)) concat.write(args.out, compression="gzip")
def info(args)
-
Prints information about .h5ad file to console
Expand source code
def info(args): """Prints information about .h5ad file to console""" print("Reading {}\n".format(args.file)) adata = sc.read(args.file) print(adata, "\n") print(".X: {} with {}\n".format(type(adata.X), adata.X.dtype)) print("obs_names: {}".format(adata.obs_names)) print("var_names: {}".format(adata.var_names))
def knee_point(args)
-
Labels cells using "knee point" method from CellRanger 2.1
Expand source code
def knee_point(args): """Labels cells using "knee point" method from CellRanger 2.1""" # read file into anndata obj if args.verbose: print("Reading {}".format(args.file), end="") a = sc.read(args.file) if args.verbose: print(" - {} cells and {} genes".format(a.shape[0], a.shape[1])) # add knee_point label to anndata cellranger2( a, expected=args.expected, upper_quant=args.upper_quant, lower_prop=args.lower_prop, label=args.label, verbose=args.verbose, ) # save file as .h5ad print("Writing counts to {}".format(args.file)) a.write(args.file, compression="gzip")
def recipe(args)
-
Full automated processing of scRNA-seq data
Expand source code
def recipe(args): """Full automated processing of scRNA-seq data""" # get basename of file for writing outputs name = [os.path.splitext(os.path.basename(args.file))[0]] if args.subset is not None: name.append("_".join(args.subset)) if args.layer is not None: name.append(args.layer) if args.use_rep is not None: name.append(args.use_rep) # read file into anndata obj if args.verbose: print("Reading {}".format(args.file), end="") a = sc.read(args.file) if args.verbose: print(" - {} cells and {} genes".format(a.shape[0], a.shape[1])) # subset anndata on .obs column if desired if args.subset is not None: a = subset_adata(a, subset=args.subset) if args.process: # switch to proper layer if args.layer is not None: if args.verbose: print("Using layer {} to reduce dimensions".format(args.layer)) a.X = a.layers[args.layer].copy() # preprocess with dropkick recipe a = recipe_dropkick( a, X_final="arcsinh_norm", verbose=args.verbose, filter=True, min_genes=args.min_genes, ) # reduce dimensions dim_reduce( a, use_rep=args.use_rep, clust_resolution=args.resolution, paga=args.paga, verbose=args.verbose, seed=args.seed, ) # run cell cycle inference if args.cell_cycle: cc_score(a, verbose=args.verbose) args.colors = ["phase"] + args.colors # make sure output dir exists before saving plots check_dir_exists(args.outdir) # if there's DE to do, plot genes if args.diff_expr is not None: if isinstance(args.diff_expr, str): args.diff_expr = [args.diff_expr] for de in args.diff_expr: plot_genes( a, de_method=args.de_method, plot_type=de, groupby="leiden", n_genes=5, cmap=args.cmap, save_to="{}/{}_{}.png".format(args.outdir, de, "_".join(name)), verbose=args.verbose, ) # if there's cnmf results, plot those on a heatmap/matrix/dotplot too if "cnmf_spectra" in a.varm: for de in args.diff_expr: plot_genes_cnmf( a, plot_type=de, groupby="leiden", attr="varm", keys="cnmf_spectra", indices=None, n_genes=5, cmap=args.cmap, save_to="{}/{}_cnmf_{}.png".format(args.outdir, de, "_".join(name)), ) # if there's a cnmf flag, try to plot loadings if args.cnmf: # check for cnmf results in anndata object if "cnmf_spectra" in a.varm: _ = rank_genes_cnmf(a, show=False) if args.verbose: print( "Saving cNMF loadings to {}/{}_cnmfspectra.png".format( args.outdir, "_".join(name) ) ) plt.savefig("{}/{}_cnmfspectra.png".format(args.outdir, "_".join(name))) if args.verbose: print( "Saving embeddings to {}/{}_embedding.png".format( args.outdir, "_".join(name) ) ) # save embedding plot with cNMF loadings if args.colors is None: args.colors = [] plot_embedding( a, colors=args.colors + a.obs.columns[a.obs.columns.str.startswith("usage_")].tolist(), show_clustering=True, n_cnmf_markers=args.n_cnmf_markers, cmap=args.cmap, seed=args.seed, save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)), verbose=args.verbose, size=args.point_size, ) else: print( "cNMF results not detected in {}. Skipping cNMF overlay for embedding.".format( args.file ) ) # save embedding plot without cNMF loadings if args.verbose: print( "Saving embeddings to {}/{}_embedding.png".format( args.outdir, "_".join(name) ) ) plot_embedding( a, colors=args.colors, show_clustering=True, cmap=args.cmap, seed=args.seed, save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)), verbose=args.verbose, size=args.point_size, ) else: # save embedding plot if args.verbose: print( "Saving embeddings to {}/{}_embedding.png".format( args.outdir, "_".join(name) ) ) plot_embedding( a, colors=args.colors, show_clustering=True, cmap=args.cmap, seed=args.seed, save_to="{}/{}_embedding.png".format(args.outdir, "_".join(name)), verbose=args.verbose, size=args.point_size, ) # save file as .h5ad if args.save_adata: if args.verbose: print( "Saving AnnData object to to {}/{}_processed.h5ad".format( args.outdir, "_".join(name) ) ) a.write( "{}/{}_processed.h5ad".format(args.outdir, "_".join(name)), compression="gzip", )
def rename_obs(args)
-
Renames .obs columns in anndata object, and overwrites .h5ad file
Expand source code
def rename_obs(args): """Renames .obs columns in anndata object, and overwrites .h5ad file""" if args.verbose: print("Reading {}".format(args.file)) adata = sc.read(args.file) if args.verbose: print("Renaming columns {} to {}".format(args.old_names, args.new_names)) adata.obs.rename(columns=dict(zip(args.old_names, args.new_names)), inplace=True) adata.write(args.file, compression="gzip")
def subset(args)
-
Subsets anndata object on binary .obs label(s), saves to new .h5ad file
Expand source code
def subset(args): """Subsets anndata object on binary .obs label(s), saves to new .h5ad file""" if args.verbose: print("Reading {}".format(args.file), end="") a = sc.read(args.file) if args.verbose: print(" - {} cells and {} genes".format(a.shape[0], a.shape[1])) a = subset_adata(a, subset=args.subset, verbose=args.verbose) if args.verbose: print("Writing subsetted counts to {}".format(args.out)) a.write(args.out, compression="gzip")
def to_h5ad(args)
-
Converts counts matrix from flat file (.txt, .csv) to .h5ad
Expand source code
def to_h5ad(args): """Converts counts matrix from flat file (.txt, .csv) to .h5ad""" # get basename of file for writing outputs name = os.path.splitext(os.path.basename(args.file))[0] # check to make sure it's an .h5ad file if os.path.splitext(args.file)[1] == ".h5ad": raise ValueError("Input file already in .h5ad format") # read file into anndata obj if args.verbose: print("Reading {}".format(args.file), end="") a = sc.read(args.file) if args.verbose: # print information about counts, including names of cells and genes print(" - {} cells and {} genes".format(a.shape[0], a.shape[1])) print("obs_names: {}".format(a.obs_names)) print("var_names: {}".format(a.var_names)) # sparsify counts slot if args.verbose: print("sparsifying counts...") a.X = sparse.csr_matrix(a.X, dtype=int) # save file as .h5ad if args.verbose: print("Writing counts to {}/{}.h5ad".format(args.outdir, name)) check_dir_exists(args.outdir) a.write("{}/{}.h5ad".format(args.outdir, name), compression="gzip") if args.rm_flat_file: # remove original, noncompressed flat file if args.verbose: print("Removing {}".format(args.file)) os.remove(args.file)
def transpose(args)
-
Transposes anndata object, replacing obs with var, and overwrites .h5ad file
Expand source code
def transpose(args): """Transposes anndata object, replacing obs with var, and overwrites .h5ad file""" # read file into anndata obj if args.verbose: print("Reading {}".format(args.file)) a = sc.read(args.file) if args.verbose: print(a) # transpose file if args.verbose: print("transposing file and saving...") a = a.T # save file as .h5ad a.write(args.file, compression="gzip")