Initial cDNA Filtering with Cell Ranger Data (Starvation Data)
In [ ]:
!date
Download Data¶
In [ ]:
import requests
from tqdm import tnrange, tqdm_notebook
def download_file(doi,ext):
url = 'https://api.datacite.org/dois/'+doi+'/media'
r = requests.get(url).json()
netcdf_url = r['data'][0]['attributes']['url']
r = requests.get(netcdf_url,stream=True)
#Set file name
fname = doi.split('/')[-1]+ext
#Download file with progress bar
if r.status_code == 403:
print("File Unavailable")
if 'content-length' not in r.headers:
print("Did not get file")
else:
with open(fname, 'wb') as f:
total_length = int(r.headers.get('content-length'))
pbar = tnrange(int(total_length/1024), unit="B")
for chunk in r.iter_content(chunk_size=1024):
if chunk:
pbar.update()
f.write(chunk)
return fname
In [ ]:
#From CellRanger (cDNA)
#matrix
download_file('10.22002/D1.1802','.gz')
#features
download_file('10.22002/D1.1803','.gz')
#barcodes
download_file('10.22002/D1.1801','.gz')
#ClickTag lane 1 counts
download_file('10.22002/D1.1799','.gz')
#ClickTag lane 2 counts
download_file('10.22002/D1.1800','.gz')
Out[ ]:
In [ ]:
!gunzip *.gz
In [ ]:
!pip install --quiet anndata
!pip install --quiet scanpy==1.6.0
!pip3 install --quiet leidenalg
!pip install --quiet louvain
Import Packages¶
In [ ]:
import pandas as pd
import anndata
import scanpy as sc
import numpy as np
import scipy.sparse
import warnings
warnings.filterwarnings('ignore')
from sklearn.neighbors import (KNeighborsClassifier,NeighborhoodComponentsAnalysis)
from sklearn.pipeline import Pipeline
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
import random
import matplotlib.pyplot as plt
%matplotlib inline
sc.set_figure_params(dpi=125)
import seaborn as sns
sns.set(style="whitegrid")
%load_ext rpy2.ipython
Select Cells for Downstream Analysis by Associated ClickTag Counts¶
Filtering of CellRanger cDNA data by ClickTag counts
In [ ]:
path = ""
!mv D1.1802 D1.1802.mtx
!mv D1.1803 D1.1803.tsv
!mv D1.1801 D1.1801.tsv
jelly3trin = sc.read(path+'D1.1802.mtx', cache=True).T
jelly3trin.var_names = pd.read_csv(path+'D1.1803.tsv', header=None, sep='\t')[1]
jelly3trin.obs_names = pd.read_csv(path+'D1.1801.tsv', header=None)[0]
jelly3trin
Out[ ]:
In [ ]:
jelly3trin.var_names_make_unique()
sc.pp.filter_cells(jelly3trin,min_counts=1)
jelly3trin.obs['n_countslog']=np.log10(jelly3trin.obs['n_counts'])
In [ ]:
jelly3trin
Out[ ]:
Selecting well-tagged cells by counts of ClickTags on embedding of cell x ClickTag counts matrix (previously calculated)
In [ ]:
!mv D1.1799 tag1_counts.csv
adata = sc.read_csv("tag1_counts.csv")
adata=adata.transpose()
sc.pp.filter_cells(adata, min_counts=100)
adata.obs['n_countslog'] = np.log(adata.obs['n_counts'])
sc.pp.log1p(adata)
#For visualization of tag counts
sc.tl.pca(adata)
sc.tl.tsne(adata)
sc.pp.neighbors(adata)
sc.tl.louvain(adata, resolution=0.35)
In [ ]:
sc.tl.louvain(adata, resolution=0.35)
In [ ]:
sc.pl.tsne(adata, color=['louvain'])
In [ ]:
sc.pl.tsne(adata, color=['BC_21', 'BC_22','BC_23','BC_24','BC_25','BC_26','BC_27','BC_28','BC_29','BC_30','BC_31','BC_32','BC_33',
'BC_34', 'BC_35','BC_36','BC_37','BC_38','BC_39','BC_40','n_countslog'])