{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Query and Download API\n",
"\n",
"For this example assume you have created a top level *Work* directory. Under the *Work* directory create a *notebooks* and *output* directory. Then unzip the Immport Download Tool zip package and place the *bin* and\n",
"*aspera* directories included in this zip under the *Work* directory. Change directories to the *notebooks*\n",
"directory then run the command *jupyter notebook*.\n",
"\n",
"The immport_download.py file contains convience functions to obtain ImmPort and Aspera tokens and a\n",
"download_file method that simplifies the downloading of a file."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sys\n",
"import os\n",
"import pandas as pd\n",
"import urllib\n",
"\n",
"# Set the Python path to the location of the directory containing \"immport_download.py\"\n",
"immport_download_code = \"../bin/\"\n",
"sys.path.insert(0,immport_download_code)\n",
"os.chdir(immport_download_code)\n",
"\n",
"import immport_download"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example configuration properties - Replace with real user_name and password"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"user_name = \"REPLACE\"\n",
"password = \"REPLACE\"\n",
"output_directory = \"../output\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download the HAI results for SDY222 using API"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Request a token, then make API call, then load into Pandas's DataFrame\n",
"token = immport_download.request_immport_token(user_name, password)\n",
"r = immport_download.api(\"https://api.immport.org/data/query/result/hai?studyAccession=SDY212\",token)\n",
"df = pd.read_json(r)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['ageEvent', 'ageEventSpecify', 'ageUnit', 'ancestralPopulation',\n",
" 'armAccession', 'armName', 'biosampleAccession', 'biosampleSubtype',\n",
" 'biosampleType', 'clinical', 'comments', 'ethnicity',\n",
" 'experimentAccession', 'expsampleAccession', 'gender', 'maxSubjectAge',\n",
" 'measurementTechnique', 'minSubjectAge', 'plannedVisitAccession',\n",
" 'race', 'raceSpecify', 'repositoryAccession', 'repositoryName',\n",
" 'resultId', 'species', 'strain', 'studyAccession', 'studyTimeCollected',\n",
" 'studyTimeCollectedUnit', 'studyTimeT0Event', 'studyTimeT0EventSpecify',\n",
" 'studyTitle', 'subjectAccession', 'subjectPhenotype',\n",
" 'treatmentAccession', 'unitPreferred', 'unitReported', 'valuePreferred',\n",
" 'valueReported', 'virusStrainPreferred', 'virusStrainReported'],\n",
" dtype='object')\n",
"['Hemagglutination Inhibition']\n",
"['Cohort_2' 'Cohort_1']\n"
]
}
],
"source": [
"# Print out a sample of the information in the DataFrame\n",
"print(df.columns)\n",
"print(df.measurementTechnique.unique())\n",
"print(df.armName.unique())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rename the armName values to more descriptive values. When reviewing the ARM information we determined that \n",
"Cohort_1 represents the Young participants and Cohort_2 represents the Older participants\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jcampbell/opt/python/anaconda3-4.3.0/lib/python3.6/site-packages/pandas/core/indexing.py:477: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self.obj[item] = s\n"
]
}
],
"source": [
"df_clean = df[['subjectAccession','armAccession','armName','gender','race','minSubjectAge','studyTimeCollected', \\\n",
" 'valuePreferred','virusStrainReported']]\n",
"df_clean.loc[df_clean['armName'] == 'Cohort_1','armName'] = \"Young\"\n",
"df_clean.loc[df_clean['armName'] == 'Cohort_2','armName'] = \"Old\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" subjectAccession | \n",
" armAccession | \n",
" armName | \n",
" gender | \n",
" race | \n",
" minSubjectAge | \n",
" studyTimeCollected | \n",
" valuePreferred | \n",
" virusStrainReported | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" SUB134240 | \n",
" ARM895 | \n",
" Old | \n",
" Female | \n",
" White | \n",
" 62.86 | \n",
" 0 | \n",
" 20 | \n",
" B | \n",
"
\n",
" \n",
" 1 | \n",
" SUB134251 | \n",
" ARM894 | \n",
" Young | \n",
" Female | \n",
" White | \n",
" 29.23 | \n",
" 0 | \n",
" 40 | \n",
" H3N2 | \n",
"
\n",
" \n",
" 2 | \n",
" SUB134258 | \n",
" ARM895 | \n",
" Old | \n",
" Female | \n",
" White | \n",
" 85.41 | \n",
" 0 | \n",
" 10 | \n",
" H1N1 | \n",
"
\n",
" \n",
" 3 | \n",
" SUB134264 | \n",
" ARM895 | \n",
" Old | \n",
" Female | \n",
" White | \n",
" 68.08 | \n",
" 0 | \n",
" 40 | \n",
" B | \n",
"
\n",
" \n",
" 4 | \n",
" SUB134271 | \n",
" ARM895 | \n",
" Old | \n",
" Female | \n",
" White | \n",
" 86.61 | \n",
" 0 | \n",
" 640 | \n",
" H3N2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subjectAccession armAccession armName gender race minSubjectAge \\\n",
"0 SUB134240 ARM895 Old Female White 62.86 \n",
"1 SUB134251 ARM894 Young Female White 29.23 \n",
"2 SUB134258 ARM895 Old Female White 85.41 \n",
"3 SUB134264 ARM895 Old Female White 68.08 \n",
"4 SUB134271 ARM895 Old Female White 86.61 \n",
"\n",
" studyTimeCollected valuePreferred virusStrainReported \n",
"0 0 20 B \n",
"1 0 40 H3N2 \n",
"2 0 10 H1N1 \n",
"3 0 40 B \n",
"4 0 640 H3N2 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_clean.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Simple descriptive statistics"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subjectAccession | \n",
"
\n",
" \n",
" armName | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Old | \n",
" 60 | \n",
"
\n",
" \n",
" Young | \n",
" 29 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subjectAccession\n",
"armName \n",
"Old 60\n",
"Young 29"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_clean[['armName','subjectAccession']].drop_duplicates().groupby('armName').count()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subjectAccession | \n",
"
\n",
" \n",
" gender | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Female | \n",
" 54 | \n",
"
\n",
" \n",
" Male | \n",
" 35 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subjectAccession\n",
"gender \n",
"Female 54\n",
"Male 35"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_clean[['gender','subjectAccession']].drop_duplicates().groupby('gender').count()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subjectAccession | \n",
"
\n",
" \n",
" race | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" American Indian or Alaska Native | \n",
" 1 | \n",
"
\n",
" \n",
" Asian | \n",
" 8 | \n",
"
\n",
" \n",
" Other | \n",
" 7 | \n",
"
\n",
" \n",
" White | \n",
" 73 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subjectAccession\n",
"race \n",
"American Indian or Alaska Native 1\n",
"Asian 8\n",
"Other 7\n",
"White 73"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_clean[['race','subjectAccession']].drop_duplicates().groupby('race').count()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0, 28}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set(df_clean.studyTimeCollected)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'B', 'H1N1', 'H3N2'}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set(df_clean.virusStrainReported)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Retrieve FCS files for Cohort_1\n",
"\n",
"For section we use the Query API to a sample of the FCS files available for this study. Then we will use the\n",
"Download API to retrieve 5 of the files."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"token = immport_download.request_immport_token(user_name, password)\n",
"r = immport_download.api(\"https://api.immport.org/data/query/result/filePath?studyAccession=SDY212&armName=Cohort_1&measurementTechnique=Flow%20cytometry\",token)\n",
"df = pd.read_json(r)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = df[df['fileDetail'] == \"Flow cytometry result\"]\n",
"df_fcs = df[['subjectAccession','armAccession','armName','gender','race','minSubjectAge','studyTimeCollected', \\\n",
" 'fileDetail','filePath']]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subjectAccession | \n",
" armAccession | \n",
" armName | \n",
" gender | \n",
" race | \n",
" minSubjectAge | \n",
" studyTimeCollected | \n",
" fileDetail | \n",
" filePath | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" SUB134242 | \n",
" ARM894 | \n",
" Cohort_1 | \n",
" Male | \n",
" White | \n",
" 26.86 | \n",
" 0 | \n",
" Flow cytometry result | \n",
" /SDY212/ResultFiles/Flow_cytometry_result/pFlo... | \n",
"
\n",
" \n",
" 1 | \n",
" SUB134249 | \n",
" ARM894 | \n",
" Cohort_1 | \n",
" Female | \n",
" Other | \n",
" 26.01 | \n",
" 0 | \n",
" Flow cytometry result | \n",
" /SDY212/ResultFiles/Flow_cytometry_result/pFlo... | \n",
"
\n",
" \n",
" 24 | \n",
" SUB134242 | \n",
" ARM894 | \n",
" Cohort_1 | \n",
" Male | \n",
" White | \n",
" 26.86 | \n",
" 0 | \n",
" Flow cytometry result | \n",
" /SDY212/ResultFiles/Flow_cytometry_result/PHOS... | \n",
"
\n",
" \n",
" 25 | \n",
" SUB134242 | \n",
" ARM894 | \n",
" Cohort_1 | \n",
" Male | \n",
" White | \n",
" 26.86 | \n",
" 0 | \n",
" Flow cytometry result | \n",
" /SDY212/ResultFiles/Flow_cytometry_result/PHOS... | \n",
"
\n",
" \n",
" 26 | \n",
" SUB134249 | \n",
" ARM894 | \n",
" Cohort_1 | \n",
" Female | \n",
" Other | \n",
" 26.01 | \n",
" 0 | \n",
" Flow cytometry result | \n",
" /SDY212/ResultFiles/Flow_cytometry_result/PHOS... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subjectAccession armAccession armName gender race minSubjectAge \\\n",
"0 SUB134242 ARM894 Cohort_1 Male White 26.86 \n",
"1 SUB134249 ARM894 Cohort_1 Female Other 26.01 \n",
"24 SUB134242 ARM894 Cohort_1 Male White 26.86 \n",
"25 SUB134242 ARM894 Cohort_1 Male White 26.86 \n",
"26 SUB134249 ARM894 Cohort_1 Female Other 26.01 \n",
"\n",
" studyTimeCollected fileDetail \\\n",
"0 0 Flow cytometry result \n",
"1 0 Flow cytometry result \n",
"24 0 Flow cytometry result \n",
"25 0 Flow cytometry result \n",
"26 0 Flow cytometry result \n",
"\n",
" filePath \n",
"0 /SDY212/ResultFiles/Flow_cytometry_result/pFlo... \n",
"1 /SDY212/ResultFiles/Flow_cytometry_result/pFlo... \n",
"24 /SDY212/ResultFiles/Flow_cytometry_result/PHOS... \n",
"25 /SDY212/ResultFiles/Flow_cytometry_result/PHOS... \n",
"26 /SDY212/ResultFiles/Flow_cytometry_result/PHOS... "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fcs.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"387"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unique_file_paths = df_fcs.filePath.unique()\n",
"unique_fcs_paths = [path for path in unique_file_paths if path.endswith(\".fcs\")]\n",
"len(unique_fcs_paths)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading: /SDY212/ResultFiles/Flow_cytometry_result/PHOSPHOFLOW SPECIMEN_FLU 010V1.390100.fcs\n",
"Downloading: /SDY212/ResultFiles/Flow_cytometry_result/PHOSPHOFLOW SPECIMEN_FLU 018V1.390104.fcs\n",
"Downloading: /SDY212/ResultFiles/Flow_cytometry_result/PHOSPHOFLOW SPECIMEN_022V1.390144.fcs\n",
"Downloading: /SDY212/ResultFiles/Flow_cytometry_result/PHOSPHOFLOW SPECIMEN_047V1.390160.fcs\n",
"Downloading: /SDY212/ResultFiles/Flow_cytometry_result/PHOSPHOFLOW SPECIMEN_039V1.391052.fcs\n"
]
}
],
"source": [
"for i in range(0,5):\n",
" print(\"Downloading: \",unique_fcs_paths[i])\n",
" immport_download.download_file(user_name,password,\n",
" unique_fcs_paths[i],output_directory)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}