Source code for BrainNetworksInPython.scripts.make_demo_table

#!/usr/bin/env python

#=============================================================================
# Created by Kirstie Whitaker
# 27th July 2015
# Contact: kw401@cam.ac.uk
#
# This code creates the tables of demographics comparing the different
# cohorts for the NSPN Cortical Myelination manuscript
#=============================================================================

#=============================================================================
# IMPORTS
#-----------------------------------------------------------------------------
import os
import pandas as pd
import datetime
import itertools as it
from glob import glob
import csv
import matplotlib.pylab as plt
import numpy as np
import sys

#=============================================================================
# FUNCTIONS
#-----------------------------------------------------------------------------

#------------------------------------------------------------------------------
# Define your latex add in caption function
#------------------------------------------------------------------------------
[docs]def add_caption(latex_table, caption): ''' Just add in a row on the second line down to include the caption (title) for this table ''' latex_table_list = latex_table.split('\n') latex_table_list[0] = latex_table_list[0] + '\n\\caption*{{{}}} \\\\'.format(caption) latex_table = '\n'.join(latex_table_list) return latex_table
#------------------------------------------------------------------------------ # And here's the main code #------------------------------------------------------------------------------
[docs]def make_demo_table(data_dir, paper_dir): demographics_file = os.path.join(data_dir, 'DemographicData.csv') caption = 'Participant Demographics' table_file = os.path.join(paper_dir, 'STATS_TABLES', 'DemographicsTable.tex') df = pd.read_csv(demographics_file) name_dict = { (0.0, 0.0) : 'Remaining 2K', (1.0, 0.0) : 'Discovery', (0.0, 1.0) : 'Validation'} table_dict = {} for name, data in df.groupby(['discovery', 'validation']): data_list = [] col_list = [] #======================================================= # NUMBER OF PARTICIPANTS #======================================================= n = data['nspn_id'].count() data_list += [n] col_list += ['\\textbf{Number of participants}'] #======================================================= # PERCENTAGE MALE #======================================================= n_male = data.loc[data['sex'] == 'Male', 'sex'].count() n_missing = data.loc[data['sex'].isnull(), 'sex'].shape[0] data_list += ['{:2.1f}\% male'.format(n_male * 100.0/n)] col_list += ['\\textbf{Gender}'] #======================================================= # AGES (MEDIAN, IQR) #======================================================= if name[0] + name[1] == 0: age_var = 'age_hqp' else: age_var = 'age_scan' # Means and Stds - not used here but just in case mean_age = data.loc[data[age_var].notnull(), age_var].mean() std_age = data.loc[data[age_var].notnull(), age_var].std() # Missing values n_missing = data.loc[data[age_var].isnull(), age_var].shape[0] # Median and IQRs med_age = np.percentile(data.loc[data[age_var].notnull(), age_var], 50) upper_age = np.percentile(data.loc[data[age_var].notnull(), age_var], 75) lower_age = np.percentile(data.loc[data[age_var].notnull(), age_var], 25) data_list += ['{:2.1f}'.format(med_age) ] data_list += ['(IQR: {:2.1f}-{:2.1f})'.format(lower_age, upper_age)] col_list += ['\\multirow{2}{*}{\\textbf{Age (years)}}'] col_list += [''] #======================================================= # FULL IQ (MEDIAN, IQR) #======================================================= # Means and Stds - not used here but just in case mean_iq = data.loc[data['wasi_zz_iq_full2_iq'].notnull(), 'wasi_zz_iq_full2_iq'].mean() std_iq = data.loc[data['wasi_zz_iq_full2_iq'].notnull(), 'wasi_zz_iq_full2_iq'].std() # Missing values n_missing = data.loc[data['wasi_zz_iq_full2_iq'].isnull(), 'wasi_zz_iq_full2_iq'].shape[0] # Median and IQRs med_iq = np.percentile(data.loc[data['wasi_zz_iq_full2_iq'].notnull(), 'wasi_zz_iq_full2_iq'], 50) upper_iq = np.percentile(data.loc[data['wasi_zz_iq_full2_iq'].notnull(), 'wasi_zz_iq_full2_iq'], 75) lower_iq = np.percentile(data.loc[data['wasi_zz_iq_full2_iq'].notnull(), 'wasi_zz_iq_full2_iq'], 25) if n > 2000: data_list += ['\\multirow{2}{*}{NA}'] data_list += [''] else: data_list += ['{:2.1f}'.format(med_iq) ] data_list += ['(IQR: {:2.1f}-{:2.1f})'.format(lower_iq, upper_iq)] col_list += ['\\multirow{2}{*}{\\textbf{IQ}}'] col_list += [''] #======================================================= # HANDEDNESS (MEDIAN, IQR) #======================================================= # Means and Stds - not used here but just in case mean_ehi = data.loc[data['ehi_handedness_score'].notnull(), 'ehi_handedness_score'].mean() std_ehi = data.loc[data['ehi_handedness_score'].notnull(), 'ehi_handedness_score'].std() # Missing values n_missing = data.loc[data['ehi_handedness_score'].isnull(), 'ehi_handedness_score'].shape[0] # Median and IQRs med_ehi = np.percentile(data.loc[data['ehi_handedness_score'].notnull(), 'ehi_handedness_score'], 50) upper_ehi = np.percentile(data.loc[data['ehi_handedness_score'].notnull(), 'ehi_handedness_score'], 75) lower_ehi = np.percentile(data.loc[data['wasi_zz_iq_full2_iq'].notnull(), 'ehi_handedness_score'], 25) if n > 2000: data_list += ['\\multirow{2}{*}{NA}'] data_list += [''] else: data_list += ['{:2.1f}'.format(med_ehi) ] data_list += ['(IQR: {:2.1f}-{:2.1f})'.format(lower_ehi, upper_ehi)] col_list += ['\\multirow{2}{*}{\\textbf{Handedness}}'] col_list += [''] #======================================================= # INDEX MULTIPLE DEPRIVATION (MEDIAN, IQR) #======================================================= # Means and Stds - not used here but just in case mean_imd = data.loc[data['imd_2007'].notnull(), 'imd_2007'].mean() std_imd = data.loc[data['imd_2007'].notnull(), 'imd_2007'].std() # Missing values n_missing = data.loc[data['imd_2007'].isnull(), 'imd_2007'].shape[0] # Median and IQRs med_imd = np.percentile(data.loc[data['imd_2007'].notnull(), 'imd_2007'], 50) upper_imd = np.percentile(data.loc[data['imd_2007'].notnull(), 'imd_2007'], 75) lower_imd = np.percentile(data.loc[data['imd_2007'].notnull(), 'imd_2007'], 25) data_list += ['{:2.1f}'.format(med_imd) ] data_list += ['(IQR: {:2.1f}-{:2.1f})'.format(lower_imd, upper_imd)] col_list += ['\\multirow{2}{*}{\\textbf{IMD}}'] col_list += [''] #======================================================= # ETHNICITY (% WHITE) n_white = data.loc[data['psq_a4_ethnic_group']==1, 'psq_a4_ethnic_group'].count() n_mixed = data.loc[data['psq_a4_ethnic_group']==2, 'psq_a4_ethnic_group'].count() n_asian = data.loc[data['psq_a4_ethnic_group']==3, 'psq_a4_ethnic_group'].count() n_black = data.loc[data['psq_a4_ethnic_group']==4, 'psq_a4_ethnic_group'].count() n_other = data.loc[data['psq_a4_ethnic_group']==5, 'psq_a4_ethnic_group'].count() n_declined = data.loc[data['psq_a4_ethnic_group']==6, 'psq_a4_ethnic_group'].count() n_known_missing = data.loc[data['psq_a4_ethnic_group']==999, 'psq_a4_ethnic_group'].count() n_missing = data.loc[data['psq_a4_ethnic_group'].isnull(), 'psq_a4_ethnic_group'].shape[0] data_list += [ '{:2.1f}\% White'.format(n_white*100.0/n) ] data_list += [ '{:2.1f}\% Asian'.format(n_asian*100.0/n) ] data_list += [ '{:2.1f}\% Black'.format(n_black*100.0/n) ] data_list += [ '{:2.1f}\% Mixed'.format(n_mixed*100.0/n) ] data_list += [ '{:2.1f}\% Other'.format(n_other*100.0/n) ] data_list += [ '{:2.1f}\% Declined to state'.format(n_declined*100.0/n) ] col_list += ['\\multirow{6}{*}{\\textbf{Ethnicity}}'] col_list += [ '' ] *5 table_dict['\\textbf{{{}}}'.format(name_dict[name])] = data_list table_df = pd.DataFrame(table_dict) table_df = pd.DataFrame(table_dict, index=col_list) table_df = table_df.loc[:, ['\\textbf{Discovery}', '\\textbf{Validation}', '\\textbf{Remaining 2K}']] latex_table = table_df.to_latex(longtable=True, index=True, escape=False) latex_table = latex_table.replace('llll', 'lccc') latex_table = add_caption(latex_table, caption) latex_header = '\n'.join([ '\\documentclass{article}', '\\usepackage{booktabs}', '\\usepackage[a4paper, left={1cm}, right={1cm}, top={1.5cm}, bottom={1.5cm}, portrait]{geometry}', '\\usepackage{longtable}', '\\usepackage{array}', '\\usepackage{multirow}' '\\begin{document}', '' ]) latex_footer = '\n\\end{document}\n' # Write the file to the table_filename with open(table_file, 'w') as f: f.write(latex_header) f.write(latex_table) f.write(latex_footer) # Write the file to the table_filename without the latex header and footer with open(table_file.replace('.tex', '.txt'), 'w') as f: f.write(latex_table)