Source code for BrainNetworksInPython.scripts.useful_functions

#!/usr/bin/env python


[docs]def read_in_df(data_file, aparc_names): ''' A very useful command for NSPN behavmerge data frames Beware though - this is quite specific and there are a few versions floating around! Be careful ''' import pandas as pd import numpy as np import os # Read in the data file df = pd.read_csv(data_file, sep=',') # Only keep the first scan! df = df.loc[df.occ==0, :] # Strip "thickness" or "thicknessstd" from the column # names so they match with the aparc_names names data_cols = [ x.replace('_{}'.format('thicknessstd'), '') for x in df.columns ] df.columns = data_cols data_cols = [ x.replace('_{}'.format('thickness'), '') for x in df.columns ] df.columns = data_cols # Define a few variables you'll want in the data frame df['ones'] = df['age_scan'] * 0 + 1 df['age'] = df['age_scan'] df['Global'] = df[aparc_names].mean(axis=1) df['Global_std'] = df[aparc_names].mean(axis=1) # If there is a corresponding standard deviation # file then read in the standard deviation if 'mean' in data_file: std_data_file = data_file.replace('mean', 'std') else: std_data_file = data_file.replace('thickness', 'thicknessstd') if os.path.isfile(std_data_file): # Repeating the steps really # Read in the file df_std = pd.read_csv(std_data_file, sep=',') # Only keep the first occ df_std = df_std.loc[df_std.occ==0, :] # Change the names so they match up data_cols = [ x.replace('_{}'.format('thicknessstd'), '') for x in df_std.columns ] df_std.columns = data_cols data_cols = [ x.replace('_{}'.format('thickness'), '') for x in df_std.columns ] df_std.columns = data_cols # Now write the std across all aparc names into the original data frame # by averaging the variances df['Global_std'] = np.sqrt(np.average(df_std[aparc_names]**2, axis=1)) # Convert the values to floats df[aparc_names] = df[aparc_names].astype('float') # If this is an MT, R2s, synthetic, MD, L1 or L23 file # then you have to divide the values by 1000 # However there have been problems here in the past with # mixing multiplied with non-multiplied values # so we'll actually just check for values greater than a # reasonable maximum and divide those ones. cols_list = aparc_names+['Global']+['Global_std'] if 'MT' in os.path.basename(data_file): df.loc[df['Global']>50, cols_list] = df.loc[df['Global']>50, cols_list]/1000.0 if 'synthetic' in os.path.basename(data_file): df.loc[df['Global']>50, cols_list] = df.loc[df['Global']>50, cols_list]/1000.0 if 'R2s' in os.path.basename(data_file): df.loc[df['Global']>1, cols_list] = df.loc[df['Global']>1, cols_list]/1000.0 if 'L1' in os.path.basename(data_file): df.loc[df['Global']>0.01, cols_list] = df.loc[df['Global']>0.01, cols_list]/1000.0 if 'L23' in os.path.basename(data_file): df.loc[df['Global']>0.01, cols_list] = df.loc[df['Global']>0.01, cols_list]/1000.0 if 'MD' in os.path.basename(data_file): df.loc[df['Global']>0.01, cols_list] = df.loc[df['Global']>0.01, cols_list]/1000.0 return df
[docs]def read_in_data(regional_measures_file, names_file, covars_file=None, names_308_style=True): ''' Read in the data from the three input files: * regional_measures_file * names_file * covars_file If the names are in 308 style then drop the first 41 entries from the names and covars files. ''' import pandas as pd # Load the input files df = pd.read_csv(regional_measures_file) with open(names_file) as f: names = [ line.strip() for line in f ] if covars_file: with open(covars_file) as f: covars_list = [ line.strip() for line in f ] else: covars_list = [] # If you have your names in names_308_style you need to strip the # first 41 items if names_308_style: names = names[41:] # You may also have to strip the words "thickness" from the # end of the names in the data frame if names_308_style: df.columns = [ col.rsplit('_thickness', 1)[0] for col in df.columns ] return df, names, covars_list
[docs]def residuals(x, y): ''' A useful little function that correlates x and y together to give their residual values. These can then be used to calculate partial correlation values ''' import numpy as np if len(x.shape) == 1: x = x[np.newaxis, :] A = np.vstack([x, np.ones(x.shape[-1])]).T B = np.linalg.lstsq(A, y)[0] m = B[:-1] c = B[-1] pre = np.sum(m * x.T, axis=1) + c res = y - pre return res