From 154c2e718b42c5820bcb36ad4a657804b0707291 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 10 Jun 2013 11:38:38 -0700 Subject: [PATCH] ds105 fmri example with single runs being fit --- examples/ds105/ds105_example.py | 525 ++++++++++++++++++++++++++++ examples/ds105/ds105_util.py | 292 ++++++++++++++++ examples/ds105/parallel_run.py | 136 +++++++ examples/ds105/view_contrasts_3d.py | 74 ++++ nipy/modalities/fmri/design.py | 130 ++++++- 5 files changed, 1153 insertions(+), 4 deletions(-) create mode 100644 examples/ds105/ds105_example.py create mode 100644 examples/ds105/ds105_util.py create mode 100644 examples/ds105/parallel_run.py create mode 100755 examples/ds105/view_contrasts_3d.py diff --git a/examples/ds105/ds105_example.py b/examples/ds105/ds105_example.py new file mode 100644 index 0000000000..365dd08797 --- /dev/null +++ b/examples/ds105/ds105_example.py @@ -0,0 +1,525 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +"""Example analyzing the FIAC dataset with NIPY. + +* Single run models with per-voxel AR(1). +* Cross-run, within-subject models with optimal effect estimates. +* Cross-subject models using fixed / random effects variance ratios. +* Permutation testing for inference on cross-subject result. + +See ``parallel_run.py`` for a rig to run these analysis in parallel using the +IPython parallel machinery. + +This script needs the pre-processed FIAC data. See ``README.txt`` and +``fiac_util.py`` for details. + +See ``examples/labs/need_data/first_level_fiac.py`` for an alternative approach +to some of these analyses. +""" +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- +from __future__ import print_function # Python 2/3 compatibility + +# Stdlib +from tempfile import NamedTemporaryFile +from os.path import join as pjoin +from copy import copy +import warnings + +# Third party +import numpy as np + +# From NIPY +from nipy.algorithms.statistics.api import (OLSModel, ARModel, make_recarray, + isestimable) +from nipy.modalities.fmri.fmristat import hrf as delay +from nipy.modalities.fmri import design, hrf +from nipy.io.api import load_image, save_image +from nipy.core import api +from nipy.core.api import Image +from nipy.core.image.image import rollimg + +from nipy.algorithms.statistics import onesample + +# Local +import ds105_util as futil +reload(futil) # while developing interactively + +#----------------------------------------------------------------------------- +# Globals +#----------------------------------------------------------------------------- + +SUBJECTS = tuple(range(1,7)) +RUNS = tuple(range(1, 13)) +DESIGNS = ('standard',) +CONTRASTS = ('speaker_0', 'speaker_1', + 'sentence_0', 'sentence_1', + 'sentence:speaker_0', + 'sentence:speaker_1') + +# XXX: this mask was copied by hand from one of the subjects +# we should have a function to create this mask from the ds105 data +GROUP_MASK = futil.load_image_ds105('group', 'mask.nii.gz') +TINY_MASK = np.zeros(GROUP_MASK.shape, np.bool) +TINY_MASK[30:32,40:42,30:32] = 1 + +#----------------------------------------------------------------------------- +# Public functions +#----------------------------------------------------------------------------- + +# For group analysis + +def run_model(subj, run): + """ + Single subject fitting of FIAC model + """ + #---------------------------------------------------------------------- + # Set initial parameters of the FIAC dataset + #---------------------------------------------------------------------- + # Number of volumes in the fMRI data + nvol = 121 + # The TR of the experiment + TR = 2.5 + # The time of the first volume + Tstart = 0.0 + # The array of times corresponding to each volume in the fMRI data + volume_times = np.arange(nvol) * TR + Tstart + # This recarray of times has one column named 't'. It is used in the + # function design.event_design to create the design matrices. + volume_times_rec = make_recarray(volume_times, 't') + # Get a path description dictionary that contains all the path data relevant + # to this subject/run + path_info = futil.path_info_run(subj,run) + + #---------------------------------------------------------------------- + # Experimental design + #---------------------------------------------------------------------- + + # Load the experimental description from disk. We have utilities in futil + # that reformat the original FIAC-supplied format into something where the + # factorial structure of the design is more explicit. This has already + # been run once, and get_experiment_initial() will simply load the + # newly-formatted design description files (.csv) into record arrays. + experiment = futil.get_experiment(path_info) + + # Create design matrices for the "initial" and "experiment" factors, saving + # the default contrasts. + + # The function event_design will create design matrices, which in the case + # of "experiment" will have num_columns = (# levels of speaker) * (# levels + # of sentence) * len(delay.spectral) = 2 * 2 * 2 = 8. For "initial", there + # will be (# levels of initial) * len([hrf.glover]) = 1 * 1 = 1. + + # Here, delay.spectral is a sequence of 2 symbolic HRFs that are described + # in: + # + # Liao, C.H., Worsley, K.J., Poline, J-B., Aston, J.A.D., Duncan, G.H., + # Evans, A.C. (2002). \'Estimating the delay of the response in fMRI + # data.\' NeuroImage, 16:593-606. + + # The contrast definitions in ``cons_exper`` are a dictionary with keys + # ['constant_0', 'constant_1', 'speaker_0', 'speaker_1', 'sentence_0', + # 'sentence_1', 'sentence:speaker_0', 'sentence:speaker_1'] representing the + # four default contrasts: constant, main effects + interactions, each + # convolved with 2 HRFs in delay.spectral. For example, sentence:speaker_0 + # is the interaction of sentence and speaker convolved with the first (=0) + # of the two HRF basis functions, and sentence:speaker_1 is the interaction + # convolved with the second (=1) of the basis functions. + + # XXX use the hrf __repr__ for naming contrasts + X_exper, cons_exper = design.block_design(experiment, volume_times_rec, + hrfs=delay.spectral, + level_contrasts=True) + + # In addition to factors, there is typically a "drift" term. In this case, + # the drift is a natural cubic spline with a not at the midpoint + # (volume_times.mean()) + vt = volume_times # shorthand + drift = np.array( [vt**i for i in range(4)] + + [(vt-vt.mean())**3 * (np.greater(vt, vt.mean()))] ) + for i in range(drift.shape[0]): + drift[i] /= drift[i].max() + + # We transpose the drift so that its shape is (nvol,5) so that it will have + # the same number of rows as X_exper. + drift = drift.T + + # There are helper functions to create these drifts: design.fourier_basis, + # design.natural_spline. Therefore, the above is equivalent (except for + # the normalization by max for numerical stability) to + # + # >>> drift = design.natural_spline(t, [volume_times.mean()]) + + # Stack all the designs, keeping the new contrasts which has the same keys + # as cons_exper, but its values are arrays with 15 columns, with the + # non-zero entries matching the columns of X corresponding to X_exper + X, cons = design.stack_designs((X_exper, cons_exper), + (drift, {})) + + # Sanity check: delete any non-estimable contrasts + for k in cons.keys(): + if not isestimable(cons[k], X): + del(cons[k]) + warnings.warn("contrast %s not estimable for this run" % k) + + # The default contrasts are all t-statistics. We may want to output + # F-statistics for 'speaker', 'sentence', 'speaker:sentence' based on the + # two coefficients, one for each HRF in delay.spectral + + # We reproduce the same constrasts as in the data base + # outputting an F using both HRFs, as well as the + # t using only the first HRF + + for obj1, obj2 in [('face', 'scrambled'), + ('house', 'scrambled'), + ('chair', 'scrambled'), + ('face', 'house')]: + cons['%s_vs_%s_F' % (obj1, obj2)] = \ + np.vstack([cons['object_%s_0' % obj1] - + cons['object_%s_0' % obj2], + cons['object_%s_1' % obj1] - + cons['object_%s_1' % obj2]]) + + + cons['%s_vs_%s_t' % (obj1, obj2)] = (cons['object_%s_0' % obj1] - + cons['object_%s_0' % obj2]) + + #---------------------------------------------------------------------- + # Data loading + #---------------------------------------------------------------------- + + # Load in the fMRI data, saving it as an array. It is transposed to have + # time as the first dimension, i.e. fmri[t] gives the t-th volume. + fmri_im = futil.get_fmri(path_info) # an Image + fmri_im = rollimg(fmri_im, 't') + fmri = fmri_im.get_data() # now, it's an ndarray + + nvol, volshape = fmri.shape[0], fmri.shape[1:] + nx, sliceshape = volshape[0], volshape[1:] + + #---------------------------------------------------------------------- + # Model fit + #---------------------------------------------------------------------- + + # The model is a two-stage model, the first stage being an OLS (ordinary + # least squares) fit, whose residuals are used to estimate an AR(1) + # parameter for each voxel. + m = OLSModel(X) + ar1 = np.zeros(volshape) + + # Fit the model, storing an estimate of an AR(1) parameter at each voxel + for s in range(nx): + d = np.array(fmri[:,s]) + flatd = d.reshape((d.shape[0], -1)) + result = m.fit(flatd) + ar1[s] = ((result.resid[1:] * result.resid[:-1]).sum(0) / + (result.resid**2).sum(0)).reshape(sliceshape) + + # We round ar1 to nearest one-hundredth and group voxels by their rounded + # ar1 value, fitting an AR(1) model to each batch of voxels. + + # XXX smooth here? + # ar1 = smooth(ar1, 8.0) + ar1 *= 100 + ar1 = ar1.astype(np.int) / 100. + + # We split the contrasts into F-tests and t-tests. + # XXX helper function should do this + fcons = {}; tcons = {} + for n, v in cons.items(): + v = np.squeeze(v) + if v.ndim == 1: + tcons[n] = v + else: + fcons[n] = v + + # Setup a dictionary to hold all the output + # XXX ideally these would be memmap'ed Image instances + output = {} + for n in tcons: + tempdict = {} + for v in ['sd', 't', 'effect']: + tempdict[v] = np.memmap(NamedTemporaryFile(prefix='%s%s.nii' + % (n,v)), dtype=np.float, + shape=volshape, mode='w+') + output[n] = tempdict + + for n in fcons: + output[n] = np.memmap(NamedTemporaryFile(prefix='%s%s.nii' + % (n,v)), dtype=np.float, + shape=volshape, mode='w+') + + # Loop over the unique values of ar1 + for val in np.unique(ar1): + armask = np.equal(ar1, val) + m = ARModel(X, val) + d = fmri[:,armask] + results = m.fit(d) + + # Output the results for each contrast + for n in tcons: + resT = results.Tcontrast(tcons[n]) + output[n]['sd'][armask] = resT.sd + output[n]['t'][armask] = resT.t + output[n]['effect'][armask] = resT.effect + for n in fcons: + output[n][armask] = results.Fcontrast(fcons[n]).F + + # Dump output to disk + odir = futil.output_dir(path_info,tcons,fcons) + # The coordmap for a single volume in the time series + vol0_map = fmri_im[0].coordmap + for n in tcons: + for v in ['t', 'sd', 'effect']: + im = Image(output[n][v], vol0_map) + save_image(im, pjoin(odir, n, '%s.nii' % v)) + for n in fcons: + im = Image(output[n], vol0_map) + save_image(im, pjoin(odir, n, "F.nii")) + + +def fixed_effects(subj, design): + """ Fixed effects (within subject) for FIAC model + + Finds run by run estimated model results, creates fixed effects results + image per subject. + + Parameters + ---------- + subj : int + subject number 1..6 inclusive + design : {'standard'} + design type + """ + # First, find all the effect and standard deviation images + # for the subject and this design type + path_dict = futil.path_info_design(subj, design) + rootdir = path_dict['rootdir'] + # The output directory + fixdir = pjoin(rootdir, "fixed") + # Fetch results images from run estimations + results = futil.results_table(path_dict) + # Get our hands on the relevant coordmap to save our results + coordmap = futil.load_image_fiac("_%02d" % subj, + "wanatomical.nii").coordmap + # Compute the "fixed" effects for each type of contrast + for con in results: + fixed_effect = 0 + fixed_var = 0 + for effect, sd in results[con]: + effect = load_image(effect).get_data() + sd = load_image(sd).get_data() + var = sd ** 2 + + # The optimal, in terms of minimum variance, combination of the + # effects has weights 1 / var + # + # XXX regions with 0 variance are set to 0 + # XXX do we want this or np.nan? + ivar = np.nan_to_num(1. / var) + fixed_effect += effect * ivar + fixed_var += ivar + + # Now, compute the fixed effects variance and t statistic + fixed_sd = np.sqrt(fixed_var) + isd = np.nan_to_num(1. / fixed_sd) + fixed_t = fixed_effect * isd + + # Save the results + odir = futil.ensure_dir(fixdir, con) + for a, n in zip([fixed_effect, fixed_sd, fixed_t], + ['effect', 'sd', 't']): + im = api.Image(a, copy(coordmap)) + save_image(im, pjoin(odir, '%s.nii' % n)) + + +def group_analysis(design, contrast): + """ Compute group analysis effect, t, sd for `design` and `contrast` + + Saves to disk in 'group' analysis directory + + Parameters + ---------- + design : {'block', 'event'} + contrast : str + contrast name + """ + array = np.array # shorthand + # Directory where output will be written + odir = futil.ensure_dir(futil.DATADIR, 'group', design, contrast) + + # Which subjects have this (contrast, design) pair? + subj_con_dirs = futil.subj_des_con_dirs(design, contrast) + if len(subj_con_dirs) == 0: + raise ValueError('No subjects for %s, %s' % (design, contrast)) + + # Assemble effects and sds into 4D arrays + sds = [] + Ys = [] + for s in subj_con_dirs: + sd_img = load_image(pjoin(s, "sd.nii")) + effect_img = load_image(pjoin(s, "effect.nii")) + sds.append(sd_img.get_data()) + Ys.append(effect_img.get_data()) + sd = array(sds) + Y = array(Ys) + + # This function estimates the ratio of the fixed effects variance + # (sum(1/sd**2, 0)) to the estimated random effects variance + # (sum(1/(sd+rvar)**2, 0)) where rvar is the random effects variance. + + # The EM algorithm used is described in: + # + # Worsley, K.J., Liao, C., Aston, J., Petre, V., Duncan, G.H., + # Morales, F., Evans, A.C. (2002). \'A general statistical + # analysis for fMRI data\'. NeuroImage, 15:1-15 + varest = onesample.estimate_varatio(Y, sd) + random_var = varest['random'] + + # XXX - if we have a smoother, use + # random_var = varest['fixed'] * smooth(varest['ratio']) + + # Having estimated the random effects variance (and possibly smoothed it), + # the corresponding estimate of the effect and its variance is computed and + # saved. + + # This is the coordmap we will use + coordmap = futil.load_image_fiac("fiac_00","wanatomical.nii").coordmap + + adjusted_var = sd**2 + random_var + adjusted_sd = np.sqrt(adjusted_var) + + results = onesample.estimate_mean(Y, adjusted_sd) + for n in ['effect', 'sd', 't']: + im = api.Image(results[n], copy(coordmap)) + save_image(im, pjoin(odir, "%s.nii" % n)) + + +def group_analysis_signs(design, contrast, mask, signs=None): + """ Refit the EM model with a vector of signs. + + Used in the permutation tests. + + Returns the maximum of the T-statistic within mask + + Parameters + ---------- + design: one of 'block', 'event' + contrast: str + name of contrast to estimate + mask : ``Image`` instance or array-like + image containing mask, or array-like + signs: ndarray, optional + Defaults to np.ones. Should have shape (*,nsubj) + where nsubj is the number of effects combined in the group analysis. + + Returns + ------- + minT: np.ndarray, minima of T statistic within mask, one for each + vector of signs + maxT: np.ndarray, maxima of T statistic within mask, one for each + vector of signs + """ + if api.is_image(mask): + maska = mask.get_data() + else: + maska = np.asarray(mask) + maska = maska.astype(np.bool) + + # Which subjects have this (contrast, design) pair? + subj_con_dirs = futil.subj_des_con_dirs(design, contrast) + + # Assemble effects and sds into 4D arrays + sds = [] + Ys = [] + for s in subj_con_dirs: + sd_img = load_image(pjoin(s, "sd.nii")) + effect_img = load_image(pjoin(s, "effect.nii")) + sds.append(sd_img.get_data()[maska]) + Ys.append(effect_img.get_data()[maska]) + sd = np.array(sds) + Y = np.array(Ys) + + if signs is None: + signs = np.ones((1, Y.shape[0])) + + maxT = np.empty(signs.shape[0]) + minT = np.empty(signs.shape[0]) + + for i, sign in enumerate(signs): + signY = sign[:,np.newaxis] * Y + varest = onesample.estimate_varatio(signY, sd) + random_var = varest['random'] + + adjusted_var = sd**2 + random_var + adjusted_sd = np.sqrt(adjusted_var) + + results = onesample.estimate_mean(Y, adjusted_sd) + T = results['t'] + minT[i], maxT[i] = np.nanmin(T), np.nanmax(T) + return minT, maxT + + +def permutation_test(design, contrast, mask=GROUP_MASK, nsample=1000): + """ + Perform a permutation (sign) test for a given design type and + contrast. It is a Monte Carlo test because we only sample nsample + possible sign arrays. + + Parameters + ---------- + design: str + one of ['block', 'event'] + contrast : str + name of contrast to estimate + mask : ``Image`` instance or array-like, optional + image containing mask, or array-like + nsample: int, optional + number of permutations + + Returns + ------- + min_vals: np.ndarray + max_vals: np.ndarray + """ + subj_con_dirs = futil.subj_des_con_dirs(design, contrast) + nsubj = len(subj_con_dirs) + if nsubj == 0: + raise ValueError('No subjects have %s, %s' % (design, contrast)) + signs = 2*np.greater(np.random.sample(size=(nsample, nsubj)), 0.5) - 1 + min_vals, max_vals = group_analysis_signs(design, contrast, mask, signs) + return min_vals, max_vals + + +def run_run_models(subject_nos=SUBJECTS, run_nos = RUNS): + """ Simple serial run of all the within-run models """ + for subj in subject_nos: + for run in run_nos: + try: + run_model(subj, run) + except IOError: + print('Skipping subject %d, run %d' % (subj, run)) + + +def run_fixed_models(subject_nos=SUBJECTS, designs=DESIGNS): + """ Simple serial run of all the within-subject models """ + for subj in subject_nos: + for design in designs: + try: + fixed_effects(subj, design) + except IOError: + print('Skipping subject %d, design %s' % (subj, design)) + + +def run_group_models(designs=DESIGNS, contrasts=CONTRASTS): + """ Simple serial run of all the across-subject models """ + for design in designs: + for contrast in contrasts: + group_analysis(design, contrast) + + +if __name__ == '__main__': + pass + # Sanity check while debugging + #permutation_test('block','sentence_0',mask=TINY_MASK,nsample=3) diff --git a/examples/ds105/ds105_util.py b/examples/ds105/ds105_util.py new file mode 100644 index 0000000000..124b1a3dba --- /dev/null +++ b/examples/ds105/ds105_util.py @@ -0,0 +1,292 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +"""Support utilities for ds105 example, mostly path management. + +The purpose of separating these is to keep the main example code as readable as +possible and focused on the experimental modeling and analysis, rather than on +local file management issues. + +Requires matplotlib +""" + +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- +from __future__ import print_function # Python 2/3 compatibility + +# Stdlib +import os +from os import makedirs, listdir +from os.path import exists, abspath, isdir, join as pjoin, splitext +import csv +try: + from StringIO import StringIO # Python 2 +except ImportError: + from io import StringIO # Python 3 + +# Third party +import numpy as np +from matplotlib.mlab import csv2rec, rec2csv + +# From NIPY +from nipy.io.api import load_image + +#----------------------------------------------------------------------------- +# Globals +#----------------------------------------------------------------------------- + +# We assume that there is a directory holding the data and it's local to this +# code. Users can either keep a copy here or a symlink to the real location on +# disk of the data. +DATADIR = 'ds105_data' + +# Sanity check +if not os.path.isdir(DATADIR): + e="The data directory %s must exist and contain the ds105 data." % DATADIR + raise IOError(e) + +#----------------------------------------------------------------------------- +# Classes and functions +#----------------------------------------------------------------------------- + +# Path management utilities +def load_image_ds105(*path): + """Return a NIPY image from a set of path components. + """ + return load_image(pjoin(DATADIR, *path)) + + +def subj_des_con_dirs(design, contrast, subjects=range(1,7)): + """Return a list of subject directories with this `design` and `contrast` + + Parameters + ---------- + design : {'standard'} + contrast : str + subjects : list, optional + which subjects + + Returns + ------- + con_dirs : list + list of directories matching `design` and `contrast` + """ + rootdir = DATADIR + con_dirs = [] + for s in range(nsub): + f = pjoin(rootdir, "sub%03d" % s, "model", design, "fixed", contrast) + if isdir(f): + con_dirs.append(f) + return con_dirs + + +def path_info_run(subj, run, design='standard'): + """Construct path information dict for current subject/run. + + Parameters + ---------- + subj : int + subject number (1..6 inclusive) + run : int + run number (1..12 inclusive). + design : str, optional + which design to use, defaults to 'standard' + Returns + ------- + path_dict : dict + a dict with all the necessary path-related keys, including 'rootdir', + and 'design', where 'design' can have values 'event' or 'block' + depending on which type of run this was for subject no `subj` and run no + `run` + """ + path_dict = {'subj': subj, 'run': run, 'design':design} + rootdir = pjoin(DATADIR, "sub%(subj)03d", "model", "%(design)s") % path_dict + path_dict['rootdir'] = rootdir + path_dict['fsldir'] = pjoin(DATADIR, "sub%(subj)03d", "model", "model001") % path_dict + return path_dict + + +def path_info_design(subj, design): + """Construct path information dict for subject and design. + + Parameters + ---------- + subj : int + subject number (1..6 inclusive) + design : {'standard'} + type of design + + Returns + ------- + path_dict : dict + having keys 'rootdir', 'subj', 'design' + """ + path_dict = {'subj': subj, 'design': design} + rootdir = pjoin(DATADIR, "sub%(subj)03d", "model", "%(design)s") % path_dict + path_dict['rootdir'] = rootdir + path_dict['fsldir'] = pjoin(DATADIR, "sub%(subj)03d", "model", "model001") % path_dict + return path_dict + + +def results_table(path_dict): + """ Return precalculated results images for subject info in `path_dict` + + Parameters + ---------- + path_dict : dict + containing key 'rootdir' + + Returns + ------- + rtab : dict + dict with keys given by run directories for this subject, values being a + list with filenames of effect and sd images. + """ + # Which runs correspond to this design type? + rootdir = path_dict['rootdir'] + runs = filter(lambda f: isdir(pjoin(rootdir, f)), + ['results_run%03d' % i for i in range(1,13)] ) + + # Find out which contrasts have t-statistics, + # storing the filenames for reading below + + results = {} + + for rundir in runs: + rundir = pjoin(rootdir, rundir) + for condir in listdir(rundir): + for stat in ['sd', 'effect']: + fname_effect = abspath(pjoin(rundir, condir, 'effect.nii')) + fname_sd = abspath(pjoin(rundir, condir, 'sd.nii')) + if exists(fname_effect) and exists(fname_sd): + results.setdefault(condir, []).append([fname_effect, + fname_sd]) + return results + + +def get_experiment(path_dict): + """Get the record arrays for the experimental design. + + Parameters + ---------- + path_dict : dict + containing key 'rootdir', 'run', 'subj' + + Returns + ------- + experiment, initial : Two record arrays. + + """ + # The following two lines read in the .csv files + # and return recarrays, with fields + # experiment: ['time', 'sentence', 'speaker'] + # initial: ['time', 'initial'] + + rootdir = path_dict['rootdir'] + if not exists(pjoin(rootdir, "experiment_run%(run)03d.csv") % path_dict): + e = "can't find design for subject=%(subj)d,run=%(subj)d" % path_dict + raise IOError(e) + + experiment = csv2rec(pjoin(rootdir, "experiment_run%(run)03d.csv") % path_dict) + + return experiment + + +def get_fmri(path_dict): + """Get the images for a given subject/run. + + Parameters + ---------- + path_dict : dict + containing key 'fsldir', 'run' + + Returns + ------- + fmri : ndarray + anat : NIPY image + """ + fmri_im = load_image( + pjoin("%(fsldir)s/task001_run%(run)03d.feat/filtered_func_data.nii.gz") % path_dict) + return fmri_im + + +def ensure_dir(*path): + """Ensure a directory exists, making it if necessary. + + Returns the full path.""" + dirpath = pjoin(*path) + if not isdir(dirpath): + makedirs(dirpath) + return dirpath + + +def output_dir(path_dict, tcons, fcons): + """Get (and make if necessary) directory to write output into. + + Parameters + ---------- + path_dict : dict + containing key 'rootdir', 'run' + tcons : sequence of str + t contrasts + fcons : sequence of str + F contrasts + """ + rootdir = path_dict['rootdir'] + odir = pjoin(rootdir, "results_run%(run)03d" % path_dict) + ensure_dir(odir) + for n in tcons: + ensure_dir(odir,n) + for n in fcons: + ensure_dir(odir,n) + return odir + +def compare_results(subj, run, other_root, mask_fname): + """ Find and compare calculated results images from a previous run + + This scipt checks that another directory containing results of this same + analysis are similar in the sense of numpy ``allclose`` within a brain mask. + + Parameters + ---------- + subj : int + subject number (1..6) + run : int + run number (1..12) + other_root : str + path to previous run estimation + mask_fname: + path to a mask image defining area in which to compare differences + """ + # Get information for this subject and run + path_dict = path_info_run(subj, run) + # Get mask + msk = load_image(mask_fname).get_data().copy().astype(bool) + # Get results directories for this run + rootdir = path_dict['rootdir'] + res_dir = pjoin(rootdir, 'results_run%03d' % run) + if not isdir(res_dir): + return + for dirpath, dirnames, filenames in os.walk(res_dir): + for fname in filenames: + froot, ext = splitext(fname) + if froot in ('effect', 'sd', 'F', 't'): + this_fname = pjoin(dirpath, fname) + other_fname = this_fname.replace(DATADIR, other_root) + if not exists(other_fname): + print(this_fname, 'present but ', other_fname, 'missing') + continue + this_arr = load_image(this_fname).get_data() + other_arr = load_image(other_fname).get_data() + ok = np.allclose(this_arr[msk], other_arr[msk]) + if not ok and froot in ('effect', 'sd', 't'): # Maybe a sign flip + ok = np.allclose(this_arr[msk], -other_arr[msk]) + if not ok: + print('Difference between', this_fname, other_fname) + + +def compare_all(other_root, mask_fname): + """ Run results comparison for all subjects and runs """ + for subj in range(1,7): + for run in range(1, 13): + compare_results(subj, run, other_root, mask_fname) diff --git a/examples/ds105/parallel_run.py b/examples/ds105/parallel_run.py new file mode 100644 index 0000000000..c92dd6898d --- /dev/null +++ b/examples/ds105/parallel_run.py @@ -0,0 +1,136 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +"""Script to run the main analyses in parallel, using the IPython machinery. + +See ``ds105_example.py``. +""" +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- + +import os + +import numpy as np + +from IPython import parallel + +#----------------------------------------------------------------------------- +# Utility functions +#----------------------------------------------------------------------------- + +_client = None +def setup_client(): + """Get a Client and initialize it. + + This assumes that all nodes see a shared filesystem. + """ + global _client + if _client is None: + _client = parallel.Client() + mydir = os.path.split(os.path.abspath(__file__))[0] + def cd(path): + import os + os.chdir(path) + _client[:].apply_sync(cd, mydir) + return _client + + +def getruns(): + for i in range(1,7): + for j in range(1,13): + yield i, j + +ace_vs_scrambled_t/ +house_vs_scrambled_F/ house_vs_scrambled_t/ +object_bottle_0/ object_bottle_1/ +object_cat_0/ object_cat_1/ +object_chair_0/ object_chair_1/ +object_face_0/ object_face_1/ +object_house_0/ object_house_1/ + + +def getvals(): + for con in ['house_vs_scrambled_t', + 'chair_vs_scrambled_t', + 'face_vs_scrambled_t', + 'face_vs_house_t']: + for design in ['standard']: + yield design, con + +#----------------------------------------------------------------------------- +# Main analysis functions +#----------------------------------------------------------------------------- + +def fitruns(): + """Run the basic model fit.""" + rc = setup_client() + view = rc.load_balanced_view() + i_s, j_s = zip(*getruns()) + + def _fit(subj, run): + import fiac_example + try: + return fiac_example.run_model(subj, run) + except IOError: + pass + + return view.map(_fit, i_s, j_s) + + +def fitfixed(): + """Run the fixed effects analysis for all subjects.""" + rc = setup_client() + view = rc.load_balanced_view() + subjects = range(16) + + def _fit(subject): + import fiac_example + try: + fiac_example.fixed_effects(subject, "block") + except IOError: + pass + try: + fiac_example.fixed_effects(subject, "event") + except IOError: + pass + + return view.map(_fit, subjects) + + +def fitgroup(): + """Run the group analysis""" + rc = setup_client() + view = rc.load_balanced_view() + d_s, c_s = zip(*getvals()) + + def _fit(d, c): + import fiac_example + return fiac_example.group_analysis(d, c) + + return view.map(_fit, d_s, c_s) + + +def run_permute_test(design, contrast, nsample=1000): + rc = setup_client() + dview = rc[:] + nnod = len(dview) + # Samples per node. Round up + ns_nod = np.ceil(nsample / float(nnod)) + + def _run_test(n, des, con): + import fiac_example + from fiac_example import GROUP_MASK + min_vals, max_vals = fiac_example.permutation_test(des, con, + GROUP_MASK, n) + return min_vals, max_vals + + ar = dview.apply_async(_run_test, ns_nod, design, contrast) + min_vals, max_vals = zip(*[r for r in ar]) + return np.concatenate(min_vals), np.concatenate(max_vals) + + +#----------------------------------------------------------------------------- +# Script entry point +#----------------------------------------------------------------------------- +if __name__ == '__main__': + pass diff --git a/examples/ds105/view_contrasts_3d.py b/examples/ds105/view_contrasts_3d.py new file mode 100755 index 0000000000..a6af4f73a0 --- /dev/null +++ b/examples/ds105/view_contrasts_3d.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +"""A quick and dirty example of using Mayavi to overlay anatomy and activation. +""" +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- +from __future__ import print_function # Python 2/3 compatibility + +import numpy as np + +try: + from mayavi import mlab +except ImportError: + try: + from enthought.mayavi import mlab + except ImportError: + raise RuntimeError('Need mayavi for this module') + +from ds105_util import load_image_ds105 + +#----------------------------------------------------------------------------- +# Globals +#----------------------------------------------------------------------------- + +MASK = load_image_fiac('group', 'mask.nii') +AVGANAT = load_image_fiac('group', 'avganat.nii') + +#----------------------------------------------------------------------------- +# Functions +#----------------------------------------------------------------------------- + +def view_thresholdedT(design, contrast, threshold, inequality=np.greater): + """ + A mayavi isosurface view of thresholded t-statistics + + Parameters + ---------- + design : {'standard'} + contrast : str + threshold : float + inequality : {np.greater, np.less}, optional + """ + maska = np.asarray(MASK) + tmap = np.array(load_image_ds105('group', design, contrast, 't.nii')) + test = inequality(tmap, threshold) + tval = np.zeros(tmap.shape) + tval[test] = tmap[test] + + # XXX make the array axes agree with mayavi2 + avganata = np.array(AVGANAT) + avganat_iso = mlab.contour3d(avganata * maska, opacity=0.3, contours=[3600], + color=(0.8,0.8,0.8)) + + avganat_iso.actor.property.backface_culling = True + avganat_iso.actor.property.ambient = 0.3 + + tval_iso = mlab.contour3d(tval * MASK, color=(0.8,0.3,0.3), + contours=[threshold]) + return avganat_iso, tval_iso + + +#----------------------------------------------------------------------------- +# Script entry point +#----------------------------------------------------------------------------- +if __name__ == '__main__': + # A simple example use case + design = 'standard' + contrast = 'house_vs_scrambled_t' + threshold = 0.3 + print('Starting thresholded view with:') + print('Design=', design, 'contrast=', contrast, 'threshold=', threshold) + view_thresholdedT(design, contrast, threshold) diff --git a/nipy/modalities/fmri/design.py b/nipy/modalities/fmri/design.py index 5692fa54b4..0f06582a80 100644 --- a/nipy/modalities/fmri/design.py +++ b/nipy/modalities/fmri/design.py @@ -11,7 +11,8 @@ from nipy.algorithms.statistics.formula.formulae import ( Formula, Factor, Term, make_recarray) -from .utils import events, fourier_basis as fourier_basis_sym +from .utils import (events, blocks, fourier_basis as fourier_basis_sym, + convolve_functions, T) from .hrf import glover @@ -79,7 +80,8 @@ def natural_spline(tvals, knots=None, order=3, intercept=True): return f.design(tvals, return_float=True) -def event_design(event_spec, t, order=2, hrfs=[glover]): +def event_design(event_spec, t, order=2, hrfs=[glover], + level_contrasts=False): """ Create a design matrix for a GLM analysis based on an event specification, evaluating @@ -102,6 +104,9 @@ def event_design(event_spec, t, order=2, hrfs=[glover]): hrfs : seq A sequence of (symbolic) HRF that will be convolved with each event. If empty, glover is used. + level_contrasts : bool + If true, generate contrasts for each individual level + of each factor. Returns ------- @@ -121,6 +126,7 @@ def event_design(event_spec, t, order=2, hrfs=[glover]): e_factors = [Factor(n, np.unique(event_spec[n])) for n in fields] e_formula = np.product(e_factors) e_contrasts = {} + if len(e_factors) > 1: for i in range(1, order+1): for comb in combinations(zip(fields, e_factors), i): @@ -142,8 +148,11 @@ def event_design(event_spec, t, order=2, hrfs=[glover]): t_terms = [] t_contrasts = {} for l, h in enumerate(hrfs): - t_terms += [events(event_spec['time'], \ - amplitudes=e_X[n], f=h) for i, n in enumerate(e_dtype.names)] + for n in e_dtype.names: + term = events(event_spec['time'], amplitudes=e_X[n], f=h) + t_terms += [term] + if level_contrasts: + t_contrasts['%s_%d' % (n, l)] = Formula([term]) for n, c in e_contrasts.items(): t_contrasts["%s_%d" % (n, l)] = Formula([ \ events(event_spec['time'], amplitudes=c[nn], f=h) @@ -155,6 +164,119 @@ def event_design(event_spec, t, order=2, hrfs=[glover]): return X_t, c_t +def block_design(block_spec, t, order=2, hrfs=[glover], + convolution_padding=5., + convolution_dt=0.02, + hrf_interval=[0.,30.], + level_contrasts=False): + """ + Create a design matrix for a GLM analysis based + on a block specification, evaluating + it a sequence of time values. Each column + in the design matrix will be convolved with each HRF in hrfs. + + Parameters + ---------- + block_spec : np.recarray + A recarray having at least a field named 'start' + and a field named 'end' signifying the + block time, and all other fields will be treated as factors in an + ANOVA-type model. + t : np.ndarray + An array of np.float values at which to evaluate the + design. Common examples would be the acquisition times of an fMRI + image. + order : int + The highest order interaction to be considered in constructing + the contrast matrices. + hrfs : seq + A sequence of (symbolic) HRF that will be convolved with each + block. If empty, glover is used. + convolution_padding : float + A padding for the convolution with the HRF. The intervals + used for the convolution are the smallest 'start' minus this + padding to the largest 'end' plus this padding. + convolution_padding : dt + Time step for use in convolving the blocks with each + HRF. + hrf_interval: sequence of floats + Interval over which the HRF is assumed supported, used in the + convolution. + level_contrasts : bool + If true, generate contrasts for each individual level + of each factor. + + Returns + ------- + X : np.ndarray + The design matrix with X.shape[0] == t.shape[0]. The number of + columns will depend on the other fields of block_spec. + contrasts : dict + Dictionary of contrasts that is expected to be of interest from + the block specification. For each interaction / effect up to a + given order will be returned. Also, a contrast is generated for + each interaction / effect for each HRF specified in hrfs. + """ + fields = list(block_spec.dtype.names) + if 'start' not in fields or 'end' not in fields: + raise ValueError('expecting fields called "start" and "end"') + fields.pop(fields.index('start')) + fields.pop(fields.index('end')) + e_factors = [Factor(n, np.unique(block_spec[n])) for n in fields] + e_formula = np.product(e_factors) + e_contrasts = {} + if len(e_factors) > 1: + for i in range(1, order+1): + for comb in combinations(zip(fields, e_factors), i): + names = [c[0] for c in comb] + fs = [c[1].main_effect for c in comb] + e_contrasts[":".join(names)] = np.product(fs).design(block_spec) + + e_contrasts['constant'] = formulae.I.design(block_spec) + + # Design and contrasts in block space + # TODO: make it so I don't have to call design twice here + # to get both the contrasts and the e_X matrix as a recarray + + e_X = e_formula.design(block_spec) + e_dtype = e_formula.dtype + + # Now construct the design in time space + + block_times = np.array([(s,e) for s, e in zip(block_spec['start'], + block_spec['end'])]) + convolution_interval = (block_times.min() - convolution_padding, + block_times.max() + convolution_padding) + + t_terms = [] + t_names = [] + t_contrasts = {} + for l, h in enumerate(hrfs): + for n in e_dtype.names: + B = blocks(block_times, amplitudes=e_X[n]) + term = convolve_functions(B, h(T), + convolution_interval, + hrf_interval, + convolution_dt) + t_terms += [term] + if level_contrasts: + t_contrasts['%s_%d' % (n, l)] = Formula([term]) + for n, c in e_contrasts.items(): + F = [] + for i, nn in enumerate(c.dtype.names): + B = blocks(block_times, amplitudes=c[nn]) + F.append(convolve_functions(B, h(T), + convolution_interval, + hrf_interval, + convolution_dt)) + t_contrasts["%s_%d" % (n, l)] = Formula(F) + t_formula = Formula(t_terms) + + tval = make_recarray(t, ['t']) + X_t, c_t = t_formula.design(tval, contrasts=t_contrasts) + return X_t, c_t + + def stack2designs(old_X, new_X, old_contrasts={}, new_contrasts={}): """ Add some columns to a design matrix that has contrasts matrices