import warnings
import sys 
import os    
import glob
from functools import reduce
import numpy as np
from brainiak.eventseg.event import EventSegment
from scipy.stats import norm

from matplotlib import pyplot as plt
import matplotlib.patches as patches
%matplotlib inline

smallsize=14; mediumsize=16; largesize=18
plt.rc('xtick', labelsize=smallsize); plt.rc('ytick', labelsize=smallsize); plt.rc('legend', fontsize=mediumsize)
plt.rc('figure', titlesize=largesize); plt.rc('axes', labelsize=mediumsize); plt.rc('axes', titlesize=mediumsize)


if not os.path.exists('Sherlock_AG_movie.npy'):
    !wget https://ndownloader.figshare.com/files/22927253 -O Sherlock_AG_movie.npy
if not os.path.exists('Sherlock_AG_recall.npy'):
    !wget https://ndownloader.figshare.com/files/22927256 -O Sherlock_AG_recall.npy

movie = np.load('Sherlock_AG_movie.npy')
recall = np.load('Sherlock_AG_recall.npy')
movie_group = np.mean(movie, axis=0)

--2020-11-10 13:56:28--  https://ndownloader.figshare.com/files/22927253
Resolving ndownloader.figshare.com (ndownloader.figshare.com)... 54.246.227.97, 52.17.195.112, 52.212.255.91, ...
Connecting to ndownloader.figshare.com (ndownloader.figshare.com)|54.246.227.97|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/22927253/Sherlock_AG_movie.npy [following]
--2020-11-10 13:56:29--  https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/22927253/Sherlock_AG_movie.npy
Resolving s3-eu-west-1.amazonaws.com (s3-eu-west-1.amazonaws.com)... 52.218.112.91
Connecting to s3-eu-west-1.amazonaws.com (s3-eu-west-1.amazonaws.com)|52.218.112.91|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 256101248 (244M) [application/octet-stream]
Saving to: ‘Sherlock_AG_movie.npy’

Sherlock_AG_movie.n 100%[===================>] 244.24M  9.00MB/s    in 36s     

2020-11-10 13:57:05 (6.83 MB/s) - ‘Sherlock_AG_movie.npy’ saved [256101248/256101248]

--2020-11-10 13:57:05--  https://ndownloader.figshare.com/files/22927256
Resolving ndownloader.figshare.com (ndownloader.figshare.com)... 34.250.14.20, 52.17.195.112, 52.212.255.91, ...
Connecting to ndownloader.figshare.com (ndownloader.figshare.com)|34.250.14.20|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/22927256/Sherlock_AG_recall.npy [following]
--2020-11-10 13:57:06--  https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/22927256/Sherlock_AG_recall.npy
Resolving s3-eu-west-1.amazonaws.com (s3-eu-west-1.amazonaws.com)... 52.218.24.75
Connecting to s3-eu-west-1.amazonaws.com (s3-eu-west-1.amazonaws.com)|52.218.24.75|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8240168 (7.9M) [application/octet-stream]
Saving to: ‘Sherlock_AG_recall.npy’

Sherlock_AG_recall. 100%[===================>]   7.86M  6.44MB/s    in 1.2s    

2020-11-10 13:57:07 (6.44 MB/s) - ‘Sherlock_AG_recall.npy’ saved [8240168/8240168]


plt.figure(figsize=(10,8))
plt.imshow(np.corrcoef(movie_group))
plt.xlabel('Timepoint')
plt.ylabel('Timepoint')
plt.colorbar()
plt.title('Spatial pattern correlation');


movie_HMM = EventSegment(n_events = 29)
movie_HMM.fit(movie_group);


# Plotting the log-likelihood (measuring overall model fit)
plt.figure(figsize = (12, 4))
plt.plot(movie_HMM.ll_)
plt.title('Log likelihood during training')
plt.xlabel('Model fitting steps')

Text(0.5, 0, 'Model fitting steps')


# Plotting mean activity in each event for some example voxels
plt.figure(figsize = (12, 4))
example_vox = np.arange(0,movie_HMM.event_pat_.shape[0],100)
plt.imshow(movie_HMM.event_pat_[example_vox,:], aspect='auto')
plt.xlabel('Event number')
plt.ylabel('Voxels')

Text(0, 0.5, 'Voxels')


# Plot probability of being in each event at each timepoint
plt.figure(figsize = (12, 6))
plt.matshow(movie_HMM.segments_[0].T, aspect='auto')
plt.gca().xaxis.tick_bottom()
plt.colorbar()
plt.title('Event probability')

Text(0.5, 1.05, 'Event probability')

<Figure size 864x432 with 0 Axes>


# Identify event boundaries as timepoints when max probability switches events
event_bounds = np.where(np.diff(np.argmax(movie_HMM.segments_[0], axis = 1)))[0]
nTRs = movie_group.shape[0]


# Plot boundaries as boxes on top of timepoint correlation matrix
def plot_tt_similarity_matrix(ax, data_matrix, bounds, n_TRs, title_text):
    
    ax.imshow(np.corrcoef(data_matrix), cmap = 'viridis')
    ax.set_title(title_text)
    ax.set_xlabel('TR')
    ax.set_ylabel('TR')
    
    # plot the boundaries 
    bounds_aug = np.concatenate(([0], bounds, [n_TRs]))
    
    for i in range(len(bounds_aug) - 1):
        rect = patches.Rectangle(
            (bounds_aug[i], bounds_aug[i]),
            bounds_aug[i+1] - bounds_aug[i],
            bounds_aug[i+1] - bounds_aug[i],
            linewidth = 2, edgecolor = 'w',facecolor = 'none'
        )
        ax.add_patch(rect)


f, ax = plt.subplots(1,1, figsize = (10,8))
title_text = '''
Overlay the HMM-predicted event boundaries
on top of the TR-TR correlation matrix
'''
plot_tt_similarity_matrix(ax, movie_group, event_bounds, nTRs, title_text)


k_array = np.arange(20, 61, 10)
test_ll = np.zeros(len(k_array))

for i, k in enumerate(k_array):
    print('Trying %d events' % k)
    
    print('   Fitting model on training subjects...')
    movie_train = np.mean(movie[:8], axis = 0)
    movie_HMM = EventSegment(k)
    movie_HMM.fit(movie_train)
    
    print('   Testing model fit on held-out subjects...')
    movie_test = np.mean(movie[8:], axis = 0)
    _, test_ll[i] = movie_HMM.find_events(movie_test)

Trying 20 events
   Fitting model on training subjects...
   Testing model fit on held-out subjects...
Trying 30 events
   Fitting model on training subjects...
   Testing model fit on held-out subjects...
Trying 40 events
   Fitting model on training subjects...
   Testing model fit on held-out subjects...
Trying 50 events
   Fitting model on training subjects...
   Testing model fit on held-out subjects...
Trying 60 events
   Fitting model on training subjects...
   Testing model fit on held-out subjects...


plt.plot(k_array, test_ll)
plt.xlabel('Number of events')
plt.ylabel('Log-likelihood')

movie_dur = nTRs * 1.5  # Data acquired every 1.5 seconds
secax = plt.gca().secondary_xaxis('top',
                                  functions=(lambda x: movie_dur / (x + sys.float_info.epsilon),
                                             lambda x: movie_dur / (x + sys.float_info.epsilon)))
secax.set_xlabel('Average event length (sec)')

Text(0.5, 0, 'Average event length (sec)')


print('Fitting HMM with 40 events...')
HMM40 = EventSegment(n_events = 40)
HMM40.fit(movie_group)
HMM40_bounds = np.where(np.diff(np.argmax(HMM40.segments_[0], axis = 1)))[0]

print('Fitting split-merge HMM with 40 events...')
HMM40_SM = EventSegment(n_events = 40, split_merge = True)
HMM40_SM.fit(movie_group)
HMM40_SM_bounds = np.where(np.diff(np.argmax(HMM40_SM.segments_[0], axis = 1)))[0]

Fitting HMM with 40 events...
Fitting split-merge HMM with 40 events...


# Timepoints of event boundaries annotated by human raters
human_bounds = [
    26, 35, 56, 72, 86, 108, 131, 143, 157, 173, 192, 204, 
    226, 313, 362, 398, 505, 526, 533, 568, 616, 634, 678,
    696, 747, 780, 870, 890
]


# Computes fraction of ground truth bounds that are covered by a set of proposed bounds
# Returns z score relative to a null distribution via permutation
def match_z(proposed_bounds, gt_bounds, num_TRs):
    nPerm = 1000
    threshold = 3
    np.random.seed(0)

    gt_lengths = np.diff(np.concatenate(([0],gt_bounds,[num_TRs])))
    match = np.zeros(nPerm + 1)
    for p in range(nPerm + 1):
        gt_bounds = np.cumsum(gt_lengths)[:-1]
        for b in gt_bounds:
            if np.any(np.abs(proposed_bounds - b) <= threshold):
                match[p] += 1
        match[p] /= len(gt_bounds)
        gt_lengths = np.random.permutation(gt_lengths)
    
    return (match[0]-np.mean(match[1:]))/np.std(match[1:])

z = [match_z(HMM40_bounds, human_bounds, nTRs),
     match_z(HMM40_SM_bounds, human_bounds, nTRs)]

print('HMM40:    z = %f, p = %f' % (z[0], norm.sf(z[0])))
print('HMM_SM40: z = %f, p = %f' % (z[1], norm.sf(z[1])))

HMM40:    z = 2.989227, p = 0.001398
HMM_SM40: z = 3.454808, p = 0.000275


movie_recall_HMM = EventSegment(40)
movie_recall_HMM.fit([movie_group, recall]);


plt.imshow(movie_recall_HMM.segments_[0] @ movie_recall_HMM.segments_[1].T)
plt.xlabel('Timepoints during recall')
plt.ylabel('Timepoints during movie')
plt.colorbar()
plt.title('Prob of being in the same event');

Event segmentation and alignment in fMRI data¶

Overview¶

Annotated Bibliography¶

Table of Contents¶

Loading data ¶

Finding event boundaries during perception ¶

Warm-up: Event structure in activity patterns¶

Fitting the HMM¶

Determining the number of events with the HMM¶

Optimal segmentation with the HMM¶

Comparing model and human-labeled boundaries ¶

Aligning movie and recall data ¶

Summary ¶

Event segmentation and alignment in fMRI data¶

Overview¶

Annotated Bibliography¶

Table of Contents¶

Loading data¶

Finding event boundaries during perception¶

Warm-up: Event structure in activity patterns¶

Fitting the HMM¶

Determining the number of events with the HMM¶

Optimal segmentation with the HMM¶

Comparing model and human-labeled boundaries¶

Aligning movie and recall data¶

Summary¶

Loading data ¶

Finding event boundaries during perception ¶

Comparing model and human-labeled boundaries ¶

Aligning movie and recall data ¶

Summary ¶