import fnmatch # filename matching
import os # navigating directories
import numpy as np
from scipy import stats
from scipy.optimize import curve_fit
import pandas as pd
pd.set_option('max_columns', 100)
pd.set_option('max_rows', 1000)
import seaborn as sns
%pylab inline
plt.rcParams['lines.linewidth'] = 5 # default line width for figures
For generation of the logistic data please refer to S4
def get_list(where, what):
os.chdir(where)
fileList = []
for fileName in os.listdir(where):
if fnmatch.fnmatch(fileName, what):
fileList.append(fileName)
return fileList
fileList = get_list('./data/','*Pt*.txt')
fileList.sort()
for i in range(len(fileList)):
print i,':\t',fileList[i]
def load_this(csv):
df = pd.read_csv(csv,
index_col=[0],
header=[0,1])
return df
exp1RawPt1 = load_this(fileList[0])/100
exp1RawPt2 = load_this(fileList[2])/100
exp1LogPt1 = load_this(fileList[8])
exp1LogPt2 = load_this(fileList[9])
Example:
exp1RawPt1
exp1LogPt1
def getSqrdErrors(fit1,fit2,raw1,raw2,cond,thisName):
firstDf = (fit1[cond]-raw2[cond])**2
scndDf = (fit2[cond]-raw1[cond])**2
allSums = {}
for p in firstDf.index:
allSums[p] = [firstDf.ix[p].sum()]
for p in scndDf.index:
allSums[p].append(scndDf.ix[p].sum())
meanSums = {}
for p in allSums:
meanSums[p] = mean(allSums[p])
meanDf = pd.DataFrame(meanSums,index=[thisName]).T
meanDf.index = [[cond]*len(meanDf), meanDf.index]
return meanDf
def allSqrdConds(fit1,fit2,raw1,raw2,thisName):
assert raw1.columns.levels[0].all() == raw2.columns.levels[0].all() == fit1.columns.levels[0].all() == fit2.columns.levels[0].all(),"comparing wrong df's!"
for cond in raw1.columns.levels[0]:
thisDf = getSqrdErrors(fit1,fit2,raw1,raw2,cond,thisName)
try:
bigDf = pd.concat([bigDf,thisDf],axis=0)
except:
bigDf = thisDf
return bigDf
def allSqrdFits(fit1,fit2,raw1,raw2):
dfRaw = allSqrdConds(raw1,raw2,raw1,raw2,'raw').unstack(0)
dfLog = allSqrdConds(fit1,fit2,raw1,raw2,'log').unstack(0)
dfAll = pd.concat([dfRaw,dfLog],axis=1)
return dfAll
exp1CvFits = allSqrdFits(exp1LogPt1,exp1LogPt2,exp1RawPt1,exp1RawPt2)
exp1CvFits
def ssStats(fitDf):
# get differences between logistic and raw fit
diffDf = fitDf['log']-fitDf['raw']
# plot results
sns.violinplot(diffDf)
axhline(0,linewidth=1,color='k')
sns.despine()
show()
# inf statistics
for cond in diffDf.columns:
t2,p2 = stats.ttest_rel(fitDf['log'][cond],fitDf['raw'][cond])
t,p = stats.ttest_1samp(diffDf[cond],0)
assert t2 == t and p2 == p
w,wp = stats.wilcoxon(fitDf['log'][cond],fitDf['raw'][cond])
print "--------",cond,"--------"
print "ttest: t:",t,"\tp:",p
print "wilco: w:",w, "\tp:",wp
if p < 0.001 and wp <0.001:
print "***"
# return df for further use
return diffDf
exp1CvFitsDiff = ssStats(exp1CvFits)
exp1CvFitsDiff
exp1CvFitsDiff.describe()
exp2RawPt1 = load_this(fileList[4])/100
exp2RawPt2 = load_this(fileList[6])/100
exp2LogPt1 = load_this(fileList[10])
exp2LogPt2 = load_this(fileList[11])
exp2CvFits = allSqrdFits(exp2LogPt1,exp2LogPt2,exp2RawPt1,exp2RawPt2)
ssStats(exp2CvFits)
def logistic(x,a,b):
y = 1 / (1 + np.exp(-b*(x-a)))
return y
def fit_func(func,ydata):
# for all designs in this study, there are 11 morphing steps, scaled between 0 and 1
x = arange(0,len(ydata)/10.,0.1)
# the scipy curvefit function is used here
popt, pcov = curve_fit(func, x, ydata, maxfev=100000)
intercept = popt[0] # intercept
slope = popt[1] # slope
y = func(x, intercept,slope) # the y-data of the fitted function
return x,y,intercept,slope
def get_cv_ss2(func,ytrain,ytest):
''' a certain function (linear or logisitic) is fitted to some training data ytrain.
Then the y-values of the fitted function are compared to a left-out dataset ytest
by a simple sum-of-squares method. Thereby, the fit of the function to new data is
computed. '''
# instead of fitting a function we might also just compare two raw data sets
if func == 'raw':
ss2 = sum(square(ytest-ytrain))
# fitting a function
else:
x,y,intercept,slope = fit_func(func,ytrain)
ss2 = sum( square(ytest-y) )
return ss2
Get ss2 for a certain pairing of conditions:
def cond_cv_ss2(func,train_df,test_df,train_cond,test_cond):
assert train_df.index.all() == test_df.index.all(), "comparing incompatible tables!"
cases = train_df.index
ss2 = []
for entry in cases:
ytrain = np.array( train_df.ix[entry][train_cond] )
ytest = np.array( test_df.ix[entry][test_cond] )
ss2.append( get_cv_ss2(func,ytrain,ytest) )
return ss2
Get ss2 for all pairings of conditions:
def cond_cv_table(test_df,train_df,func):
d = {}
for c1 in ['whole','eyes','mouth']:
for c2 in ['whole','eyes','mouth']:
d[str(c1+'_'+c2)] = cond_cv_ss2(func,test_df,train_df,c1,c2)
return pd.DataFrame(d)
Average results for both split-half variants:
exp1_cond_cv = (cond_cv_table(exp1RawPt1,exp1RawPt2,'raw') + cond_cv_table(exp1RawPt2,exp1RawPt1,'raw'))/2.
exp1_cond_cv
def make_cv_lineplot(df,cond,start):
for j in df.index:
this_results = []
for c in cond:
this_results.append(df.ix[j][c])
for n in range(len(cond)-1):
if this_results[n] < this_results[n+1]:
plot(
[start+n/4.,start+0.25+n/4.],
[this_results[n], this_results[n+1]],
color='k',
alpha=0.2,
zorder =0
)
elif this_results[n] >= this_results[n+1]:
plot(
[start+n/4.,start+0.255+n/4.],
[this_results[n], this_results[n+1]],
color='b',
alpha=0.2,
zorder =0
)
i = start
for c in cond:
errorbar(
i,
df[c].mean(),
yerr=df[c].std(ddof=1)/sqrt(len(df.index))*1.96,
ecolor='r',
capsize=5,
capthick=5,
zorder=1
)
i+=0.25
def make_all_cv_plots(pt1,pt2,cond,pltname):
ax = plt.subplot(111)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.yaxis.set_ticks_position('none')
ax.xaxis.set_ticks_position('none')
# do this for raw data or with fitted functions
for func in ['raw',logistic]:
df = (cond_cv_table(pt1,pt2,func) + cond_cv_table(pt2,pt1,func))/2.
print func
# make list with conditions names (could also be achieved using cond_cv.columns()
# but the order would not be custom
all_conds = []
for c1 in cond:
for c2 in cond:
all_conds.append(str(c1+'_'+c2))
# line plot
# plot separately for each masking condition (3x3)
make_cv_lineplot(df,all_conds[0:3],0)
make_cv_lineplot(df,all_conds[3:6],0.75)
make_cv_lineplot(df,all_conds[6:9],1.5)
plt.xticks(arange(0,2.01,0.25),all_conds,rotation=90)
plt.xlim(-0.1,2.1)
plt.title("Cross-Validation with "+str(func)+" between conditions")
plt.xlabel("Face Conditions\n(Raw Data with 95% CI)")
plt.ylabel("Sums of Squared Errors")
plt.savefig(pltname+str(func)+'.png',dpi=600)
plt.show()
make_all_cv_plots(exp1RawPt1,exp1RawPt2,
['whole','eyes','mouth'],
'exp1 cv plot')
def cv_stats(df,sig_level):
for c1 in df.columns:
for c2 in df.columns:
if c1 != c2 and c1[:4] == c2[:4]:
t,p = stats.ttest_rel(df[c1],df[c2])
W,wp = stats.wilcoxon(df[c1],df[c2])
if p < sig_level and wp <sig_level:
x = '*'
else:
x = ''
print c1,'\t',c2,x
print 't=',t,'\tp=',p
print 'W=',W,'\tp=',wp,'\n'
cv_stats(exp1_cond_cv,0.05)
Inferential statistics for the difference of differences, comparing whether the whole face and the eyes condition are most similar to each other
print "\nwhole-eyes vs whole-mouth"
print "t-test\t",stats.ttest_rel( exp1_cond_cv['whole_whole']-exp1_cond_cv['whole_eyes'] , exp1_cond_cv['whole_whole']-exp1_cond_cv['whole_mouth'] )
print "Wilcoxon\t",stats.wilcoxon( exp1_cond_cv['whole_whole']-exp1_cond_cv['whole_eyes'] , exp1_cond_cv['whole_whole']-exp1_cond_cv['whole_mouth'] )
print "t-test\t",stats.ttest_rel( exp1_cond_cv['whole_eyes'] -exp1_cond_cv['whole_whole'] , exp1_cond_cv['whole_eyes'] -exp1_cond_cv['whole_mouth'] )
print "Wilcoxon\t",stats.wilcoxon( exp1_cond_cv['whole_eyes'] -exp1_cond_cv['whole_whole'] , exp1_cond_cv['whole_eyes'] -exp1_cond_cv['whole_mouth'] )
print "\neyes-whole vs eyes-mouth"
print "t-test\t",stats.ttest_rel( exp1_cond_cv['eyes_eyes']-exp1_cond_cv['eyes_whole'] , exp1_cond_cv['eyes_eyes']-exp1_cond_cv['eyes_mouth'] )
print "Wilcoxon\t",stats.wilcoxon( exp1_cond_cv['eyes_eyes']-exp1_cond_cv['eyes_whole'] , exp1_cond_cv['eyes_eyes']-exp1_cond_cv['eyes_mouth'] )
print "t-test\t",stats.ttest_rel( exp1_cond_cv['eyes_whole'] -exp1_cond_cv['eyes_eyes'] , exp1_cond_cv['eyes_whole'] -exp1_cond_cv['eyes_mouth'] )
print "Wilcoxon\t",stats.wilcoxon( exp1_cond_cv['eyes_whole'] -exp1_cond_cv['eyes_eyes'] , exp1_cond_cv['eyes_whole'] -exp1_cond_cv['eyes_mouth'] )
print "\nmouth-whole vs mouth-eyes"
print "t-test\t",stats.ttest_rel( exp1_cond_cv['mouth_mouth']-exp1_cond_cv['mouth_whole'] , exp1_cond_cv['mouth_mouth']-exp1_cond_cv['mouth_eyes'] )
print "Wilcoxon\t",stats.wilcoxon( exp1_cond_cv['mouth_mouth']-exp1_cond_cv['mouth_whole'] , exp1_cond_cv['mouth_mouth']-exp1_cond_cv['mouth_eyes'] )
print "t-test\t",stats.ttest_rel( exp1_cond_cv['mouth_whole'] -exp1_cond_cv['mouth_mouth'] , exp1_cond_cv['mouth_whole'] -exp1_cond_cv['mouth_eyes'] )
print "Wilcoxon\t",stats.wilcoxon( exp1_cond_cv['mouth_whole'] -exp1_cond_cv['mouth_mouth'] , exp1_cond_cv['mouth_whole'] -exp1_cond_cv['mouth_eyes'] )
def cond_cv_table2(test_df,train_df,func):
d = {}
for c1 in ['fearLOW','angerLOW','fearUP','angerUP']:
for c2 in ['fearLOW','angerLOW','fearUP','angerUP']:
d[str(c1+'_'+c2)] = cond_cv_ss2(func,test_df,train_df,c1,c2)
return pd.DataFrame(d)
exp2_cond_cv = (cond_cv_table2(exp2RawPt1,exp2RawPt2,'raw') + cond_cv_table2(exp2RawPt2,exp2RawPt1,'raw'))/2.
exp2_cond_cv
# remove extreme outlier for visualisation
exp2_cond_cv_rev = pd.concat([exp2_cond_cv[:4], exp2_cond_cv[5:6], exp2_cond_cv[7:]])
ax = plt.subplot(111)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.yaxis.set_ticks_position('none')
ax.xaxis.set_ticks_position('none')
# make custom condition order
cond = ['fearLOW','angerLOW','fearUP','angerUP']
all_conds = []
for c in cond:
all_conds = all_conds + ['fearLOW_'+c,'angerLOW_'+c,'fearUP_'+c,'angerUP_'+c]
print all_conds
make_cv_lineplot(exp2_cond_cv_rev,all_conds[0:4],0)
make_cv_lineplot(exp2_cond_cv_rev,all_conds[4:8],1)
make_cv_lineplot(exp2_cond_cv_rev,all_conds[8:12],2)
make_cv_lineplot(exp2_cond_cv_rev,all_conds[12:16],3)
# customise plot
plt.xlim(-0.2,3.9)
#plt.ylim(0,2.5)
plt.xticks(arange(0,3.76,0.25),all_conds,rotation=90)
plt.title("Cross-Validation with Raw Data between Conditions")
plt.xlabel("Face Conditions\n(Raw Data with 95% CI)")
plt.ylabel("Sums of Squared Errors")
plt.savefig('Experiment2_CV_rawcond_line.png',dpi=600)
plt.show()
for i in [0,4,8,12]:
print '\n'
for c1 in exp2_cond_cv.columns[i:i+4]:
for c2 in exp2_cond_cv.columns[i:i+4]:
if c1.split('_')[0] == c1.split('_')[1]:
if (exp2_cond_cv[c1] == exp2_cond_cv[c2] ).all() == False:
t,p = stats.ttest_rel(exp2_cond_cv[c1],exp2_cond_cv[c2])
W,wp = stats.wilcoxon(exp2_cond_cv[c1],exp2_cond_cv[c2])
if p < 0.01 and wp <0.01:
x = '*'
else:
x = ''
print c1,'\t',c2,x
print round(t,2),'\t',p,'\t',round(W,2),'\t',wp