/* * Copyright (c) 1995 Giovanni Flammia * * Massachusetts Institute of Technology * Laboratory for Computer Science * Spoken Language Systems Group * http://www.sls.lcs.mit.edu * * Contact: gflammia AT alum.mit.edu * * File: kappa.c * * Compute generalized kappa coefficient from * data classified by multiple coders into mutually * exclusive and exhaustive set of categories. * * For more information about the formulae used, * read the appendix of Giovanni Flammia PhD Thesis * "Discourse Segmentation of Spoken Dialogue: An Empirical Approach" * PDF document located at: http://www.theredesign.com/Technology/Dialogue * * Directly implements the computations described in the paper: * Uebersax, J.S.: "A Generalized Kappa Coefficient" * Educational and Psychological Measurement * 1982, Vol. 42, pp. 181--183. * * By using this software, you ("Licensee") agree to the following license: * * 1. Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 2. You are granted a non-exclusive, non-transferable limited license * to use this Software free of charge for * non-commercial purposes only. * * 3. Redistributions of source or binary form of this code must * retain the above copyright notice, this list of conditions * and the following disclaimer. * * 4. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * */ /* To compile: just use cc or gcc Usage: kappa Required input file format for the r_matrix_text_file: ------------------------------------------------------------------------- input_directory = /where-all-the-data-comes-from/ num_categories = 3 categories = [ Initiative Response Other ] (optional) annotated_files = [ (optional) aa_30 40 aa_5 58 aa_55 38 st1t1_51 43 st1t4_18 66 st2t1_32 119 ] num_coders = 5 coders = [ (optional) christine jwc laura raylau rob ] num_points = 364 (i.e. number of lines for the R matrix ) R_matrix = [ 0 1 0 1 0 1 0 1 0 1 1 0 1 0 0 1 1 0 0 1 ] Format of the R matrix: The R matrix tells how each line in the data has been coded by each one of the coders. It is a 3-dimensional matrix i.e. num_points * num_coders * num_categories Each line is divided into num_coders * num_categories columns: For example, if coder 1 has assigned cat 2 to line m and coder 2 has assigned cat 1 to line m, then the corresponding line in R_matrix should look like this: coder[1] coder[2] cat[1] cat[2] ... cat[num_categ] cat[1] cat[2] ... cat[num_categ] line[m] 0 1 ... 0 1 0 0 R[c][k][m] = 1 if coder c assigned class k to data point m, else R[c][k][m] = 0 (also if coder c has not assigned any category to line m). */ #include #include #include #include #include #include #include int num_categories,num_coders,num_points; double compute_term1(R,i,j,k) int ***R; int i; int j; int k; { int ii,jj; /* range over 0 .. num_coders-1 */ int kk; /* range over 0 .. num_categories-1 */ int l,ll; /* range over 0 .. num_points-1 */ int denom_i=0; int denom_j=0; int num_i=0; int num_j=0; for(kk=0;kk\n",argv[0]); exit(); } if ((file=fopen(argv[1],"r"))==NULL) { fprintf(stderr,"%s: File %s not found.\n",argv[0],argv[1]); exit(); } /* Read R matrix data and put it into info and R data structures Input format: input_directory = /usr/users/flammia/eurospeech_95/data/by_coders/group_1 num_categories = 2 categories = [ Open_Segment Other ] annotated_files = [ aa_30 40 aa_5 58 aa_55 38 st1t1_51 43 st1t4_18 66 st2t1_32 119 ] num_coders = 5 coders = [ christine jwc laura raylau rob ] num_points = 364 R_matrix = [ 0 1 0 1 0 1 0 1 0 1 1 0 1 0 0 1 1 0 0 1 */ is_data=0; while(fgets(line, 2048, file) != NULL && strcmp(line,"\n")!=0) { x=0; if ((nd = sscanf(line,"%s %s %d",word1,word2,&x))==3 && strcmp(word2,"=")==0 && x>0) { /* read num_categories: */ if (strcmp(word1,"num_categories")==0) { num_categories = x; } else if (strcmp(word1,"num_coders")==0) { /* read num_coders: */ num_coders = x; } else if (strcmp(word1,"num_points")==0) { /* read num_points */ num_points = x; } } else if (nd>=2 && strcmp(word1,"R_matrix")==0) { is_data=1; R = (int ***)calloc(num_coders,sizeof(int **)); for(i=0;i=1 && strcmp(line,"\n")!=0) { strcat(info,"\n"); strcat(info,line); } else if (is_data==1) { /* Read data: for each line: coder[1] coder[2] ... cat[1]...cat[num_categ] cat[1]...cat[num_categ] ... 0 0 ... 1 ..... 0 0 1 .... 0 0 ... R[c][k][m] = 1 if coder c assigned class k to data point m, */ line_ptr=line; i=0; k=0; if (line_ptr && (nd=sscanf(line_ptr,"%d",&x))==1) { m++; /* fprintf(stderr,"R[][][%d] %s",m,line_ptr); */ } while (m>=0 && m