1 dojo.declare( 'JBrowse.Model.TrackMetaData', null, 2 /** 3 * @lends JBrowse.Model.TrackMetaData.prototype 4 */ 5 { 6 7 _noDataValue: '(no data)', 8 9 /** 10 * Data store for track metadata, supporting faceted 11 * (parameterized) searching. Keeps all of the track metadata, 12 * and the indexes thereof, in memory. 13 * @constructs 14 * @param args.trackConfigs {Array} array of track configuration 15 * @param args.indexFacets {Function|Array|String} 16 * @param args.onReady {Function} 17 * @param args.metadataStores {Array[dojox.data]} 18 */ 19 constructor: function( args ) { 20 // set up our facet name discrimination: what facets we will 21 // actually provide search on 22 var non_facet_attrs = ['conf']; 23 this._filterFacet = (function() { 24 var filter = args.indexFacets || function() {return true;}; 25 // if we have a non-function filter, coerce to an array, 26 // then convert that array to a function 27 if( typeof filter == 'string' ) 28 filter = [filter]; 29 if( Array.isArray( filter ) ) { 30 var oldfilter = filter; 31 filter = function( facetName) { 32 return dojo.some( oldfilter, function(fn) { 33 return facetName == fn.toLowerCase(); 34 }); 35 }; 36 } 37 var ident_facets = this.getIdentityAttributes(); 38 return function(facetName) { 39 return ( 40 // always index ident facets 41 dojo.some( ident_facets, function(n) { return n == facetName; } ) 42 // otherwise, must pass the user filter AND not be one of our explicitly-blocked attrs 43 || filter(facetName) 44 && ! dojo.some( non_facet_attrs, function(a) { return a == facetName;}) 45 ); 46 }; 47 }).call(this); 48 49 // set up our onReady callbacks to fire once the data is 50 // loaded 51 if( ! dojo.isArray( args.onReady ) ){ 52 this.onReadyFuncs = args.onReady ? [ args.onReady ] : []; 53 } else { 54 this.onReadyFuncs = dojo.clone(args.onReady); 55 } 56 57 // interpret the track configurations as a metadata store 58 this._indexItems( 59 { 60 store: this, 61 items: dojo.map( args.trackConfigs, function(conf) { 62 var metarecord = dojo.clone( conf.metadata || {} ); 63 metarecord.label = conf.label; 64 metarecord.key = conf.key; 65 metarecord.conf = conf; 66 if( conf.category ) 67 metarecord.category = conf.category; 68 return metarecord; 69 },this) 70 } 71 ); 72 73 // fetch and index all the items from each of the stores 74 var stores_fetched_count = 0; 75 // filter out empty metadata store entries 76 args.metadataStores = dojo.filter( args.metadataStores, function(s) { return s; } ); 77 if( ! args.metadataStores || ! args.metadataStores.length ) { 78 // if we don't actually have any stores besides the track 79 // confs, we're ready now. 80 this._finishLoad(); 81 } else { 82 // index the track metadata from each of the stores 83 84 var storeFetchFinished = dojo.hitch( this, function() { 85 if( ++stores_fetched_count == args.metadataStores.length ) 86 this._finishLoad(); 87 }); 88 dojo.forEach( args.metadataStores, function(store) { 89 store.fetch({ 90 scope: this, 91 onComplete: dojo.hitch( this, function(items) { 92 // build our indexes 93 this._indexItems({ store: store, items: items, supplementalOnly: true }); 94 95 // if this is the last store to be fetched, call 96 // our onReady callbacks 97 storeFetchFinished(); 98 }), 99 onError: function(e) { 100 console.error(''+e); 101 storeFetchFinished(); 102 } 103 }); 104 },this); 105 } 106 }, 107 108 /** 109 * Set the store's state to be ready (i.e. loaded), and calls all 110 * our onReady callbacks. 111 * @private 112 */ 113 _finishLoad: function() { 114 115 // sort the facet names 116 this.facets.sort(); 117 118 // calculate the average bucket size for each facet index 119 dojo.forEach( dojof.values( this.facetIndexes.byName ), function(bucket) { 120 bucket.avgBucketSize = bucket.itemCount / bucket.bucketCount; 121 }); 122 // calculate the rank of the facets: make an array of 123 // facet names sorted by bucket size, descending 124 this.facetIndexes.facetRank = dojo.clone(this.facets).sort(dojo.hitch(this,function(a,b){ 125 return this.facetIndexes.byName[a].avgBucketSize - this.facetIndexes.byName[b].avgBucketSize; 126 })); 127 128 // sort the facet indexes by ident, so that we can do our 129 // kind-of-efficient N-way merging when querying 130 var itemSortFunction = dojo.hitch( this, '_itemSortFunc' ); 131 dojo.forEach( dojof.values( this.facetIndexes.byName ), function( facetIndex ) { 132 dojo.forEach( dojof.keys( facetIndex.byValue ), function( value ) { 133 facetIndex.byValue[value].items = facetIndex.byValue[value].items.sort( itemSortFunction ); 134 }); 135 },this); 136 137 this.ready = true; 138 this._onReady(); 139 }, 140 141 _itemSortFunc: function(a,b) { 142 var ai = this.getIdentity(a), 143 bi = this.getIdentity(b); 144 return ai == bi ? 0 : 145 ai > bi ? 1 : 146 ai < bi ? -1 : 0; 147 }, 148 149 _indexItems: function( args ) { 150 // get our (filtered) list of facets we will index for 151 var store = args.store, 152 items = args.items; 153 154 var storeAttributes = {}; 155 156 // convert the items to a uniform format 157 items = dojo.map( items, function( item ) { 158 var itemattrs = store.getAttributes(item); 159 160 //convert the item into a uniform data format of plain objects 161 var newitem = {}; 162 dojo.forEach( itemattrs, function(attr) { 163 var lcattr = attr.toLowerCase(); 164 storeAttributes[lcattr] = true; 165 newitem[lcattr] = store.getValue(item,attr); 166 }); 167 return newitem; 168 }, 169 this 170 ); 171 172 // merge them with any existing records, filtering out ones 173 // that should be ignored if we were passed 174 // 'supplementalOnly', and update the identity index 175 this.identIndex = this.identIndex || {}; 176 items = (function() { 177 var seenInThisStore = {}; 178 return dojo.map( items, function(item) { 179 // merge the new item attributes with any existing 180 // record for this item 181 var ident = this.getIdentity(item); 182 var existingItem = this.identIndex[ ident ]; 183 184 // skip this item if we have already 185 // seen it from this store, or if we 186 // are supplementalOnly and it 187 // does not already exist 188 if( seenInThisStore[ident] || args.supplementalOnly && !existingItem) { 189 return null; 190 } 191 seenInThisStore[ident] = true; 192 193 return this.identIndex[ ident ] = dojo.mixin( existingItem || {}, item ); 194 }, 195 this 196 ); 197 }).call(this); 198 199 // filter out nulls 200 items = dojo.filter( items, function(i) { return i;}); 201 202 // update our facet list to include any new attrs these 203 // items have 204 var new_facets = this._addFacets( dojof.keys( storeAttributes ) ); 205 206 // initialize indexes for any new facets 207 this.facetIndexes = this.facetIndexes || { itemCount: 0, bucketCount: 0, byName: {} }; 208 dojo.forEach( new_facets, function(facet) { 209 if( ! this.facetIndexes.byName[facet] ) { 210 this.facetIndexes.bucketCount++; 211 this.facetIndexes.byName[facet] = { itemCount: 0, bucketCount: 0, byValue: {} }; 212 } 213 }, this); 214 215 // now update the indexes with the new facets 216 if( new_facets.length ) { 217 var gotDataForItem = {}; 218 dojo.forEach( new_facets, function(f){ gotDataForItem[f] = {};}); 219 220 dojo.forEach( items, function( item ) { 221 this.facetIndexes.itemCount++; 222 dojo.forEach( new_facets, function( facet ) { 223 var value = this.getValue( item, facet, undefined ); 224 if( typeof value == 'undefined' ) 225 return; 226 gotDataForItem[facet][this.getIdentity(item)] = 1; 227 this._indexItem( facet, value, item ); 228 },this); 229 }, this); 230 231 // index the items that do not have data for this facet 232 dojo.forEach( new_facets, function(facet) { 233 var gotSomeWithNoData = false; 234 dojo.forEach( dojof.values( this.identIndex ), function(item) { 235 if( ! gotDataForItem[facet][this.getIdentity(item)] ) { 236 gotSomeWithNoData = true; 237 this._indexItem( facet, this._noDataValue, item ); 238 } 239 },this); 240 },this); 241 } 242 }, 243 244 /** 245 * Add an item to the indexes for the given facet name and value. 246 * @private 247 */ 248 _indexItem: function( facet, value, item ) { 249 var facetValues = this.facetIndexes.byName[facet]; 250 var bucket = facetValues.byValue[value]; 251 if( !bucket ) { 252 bucket = facetValues.byValue[value] = { itemCount: 0, items: [] }; 253 facetValues.bucketCount++; 254 } 255 bucket.itemCount++; 256 facetValues.itemCount++; 257 bucket.items.push(item); 258 }, 259 260 /** 261 * Given an array of string facet names, add records for them, 262 * initializing the necessary data structures. 263 * @private 264 * @returns {Array[String]} facet names that did not already exist 265 */ 266 _addFacets: function( facetNames ) { 267 var old_facets = this.facets || []; 268 var seen = {}; 269 this.facets = dojo.filter( 270 old_facets.concat( facetNames ), 271 function(facetName) { 272 var take = this._filterFacet(facetName) && !seen[facetName]; 273 seen[facetName] = true; 274 return take; 275 }, 276 this 277 ); 278 return this.facets.slice( old_facets.length ); 279 }, 280 281 /** 282 * Get the number of items that matched the most recent query. 283 * @returns {Number} the item count, or undefined if there has not 284 * been any query so far. 285 */ 286 getCount: function() { 287 return this._fetchCount; 288 }, 289 290 291 /** 292 * @param facetName {String} facet name 293 * @returns {Object} 294 */ 295 getFacetCounts: function( facetName ) { 296 var context = this._fetchFacetCounts[ facetName ] || this._fetchFacetCounts[ '__other__' ]; 297 return context ? context[facetName] : undefined; 298 }, 299 300 /** 301 * Get an array of the text names of the facets that are defined 302 * in this track metadata. 303 * @param callback {Function} called as callback( [facet,facet,...] ) 304 */ 305 getFacetNames: function( callback ) { 306 return this.facets; 307 }, 308 309 /** 310 * Get an Array of the distinct values for a given facet name. 311 * @param facetName {String} the facet name 312 * @returns {Array} distinct values for that facet 313 */ 314 getFacetValues: function( facetName ) { 315 var index = this.facetIndexes.byName[facetName]; 316 if( !index ) 317 return []; 318 319 return dojof.keys( index.byValue ); 320 }, 321 322 /** 323 * Get statistics about the facet with the given name. 324 * @returns {Object} as: <code>{ itemCount: ##, bucketCount: ##, avgBucketSize: ## }</code> 325 */ 326 getFacetStats: function( facetName ) { 327 var index = this.facetIndexes.byName[facetName]; 328 if( !index ) return {}; 329 330 var stats = {}; 331 dojo.forEach( ['itemCount','bucketCount','avgBucketSize'], 332 function(attr) { stats[attr] = index[attr]; } 333 ); 334 return stats; 335 }, 336 337 // dojo.data.api.Read support 338 339 getValue: function( i, attr, defaultValue ) { 340 var v = i[attr]; 341 return typeof v == 'undefined' ? defaultValue : v; 342 }, 343 getValues: function( i, attr ) { 344 var a = [ i[attr] ]; 345 return typeof a[0] == 'undefined' ? [] : a; 346 }, 347 348 getAttributes: function(item) { 349 return dojof.keys( item ); 350 }, 351 352 hasAttribute: function(item,attr) { 353 return item.hasOwnProperty(attr); 354 }, 355 356 containsValue: function(item, attribute, value) { 357 return item[attribute] == value; 358 }, 359 360 isItem: function(item) { 361 return typeof item == 'object' && typeof item.label == 'string'; 362 }, 363 364 isItemLoaded: function() { 365 return true; 366 }, 367 368 loadItem: function( args ) { 369 }, 370 371 // used by the dojo.data.util.simpleFetch mixin to implement fetch() 372 _fetchItems: function( keywordArgs, findCallback, errorCallback ) { 373 if( ! this.ready ) { 374 this.onReady( dojo.hitch( this, '_fetchItems', keywordArgs, findCallback, errorCallback ) ); 375 return; 376 } 377 378 var query = dojo.clone( keywordArgs.query || {} ); 379 // coerce query arguments to arrays if they are not already arrays 380 dojo.forEach( dojof.keys( query ), function(qattr) { 381 if( ! Array.isArray( query[qattr] ) ) { 382 query[qattr] = [ query[qattr] ]; 383 } 384 },this); 385 386 var results; 387 var queryFingerprint = Util.objectFingerprint( query ); 388 if( queryFingerprint == this.previousQueryFingerprint ) { 389 results = this.previousResults; 390 } else { 391 this.previousQueryFingerprint = queryFingerprint; 392 this.previousResults = results = this._doQuery( query ); 393 } 394 395 // and finally, hand them to the finding callback 396 findCallback(results,keywordArgs); 397 this.onFetchSuccess(); 398 }, 399 400 /** 401 * @private 402 */ 403 _doQuery: function( /**Object*/ query ) { 404 405 var textFilter = this._compileTextFilter( query.text ); 406 delete query.text; 407 408 // algorithm pseudocode: 409 // 410 // * for each individual facet, get a set of tracks that 411 // matches its selected values. sort each set by the 412 // track's unique identifier. 413 // * while still need to go through all the items in the filtered sets: 414 // - if all the facets have the same track first in their sorted set: 415 // add it to the core result set. 416 // count it in the global counts 417 // - if all the facets *but one* have the same track first: 418 // this track will need to be counted in the 419 // 'leave-out' counts for the odd facet out. count it. 420 // - shift the lowest-labeled track off of whatever facets have it at the front 421 422 var results = []; // array of items that completely match the query 423 424 // construct the filtered sets (arrays of items) for each of 425 // our search criteria 426 var filteredSets = []; 427 if( textFilter ) { 428 filteredSets.push( 429 dojo.filter( dojof.values( this.identIndex ), textFilter ) 430 .sort( dojo.hitch(this,'_itemSortFunc') ) 431 ); 432 filteredSets[0].facetName = 'Contains text'; 433 } 434 filteredSets.push.apply( filteredSets, 435 dojo.map( dojof.keys( query ), function( facetName ) { 436 var values = query[facetName]; 437 var items = []; 438 if( ! this.facetIndexes.byName[facetName] ) { 439 console.error( "No facet defined with name '"+facetName+"'." ); 440 throw "No facet defined with name '"+facetName+"', faceted search failed."; 441 } 442 dojo.forEach( values, function(value) { 443 var idx = this.facetIndexes.byName[facetName].byValue[value] || {}; 444 items.push.apply( items, idx.items || [] ); 445 },this); 446 items.facetName = facetName; 447 items.sort( dojo.hitch( this, '_itemSortFunc' )); 448 return items; 449 },this) 450 ); 451 dojo.forEach( filteredSets, function(s) { 452 s.myOffset = 0; 453 s.topItem = function() { return this[this.myOffset]; }; 454 s.shift = function() { this.myOffset++; }; 455 }); 456 457 // init counts 458 var facetMatchCounts = {}; 459 460 if( ! filteredSets.length ) { 461 results = dojof.values( this.identIndex ); 462 } else { 463 // calculate how many item records total we need to go through 464 var leftToProcess = 0; 465 dojo.forEach( filteredSets, 466 function(s) { leftToProcess += s.length;} ); 467 468 // do a sort of N-way merge of the filtered sets 469 while( leftToProcess ) { 470 471 // look at the top of each of our sets, seeing what items 472 // we have there. group the sets by the identity of their 473 // topmost item. 474 var setsByTopIdent = {}, uniqueIdents = [], ident, item; 475 dojo.forEach(filteredSets, function(set,i) { 476 item = set.topItem(); 477 ident = item ? this.getIdentity( item ) : '(at end of set)'; 478 if( setsByTopIdent[ ident ] ) { 479 setsByTopIdent[ ident ].push( set ); 480 } else { 481 setsByTopIdent[ ident ] = [set]; 482 uniqueIdents.push( ident ); 483 } 484 },this); 485 if( uniqueIdents.length == 1 ) { 486 // each of our matched sets has the same item at the 487 // top. this means it is part of the core result set. 488 results.push( item ); 489 } else { 490 491 // ident we are operating on is always the 492 // lexically-first one that is not the end-of-set 493 // marker 494 uniqueIdents.sort(); 495 var leftOutIndex; 496 if( uniqueIdents[0] == '(at end of set)' ) { 497 ident = uniqueIdents[1]; 498 leftOutIndex = 0; 499 } else { 500 ident = uniqueIdents[0]; 501 leftOutIndex = 1; 502 } 503 ident = uniqueIdents[0] == '(at end of set)' ? uniqueIdents[1] : uniqueIdents[0]; 504 505 if( uniqueIdents.length == 2 506 && setsByTopIdent[ ident ].length == filteredSets.length - 1 ) { 507 // all of the matched sets except one has the same 508 // item on top, and it is the lowest-labeled item 509 510 var leftOutSet = setsByTopIdent[ uniqueIdents[ leftOutIndex ] ][0]; 511 this._countItem( facetMatchCounts, setsByTopIdent[ident][0].topItem(), leftOutSet.facetName ); 512 } 513 } 514 515 dojo.forEach( setsByTopIdent[ ident ], function(s) { s.shift(); leftToProcess--; }); 516 } 517 } 518 519 // each of the leave-one-out count sets needs to also have the 520 // core result set counted in it, and also make a counting set 521 // for the core result set (used by __other__ facets not 522 // involved in the query) 523 dojo.forEach( dojof.keys(facetMatchCounts).concat( ['__other__'] ), function(category) { 524 dojo.forEach( results, function(item) { 525 this._countItem( facetMatchCounts, item, category); 526 },this); 527 },this); 528 529 // in the case of just one filtered set, the 'leave-one-out' 530 // count for it is actually the count of all results, so we 531 // need to make a special little count of that attribute for 532 // the global result set. 533 if( filteredSets.length == 1 ) { 534 dojo.forEach( dojof.values( this.identIndex ), function(item) { 535 this._countItem( facetMatchCounts, item, filteredSets[0].facetName ); 536 },this); 537 } 538 539 this._fetchFacetCounts = facetMatchCounts; 540 this._fetchCount = results.length; 541 return results; 542 }, 543 544 _countItem: function( facetMatchCounts, item, facetName ) { 545 var facetEntry = facetMatchCounts[facetName]; 546 if( !facetEntry ) facetEntry = facetMatchCounts[facetName] = {}; 547 var facets = facetName == '__other__' ? this.facets : [facetName]; 548 dojo.forEach( facets, function(attrName) { 549 var value = this.getValue( item, attrName, this._noDataValue ); 550 var attrEntry = facetEntry[attrName]; 551 if( !attrEntry ) { 552 attrEntry = facetEntry[attrName] = {}; 553 attrEntry[value] = 0; 554 } 555 attrEntry[value] = ( attrEntry[value] || 0 ) + 1; 556 },this); 557 }, 558 559 onReady: function( scope, func ) { 560 scope = scope || dojo.global; 561 func = dojo.hitch( scope, func ); 562 if( ! this.ready ) { 563 this.onReadyFuncs.push( func ); 564 return; 565 } else { 566 func(); 567 } 568 }, 569 570 /** 571 * Event hook called once when the store is initialized and has 572 * an initial set of data loaded. 573 */ 574 _onReady: function() { 575 dojo.forEach( this.onReadyFuncs || [], function(func) { 576 func.call(); 577 }); 578 }, 579 580 /** 581 * Event hook called after a fetch has been successfully completed 582 * on this store. 583 */ 584 onFetchSuccess: function() { 585 }, 586 587 /** 588 * Compile a text search string into a function that tests whether 589 * a given piece of text matches that search string. 590 * @private 591 */ 592 _compileTextFilter: function( textString ) { 593 if( textString === undefined ) 594 return null; 595 596 // parse out words and quoted words, and convert each into a regexp 597 var rQuotedWord = /\s*["']([^"']+)["']\s*/g; 598 var rWord = /(\S+)/g; 599 var parseWord = function() { 600 var word = rQuotedWord.exec( textString ) || rWord.exec( textString ); 601 if( word ) { 602 word = word[1]; 603 var lastIndex = Math.max( rQuotedWord.lastIndex, rWord.lastIndex ); 604 rWord.lastIndex = rQuotedWord.lastIndex = lastIndex; 605 } 606 return word; 607 }; 608 var wordREs = []; 609 var currentWord; 610 while( (currentWord = parseWord()) ) { 611 // escape regex control chars, and convert glob-like chars to 612 // their regex equivalents 613 currentWord = dojo.regexp.escapeString( currentWord, '*?' ) 614 .replace(/\*/g,'.+') 615 .replace(/ /g,'\\s+') 616 .replace(/\?/g,'.'); 617 wordREs.push( new RegExp(currentWord,'i') ); 618 } 619 620 // return a function that takes on item and returns true if it 621 // matches the text filter 622 return dojo.hitch(this, function(item) { 623 return dojo.some( this.facets, function(facetName) { 624 var text = this.getValue( item, facetName ); 625 return dojof.every( wordREs, function(re) { return re.test(text); } ); 626 },this); 627 }); 628 }, 629 630 getFeatures: function() { 631 return { 632 'dojo.data.api.Read': true, 633 'dojo.data.api.Identity': true 634 }; 635 }, 636 close: function() {}, 637 638 getLabel: function(i) { 639 return this.getValue(i,'key',undefined); 640 }, 641 getLabelAttributes: function(i) { 642 return ['key']; 643 }, 644 645 // dojo.data.api.Identity support 646 getIdentityAttributes: function() { 647 return ['label']; 648 }, 649 getIdentity: function(i) { 650 return this.getValue(i, 'label', undefined); 651 }, 652 fetchItemByIdentity: function(id) { 653 return this.identIndex[id]; 654 } 655 }); 656 dojo.require('dojo.data.util.simpleFetch'); 657 dojo.extend( JBrowse.Model.TrackMetaData, dojo.data.util.simpleFetch );