1 dojo.declare( 'JBrowse.Model.TrackMetaData', null,
  2 /**
  3  * @lends JBrowse.Model.TrackMetaData.prototype
  4  */
  5 {
  6 
  7     _noDataValue: '(no data)',
  8 
  9     /**
 10      * Data store for track metadata, supporting faceted
 11      * (parameterized) searching.  Keeps all of the track metadata,
 12      * and the indexes thereof, in memory.
 13      * @constructs
 14      * @param args.trackConfigs {Array} array of track configuration
 15      * @param args.indexFacets {Function|Array|String}
 16      * @param args.onReady {Function}
 17      * @param args.metadataStores {Array[dojox.data]}
 18      */
 19     constructor: function( args ) {
 20         // set up our facet name discrimination: what facets we will
 21         // actually provide search on
 22         var non_facet_attrs = ['conf'];
 23         this._filterFacet = (function() {
 24             var filter = args.indexFacets || function() {return true;};
 25             // if we have a non-function filter, coerce to an array,
 26             // then convert that array to a function
 27             if( typeof filter == 'string' )
 28                 filter = [filter];
 29             if( Array.isArray( filter ) ) {
 30                 var oldfilter = filter;
 31                 filter = function( facetName) {
 32                     return dojo.some( oldfilter, function(fn) {
 33                                          return facetName == fn.toLowerCase();
 34                                      });
 35                };
 36             }
 37             var ident_facets = this.getIdentityAttributes();
 38             return function(facetName) {
 39                 return (
 40                     // always index ident facets
 41                     dojo.some( ident_facets, function(n) { return n == facetName; } )
 42                     // otherwise, must pass the user filter AND not be one of our explicitly-blocked attrs
 43                  || filter(facetName)
 44                     && ! dojo.some( non_facet_attrs, function(a) { return a == facetName;})
 45                 );
 46             };
 47         }).call(this);
 48 
 49         // set up our onReady callbacks to fire once the data is
 50         // loaded
 51         if( ! dojo.isArray( args.onReady ) ){
 52             this.onReadyFuncs = args.onReady ? [ args.onReady ] : [];
 53         } else {
 54             this.onReadyFuncs = dojo.clone(args.onReady);
 55         }
 56 
 57         // interpret the track configurations as a metadata store
 58         this._indexItems(
 59             {
 60                 store: this,
 61                 items: dojo.map( args.trackConfigs, function(conf) {
 62                     var metarecord = dojo.clone( conf.metadata || {} );
 63                     metarecord.label = conf.label;
 64                     metarecord.key = conf.key;
 65                     metarecord.conf = conf;
 66                     if( conf.category )
 67                         metarecord.category = conf.category;
 68                     return metarecord;
 69                 },this)
 70             }
 71         );
 72 
 73         // fetch and index all the items from each of the stores
 74         var stores_fetched_count = 0;
 75         // filter out empty metadata store entries
 76         args.metadataStores = dojo.filter( args.metadataStores, function(s) { return s; } );
 77         if( ! args.metadataStores || ! args.metadataStores.length ) {
 78             // if we don't actually have any stores besides the track
 79             // confs, we're ready now.
 80             this._finishLoad();
 81         } else  {
 82             // index the track metadata from each of the stores
 83 
 84             var storeFetchFinished = dojo.hitch( this, function() {
 85                 if( ++stores_fetched_count == args.metadataStores.length )
 86                     this._finishLoad();
 87             });
 88             dojo.forEach( args.metadataStores, function(store) {
 89                 store.fetch({
 90                     scope: this,
 91                     onComplete: dojo.hitch( this, function(items) {
 92                         // build our indexes
 93                         this._indexItems({ store: store, items: items, supplementalOnly: true });
 94 
 95                         // if this is the last store to be fetched, call
 96                         // our onReady callbacks
 97                         storeFetchFinished();
 98                     }),
 99                     onError: function(e) {
100                         console.error(''+e);
101                         storeFetchFinished();
102                     }
103                 });
104             },this);
105         }
106      },
107 
108     /**
109      * Set the store's state to be ready (i.e. loaded), and calls all
110      * our onReady callbacks.
111      * @private
112      */
113     _finishLoad: function() {
114 
115         // sort the facet names
116         this.facets.sort();
117 
118         // calculate the average bucket size for each facet index
119         dojo.forEach( dojof.values( this.facetIndexes.byName ), function(bucket) {
120             bucket.avgBucketSize = bucket.itemCount / bucket.bucketCount;
121         });
122         // calculate the rank of the facets: make an array of
123         // facet names sorted by bucket size, descending
124         this.facetIndexes.facetRank = dojo.clone(this.facets).sort(dojo.hitch(this,function(a,b){
125             return this.facetIndexes.byName[a].avgBucketSize - this.facetIndexes.byName[b].avgBucketSize;
126         }));
127 
128         // sort the facet indexes by ident, so that we can do our
129         // kind-of-efficient N-way merging when querying
130         var itemSortFunction = dojo.hitch( this, '_itemSortFunc' );
131         dojo.forEach( dojof.values( this.facetIndexes.byName ), function( facetIndex ) {
132             dojo.forEach( dojof.keys( facetIndex.byValue ), function( value ) {
133                 facetIndex.byValue[value].items = facetIndex.byValue[value].items.sort( itemSortFunction );
134             });
135         },this);
136 
137         this.ready = true;
138         this._onReady();
139     },
140 
141     _itemSortFunc: function(a,b) {
142             var ai = this.getIdentity(a),
143                 bi = this.getIdentity(b);
144             return ai == bi ?  0 :
145                    ai  > bi ?  1 :
146                    ai  < bi ? -1 : 0;
147     },
148 
149     _indexItems: function( args ) {
150         // get our (filtered) list of facets we will index for
151         var store = args.store,
152             items = args.items;
153 
154         var storeAttributes = {};
155 
156         // convert the items to a uniform format
157         items = dojo.map( items, function( item ) {
158                               var itemattrs = store.getAttributes(item);
159 
160                               //convert the item into a uniform data format of plain objects
161                               var newitem = {};
162                               dojo.forEach( itemattrs, function(attr) {
163                                                 var lcattr = attr.toLowerCase();
164                                                 storeAttributes[lcattr] = true;
165                                                 newitem[lcattr] = store.getValue(item,attr);
166                                             });
167                               return newitem;
168                           },
169                           this
170                         );
171 
172         // merge them with any existing records, filtering out ones
173         // that should be ignored if we were passed
174         // 'supplementalOnly', and update the identity index
175         this.identIndex = this.identIndex || {};
176         items = (function() {
177             var seenInThisStore = {};
178             return dojo.map( items, function(item) {
179                                  // merge the new item attributes with any existing
180                                  // record for this item
181                                  var ident = this.getIdentity(item);
182                                  var existingItem = this.identIndex[ ident ];
183 
184                                  // skip this item if we have already
185                                  // seen it from this store, or if we
186                                  // are supplementalOnly and it
187                                  // does not already exist
188                                  if( seenInThisStore[ident] || args.supplementalOnly && !existingItem) {
189                                      return null;
190                                  }
191                                  seenInThisStore[ident] = true;
192 
193                                  return this.identIndex[ ident ] = dojo.mixin( existingItem || {}, item );
194                              },
195                              this
196                            );
197         }).call(this);
198 
199         // filter out nulls
200         items = dojo.filter( items, function(i) { return i;});
201 
202         // update our facet list to include any new attrs these
203         // items have
204         var new_facets = this._addFacets( dojof.keys( storeAttributes ) );
205 
206         // initialize indexes for any new facets
207         this.facetIndexes = this.facetIndexes || { itemCount: 0, bucketCount: 0, byName: {} };
208         dojo.forEach( new_facets, function(facet) {
209             if( ! this.facetIndexes.byName[facet] ) {
210                 this.facetIndexes.bucketCount++;
211                 this.facetIndexes.byName[facet] = { itemCount: 0, bucketCount: 0, byValue: {} };
212             }
213         }, this);
214 
215         // now update the indexes with the new facets
216         if( new_facets.length ) {
217             var gotDataForItem = {};
218             dojo.forEach( new_facets, function(f){ gotDataForItem[f] = {};});
219 
220             dojo.forEach( items, function( item ) {
221                 this.facetIndexes.itemCount++;
222                 dojo.forEach( new_facets, function( facet ) {
223                     var value = this.getValue( item, facet, undefined );
224                     if( typeof value == 'undefined' )
225                         return;
226                     gotDataForItem[facet][this.getIdentity(item)] = 1;
227                     this._indexItem( facet, value, item );
228                 },this);
229             }, this);
230 
231             // index the items that do not have data for this facet
232             dojo.forEach( new_facets, function(facet) {
233                 var gotSomeWithNoData = false;
234                 dojo.forEach( dojof.values( this.identIndex ), function(item) {
235                     if( ! gotDataForItem[facet][this.getIdentity(item)] ) {
236                         gotSomeWithNoData = true;
237                         this._indexItem( facet, this._noDataValue, item );
238                     }
239                 },this);
240             },this);
241         }
242     },
243 
244     /**
245      * Add an item to the indexes for the given facet name and value.
246      * @private
247      */
248     _indexItem: function( facet, value, item ) {
249         var facetValues = this.facetIndexes.byName[facet];
250         var bucket = facetValues.byValue[value];
251         if( !bucket ) {
252             bucket = facetValues.byValue[value] = { itemCount: 0, items: [] };
253             facetValues.bucketCount++;
254         }
255         bucket.itemCount++;
256         facetValues.itemCount++;
257         bucket.items.push(item);
258     },
259 
260     /**
261      * Given an array of string facet names, add records for them,
262      * initializing the necessary data structures.
263      * @private
264      * @returns {Array[String]} facet names that did not already exist
265      */
266     _addFacets: function( facetNames ) {
267         var old_facets = this.facets || [];
268         var seen = {};
269         this.facets = dojo.filter(
270             old_facets.concat( facetNames ),
271             function(facetName) {
272                 var take = this._filterFacet(facetName) && !seen[facetName];
273                 seen[facetName] = true;
274                 return take;
275             },
276             this
277         );
278         return this.facets.slice( old_facets.length );
279     },
280 
281     /**
282      * Get the number of items that matched the most recent query.
283      * @returns {Number} the item count, or undefined if there has not
284      * been any query so far.
285      */
286     getCount: function() {
287         return this._fetchCount;
288     },
289 
290 
291     /**
292      * @param facetName {String} facet name
293      * @returns {Object}
294      */
295     getFacetCounts: function( facetName ) {
296         var context = this._fetchFacetCounts[ facetName ] || this._fetchFacetCounts[ '__other__' ];
297         return context ? context[facetName] : undefined;
298     },
299 
300     /**
301      * Get an array of the text names of the facets that are defined
302      * in this track metadata.
303      * @param callback {Function} called as callback( [facet,facet,...] )
304      */
305     getFacetNames: function( callback ) {
306         return this.facets;
307     },
308 
309     /**
310      * Get an Array of the distinct values for a given facet name.
311      * @param facetName {String} the facet name
312      * @returns {Array} distinct values for that facet
313      */
314     getFacetValues: function( facetName ) {
315         var index = this.facetIndexes.byName[facetName];
316         if( !index )
317             return [];
318 
319         return dojof.keys( index.byValue );
320     },
321 
322     /**
323      * Get statistics about the facet with the given name.
324      * @returns {Object} as: <code>{ itemCount: ##, bucketCount: ##, avgBucketSize: ## }</code>
325      */
326     getFacetStats: function( facetName ) {
327         var index = this.facetIndexes.byName[facetName];
328         if( !index ) return {};
329 
330         var stats = {};
331         dojo.forEach( ['itemCount','bucketCount','avgBucketSize'],
332                       function(attr) { stats[attr] = index[attr]; }
333                     );
334         return stats;
335     },
336 
337     // dojo.data.api.Read support
338 
339     getValue: function( i, attr, defaultValue ) {
340         var v = i[attr];
341         return typeof v == 'undefined' ? defaultValue : v;
342     },
343     getValues: function( i, attr ) {
344         var a = [ i[attr] ];
345         return typeof a[0] == 'undefined' ? [] : a;
346     },
347 
348     getAttributes: function(item)  {
349         return dojof.keys( item );
350     },
351 
352     hasAttribute: function(item,attr) {
353         return item.hasOwnProperty(attr);
354     },
355 
356     containsValue: function(item, attribute, value) {
357         return item[attribute] == value;
358     },
359 
360     isItem: function(item) {
361         return typeof item == 'object' && typeof item.label == 'string';
362     },
363 
364     isItemLoaded: function() {
365         return true;
366     },
367 
368     loadItem: function( args ) {
369     },
370 
371     // used by the dojo.data.util.simpleFetch mixin to implement fetch()
372     _fetchItems: function( keywordArgs, findCallback, errorCallback ) {
373         if( ! this.ready ) {
374             this.onReady( dojo.hitch( this, '_fetchItems', keywordArgs, findCallback, errorCallback ) );
375             return;
376         }
377 
378         var query = dojo.clone( keywordArgs.query || {} );
379         // coerce query arguments to arrays if they are not already arrays
380         dojo.forEach( dojof.keys( query ), function(qattr) {
381             if( ! Array.isArray( query[qattr] ) ) {
382                 query[qattr] = [ query[qattr] ];
383             }
384         },this);
385 
386         var results;
387         var queryFingerprint = Util.objectFingerprint( query );
388         if( queryFingerprint == this.previousQueryFingerprint ) {
389             results = this.previousResults;
390         } else {
391             this.previousQueryFingerprint = queryFingerprint;
392             this.previousResults = results = this._doQuery( query );
393         }
394 
395         // and finally, hand them to the finding callback
396         findCallback(results,keywordArgs);
397         this.onFetchSuccess();
398     },
399 
400     /**
401      * @private
402      */
403     _doQuery: function( /**Object*/ query ) {
404 
405         var textFilter = this._compileTextFilter( query.text );
406         delete query.text;
407 
408         // algorithm pseudocode:
409         //
410         //    * for each individual facet, get a set of tracks that
411         //      matches its selected values.  sort each set by the
412         //      track's unique identifier.
413         //    * while still need to go through all the items in the filtered sets:
414         //          - if all the facets have the same track first in their sorted set:
415         //                 add it to the core result set.
416         //                 count it in the global counts
417         //          - if all the facets *but one* have the same track first:
418         //                 this track will need to be counted in the
419         //                 'leave-out' counts for the odd facet out.  count it.
420         //          - shift the lowest-labeled track off of whatever facets have it at the front
421 
422         var results = []; // array of items that completely match the query
423 
424         // construct the filtered sets (arrays of items) for each of
425         // our search criteria
426         var filteredSets = [];
427         if( textFilter ) {
428             filteredSets.push(
429                 dojo.filter( dojof.values( this.identIndex ), textFilter )
430                     .sort( dojo.hitch(this,'_itemSortFunc') )
431             );
432             filteredSets[0].facetName = 'Contains text';
433         }
434         filteredSets.push.apply( filteredSets,
435                 dojo.map( dojof.keys( query ), function( facetName ) {
436                     var values = query[facetName];
437                     var items = [];
438                     if( ! this.facetIndexes.byName[facetName] ) {
439                         console.error( "No facet defined with name '"+facetName+"'." );
440                         throw "No facet defined with name '"+facetName+"', faceted search failed.";
441                     }
442                     dojo.forEach( values, function(value) {
443                         var idx = this.facetIndexes.byName[facetName].byValue[value] || {};
444                         items.push.apply( items, idx.items || [] );
445                     },this);
446                     items.facetName = facetName;
447                     items.sort( dojo.hitch( this, '_itemSortFunc' ));
448                     return items;
449                 },this)
450         );
451         dojo.forEach( filteredSets, function(s) {
452             s.myOffset = 0;
453             s.topItem = function() { return this[this.myOffset]; };
454             s.shift   = function() { this.myOffset++; };
455         });
456 
457         // init counts
458         var facetMatchCounts   = {};
459 
460         if( ! filteredSets.length ) {
461             results = dojof.values( this.identIndex );
462         } else {
463             // calculate how many item records total we need to go through
464             var leftToProcess = 0;
465             dojo.forEach( filteredSets,
466                           function(s) { leftToProcess += s.length;} );
467 
468             // do a sort of N-way merge of the filtered sets
469             while( leftToProcess ) {
470 
471                 // look at the top of each of our sets, seeing what items
472                 // we have there.  group the sets by the identity of their
473                 // topmost item.
474                 var setsByTopIdent = {}, uniqueIdents = [], ident, item;
475                 dojo.forEach(filteredSets, function(set,i) {
476                     item = set.topItem();
477                     ident = item ? this.getIdentity( item ) : '(at end of set)';
478                     if( setsByTopIdent[ ident ] ) {
479                         setsByTopIdent[ ident ].push( set );
480                     } else {
481                         setsByTopIdent[ ident ] = [set];
482                         uniqueIdents.push( ident );
483                     }
484                 },this);
485                 if( uniqueIdents.length == 1 ) {
486                     // each of our matched sets has the same item at the
487                     // top.  this means it is part of the core result set.
488                     results.push( item );
489                 } else {
490 
491                     // ident we are operating on is always the
492                     // lexically-first one that is not the end-of-set
493                     // marker
494                     uniqueIdents.sort();
495                     var leftOutIndex;
496                     if( uniqueIdents[0] == '(at end of set)' ) {
497                         ident = uniqueIdents[1];
498                         leftOutIndex = 0;
499                     } else {
500                         ident = uniqueIdents[0];
501                         leftOutIndex = 1;
502                     }
503                     ident = uniqueIdents[0] == '(at end of set)' ? uniqueIdents[1] : uniqueIdents[0];
504 
505                     if( uniqueIdents.length == 2
506                         && setsByTopIdent[ ident ].length == filteredSets.length - 1 ) {
507                         // all of the matched sets except one has the same
508                         // item on top, and it is the lowest-labeled item
509 
510                         var leftOutSet = setsByTopIdent[ uniqueIdents[ leftOutIndex ] ][0];
511                         this._countItem( facetMatchCounts, setsByTopIdent[ident][0].topItem(), leftOutSet.facetName );
512                     }
513                 }
514 
515                 dojo.forEach( setsByTopIdent[ ident ], function(s) { s.shift(); leftToProcess--; });
516             }
517         }
518 
519         // each of the leave-one-out count sets needs to also have the
520         // core result set counted in it, and also make a counting set
521         // for the core result set (used by __other__ facets not
522         // involved in the query)
523         dojo.forEach( dojof.keys(facetMatchCounts).concat( ['__other__'] ), function(category) {
524             dojo.forEach( results, function(item) {
525                  this._countItem( facetMatchCounts, item, category);
526             },this);
527         },this);
528 
529         // in the case of just one filtered set, the 'leave-one-out'
530         // count for it is actually the count of all results, so we
531         // need to make a special little count of that attribute for
532         // the global result set.
533         if( filteredSets.length == 1 ) {
534             dojo.forEach( dojof.values( this.identIndex ), function(item) {
535                 this._countItem( facetMatchCounts, item, filteredSets[0].facetName );
536             },this);
537         }
538 
539         this._fetchFacetCounts = facetMatchCounts;
540         this._fetchCount = results.length;
541         return results;
542     },
543 
544     _countItem: function( facetMatchCounts, item, facetName ) {
545         var facetEntry = facetMatchCounts[facetName];
546         if( !facetEntry ) facetEntry = facetMatchCounts[facetName] = {};
547         var facets = facetName == '__other__' ? this.facets : [facetName];
548         dojo.forEach( facets, function(attrName) {
549             var value = this.getValue( item, attrName, this._noDataValue );
550             var attrEntry = facetEntry[attrName];
551             if( !attrEntry ) {
552                 attrEntry = facetEntry[attrName] = {};
553                 attrEntry[value] = 0;
554             }
555             attrEntry[value] = ( attrEntry[value] || 0 ) + 1;
556         },this);
557     },
558 
559     onReady: function( scope, func ) {
560         scope = scope || dojo.global;
561         func = dojo.hitch( scope, func );
562         if( ! this.ready ) {
563             this.onReadyFuncs.push( func );
564             return;
565         } else {
566             func();
567         }
568     },
569 
570     /**
571      * Event hook called once when the store is initialized and has
572      * an initial set of data loaded.
573      */
574     _onReady: function() {
575         dojo.forEach( this.onReadyFuncs || [], function(func) {
576             func.call();
577         });
578     },
579 
580     /**
581      * Event hook called after a fetch has been successfully completed
582      * on this store.
583      */
584     onFetchSuccess: function() {
585     },
586 
587     /**
588      * Compile a text search string into a function that tests whether
589      * a given piece of text matches that search string.
590      * @private
591      */
592     _compileTextFilter: function( textString ) {
593         if( textString === undefined )
594             return null;
595 
596         // parse out words and quoted words, and convert each into a regexp
597         var rQuotedWord = /\s*["']([^"']+)["']\s*/g;
598         var rWord = /(\S+)/g;
599         var parseWord = function() {
600             var word = rQuotedWord.exec( textString ) || rWord.exec( textString );
601             if( word ) {
602                 word = word[1];
603                 var lastIndex = Math.max( rQuotedWord.lastIndex, rWord.lastIndex );
604                 rWord.lastIndex = rQuotedWord.lastIndex = lastIndex;
605             }
606             return word;
607         };
608         var wordREs = [];
609         var currentWord;
610         while( (currentWord = parseWord()) ) {
611             // escape regex control chars, and convert glob-like chars to
612             // their regex equivalents
613             currentWord = dojo.regexp.escapeString( currentWord, '*?' )
614                               .replace(/\*/g,'.+')
615                               .replace(/ /g,'\\s+')
616                               .replace(/\?/g,'.');
617             wordREs.push( new RegExp(currentWord,'i') );
618         }
619 
620         // return a function that takes on item and returns true if it
621         // matches the text filter
622         return dojo.hitch(this, function(item) {
623             return dojo.some( this.facets, function(facetName) {
624                        var text = this.getValue( item, facetName );
625                        return dojof.every( wordREs, function(re) { return re.test(text); } );
626             },this);
627         });
628     },
629 
630     getFeatures: function() {
631         return {
632 	    'dojo.data.api.Read': true,
633 	    'dojo.data.api.Identity': true
634 	};
635     },
636     close: function() {},
637 
638     getLabel: function(i) {
639         return this.getValue(i,'key',undefined);
640     },
641     getLabelAttributes: function(i) {
642         return ['key'];
643     },
644 
645     // dojo.data.api.Identity support
646     getIdentityAttributes: function() {
647         return ['label'];
648     },
649     getIdentity: function(i) {
650         return this.getValue(i, 'label', undefined);
651     },
652     fetchItemByIdentity: function(id) {
653         return this.identIndex[id];
654     }
655 });
656 dojo.require('dojo.data.util.simpleFetch');
657 dojo.extend( JBrowse.Model.TrackMetaData, dojo.data.util.simpleFetch );