eXpress “1.5”

src/targets.h

00001 
00009 #ifndef TRANSCRIPTS_H
00010 #define TRANSCRIPTS_H
00011 
00012 #include <boost/scoped_ptr.hpp>
00013 #include "boost/shared_ptr.hpp"
00014 #include <boost/thread.hpp>
00015 #include <boost/unordered_map.hpp>
00016 #include <iostream>
00017 #include <fstream>
00018 #include <map>
00019 #include <string>
00020 #include <vector>
00021 #include "main.h"
00022 #include "bundles.h"
00023 #include "sequence.h"
00024 
00025 class LengthDistribution;
00026 class FragHit;
00027 class BiasBoss;
00028 class MismatchTable;
00029 class Librarian;
00030 class HaplotypeHandler;
00031 class TargetTable;
00032 
00040 struct RoundParams {
00045   double mass;
00050   double ambig_mass;
00055   double tot_ambig_mass;
00059   double mass_var;
00064   double var_sum;
00065   /***
00066    * A shared pointer to the target's HaplotypeHandler.  Null if target has no
00067    * haplotype partner.
00068    **/
00069   boost::shared_ptr<HaplotypeHandler> haplotype;
00073   RoundParams() : mass(LOG_0), ambig_mass(LOG_0), tot_ambig_mass(LOG_0),
00074                   mass_var(LOG_0), var_sum(LOG_0) {}
00075 };
00076 
00077 typedef size_t TargID;
00078 
00090 class Target {
00091   friend class HaplotypeHandler;
00092   friend class TargetTable;
00097   const Librarian* _libs;
00101   TargID _id;
00105   std::string _name;
00109   SequenceFwd _seq_f;
00113   SequenceRev _seq_r;
00117   double _alpha;
00122   RoundParams _curr_params;
00127   RoundParams _last_params;
00132   RoundParams* _ret_params;
00137   size_t _uniq_counts;
00143   size_t _tot_counts;
00147   double _init_pseudo_mass;
00151   Bundle* _bundle;
00156   mutable boost::mutex _mutex;
00161   boost::scoped_ptr<std::vector<float> > _start_bias;
00165   boost::scoped_ptr<std::vector<float> > _start_bias_buffer;
00170   boost::scoped_ptr<std::vector<float> > _end_bias;
00174   boost::scoped_ptr<std::vector<float> > _end_bias_buffer;
00179   double _avg_bias;
00183   double _avg_bias_buffer;
00188   double _cached_eff_len;
00192   double _cached_eff_len_buffer;
00198   bool _solvable;
00199 
00200 public:
00217   Target(TargID id, const std::string& name, const std::string& seq,
00218          bool prob_seq, double alpha, const Librarian* libs,
00219          const BiasBoss* known_bias_boss, const LengthDistribution* known_fld);
00224   void lock() const { _mutex.lock(); }
00228   void unlock() const { _mutex.unlock(); }
00233   const std::string& name() const { return _name; }
00238   TargID id() const { return _id; }
00244   const Sequence& seq(bool rev=false) const {
00245     if (rev) {
00246       return _seq_r;
00247     }
00248     return _seq_f;
00249   }
00255   Sequence& seq(bool rev) {
00256     if (rev) {
00257       return _seq_r;
00258     }
00259     return _seq_f;
00260   }
00265   void haplotype(boost::shared_ptr<HaplotypeHandler> hh) {
00266     _curr_params.haplotype = hh;
00267   }
00272   void alpha(double alpha) { _alpha = log(alpha); }
00277   size_t length() const { return _seq_f.length(); }
00283   double rho() const;
00291   double mass(bool with_pseudo=true) const;
00297   double mass_var() const;
00302   double var_sum() const { return _ret_params->var_sum; }
00309   double tot_ambig_mass() const { return _ret_params->tot_ambig_mass; }
00314   void round_reset();
00320   size_t tot_counts() const { return _tot_counts; }
00326   size_t uniq_counts() const { return _uniq_counts; }
00331   Bundle* bundle() const { return _bundle; }
00336   void bundle(Bundle* b) { _bundle = b; }
00346   void add_hit(const FragHit& h, double v, double mass);
00354   void incr_counts(bool uniq, size_t incr_amt = 1) {
00355     if (uniq) {
00356       _solvable = true;
00357     }
00358     _tot_counts += incr_amt;
00359     _uniq_counts += incr_amt * uniq;
00360   }
00371   double sample_likelihood(bool with_pseudo,
00372                            const std::vector<const Target*>* neighbors = NULL)
00373   const;
00379   double align_likelihood(const FragHit& frag) const;
00390   double est_effective_length(const LengthDistribution* fld = NULL,
00391                               bool with_bias=true) const;
00399   double cached_effective_length(bool with_bias=true) const;
00409   void update_target_bias_buffer(const BiasBoss* bias_table = NULL,
00410                                  const LengthDistribution* fld = NULL);
00415   void swap_bias_parameters();
00422   bool solvable() const { return _solvable; }
00428   void solvable(bool s) { _solvable = s; }
00429 };
00430 
00446 class HaplotypeHandler {
00450   std::vector<const Target*> _targets;
00455   FrequencyMatrix<double> _haplo_taus;
00461   std::string _frag_name_buff;
00466   std::vector<double> _align_likelihoods_buff;
00471   std::vector<double> _masses_buff;
00476   bool _committed;
00481   size_t find_target(const Target* targ);
00486   void commit_buffer();
00487  public:
00492   HaplotypeHandler(std::vector<Target*> targets, double alpha);
00496   double get_mass(const Target* targ, bool with_pseudo);
00501   void update_mass(const Target* targ, const std::string& frag_name,
00502                    double align_likelihood, double mass);
00503 };
00504 
00505 
00506 typedef std::vector<Target*> TransMap;
00507 typedef boost::unordered_map<std::string, size_t> TransIndex;
00508 typedef boost::unordered_map<size_t, float> CovarMap;
00509 typedef boost::unordered_map<std::string, double> AlphaMap;
00510 typedef boost::unordered_set<std::vector<Target*> > HaplotypeSet;
00511 
00520 class TargetTable {
00525   const Librarian* _libs;
00529   TransMap _targ_map;
00533   BundleTable _bundle_table;
00539   CovarTable _covar_table;
00544   HaplotypeSet _haplotype_groups;
00549   double _total_fpb;
00553   mutable boost::mutex _fpb_mut;
00554 
00569   void add_targ(const std::string& name, const std::string& seq, bool prob_seqs,
00570                 bool known_aux_params, double alpha,
00571                 const TransIndex& targ_index, const TransIndex& targ_lengths);
00572 
00573 public:
00592   TargetTable(std::string targ_fasta_file, std::string haplotype_file,
00593               bool prob_seqs, bool known_aux_params, double alpha,
00594               const AlphaMap* alpha_map, const Librarian* libs);
00598   ~TargetTable();
00604   Target* get_targ(TargID id);
00609   void round_reset();
00614   size_t size() const { return _targ_map.size(); }
00619   double total_fpb() const;
00625   void update_total_fpb(double incr_amt);
00635   void update_covar(TargID targ1, TargID targ2, double covar) {
00636     _covar_table.increment(targ1, targ2, covar);
00637   }
00645   double get_covar(TargID targ1, TargID targ2) {
00646     return _covar_table.get(targ1, targ2);
00647   }
00652   size_t covar_size() const { return _covar_table.size(); }
00659   Bundle* merge_bundles(Bundle* b1, Bundle* b2);
00664   size_t num_bundles() const { return _bundle_table.size(); }
00668   void masses_to_counts();
00681   void output_results(std::string output_dir, size_t tot_counts,
00682                       bool output_varcov=false, bool output_rdds=false);
00689   void asynch_bias_update(boost::mutex* mutex);
00690   void enable_bundle_threadsafety() { _bundle_table.threadsafe_mode(true); }
00691   void disable_bundle_threadsafety() { _bundle_table.threadsafe_mode(false); }
00695   void collapse_bundles() { _bundle_table.collapse(); }
00696 };
00697 
00698 #endif
 All Classes Functions Variables