eXpress “1.5”
|
00001 00009 #ifndef TRANSCRIPTS_H 00010 #define TRANSCRIPTS_H 00011 00012 #include <boost/scoped_ptr.hpp> 00013 #include "boost/shared_ptr.hpp" 00014 #include <boost/thread.hpp> 00015 #include <boost/unordered_map.hpp> 00016 #include <iostream> 00017 #include <fstream> 00018 #include <map> 00019 #include <string> 00020 #include <vector> 00021 #include "main.h" 00022 #include "bundles.h" 00023 #include "sequence.h" 00024 00025 class LengthDistribution; 00026 class FragHit; 00027 class BiasBoss; 00028 class MismatchTable; 00029 class Librarian; 00030 class HaplotypeHandler; 00031 class TargetTable; 00032 00040 struct RoundParams { 00045 double mass; 00050 double ambig_mass; 00055 double tot_ambig_mass; 00059 double mass_var; 00064 double var_sum; 00065 /*** 00066 * A shared pointer to the target's HaplotypeHandler. Null if target has no 00067 * haplotype partner. 00068 **/ 00069 boost::shared_ptr<HaplotypeHandler> haplotype; 00073 RoundParams() : mass(LOG_0), ambig_mass(LOG_0), tot_ambig_mass(LOG_0), 00074 mass_var(LOG_0), var_sum(LOG_0) {} 00075 }; 00076 00077 typedef size_t TargID; 00078 00090 class Target { 00091 friend class HaplotypeHandler; 00092 friend class TargetTable; 00097 const Librarian* _libs; 00101 TargID _id; 00105 std::string _name; 00109 SequenceFwd _seq_f; 00113 SequenceRev _seq_r; 00117 double _alpha; 00122 RoundParams _curr_params; 00127 RoundParams _last_params; 00132 RoundParams* _ret_params; 00137 size_t _uniq_counts; 00143 size_t _tot_counts; 00147 double _init_pseudo_mass; 00151 Bundle* _bundle; 00156 mutable boost::mutex _mutex; 00161 boost::scoped_ptr<std::vector<float> > _start_bias; 00165 boost::scoped_ptr<std::vector<float> > _start_bias_buffer; 00170 boost::scoped_ptr<std::vector<float> > _end_bias; 00174 boost::scoped_ptr<std::vector<float> > _end_bias_buffer; 00179 double _avg_bias; 00183 double _avg_bias_buffer; 00188 double _cached_eff_len; 00192 double _cached_eff_len_buffer; 00198 bool _solvable; 00199 00200 public: 00217 Target(TargID id, const std::string& name, const std::string& seq, 00218 bool prob_seq, double alpha, const Librarian* libs, 00219 const BiasBoss* known_bias_boss, const LengthDistribution* known_fld); 00224 void lock() const { _mutex.lock(); } 00228 void unlock() const { _mutex.unlock(); } 00233 const std::string& name() const { return _name; } 00238 TargID id() const { return _id; } 00244 const Sequence& seq(bool rev=false) const { 00245 if (rev) { 00246 return _seq_r; 00247 } 00248 return _seq_f; 00249 } 00255 Sequence& seq(bool rev) { 00256 if (rev) { 00257 return _seq_r; 00258 } 00259 return _seq_f; 00260 } 00265 void haplotype(boost::shared_ptr<HaplotypeHandler> hh) { 00266 _curr_params.haplotype = hh; 00267 } 00272 void alpha(double alpha) { _alpha = log(alpha); } 00277 size_t length() const { return _seq_f.length(); } 00283 double rho() const; 00291 double mass(bool with_pseudo=true) const; 00297 double mass_var() const; 00302 double var_sum() const { return _ret_params->var_sum; } 00309 double tot_ambig_mass() const { return _ret_params->tot_ambig_mass; } 00314 void round_reset(); 00320 size_t tot_counts() const { return _tot_counts; } 00326 size_t uniq_counts() const { return _uniq_counts; } 00331 Bundle* bundle() const { return _bundle; } 00336 void bundle(Bundle* b) { _bundle = b; } 00346 void add_hit(const FragHit& h, double v, double mass); 00354 void incr_counts(bool uniq, size_t incr_amt = 1) { 00355 if (uniq) { 00356 _solvable = true; 00357 } 00358 _tot_counts += incr_amt; 00359 _uniq_counts += incr_amt * uniq; 00360 } 00371 double sample_likelihood(bool with_pseudo, 00372 const std::vector<const Target*>* neighbors = NULL) 00373 const; 00379 double align_likelihood(const FragHit& frag) const; 00390 double est_effective_length(const LengthDistribution* fld = NULL, 00391 bool with_bias=true) const; 00399 double cached_effective_length(bool with_bias=true) const; 00409 void update_target_bias_buffer(const BiasBoss* bias_table = NULL, 00410 const LengthDistribution* fld = NULL); 00415 void swap_bias_parameters(); 00422 bool solvable() const { return _solvable; } 00428 void solvable(bool s) { _solvable = s; } 00429 }; 00430 00446 class HaplotypeHandler { 00450 std::vector<const Target*> _targets; 00455 FrequencyMatrix<double> _haplo_taus; 00461 std::string _frag_name_buff; 00466 std::vector<double> _align_likelihoods_buff; 00471 std::vector<double> _masses_buff; 00476 bool _committed; 00481 size_t find_target(const Target* targ); 00486 void commit_buffer(); 00487 public: 00492 HaplotypeHandler(std::vector<Target*> targets, double alpha); 00496 double get_mass(const Target* targ, bool with_pseudo); 00501 void update_mass(const Target* targ, const std::string& frag_name, 00502 double align_likelihood, double mass); 00503 }; 00504 00505 00506 typedef std::vector<Target*> TransMap; 00507 typedef boost::unordered_map<std::string, size_t> TransIndex; 00508 typedef boost::unordered_map<size_t, float> CovarMap; 00509 typedef boost::unordered_map<std::string, double> AlphaMap; 00510 typedef boost::unordered_set<std::vector<Target*> > HaplotypeSet; 00511 00520 class TargetTable { 00525 const Librarian* _libs; 00529 TransMap _targ_map; 00533 BundleTable _bundle_table; 00539 CovarTable _covar_table; 00544 HaplotypeSet _haplotype_groups; 00549 double _total_fpb; 00553 mutable boost::mutex _fpb_mut; 00554 00569 void add_targ(const std::string& name, const std::string& seq, bool prob_seqs, 00570 bool known_aux_params, double alpha, 00571 const TransIndex& targ_index, const TransIndex& targ_lengths); 00572 00573 public: 00592 TargetTable(std::string targ_fasta_file, std::string haplotype_file, 00593 bool prob_seqs, bool known_aux_params, double alpha, 00594 const AlphaMap* alpha_map, const Librarian* libs); 00598 ~TargetTable(); 00604 Target* get_targ(TargID id); 00609 void round_reset(); 00614 size_t size() const { return _targ_map.size(); } 00619 double total_fpb() const; 00625 void update_total_fpb(double incr_amt); 00635 void update_covar(TargID targ1, TargID targ2, double covar) { 00636 _covar_table.increment(targ1, targ2, covar); 00637 } 00645 double get_covar(TargID targ1, TargID targ2) { 00646 return _covar_table.get(targ1, targ2); 00647 } 00652 size_t covar_size() const { return _covar_table.size(); } 00659 Bundle* merge_bundles(Bundle* b1, Bundle* b2); 00664 size_t num_bundles() const { return _bundle_table.size(); } 00668 void masses_to_counts(); 00681 void output_results(std::string output_dir, size_t tot_counts, 00682 bool output_varcov=false, bool output_rdds=false); 00689 void asynch_bias_update(boost::mutex* mutex); 00690 void enable_bundle_threadsafety() { _bundle_table.threadsafe_mode(true); } 00691 void disable_bundle_threadsafety() { _bundle_table.threadsafe_mode(false); } 00695 void collapse_bundles() { _bundle_table.collapse(); } 00696 }; 00697 00698 #endif