From 75989979ed4b32e3c73d8fa687b9538b661b8862 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 17 Mar 2020 17:46:41 +0100 Subject: [PATCH 01/67] commit with speed up integration / partially modernised caching --- AmpGen/AmplitudeRules.h | 13 +- AmpGen/BinDT.h | 24 ++-- AmpGen/Chi2Estimator.h | 9 +- AmpGen/CoherentSum.h | 8 +- AmpGen/CompiledExpression.h | 15 ++- AmpGen/CompiledExpressionBase.h | 4 +- AmpGen/Event.h | 1 + AmpGen/EventList.h | 41 ++++--- AmpGen/FitFraction.h | 5 +- AmpGen/Integrator.h | 17 ++- AmpGen/Integrator2.h | 31 +++-- AmpGen/LiteSpan.h | 74 ++++++++++++ AmpGen/NamedParameter.h | 2 +- AmpGen/Particle.h | 1 + AmpGen/PolarisedSum.h | 31 ++--- AmpGen/Projection.h | 29 +++-- AmpGen/SumPDF.h | 23 +++- AmpGen/Utilities.h | 9 +- apps/Fitter.cpp | 7 +- apps/Generator.cpp | 4 +- examples/FitterWithPolarisation.cpp | 32 ++--- examples/SignalOnlyFitter.cpp | 39 +++--- examples/SimFit.cpp | 4 +- src/AmplitudeRules.cpp | 2 +- src/BinDT.cpp | 20 +++- src/Chi2Estimator.cpp | 17 +-- src/CoherentSum.cpp | 55 ++++++++- src/CompiledExpressionBase.cpp | 2 +- src/Event.cpp | 2 +- src/EventList.cpp | 50 +++----- src/FitFraction.cpp | 2 - src/Integrator2.cpp | 13 +- src/PolarisedSum.cpp | 178 +++++++++++++++++++--------- src/Projection.cpp | 35 +++++- 34 files changed, 537 insertions(+), 262 deletions(-) create mode 100644 AmpGen/LiteSpan.h diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index e3a18d0f6c1..2dbb64ed0db 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -73,7 +73,7 @@ namespace AmpGen AmplitudeRules( const MinuitParameterSet& mps ); std::vector rulesForDecay(const std::string& head, const std::string& prefix=""); bool hasDecay( const std::string& head ); - std::map> rules(); + const std::map>& rules() const; std::vector> getMatchingRules( const EventType& type, const std::string& prefix="" ); std::vector processesThatProduce(const Particle& particle) const; @@ -113,8 +113,7 @@ namespace AmpGen size_t addressData = {999}; }; - template - std::vector processIndex(const std::vector>& tm, const std::string& label) + template std::vector processIndex(const std::vector>& tm, const std::string& label) { std::vector indices; for ( size_t i = 0; i < tm.size(); ++i ) { @@ -123,8 +122,7 @@ namespace AmpGen return indices; } - template - size_t findIndex(const std::vector>& tm, const std::string& decayDescriptor) + template size_t findIndex(const std::vector>& tm, const std::string& decayDescriptor) { for ( size_t i = 0; i < tm.size(); ++i ) { if ( tm[i].decayDescriptor() == decayDescriptor ) return i; @@ -163,12 +161,12 @@ namespace AmpGen amp(decayTree.getExpression(debugThis ? &db : nullptr ), decayTree.decayDescriptor(), evtFormat, db, &mps ) { amp.use_rto();} const std::vector operator()(const Event& event) const { - std::vector rt; + std::vector rt(4); amp(rt.data(), amp.externBuffer().data(), event.address() ); return rt; } const std::vector operator()(const Event& event, const size_t& cacheOffset) const { - std::vector rt; + std::vector rt(4); amp(rt.data(), amp.externBuffer().data(), event.address() + cacheOffset); return rt; } @@ -180,6 +178,7 @@ namespace AmpGen DebugSymbols db; CompiledExpression amp; size_t addressData = {999}; + bool workToDo = {false}; }; } // namespace AmpGen diff --git a/AmpGen/BinDT.h b/AmpGen/BinDT.h index 58795770a6a..5a29f25aefd 100644 --- a/AmpGen/BinDT.h +++ b/AmpGen/BinDT.h @@ -69,7 +69,9 @@ namespace AmpGen void serialize( std::ostream& stream ) const override; void setChildren( std::shared_ptr l, std::shared_ptr r ); void visit( const std::function& visit_function ) override; + friend class BinDT; + private : std::shared_ptr m_left; std::shared_ptr m_right; @@ -87,22 +89,12 @@ namespace AmpGen { m_top = makeNodes( addr ); } - template - BinDT( const EventList& events, const ARGS&... args ) : BinDT( ArgumentPack( args... ) ) + template BinDT( const EventList& events, const ARGS&... args ) : BinDT(ArgumentPack( args... ) ) { - std::vector data( m_dim * events.size() ); - std::vector addresses( events.size() ); - size_t counter = 0; - for ( auto& evt : events ) { - auto val = m_functors( evt ); - for ( unsigned int i = 0; i < m_dim; ++i ) data[m_dim * counter + i] = val[i]; - addresses[counter] = &( data[m_dim * counter] ); - counter++; - } - INFO( "Making nodes" ); - m_top = makeNodes( addresses ); + m_top = makeNodes( events ); } explicit BinDT( const ArgumentPack& args ); + BinDT( const EventList& events, const ArgumentPack& args ); BinDT() = default; std::shared_ptr top() { return m_top; } @@ -122,19 +114,19 @@ namespace AmpGen std::function( const Event& )> makeDefaultFunctors(); void refreshQueue(const std::vector&, std::queue&, const unsigned&); + std::shared_ptr makeNodes(const EventList&); std::shared_ptr makeNodes(const std::vector&, std::queue, const unsigned&); std::shared_ptr makeNodes(const std::vector&); std::shared_ptr makeNodes(const std::vector&, const std::vector&); std::shared_ptr makeNodes(std::vector, std::vector, std::queue, const unsigned&); void setFunctor(const std::function( const Event& )>& functors) { m_functors = functors; } - private: std::shared_ptr m_top = {nullptr}; unsigned m_dim = {0}; unsigned m_minEvents = {0}; unsigned m_maxDepth = {0}; - std::vector m_queueOrdering = {}; - std::vector> m_endNodes = {}; + std::vector m_queueOrdering = {}; + std::vector> m_endNodes = {}; std::function(const Event&)> m_functors = {}; double getBestPost(const std::vector& source, const std::vector& target, int index, bool verbose = false ); }; diff --git a/AmpGen/Chi2Estimator.h b/AmpGen/Chi2Estimator.h index 9102cbdb7dc..4f59291ca85 100644 --- a/AmpGen/Chi2Estimator.h +++ b/AmpGen/Chi2Estimator.h @@ -16,11 +16,14 @@ namespace AmpGen class Chi2Estimator { public: + template Chi2Estimator( const EventList& dataEvents, const EventList& mcEvents, - const std::function& fcn, const unsigned int& minEvents = 10 ); + const std::function& fcn, + const argument_types&... args ) : m_binning(dataEvents, ArgumentPack(args...) ) + { + doChi2(dataEvents, mcEvents, fcn); + } - Chi2Estimator( const EventList& dataEvents, const EventList& mcEvents, - const std::function& fcn, const std::string& filename ); double chi2() const; double nBins() const; void writeBinningToFile( const std::string& filename ); diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 0ceff448770..514c5d0d46f 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -86,13 +86,15 @@ namespace AmpGen std::map> getGroupedAmplitudes(); Bilinears norms() const { return m_normalisations ; } - + + std::function evaluator(const EventList* = nullptr) const; + KeyedView componentEvaluator(const EventList* = nullptr) const; + protected: - typedef Integrator<10> integrator; std::vector> m_matrixElements; ///< Vector of (expanded) matrix elements Bilinears m_normalisations; ///< Normalisation integrals AmplitudeRules m_rules; ///< Ruleset for the selected transition. - integrator m_integrator; ///< Integral dispatch tool (with default unroll = 10) + Integrator2 m_integrator; ///< Integral dispatch tool (with default unroll = 10) TransitionMatrix m_total; ///< Total Matrix Element EventList* m_events = {nullptr}; ///< Data events to evaluate PDF on EventType m_evtType; ///< Final state for this amplitude diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index 8fe90dce51a..d3289d6c4a6 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -23,7 +23,9 @@ namespace AmpGen (i.e. returning array types) */ namespace detail { template struct size_of { unsigned operator()(){ return sizeof(T); } }; - template <> struct size_of { unsigned operator()(){ return 0; } }; + template <> struct size_of { unsigned operator()(){ + WARNING("Asking for the size_of the return buffer of an RTO expression"); + return 0; } }; } template @@ -37,6 +39,7 @@ namespace AmpGen bool m_hasExternalsChanged = {false}; public: + typedef RETURN_TYPE return_type; CompiledExpression( const Expression& expression, const std::string& name, @@ -54,7 +57,10 @@ namespace AmpGen std::string returnTypename() const override { return typeof(); } std::string fcnSignature() const override { - return CompiledExpressionBase::fcnSignature(typelist(), m_rto); + return CompiledExpressionBase::fcnSignature(typelist(), use_rto()); + } + bool use_rto() const override { + return std::is_same::value; } std::string args() const override { @@ -145,7 +151,9 @@ namespace AmpGen if ( !m_fdb.isLinked() ) { FATAL( "Function" << name() << " debugging symbols not linked" ); } - auto debug_results = m_fdb( &( m_externals[0] ), event ); + std::vector> debug_results; + if constexpr(std::is_same::value) debug_results = m_fdb( nullptr, &( m_externals[0] ), event ); + else debug_results = m_fdb( &(m_externals[0]), event); for( auto& debug_result : debug_results ){ auto val = debug_result.second; auto label = debug_result.first; @@ -192,7 +200,6 @@ namespace AmpGen make_rto_expression( const Expression& expression, const std::string& name , const bool& verbose=false) { CompiledExpression rt(expression,name); - rt.use_rto(); rt.compile(); rt.prepare(); return rt; diff --git a/AmpGen/CompiledExpressionBase.h b/AmpGen/CompiledExpressionBase.h index 6643d47d97f..d1e26ea9a7a 100644 --- a/AmpGen/CompiledExpressionBase.h +++ b/AmpGen/CompiledExpressionBase.h @@ -57,7 +57,8 @@ namespace AmpGen virtual ~CompiledExpressionBase(); virtual unsigned returnTypeSize() const = 0; static std::string fcnSignature(const std::vector& argList, bool rto); - void use_rto(){ m_rto = true; } + virtual bool use_rto() const = 0; + Expression expression() const { return m_obj; } protected: Expression m_obj; std::string m_name; @@ -68,7 +69,6 @@ namespace AmpGen std::vector> m_debugSubexpressions; std::vector> m_cacheTransfers; std::shared_ptr m_resolver; - bool m_rto = {false}; private: void addDebug( std::ostream& stream ) const; void addDependentExpressions( std::ostream& stream, size_t& sizeOfStream ) const; diff --git a/AmpGen/Event.h b/AmpGen/Event.h index 53e2aed3d18..bf168f02cd3 100644 --- a/AmpGen/Event.h +++ b/AmpGen/Event.h @@ -51,6 +51,7 @@ namespace AmpGen { const complex_t& getCache(const unsigned& pos) const { return m_cache[pos]; } const complex_t* getCachePtr(const unsigned& pos=0) const { return &(m_cache[0]) + pos; } + complex_t* getCachePtr(const unsigned& pos=0) { return &(m_cache[0]) + pos; } void setWeight( const real_t& weight ){ m_weight = weight ; } void setGenPdf( const real_t& genPdf ){ m_genPdf = genPdf ; } diff --git a/AmpGen/EventList.h b/AmpGen/EventList.h index 01e739042ba..131af735ea0 100644 --- a/AmpGen/EventList.h +++ b/AmpGen/EventList.h @@ -25,9 +25,7 @@ namespace AmpGen { - - DECLARE_ARGUMENT(Bins, size_t); - + namespace PlotOptions { DECLARE_ARGUMENT(Bins, size_t); } class CompiledExpressionBase; class EventList { @@ -36,7 +34,6 @@ namespace AmpGen EventType m_eventType = {}; std::map m_pdfIndex = {}; std::map m_extensions = {}; - double m_norm = {0}; size_t m_lastCachePosition = {0}; public: typedef Event value_type; @@ -74,7 +71,6 @@ namespace AmpGen const Event& at( const size_t& pos ) const { return m_data[pos]; } size_t size() const { return m_data.size(); } double integral() const; - double norm(); void reserve( const size_t& size ) { m_data.reserve( size ); } void push_back( const Event& evt ) { m_data.push_back( evt ); } @@ -86,7 +82,7 @@ namespace AmpGen void clear(); void erase( const std::vector::iterator& begin, const std::vector::iterator& end ); - TTree* tree( const std::string& name, const std::vector& extraBranches = {} ); + TTree* tree( const std::string& name, const std::vector& extraBranches = {} ) const; size_t getCacheIndex( const CompiledExpressionBase& PDF, bool& status ) const; size_t getCacheIndex( const CompiledExpressionBase& PDF ) const; @@ -113,15 +109,25 @@ namespace AmpGen } } - template - void updateCache( const FCN& fcn, const size_t& index ) + template void updateCache( const FCN& fcn, const size_t& index ) { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( unsigned int i = 0; i < size(); ++i ) { - ( *this )[i].setCache(fcn(getEvent(i)), index); + if constexpr( std::is_same< typename FCN::return_type, void >::value ) + { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t i = 0; i < size(); ++i ) { + fcn( m_data[i].getCachePtr(index), fcn.externBuffer().data(), m_data[i].address() ); + } } + else { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t i = 0; i < size(); ++i ) { + m_data[i].setCache(fcn(m_data[i].address()), index); + } + } } void reserveCache(const size_t& index); void resizeCache(const size_t& newCacheSize ); @@ -132,7 +138,7 @@ namespace AmpGen template std::vector makeDefaultProjections( const ARGS&... args ) { auto argPack = ArgumentPack( args... ); - size_t nBins = argPack.getArg(100); + size_t nBins = argPack.getArg(100); auto proj = eventType().defaultProjections(nBins); return makeProjections( proj , argPack ); } @@ -174,10 +180,6 @@ namespace AmpGen return std::count_if( std::begin(*this), std::end(*this), fcn ); } }; - DECLARE_ARGUMENT(LineColor, int); - DECLARE_ARGUMENT(DrawStyle, std::string); - DECLARE_ARGUMENT(Selection, std::function); - DECLARE_ARGUMENT(WeightFunction, std::function); DECLARE_ARGUMENT(Branches, std::vector); DECLARE_ARGUMENT(EntryList, std::vector); DECLARE_ARGUMENT(GetGenPdf, bool); @@ -185,7 +187,8 @@ namespace AmpGen DECLARE_ARGUMENT(Filter, std::string); DECLARE_ARGUMENT(WeightBranch, std::string); DECLARE_ARGUMENT(ApplySym, bool); - DECLARE_ARGUMENT(Prefix, std::string); + DECLARE_ARGUMENT(WeightFunction, std::function); + } // namespace AmpGen #endif diff --git a/AmpGen/FitFraction.h b/AmpGen/FitFraction.h index cabdb99767c..88efd3af8c5 100644 --- a/AmpGen/FitFraction.h +++ b/AmpGen/FitFraction.h @@ -7,6 +7,7 @@ #include #include "AmpGen/Types.h" #include "AmpGen/ErrorPropagator.h" +#include "AmpGen/AmplitudeRules.h" namespace AmpGen { @@ -24,7 +25,6 @@ namespace AmpGen double val() const; double err() const; std::string name() const; - std::shared_ptr particle() const; private: std::string m_name; @@ -60,7 +60,8 @@ namespace AmpGen pdf(pdf), normSet(normSet), recalculateIntegrals(recalculateIntegrals) {} - std::vector operator()(){ + std::vector operator()() + { if ( recalculateIntegrals ) pdf->prepare(); else pdf->transferParameters(); std::vector rv; diff --git a/AmpGen/Integrator.h b/AmpGen/Integrator.h index 7f16be8d0f8..5312012b13b 100644 --- a/AmpGen/Integrator.h +++ b/AmpGen/Integrator.h @@ -51,7 +51,7 @@ namespace AmpGen Integral(const size_t& i, const size_t& j, TransferFCN t) : i(i), j(j), transfer(t) {} }; - +/* template class Integrator { @@ -87,10 +87,11 @@ namespace AmpGen public: Integrator( EventList* events = nullptr ) : m_events( events ){} - double sampleNorm() { return m_events->norm(); } + double norm() { return m_events->norm(); } bool isReady() const { return m_events != nullptr; } EventList& events() { return *m_events; } const EventList& events() const { return *m_events; } + void reserveCache(const unsigned& size){ m_events->reserveCache(size); } template void addIntegral( const T1& f1, const T2& f2, const Integral::TransferFCN& tf ) { @@ -136,7 +137,7 @@ namespace AmpGen return m_events->getCacheIndex(expression); } }; - + */ template class BinnedIntegrator { @@ -149,6 +150,7 @@ namespace AmpGen std::vector m_slice = {0}; std::array, NROLL> m_integrals; EventList* m_events = {nullptr}; + real_t m_norm = {0}; void calculate() { integrateBlock(); @@ -187,17 +189,20 @@ namespace AmpGen } } } - double nv = m_events->norm(); for ( size_t thisIntegral = 0; thisIntegral < m_counter; ++thisIntegral ) { std::vector> tmpBins( NBINS ); size_t offset = thisIntegral * NBINS; for ( size_t nBin = 0; nBin < NBINS; ++nBin ) - tmpBins[nBin] = std::complex( re[offset + nBin], im[offset + nBin] ) / nv; + tmpBins[nBin] = std::complex( re[offset + nBin], im[offset + nBin] ) / m_norm; m_integrals[thisIntegral].transfer( tmpBins ); } } public: - BinnedIntegrator( EventList* events = nullptr ) : m_events( events ) {} + BinnedIntegrator( EventList* events = nullptr ) : m_events( events ) + { + if( m_events == nullptr ) return; + for ( const auto& event : *m_events ) m_norm += event.weight() / event.genPdf(); + } void setView( const std::function& binNumber ) { if ( m_slice.size() == 0 ) { diff --git a/AmpGen/Integrator2.h b/AmpGen/Integrator2.h index d8f7742c37d..f13efd1ad6f 100644 --- a/AmpGen/Integrator2.h +++ b/AmpGen/Integrator2.h @@ -26,7 +26,9 @@ namespace AmpGen { void flush(); void setBuffer( complex_t* pos, const complex_t& value, const size_t& size ); void setBuffer( complex_t* pos, const std::vector& value, const size_t& size); - + void reserveCache(const unsigned int& size){}; + complex_t get(const unsigned& i, const unsigned& evt) const { return m_buffer[i][evt]; } + double norm() const { return m_norm; } template size_t getCacheIndex(const T& expression) const { return m_index.find(expression.name())->second; @@ -42,17 +44,28 @@ namespace AmpGen { index = m_buffer.size(); m_index[ expression.name() ] = index; m_buffer.resize(index+vsize); - for(size_t j = 0 ; j != vsize; ++j ) - m_buffer[index+j].resize( m_events->size() ); + for(size_t j = 0 ; j != vsize; ++j ) m_buffer[index+j].resize( m_events->size() ); } else index = it->second; - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t i = 0; i < m_events->size(); ++i ) + if constexpr( std::is_same< typename T::return_type, void >::value ) { - auto v = expression(m_events->at(i).address()); - setBuffer( &(m_buffer[index][i]), v, vsize ); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t i = 0; i < m_events->size(); ++i ) + { + std::vector buf(vsize); + expression(&buf[0], expression.externBuffer().data(), m_events->at(i).address() ); + for( unsigned j = 0; j != vsize; ++j ) m_buffer[index+j][i] = buf[j]; + //expression(&(m_buffer[index][i]), expression.externBuffer().data(), m_events->at(i).address() ); + } + } + else { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t i = 0; i < m_events->size(); ++i ) + setBuffer( &(m_buffer[index][i]), expression(m_events->at(i).address()), vsize ); } } diff --git a/AmpGen/LiteSpan.h b/AmpGen/LiteSpan.h new file mode 100644 index 00000000000..ac1a4548d60 --- /dev/null +++ b/AmpGen/LiteSpan.h @@ -0,0 +1,74 @@ +#ifndef AMPGEN_LITESPAN_H +#define AMPGEN_LITESPAN_H + +#include "AmpGen/MsgService.h" +#include "AmpGen/Utilities.h" + +namespace AmpGen { + // replace with std::span when c++20 becomes widespread + template class LiteSpan + { + public: + LiteSpan( const type* data, unsigned size =1) : m_data(data), m_size(size) {} + const type& operator[](const unsigned index) const { return m_data[index]; } + operator type() const { return m_data[0] ; } + unsigned size() const { return m_size; } + private: + const type* m_data = {nullptr}; + unsigned m_size = {0}; + }; + /// functor-like object that documents what is stored in each slot; + /// This mutated into a cache-like object as was I was writing it, so + /// should rename it to something else ... + template class KeyedView + { + typedef typename container_type::value_type value_type; + public: + KeyedView( const container_type& container, const unsigned width ) : + m_begin( &(container[0]) ), + m_cache( width * container.size(),0 ), + m_width(width), + m_size(container.size()), + m_keys( width, "") {} + unsigned index(const value_type& it) const { + if( &it - m_begin < 0 || &it -m_begin >= m_size ) ERROR("Invalid address: " << &it - m_begin ); + return &it -m_begin; } + const std::string& key(const unsigned int& column ) const { return m_keys[column] ; } + const return_type* operator()( const value_type& it ) const { + if( m_width *index(it) >= m_cache.size()) ERROR("Out-of-bounds access : " << index(it) ); + return &m_cache[m_width * index(it)]; } + const cache_type& operator()(const value_type& it, const unsigned entry ) const { + if( m_width * index(it) + entry > m_cache.size() ) ERROR("Invalid cache element: " << m_width * index(it) + entry > m_cache.size() ); + return m_cache[m_width * index(it) + entry] ; } + unsigned width() const { return m_width ; } + + template void set(const functor_type& functor, + unsigned int column, + const std::string& key = "") + { + for( unsigned i = 0 ; i != m_size; ++i ) m_cache[ i*m_width + column] = functor(m_begin[i]); + if( key != "" ) m_keys[column] = key; + } + cache_type& operator()(const value_type& it, const unsigned entry ) { + auto pos = m_width * index(it) + entry; + if( pos >= m_cache.size() ) ERROR("Out-of-bounds access: " << pos << " " << index(it) + entry); + return m_cache[pos] ; } + void setKey(const unsigned& column, const std::string& key ) { m_keys[column] = key ; } + void print() + { + INFO( "width = " << m_width << ", size = " << m_size << " begin = " << m_begin << " keys = " << vectorToString( m_keys , " ") << " cache size = " << m_cache.size() ); + for( unsigned int i = 0 ; i != m_width ; ++i ) std::cout << m_cache[i] << " "; + } + private: + const value_type* m_begin; + std::vector m_cache; + unsigned m_width; + unsigned m_size; + std::vector m_keys; + }; + +} + +#endif diff --git a/AmpGen/NamedParameter.h b/AmpGen/NamedParameter.h index 374ff4b3fb5..461b33c4bd3 100644 --- a/AmpGen/NamedParameter.h +++ b/AmpGen/NamedParameter.h @@ -74,7 +74,7 @@ namespace AmpGen setFromOptionsParser(); if ( OptionsParser::printHelp() ) help( defVec.size() > 0 ? defVec[0] : T() ); } - +// ~NamedParameter(){ INFO("Deconstructing: " << m_name ); } void help(const T& def){ std::string type = typeof(); if( type == "std::__cxx11::basic_string, std::allocator >" ) type = "string"; diff --git a/AmpGen/Particle.h b/AmpGen/Particle.h index 15577818f98..9dd3609d1b6 100644 --- a/AmpGen/Particle.h +++ b/AmpGen/Particle.h @@ -276,6 +276,7 @@ namespace AmpGen /// matches Check the matching between two decay chains, according to the MatchState enum. unsigned int matches( const Particle& other ) const; std::string makeUniqueString(); ///< Generate the decay descriptor for this decay. + private: std::string m_name = {""}; ///< Name of the particle const ParticleProperties* m_props = {nullptr}; ///< Particle Properties from the PDG diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index a6c002ac73b..0cc662c728d 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -35,35 +35,36 @@ namespace AmpGen class PolarisedSum { public: - typedef Integrator<10> integrator; + typedef Integrator2 integrator; PolarisedSum() = default; - PolarisedSum(const EventType&, AmpGen::MinuitParameterSet&, const std::vector& = {}); + PolarisedSum(const EventType&, MinuitParameterSet&, const std::vector& = {}); void prepare(); - void setEvents(AmpGen::EventList&); - void setMC(AmpGen::EventList&); + void setEvents(EventList&); + void setMC(EventList&); void reset(const bool& = false); - void debug(const AmpGen::Event&); + void debug(const Event&); void debug_norm(); void setWeight(MinuitProxy); double getWeight() const; - void calculateNorms(const std::vector&); + void calculateNorms(); void generateSourceCode(const std::string&, const double& = 1, bool = false); void build_probunnormalised(); Expression probExpression(const Tensor&, const std::vector&, DebugSymbols* = nullptr) const; size_t size() const; real_t norm() const; complex_t norm(const size_t&, const size_t&, integrator* = nullptr); - inline real_t operator()(const AmpGen::Event& evt) const { return m_weight * prob_unnormalised(evt) / m_norm; } - real_t prob_unnormalised(const AmpGen::Event&) const; - real_t prob(const AmpGen::Event&) const; - real_t getValNoCache(const AmpGen::Event&) ; + inline real_t operator()(const Event& evt) const { return m_weight * prob_unnormalised(evt) / m_norm; } + real_t prob_unnormalised(const Event&) const; + real_t prob(const Event&) const; + real_t getValNoCache(const Event&) ; std::vector fitFractions(const LinearErrorPropagator&); - std::vector>> matrixElements() const; + std::vector> matrixElements() const; void transferParameters(); Tensor transitionMatrix(); - TransitionMatrix> operator[](const size_t& i) const { return m_matrixElements[i] ; } - + const TransitionMatrix& operator[](const size_t& i) const { return m_matrixElements[i] ; } + std::function evaluator(const EventList* = nullptr) const; + KeyedView componentEvaluator(const EventList* = nullptr) const; private: size_t m_nCalls = {0}; real_t m_norm = {1}; @@ -81,9 +82,9 @@ namespace AmpGen std::vector m_integIndex; AmplitudeRules m_rules; std::pair m_dim; - std::vector>> m_matrixElements; + std::vector> m_matrixElements; CompiledExpression m_probExpression; - + std::vector> indexProduct(const std::vector>&, const std::vector&) const; std::vector polarisations(const std::string&) const ; }; diff --git a/AmpGen/Projection.h b/AmpGen/Projection.h index 6f14beeaee0..53730e4de3e 100644 --- a/AmpGen/Projection.h +++ b/AmpGen/Projection.h @@ -8,10 +8,10 @@ #include "TH1D.h" #include "TH2D.h" -#include "AmpGen/ArgumentPack.h" +#include "THStack.h" -class TH1D; -class TH2D; +#include "AmpGen/ArgumentPack.h" +#include "AmpGen/LiteSpan.h" namespace AmpGen { @@ -31,18 +31,26 @@ namespace AmpGen const std::string& xAxisTitle, const size_t& nBins, const double& min, const double& max, const std::string& units = "" ); const std::string name() const; - template TH1D* operator()(const EventList& evt, const ARGS... args) const { return projInternal(evt, ArgumentPack(args...) ); } - + template TH1D* operator()(const EventList& evt, const ARGS... args) const + { + return projInternal(evt, ArgumentPack(args...) ); + } + template std::tuple, THStack*> operator()(const EventList& evt, const KeyedView& weightFunction, const ARGS... args ) const + { + return projInternal(evt, weightFunction, ArgumentPack(args...) ); + } + double operator()( const Event& evt ) const; TH1D* plot(const std::string& prefix="") const; - std::function binFunctor() const; + std::function binFunctor() const; void setRange( const double& min, const double& max ){ m_min = (min); m_max = (max) ; } friend class Projection2D; private: TH1D* projInternal(const EventList&, const ArgumentPack&) const; + std::tuple, THStack*> projInternal(const EventList&, const KeyedView&, const ArgumentPack&) const; std::function m_func; std::string m_name = {""}; std::string m_xAxisTitle = {""}; @@ -66,7 +74,14 @@ namespace AmpGen std::pair operator()( const Event& evt ) const; }; - + namespace PlotOptions { + DECLARE_ARGUMENT(LineColor , int); + DECLARE_ARGUMENT(DrawStyle , std::string); + DECLARE_ARGUMENT(Selection , std::function); + DECLARE_ARGUMENT(Prefix , std::string); + DECLARE_ARGUMENT(Norm , double); + DECLARE_ARGUMENT(AddTo , THStack*); + } } // namespace AmpGen #endif diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index dbce73fcdff..3388560266d 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -5,7 +5,7 @@ #include "AmpGen/MetaUtils.h" #include "AmpGen/MsgService.h" #include "AmpGen/ProfileClock.h" - +#include "AmpGen/LiteSpan.h" #include namespace AmpGen @@ -65,7 +65,7 @@ namespace AmpGen double operator()( const eventValueType& evt ) { double prob = 0; - for_each( this->m_pdfs, [&prob, &evt]( auto& f ) { prob += f(evt); } ); + for_each( this->m_pdfs, [&prob, &evt]( const auto& f ) { prob += f(evt); } ); return prob; } @@ -81,6 +81,25 @@ namespace AmpGen /// Returns the tuple of PDFs used by this function std::tuple pdfs() const { return m_pdfs; } + + std::function evaluator(const eventListType* events) const + { + std::vector values( events->size() ); + for_each( this->m_pdfs, [events, &values](const auto& pdf ) mutable { + auto eval = pdf.evaluator(events); + for( unsigned i = 0; i != events->size(); ++i ) values[i] += eval( events->at(i) ); + } ); + return arrayToFunctor(values, events); + } + KeyedView componentEvaluator(const eventListType* events) const + { + KeyedView view(*events, nPDFs() ); + unsigned pdf_counter = 0; + for_each( this->m_pdfs, [&events, &view, &pdf_counter](const auto& pdf) mutable { + view.set(pdf.evaluator(events), pdf_counter++, typeof(pdf) ); + } ); + return view; + } }; /** @function make_pdf diff --git a/AmpGen/Utilities.h b/AmpGen/Utilities.h index b321773df16..929c55c558f 100644 --- a/AmpGen/Utilities.h +++ b/AmpGen/Utilities.h @@ -19,7 +19,6 @@ #include "AmpGen/MsgService.h" #include "AmpGen/MetaUtils.h" - namespace AmpGen { template std::string vectorToString( it_type begin, @@ -162,6 +161,14 @@ namespace AmpGen { } return total; } + template std::function + arrayToFunctor( const std::vector& values, const container_type* container ) + { + return [container, values](const typename container_type::value_type& event) -> return_type { + int addr = &event - &container->at(0); + return *(values.data() + addr); }; + } template void parallel_sort(iterator begin, diff --git a/apps/Fitter.cpp b/apps/Fitter.cpp index d05d1430ba9..a1dedbb40b7 100644 --- a/apps/Fitter.cpp +++ b/apps/Fitter.cpp @@ -129,12 +129,11 @@ FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& if ( makePlots ) { auto ep = fr->getErrorPropagator(); -// const size_t NBins = NamedParameter ("nBins" , 100 , "Number of bins used for plotting."); unsigned int counter = 1; for_each( pdf.pdfs(), [&]( auto& f ) { auto tStartIntegral2 = std::chrono::high_resolution_clock::now(); - auto mc_plot3 = mc.makeProjections( mc.eventType().defaultProjections(100), WeightFunction(f), Prefix("tMC_Category"+std::to_string(counter) ) ); + auto mc_plot3 = mc.makeProjections( mc.eventType().defaultProjections(100), WeightFunction(f), PlotOptions::Prefix("tMC_Category"+std::to_string(counter) ) ); auto tEndIntegral2 = std::chrono::high_resolution_clock::now(); double t2 = std::chrono::duration( tEndIntegral2 - tStartIntegral2 ).count(); INFO( "Time for plots = " << t2 ); @@ -146,7 +145,7 @@ FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& counter++; } ); } - Chi2Estimator chi2( data, mc, pdf, 15 ); + Chi2Estimator chi2( data, mc, pdf, MinEvents(15) ); fr->addChi2( chi2.chi2(), chi2.nBins() ); auto twall_end = std::chrono::high_resolution_clock::now(); @@ -279,7 +278,7 @@ int main( int argc, char* argv[] ) fr->writeToFile( logFile ); output->cd(); - auto plots = events.makeDefaultProjections( Prefix( "Data_" ), Bins( NBins ) ); + auto plots = events.makeDefaultProjections( PlotOptions::Prefix( "Data_" ), PlotOptions::Bins( NBins ) ); for ( auto& plot : plots ) plot->Write(); output->Write(); diff --git a/apps/Generator.cpp b/apps/Generator.cpp index fd7ac128e78..89bbb6d2cba 100644 --- a/apps/Generator.cpp +++ b/apps/Generator.cpp @@ -160,13 +160,13 @@ int main( int argc, char** argv ) if( accepted.size() == 0 ) return -1; TFile* f = TFile::Open( outfile.c_str(), "RECREATE" ); accepted.tree( "DalitzEventList" )->Write(); - auto plots = accepted.makeDefaultProjections(Bins(nBins), LineColor(kBlack)); + auto plots = accepted.makeDefaultProjections(PlotOptions::Bins(nBins), PlotOptions::LineColor(kBlack)); for ( auto& plot : plots ) plot->Write(); if( NamedParameter("plots_2d",true) == true ){ auto proj = eventType.defaultProjections(nBins); for( size_t i = 0 ; i < proj.size(); ++i ){ for( size_t j = i+1 ; j < proj.size(); ++j ){ - accepted.makeProjection( Projection2D(proj[i], proj[j]), LineColor(kBlack) )->Write(); + accepted.makeProjection( Projection2D(proj[i], proj[j]), PlotOptions::LineColor(kBlack) )->Write(); } } } diff --git a/examples/FitterWithPolarisation.cpp b/examples/FitterWithPolarisation.cpp index 05aa87beadc..8ee80173425 100644 --- a/examples/FitterWithPolarisation.cpp +++ b/examples/FitterWithPolarisation.cpp @@ -112,15 +112,17 @@ int main( int argc, char* argv[] ) fit results + covariance matrix) of the fit result, and write them to a file. */ auto fitFractions = sig.fitFractions( fr->getErrorPropagator() ); - + + INFO("Adding fraction to file..."); fr->addFractions( fitFractions ); + INFO("Writing file ... "); fr->writeToFile( logFile ); output->cd(); /* Write out the data plots. This also shows the first example of the named arguments to functions, emulating python's behaviour in this area */ - auto plots = events.makeDefaultProjections(Prefix("Data"), Bins(100)); + auto plots = events.makeDefaultProjections(PlotOptions::Prefix("Data"), PlotOptions::Bins(100)); for ( auto& plot : plots ) plot->Write(); output->Close(); @@ -143,21 +145,21 @@ FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& /* Make the plots for the different components in the PDF, i.e. the signal and backgrounds. The structure assumed the PDF is some SumPDF. */ - unsigned int counter = 1; - for_each(pdf.pdfs(), [&]( auto& f ){ - auto mc_plot3 = mc.makeDefaultProjections(WeightFunction(f), Prefix("Model_cat"+std::to_string(counter))); - for( auto& plot : mc_plot3 ) - { - plot->Scale( ( data.integral() * f.getWeight() ) / plot->Integral() ); - plot->Write(); - } - counter++; - } ); +// unsigned int counter = 1; +// for_each(pdf.pdfs(), [&]( const auto& f ){ +// auto mc_plot3 = mc.makeDefaultProjections(WeightFunction(f.evaluator()), Prefix("Model_cat"+std::to_string(counter))); +// for( auto& plot : mc_plot3 ) +// { +// plot->Scale( ( data.integral() * f.getWeight() ) / plot->Integral() ); +// plot->Write(); +// } +// counter++; +// } ); /* Estimate the chi2 using an adaptive / decision tree based binning, down to a minimum bin population of 15, and add it to the output. */ - Chi2Estimator chi2( data, mc, pdf, 15 ); - chi2.writeBinningToFile("chi2_binning.txt"); - fr->addChi2( chi2.chi2(), chi2.nBins() ); + // Chi2Estimator chi2( data, mc, pdf, 15 ); + // chi2.writeBinningToFile("chi2_binning.txt"); + // fr->addChi2( chi2.chi2(), chi2.nBins() ); auto twall_end = std::chrono::high_resolution_clock::now(); double time_cpu = ( std::clock() - time ) / (double)CLOCKS_PER_SEC; diff --git a/examples/SignalOnlyFitter.cpp b/examples/SignalOnlyFitter.cpp index e49d43f317b..ce4bc7d5ded 100644 --- a/examples/SignalOnlyFitter.cpp +++ b/examples/SignalOnlyFitter.cpp @@ -103,7 +103,7 @@ int main( int argc, char* argv[] ) /* Generate events to normalise the PDF with. This can also be loaded from a file, which will be the case when efficiency variations are included. Default number of normalisation events is 5 million. */ - EventList eventsMC = intFile == "" ? Generator<>(evtType, &rndm).generate(1e7) : EventList(intFile, evtType, GetGenPdf(true)); + EventList eventsMC = intFile == "" ? Generator<>(evtType, &rndm).generate(2e6) : EventList(intFile, evtType, GetGenPdf(true)); sig.setMC( eventsMC ); @@ -124,7 +124,7 @@ int main( int argc, char* argv[] ) /* Write out the data plots. This also shows the first example of the named arguments to functions, emulating python's behaviour in this area */ - auto plots = events.makeDefaultProjections(Prefix("Data"), Bins(100)); + auto plots = events.makeDefaultProjections(PlotOptions::Prefix("Data"), PlotOptions::Bins(100)); for ( auto& plot : plots ) plot->Write(); output->Close(); @@ -147,27 +147,28 @@ FitResult* doFit( likelihoodType&& likelihood, EventList& data, EventList& mc, M double tWall = std::chrono::duration( twall_end - time_wall ).count(); INFO( "Wall time = " << tWall / 1000. ); INFO( "CPU time = " << time_cpu ); - - /* Make the plots for the different components in the PDF, i.e. the signal and backgrounds. - The structure assumed the PDF is some SumPDF. */ - unsigned int counter = 1; - for_each(likelihood.pdfs(), [&](auto& pdf){ - auto pfx = Prefix("Model_cat"+std::to_string(counter)); - auto mc_plot3 = mc.makeDefaultProjections(WeightFunction(pdf), pfx); - for( auto& plot : mc_plot3 ) - { - plot->Scale( ( data.integral() * pdf.getWeight() ) / plot->Integral() ); - plot->Write(); - } - counter++; - }); - + /* Estimate the chi2 using an adaptive / decision tree based binning, down to a minimum bin population of 15, and add it to the output. */ - Chi2Estimator chi2( data, mc, likelihood, 15 ); + + Chi2Estimator chi2( data, mc, likelihood.evaluator(&mc), MinEvents(15), Dim(data.eventType().dof()) ); chi2.writeBinningToFile("chi2_binning.txt"); fr->addChi2( chi2.chi2(), chi2.nBins() ); - fr->print(); + + /* Make the plots for the different components in the PDF, i.e. the signal and backgrounds. + The structure assumed the PDF is some SumPDF. */ + auto evaluator = likelihood.componentEvaluator(&mc); + auto evaluator_per_component = std::get<0>( likelihood.pdfs() ).componentEvaluator(&mc); + auto projections = data.eventType().defaultProjections(100); + for( const auto& proj : projections ) + { + auto [components, total] = proj(mc, evaluator, PlotOptions::Norm(data.size() ) ); + for( const auto& component : components ) component->Write(); + total->Write(); + auto [signal_components, total_signal_component] = proj(mc, evaluator_per_component, PlotOptions::Norm(data.size()) ); + for( const auto& component : signal_components ) component->Write(); + total_signal_component->Write(); + } return fr; } diff --git a/examples/SimFit.cpp b/examples/SimFit.cpp index dcc7b5b69ac..2671093485c 100644 --- a/examples/SimFit.cpp +++ b/examples/SimFit.cpp @@ -84,12 +84,12 @@ int main(int argc , char* argv[] ){ for( size_t i = 0 ; i < data.size(); ++i ) { INFO("Making figures for sample: " << i << " ..."); - auto dataPlots = data[i].makeDefaultProjections( Prefix("Data_"+std::to_string(i))); + auto dataPlots = data[i].makeDefaultProjections( PlotOptions::Prefix("Data_"+std::to_string(i))); for( auto& p : dataPlots ) p->Write(); size_t counter = 0; for_each(pdfs[i].pdfs(), [&]( auto& f ){ auto mc_plots = mcs[i].makeDefaultProjections(WeightFunction(f), - Prefix("Model_sample_"+std::to_string(i)+"_cat"+std::to_string(counter))); + PlotOptions::Prefix("Model_sample_"+std::to_string(i)+"_cat"+std::to_string(counter))); for( auto& plot : mc_plots ) { plot->Scale( ( data[i].integral() * f.getWeight() ) / plot->Integral() ); diff --git a/src/AmplitudeRules.cpp b/src/AmplitudeRules.cpp index 01e1c2737e6..98b66bcd496 100644 --- a/src/AmplitudeRules.cpp +++ b/src/AmplitudeRules.cpp @@ -98,7 +98,7 @@ std::vector AmplitudeRules::rulesForDecay(const std::string& head, con return rt; } -std::map> AmplitudeRules::rules() +const std::map>& AmplitudeRules::rules() const { return m_rules; } diff --git a/src/BinDT.cpp b/src/BinDT.cpp index 911ab247989..e6c15149b0d 100644 --- a/src/BinDT.cpp +++ b/src/BinDT.cpp @@ -80,7 +80,7 @@ std::function( const Event& )> BinDT::makeDefaultFunctors() DEBUG( "Problem has 2 d.o.f.s -> using Dalitz coordinates" ); return []( const Event& evt ) -> std::vector { return {evt.s( 0, 1 ), evt.s( 1, 2 )}; }; } - ERROR( "No functors found for dim = " << m_dim ); + ERROR( "No default functors found for dim = " << m_dim ); return nullptr; } @@ -97,6 +97,10 @@ BinDT::BinDT( const ArgumentPack& args ) } } +BinDT::BinDT( const EventList& events, const ArgumentPack& args ) : BinDT(args) +{ + m_top = makeNodes(events); +} void BinDT::readFromStream( std::istream& stream ) { std::map>> nodes; @@ -363,3 +367,17 @@ void BinDT::Decision::visit( const std::function& visit_fun m_left->visit( visit_function ); m_right->visit( visit_function ); } + +std::shared_ptr BinDT::makeNodes( const EventList& events ) +{ + std::vector data( m_dim * events.size() ); + std::vector addresses( events.size() ); + size_t counter = 0; + for ( auto& evt : events ) { + auto val = m_functors( evt ); + for ( unsigned int i = 0; i < m_dim; ++i ) data[m_dim * counter + i] = val[i]; + addresses[counter] = &( data[m_dim * counter] ); + counter++; + } + return makeNodes( addresses ); +} diff --git a/src/Chi2Estimator.cpp b/src/Chi2Estimator.cpp index 58047ad5c4c..500210f0489 100644 --- a/src/Chi2Estimator.cpp +++ b/src/Chi2Estimator.cpp @@ -33,21 +33,6 @@ struct Moment { double var() { return N == 0 ? 0 : xx; } }; -Chi2Estimator::Chi2Estimator( const EventList& dataEvents, const EventList& mcEvents, - const std::function& fcn, const unsigned int& minEvents ) : - m_binning( dataEvents, MinEvents( minEvents ), Dim( dataEvents.eventType().dof() ) ) -{ - doChi2( dataEvents, mcEvents, fcn ); -} - -Chi2Estimator::Chi2Estimator( const EventList& dataEvents, const EventList& mcEvents, - const std::function& fcn, const std::string& filename ) : - m_binning( File( filename ) ) -{ - doChi2( dataEvents, mcEvents, fcn ); -} - - double Chi2Estimator::chi2() const { return m_chi2; } double Chi2Estimator::nBins() const { return m_nBins; } void Chi2Estimator::writeBinningToFile( const std::string& filename ) { m_binning.serialize( filename ); } @@ -90,3 +75,5 @@ void Chi2Estimator::doChi2( const EventList& dataEvents, const EventList& mcEv m_nBins = m_binning.size(); } + + diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index e72d0ac3381..3bc4b612121 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -103,7 +103,6 @@ void CoherentSum::prepare() void CoherentSum::updateNorms( const std::vector& changedPdfIndices ) { - //for ( auto& i : changedPdfIndices ) m_integrator.prepareExpression( m_matrixElements[i].amp ); std::vector cacheIndex; std::transform( m_matrixElements.begin(), m_matrixElements.end(), std::back_inserter(cacheIndex), [this](auto& m){ return this->m_integrator.getCacheIndex( m.amp ) ; } ); @@ -282,7 +281,7 @@ void CoherentSum::reset( bool resetEvents ) for ( auto& mE : m_matrixElements ) mE.addressData = 999; if ( resetEvents ){ m_events = nullptr; - m_integrator = integrator(); + m_integrator = Integrator2(); } } @@ -297,7 +296,7 @@ void CoherentSum::setMC( EventList& sim ) { if ( m_verbosity ) INFO( "Setting norm. event list with:" << sim.size() << " events for " << this ); reset(); - m_integrator = integrator(&sim); + m_integrator = Integrator2(&sim); } real_t CoherentSum::norm() const @@ -368,3 +367,53 @@ complex_t CoherentSum::getVal( const Event& evt, const std::vector& cach value += m_matrixElements[i].coefficient * evt.getCache( cacheAddresses[i] ); return value; } + +std::function CoherentSum::evaluator(const EventList* events) const +{ + if( events != nullptr && events != &this->m_integrator.events() ) + ERROR("Evaluator only working on the integration sample, fix me!"); + std::vector address_mapping( size() ); + for( const auto& me : m_matrixElements ) address_mapping[me.addressData] = m_integrator.getCacheIndex( me.amp ); + std::vector values( m_integrator.events().size() ); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for( unsigned int i = 0 ; i != m_integrator.events().size(); ++i ) + { + complex_t amp = 0; + for( unsigned j = 0 ; j != address_mapping.size(); ++j ) amp += m_matrixElements[j].coefficient * this->m_integrator.get(address_mapping[j], i); + values[i] = m_weight * std::norm(amp) / m_norm; + } + return arrayToFunctor(values, events); +} + +KeyedView CoherentSum::componentEvaluator(const EventList* events) const +{ + if( events != nullptr && events != &this->m_integrator.events() ) + ERROR("Evaluator only working on the integration sample, fix me!"); + + KeyedView rt(*events, m_matrixElements.size() ); + std::vector address_mapping(m_matrixElements.size()); + for( unsigned i = 0; i != m_matrixElements.size(); ++i ) address_mapping[i] = m_integrator.getCacheIndex( m_matrixElements[i].amp ); + + for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) + { + auto& me = m_matrixElements[i]; + rt.setKey(i, programatic_name( me.decayTree.decayDescriptor() ) ); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for( unsigned evt = 0 ; evt != m_integrator.events().size(); ++evt ) + { + complex_t total = 0; + for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ + total += this->m_integrator.get( address_mapping[i], evt ) * m_matrixElements[i].coefficient + * std::conj( this->m_integrator.get( address_mapping[j], evt ) * m_matrixElements[j].coefficient ); + } + + rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; + } + } + return rt; +} + diff --git a/src/CompiledExpressionBase.cpp b/src/CompiledExpressionBase.cpp index eb680b55b17..e5a1de4071e 100644 --- a/src/CompiledExpressionBase.cpp +++ b/src/CompiledExpressionBase.cpp @@ -87,7 +87,7 @@ void CompiledExpressionBase::to_stream( std::ostream& stream ) const stream << "extern \"C\" const char* " << progName() << "_name() { return \"" << m_name << "\"; } \n"; bool enable_cuda = NamedParameter("UseCUDA",false); size_t sizeOfStream = 0; - if( m_rto ) + if( use_rto() ) { stream << "extern \"C\" " << returnTypename() << " " << progName() << "(" << fcnSignature() << "){\n"; addDependentExpressions( stream , sizeOfStream ); diff --git a/src/Event.cpp b/src/Event.cpp index 8ca8c171466..6ffa95df0b5 100644 --- a/src/Event.cpp +++ b/src/Event.cpp @@ -1,4 +1,4 @@ -#include + #include #include #include diff --git a/src/EventList.cpp b/src/EventList.cpp index 5068ba0d1b4..c7b90bdd5c5 100644 --- a/src/EventList.cpp +++ b/src/EventList.cpp @@ -118,7 +118,7 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) INFO("Time to read tree = " << read_time << "[ms]; nEntries = " << size() ); } -TTree* EventList::tree( const std::string& name, const std::vector& extraBranches ) +TTree* EventList::tree( const std::string& name, const std::vector& extraBranches ) const { std::string title = m_eventType.mother(); for( unsigned i = 0 ; i != m_eventType.size(); ++i ) title += " " + m_eventType[i]; @@ -131,16 +131,13 @@ TTree* EventList::tree( const std::string& name, const std::vector& double genPdf = 1; double weight = 1; auto format = m_eventType.getEventFormat( true ); - - for ( auto& f : format ){ - outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); - } - for ( auto& f : m_extensions ){ - outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); - } + + for ( const auto& f : format ) outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); + for ( const auto& f : m_extensions ) outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); + outputTree->Branch( "genPdf", &genPdf ); outputTree->Branch( "weight", &weight ); - for ( auto& evt : *this ) { + for ( const auto& evt : *this ) { tmp = evt; genPdf = evt.genPdf(); weight = evt.weight(); @@ -152,21 +149,17 @@ TTree* EventList::tree( const std::string& name, const std::vector& std::vector EventList::makeProjections( const std::vector& projections, const ArgumentPack& args ) { std::vector plots; - for ( auto& proj : projections ) { - TH1D* plot = makeProjection(proj, args ); - DEBUG("Made plot ... " << plot->GetName() ); - plots.push_back( plot ); - } + for ( const auto& proj : projections ) plots.push_back( makeProjection(proj, args) ); return plots; } TH1D* EventList::makeProjection( const Projection& projection, const ArgumentPack& args ) const { - auto selection = args.getArg().val; + auto selection = args.getArg().val; auto weightFunction = args.getArg().val; - std::string prefix = args.getArg(std::string("")); + std::string prefix = args.getArg(std::string("")); auto plot = projection.plot(prefix); - plot->SetLineColor(args.getArg(kBlack).val); + plot->SetLineColor(args.getArg(kBlack).val); plot->SetMarkerSize(0); for( auto& evt : m_data ){ if( selection != nullptr && !selection(evt) ) continue; @@ -179,9 +172,9 @@ TH1D* EventList::makeProjection( const Projection& projection, const ArgumentPac TH2D* EventList::makeProjection( const Projection2D& projection, const ArgumentPack& args ) const { - auto selection = args.getArg().val; + auto selection = args.getArg().val; auto weightFunction = args.getArg().val; - std::string prefix = args.getArg().val; + std::string prefix = args.getArg().val; auto plot = projection.plot(prefix); for ( auto& evt : m_data ){ if ( selection != nullptr && !selection(evt) ) continue; @@ -228,12 +221,9 @@ void EventList::resetCache() double EventList::integral() const { - double integral = 0; - for ( auto& evt : *this ) { - integral += evt.weight(); - } - return integral; + return std::accumulate( std::begin(*this), std::end(*this), 0, [](double rv, const auto& evt){ return rv + evt.weight(); } ); } + void EventList::add( const EventList& evts ) { resetCache(); @@ -243,16 +233,6 @@ void EventList::add( const EventList& evts ) rbegin()->resizeCache( 0 ); } } -double EventList::norm() -{ - if ( m_norm == 0 ) { - double totalWeight = 0; -#pragma omp parallel for reduction( + : totalWeight ) - for ( unsigned int i = 0; i < size(); ++i ) totalWeight += ( *this )[i].weight() / ( *this )[i].genPdf(); - m_norm = totalWeight; - } - return m_norm; -} void EventList::clear() { @@ -260,7 +240,7 @@ void EventList::clear() } void EventList::erase(const std::vector::iterator& begin, - const std::vector::iterator& end) + const std::vector::iterator& end) { m_data.erase( begin, end ); } diff --git a/src/FitFraction.cpp b/src/FitFraction.cpp index 65a1cec41bf..f064e2d7750 100644 --- a/src/FitFraction.cpp +++ b/src/FitFraction.cpp @@ -19,8 +19,6 @@ FitFraction::FitFraction( const std::string& line ) m_error = stod( tokens[3] ); } -std::shared_ptr FitFraction::particle() const { return std::make_shared( m_name ); } - FitFraction::FitFraction( const std::string& name, const double& frac, const double& err ) : m_name( name ), m_value( frac ), m_error( err ) { diff --git a/src/Integrator2.cpp b/src/Integrator2.cpp index dacc4902bb4..c36df07a789 100644 --- a/src/Integrator2.cpp +++ b/src/Integrator2.cpp @@ -13,16 +13,17 @@ void Integrator2::integrateBlock() addr_i[roll] = m_integrals[roll].i; addr_j[roll] = m_integrals[roll].j; } -#pragma omp parallel for reduction(+: re, im) - for ( size_t i = 0; i < m_events->size(); ++i ) { - for ( size_t roll = 0; roll < N; ++roll ) { - auto c = m_buffer[addr_i[roll]][i] * std::conj(m_buffer[addr_j[roll]][i]); + for ( size_t roll = 0; roll < N; ++roll ) { + auto& b1 = m_buffer[m_integrals[roll].i]; + auto& b2 = m_buffer[m_integrals[roll].j]; + #pragma omp parallel for reduction(+: re, im) + for ( size_t i = 0; i < m_events->size(); ++i ) { + auto c = b1[i] * std::conj(b2[i]); re[roll] += m_weight[i] * std::real(c); im[roll] += m_weight[i] * std::imag(c); } } - for ( size_t j = 0; j < m_counter; ++j ) - m_integrals[j].transfer( complex_t( re[j], im[j] ) / m_norm ); + for ( size_t j = 0; j < m_counter; ++j ) m_integrals[j].transfer( complex_t( re[j], im[j] ) / m_norm ); m_counter = 0; } diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index c4ac3ab2a7a..e63ab654231 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -33,9 +33,7 @@ using namespace AmpGen; using namespace std::complex_literals; -namespace AmpGen { - make_enum(spaceType, spin, flavour) -} +namespace AmpGen { make_enum(spaceType, spin, flavour) } PolarisedSum::PolarisedSum(const EventType& type, MinuitParameterSet& mps, @@ -48,8 +46,8 @@ PolarisedSum::PolarisedSum(const EventType& type, , m_rules (mps) , m_dim (m_eventType.dim()) { - std::string objCache = NamedParameter("PolarisedSum::ObjectCache","" ); - spaceType stype = NamedParameter("PolarisedSum::SpaceType", spaceType::spin); + std::string objCache = NamedParameter("PolarisedSum::ObjectCache", "" ); + spaceType stype = NamedParameter( "PolarisedSum::SpaceType" , spaceType::spin); if( stype == spaceType::spin ) { auto prodPols = polarisations(m_eventType.mother()); @@ -73,13 +71,13 @@ PolarisedSum::PolarisedSum(const EventType& type, p.setPolarisationState( polStates[j] ); thisExpression[j] = make_cse( p.getExpression(&syms) ); } - m_matrixElements[i] = TransitionMatrix>( + m_matrixElements[i] = TransitionMatrix( p, coupling, - CompiledExpression< std::vector,const real_t*, const real_t*>( + CompiledExpression( TensorExpression(thisExpression), p.decayDescriptor(), - this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); + this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); CompilerWrapper().compile( m_matrixElements[i].amp ); }); } @@ -99,10 +97,10 @@ PolarisedSum::PolarisedSum(const EventType& type, auto& tm = i < r1.size() ? r1[i] : r2[i-r1.size()]; thisExpression[0] = i < r1.size() ? make_cse( tm.first.getExpression(&syms) ) : 0; thisExpression[1] = i < r1.size() ? 0 : make_cse( tm.first.getExpression(&syms) ); - m_matrixElements[i] = TransitionMatrix>( + m_matrixElements[i] = TransitionMatrix( tm.first, tm.second, - CompiledExpression< std::vector,const real_t*, const real_t*>( + CompiledExpression( TensorExpression(thisExpression), tm.first.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); @@ -181,7 +179,7 @@ std::vector convertProxies(const std::vector& proxyVect return rt; } -std::vector>> PolarisedSum::matrixElements() const +std::vector> PolarisedSum::matrixElements() const { return m_matrixElements; } @@ -190,24 +188,19 @@ void PolarisedSum::prepare() { DEBUG( "Preparing: " << m_prefix << " " << m_events << " ready = " << m_integrator.isReady() ); transferParameters(); - std::vector hasChanged( m_matrixElements.size(), false); size_t nChanges = 0; ProfileClock tEval; size_t size_of = size() / m_matrixElements.size(); if( m_events != nullptr ) m_events->reserveCache( size() ); - if( m_integrator.isReady() ) m_integrator.events().reserveCache( size() ); - for( size_t i = 0; i < m_matrixElements.size(); ++i ){ - ProfileClock tMEval; - auto& t = m_matrixElements[i]; + if( m_integrator.isReady() ) m_integrator.reserveCache( size() ); + for(auto& t : m_matrixElements){ if( m_nCalls != 0 && !t.amp.hasExternalsChanged() ) continue; - if( t.addressData == 999 ) t.addressData = m_events->registerExpression(t.amp, m_dim.first * m_dim.second ); m_events->updateCache(t.amp, t.addressData); m_integrator.prepareExpression(t.amp, size_of); - tMEval.stop(); t.amp.resetExternals(); - hasChanged[i] = true; + t.workToDo = true; nChanges++; - if( m_nCalls == 0 && m_integrator.isReady() ) m_integIndex.push_back( m_integrator.events().getCacheIndex( t.amp ) ); + if( m_nCalls == 0 && m_integrator.isReady() ) m_integIndex.push_back( m_integrator.getCacheIndex( t.amp ) ); } if( !m_probExpression.isLinked() ) build_probunnormalised(); tEval.stop(); @@ -215,46 +208,52 @@ void PolarisedSum::prepare() m_rho = densityMatrix(m_dim.first, m_pVector); if( m_integrator.isReady() ) { - if(nChanges != 0) calculateNorms(hasChanged); + if(nChanges != 0) calculateNorms(); complex_t z = 0; for(size_t i = 0; i < m_matrixElements.size(); ++i){ - for(size_t j = i; j < m_matrixElements.size(); ++j){ - z += ((i==j) ? 1. : 2. ) * m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling())*norm(i,j); + for(size_t j = 0; j < m_matrixElements.size(); ++j){ + // z += ((i==j) ? 1. : 2. ) * m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling())*norm(i,j); + z += m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling())* ( i > j ? std::conj(norm(j,i)) : norm(i,j) ); } } m_norm = std::real(z); if(m_nCalls % 10000 == 0 && m_prefix == "") debug_norm(); } + tIntegral.stop(); if(m_verbosity && nChanges != 0) INFO("Time to evaluate = " << tEval << " ms; " << "norm = " << tIntegral << " ms; " << "pdfs = " << nChanges); m_nCalls++; + for( auto& me : m_matrixElements ) me.workToDo = false; } void PolarisedSum::debug_norm() { double norm_slow = 0; for( auto& evt : m_integrator.events() ) - norm_slow += evt.weight() * prob_unnormalised(evt) / evt.genPdf(); + norm_slow += evt.weight() * getValNoCache(evt) / evt.genPdf(); auto evt = m_integrator.events()[0]; INFO("Event[0]: " << prob_unnormalised(evt) << " " << getValNoCache(evt) ); INFO("Norm : " << std::setprecision(10) << "bilinears=" << m_norm - << "; exact=" << norm_slow / m_integrator.sampleNorm() - << "; d = " << m_norm - norm_slow / m_integrator.sampleNorm() - << "; sample=" << m_integrator.sampleNorm() ); + << "; exact=" << norm_slow / m_integrator.norm() + << "; d = " << m_norm - norm_slow / m_integrator.norm() + << "; sample=" << m_integrator.norm() ); } void PolarisedSum::setEvents( EventList& events ) { reset(); m_events = &events; + for( unsigned i = 0; i != m_matrixElements.size(); ++i ) + m_matrixElements[i].addressData = m_events->registerExpression(m_matrixElements[i].amp, m_dim.first * m_dim.second ); } void PolarisedSum::setMC( EventList& events ) { m_nCalls = 0; m_integrator = integrator(&events); + } size_t PolarisedSum::size() const @@ -289,7 +288,7 @@ Tensor PolarisedSum::transitionMatrix() return T_matrix; } -double PolarisedSum::prob_unnormalised( const Event& evt ) const +real_t PolarisedSum::prob_unnormalised( const Event& evt ) const { return m_probExpression( evt.getCachePtr(0) ); } @@ -316,11 +315,11 @@ complex_t PolarisedSum::norm(const size_t& i, const size_t& j, PolarisedSum::int return total; } -void PolarisedSum::calculateNorms(const std::vector& hasChanged) +void PolarisedSum::calculateNorms() { for( unsigned i = 0 ; i < m_matrixElements.size(); ++i ){ for( unsigned j = i; j < m_matrixElements.size(); ++j ){ - if( hasChanged[i] || hasChanged[j] ) norm(i, j, &m_integrator); + if( m_matrixElements[i].workToDo || m_matrixElements[j].workToDo ) norm(i, j, &m_integrator); } } m_integrator.flush(); @@ -366,14 +365,16 @@ void PolarisedSum::generateSourceCode(const std::string& fname, const double& no Expression event = Parameter("x0",0,true); std::vector expressions(size); for( auto& p : m_matrixElements ){ - p.amp.prepare(); - p.amp.to_stream( stream ); - p.amp.compileWithParameters( stream ); + auto expr = CompiledExpression, const real_t*, const real_t*>( + p.amp.expression(), + p.decayDescriptor(), + m_eventType.getEventFormat(), DebugSymbols() ,m_mps ) ; + expr.prepare(); + expr.to_stream( stream ); + expr.compileWithParameters( stream ); Array z( make_cse( Function( programatic_name( p.amp.name()) + "_wParams", {event} ) ), size ); INFO( p.decayDescriptor() << " coupling = " << p.coupling() ); - for( unsigned int j = 0 ; j < size; ++j ){ - expressions[j] = expressions[j] + p.coupling() * z[j]; - } + for( unsigned int j = 0 ; j < size; ++j ) expressions[j] = expressions[j] + p.coupling() * z[j]; } Tensor T_matrix( expressions, {m_dim.first, m_dim.second} ); T_matrix.st(); @@ -430,11 +431,12 @@ Expression PolarisedSum::probExpression(const Tensor& T_matrix, const std::vecto std::vector PolarisedSum::fitFractions(const LinearErrorPropagator& prop) { bool recomputeIntegrals = NamedParameter("PolarisedSum::RecomputeIntegrals", false ); + bool interferenceFractions = NamedParameter("PolarisedSum::InterferenceFractions", false ); std::vector outputFractions; - for(auto& rule : m_rules.rules()) + for(const auto& rule : m_rules.rules()) { FitFractionCalculator pCalc(this, findIndices(m_matrixElements, rule.first), recomputeIntegrals); - for(auto& process : rule.second) + for(const auto& process : rule.second) { if(process.head() == m_eventType.mother() && process.prefix() != m_prefix) continue; auto numeratorIndices = processIndex(m_matrixElements, process.name()); @@ -443,26 +445,33 @@ std::vector PolarisedSum::fitFractions(const LinearErrorPropagator& } if( pCalc.calculators.size() == 0 ) continue; auto fractions = pCalc(rule.first, prop); - for( auto& f : fractions ) outputFractions.emplace_back(f); + for( const auto& f : fractions ) outputFractions.emplace_back(f); } - auto head_rules = m_rules.rulesForDecay(m_eventType.mother(), m_prefix); - FitFractionCalculator iCalc(this, findIndices(m_matrixElements, m_eventType.mother()), recomputeIntegrals); - for(size_t i = 0 ; i < head_rules.size(); ++i) + INFO("Fit fractions: "); + for(const auto& p : outputFractions) INFO(p); + + if( interferenceFractions ) { - auto process_i = head_rules[i]; - auto num_i = processIndex(m_matrixElements, process_i.name()); - if( num_i.size() == 0 || num_i == iCalc.normSet ) continue; - for( size_t j = i+1 ; j < head_rules.size(); ++j ){ - auto process_j = head_rules[j]; - auto num_j = processIndex(m_matrixElements, process_j.name()); - if( num_j.size() == 0 || num_j == iCalc.normSet ) continue; - iCalc.emplace_back(process_i.name() + " " + process_j.name() , num_i, num_j); + + auto head_rules = m_rules.rulesForDecay(m_eventType.mother(), m_prefix); + FitFractionCalculator iCalc(this, findIndices(m_matrixElements, m_eventType.mother()), recomputeIntegrals); + for(size_t i = 0 ; i < head_rules.size(); ++i) + { + auto process_i = head_rules[i]; + auto num_i = processIndex(m_matrixElements, process_i.name()); + if( num_i.size() == 0 || num_i == iCalc.normSet ) continue; + for( size_t j = i+1 ; j < head_rules.size(); ++j ){ + auto process_j = head_rules[j]; + auto num_j = processIndex(m_matrixElements, process_j.name()); + if( num_j.size() == 0 || num_j == iCalc.normSet ) continue; + iCalc.emplace_back(process_i.name() + " " + process_j.name() , num_i, num_j); + } } + auto ifractions = iCalc(m_eventType.mother(), prop); + INFO("Interference fractions: "); + for( auto& f : ifractions ) INFO( FitFraction(f) ); } - auto ifractions = iCalc(m_eventType.mother(), prop); - for(auto& p : outputFractions) INFO(p); - INFO("INTERFERENCE FRACTIONS"); - for( auto& f : ifractions ) INFO( FitFraction(f) ); + INFO("Returning: " << outputFractions.size() << " fractions"); return outputFractions; } @@ -491,3 +500,64 @@ real_t PolarisedSum::getValNoCache( const Event& evt ) void PolarisedSum::setWeight( MinuitProxy param ){ m_weight = param; } double PolarisedSum::getWeight() const { return m_weight ; } + +std::function PolarisedSum::evaluator(const EventList* events) const +{ + if( events != nullptr && events != &this->m_integrator.events() ) + ERROR("Evaluator only working on the integration sample, fix me!"); + + std::vector address_mapping( size() ); + for( const auto& me : m_matrixElements ){ + for( unsigned i = 0; i != size() / m_matrixElements.size(); ++i ) + address_mapping[me.addressData+i] = m_integrator.getCacheIndex( me.amp ) + i; + } + std::vector values( m_integrator.events().size() ); + std::vector buffer(address_mapping.size()); + #ifdef _OPENMP + #pragma omp parallel for firstprivate(buffer) + #endif + for( unsigned int i = 0 ; i != m_integrator.events().size(); ++i ) + { + for( unsigned j = 0 ; j != address_mapping.size(); ++j ) buffer[j] = this->m_integrator.get(address_mapping[j], i); + values[i] = m_weight * m_probExpression(&buffer[0]) / m_norm; + } + return arrayToFunctor(values, events); +} + +KeyedView PolarisedSum::componentEvaluator(const EventList* events) const +{ + if( events != nullptr && events != &this->m_integrator.events() ) + ERROR("Evaluator only working on the integration sample, fix me!"); + KeyedView rt(*events, m_matrixElements.size() ); + std::vector address_mapping(m_matrixElements.size()); + for( unsigned i = 0; i != m_matrixElements.size(); ++i ) + address_mapping[i] = m_integrator.getCacheIndex( m_matrixElements[i].amp ); + + for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) + { + auto& me = m_matrixElements[i]; + rt.setKey(i, programatic_name( me.decayTree.decayDescriptor() ) ); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for( unsigned evt = 0 ; evt != m_integrator.events().size(); ++evt ) + { + complex_t total = 0; + for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ + for( unsigned x = 0; x != m_norms.size(); ++x ) + { + auto s1 = m_dim.first; + auto s2 = m_dim.second; + auto f = x % s2; + auto psiIndex = (x-f) / s2; + auto m2 = psiIndex % s1; + auto m1 = (psiIndex-m2)/s1; + total += m_rho[psiIndex] * this->m_integrator.get( address_mapping[i] + m1 * s2 + f, evt ) * m_matrixElements[i].coefficient + * std::conj( this->m_integrator.get( address_mapping[j] + m2 * s2 + f, evt ) * m_matrixElements[j].coefficient ); + } + } + rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; + } + } + return rt; +} diff --git a/src/Projection.cpp b/src/Projection.cpp index aacf9ea405b..1a16dbbd1b6 100644 --- a/src/Projection.cpp +++ b/src/Projection.cpp @@ -7,8 +7,10 @@ #include "TAxis.h" #include "TH1.h" #include "TH2.h" +#include "THStack.h" using namespace AmpGen; +using namespace AmpGen::PlotOptions; Projection::Projection() = default; @@ -44,10 +46,8 @@ TH1D* Projection::plot(const std::string& prefix) const { plot->SetMinimum(0); return plot; } -std::function Projection::binFunctor() const { - return [this](auto& evt){ - - return int ( ( (*this)(evt) - m_min ) / m_width ) ; }; +std::function Projection::binFunctor() const { + return [this](auto& evt){ return int ( ( (*this)(evt) - m_min ) / m_width ) ;}; } TH2D* Projection2D::plot(const std::string& prefix) const { @@ -74,3 +74,30 @@ TH1D* Projection::projInternal( const EventList& events, const ArgumentPack& arg { return events.makeProjection(*this, args); } + +std::tuple, THStack*> Projection::projInternal(const EventList& events, const KeyedView& weightFunction, const ArgumentPack& args) const +{ +// INFO("Making projection: " << m_name << " classes = " << weightFunction.width() << " " << &(events[0]) ); + std::vector hists; + double norm_sum = args.getArg(1).val; + std::string prefix = args.getArg().val; + THStack* stack = args.getArg(new THStack()).val; + if( prefix != "" ) prefix = prefix +"_"; + for( unsigned int i = 0 ; i != weightFunction.width(); ++i ) + hists.push_back( plot(prefix + weightFunction.key(i)==""?"C"+std::to_string(i):weightFunction.key(i)) ); + auto selection = args.getArg().val; + for( const auto& evt : events ){ + if( selection != nullptr && !selection(evt) ) continue; + auto pos = operator()(evt); + auto weights = weightFunction(evt); + for( unsigned j = 0 ; j != weightFunction.width(); ++j ) hists[j]->Fill( pos, evt.weight() * weights[j] / evt.genPdf() ); + } + std::sort( std::begin(hists), std::end(hists), [](auto& h1, auto& h2){ return h1->Integral() < h2->Integral() ; } ); + double total = std::accumulate( std::begin(hists), std::end(hists), 0.0, [](double& t, auto& h){ return t + h->Integral() ; } ); + if( total == 0 ) ERROR("Norm = " << total ); + else for( auto& h : hists ) h->Scale( norm_sum / total ); + stack->SetName( (prefix + name() + "_stack").c_str()); + for( auto& h : hists ) stack->Add(h, "C HIST"); + return {hists, stack}; +} + From 65952768e1b0f064d6a37be77cd7cb9039879d2c Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Thu, 9 Apr 2020 17:33:41 +0200 Subject: [PATCH 02/67] Begin vectorising the code --- AmpGen/ASTResolver.h | 7 +- AmpGen/AmplitudeRules.h | 32 ++- AmpGen/ArgumentPack.h | 20 +- AmpGen/BinDT.h | 26 ++- AmpGen/CoherentSum.h | 51 +++-- AmpGen/CompiledExpression.h | 395 ++++++++++++++++---------------- AmpGen/CompiledExpressionBase.h | 16 +- AmpGen/CompilerWrapper.h | 1 + AmpGen/DynamicFCN.h | 5 +- AmpGen/Event.h | 39 ++-- AmpGen/EventList.h | 55 ++--- AmpGen/EventListSIMD.h | 173 ++++++++++++++ AmpGen/Expression.h | 5 + AmpGen/Generator.h | 25 +- AmpGen/Integrator.h | 287 ++++++----------------- AmpGen/Integrator2.h | 84 ------- AmpGen/IntegratorSIMD.h | 73 ++++++ AmpGen/LiteSpan.h | 12 +- AmpGen/PhaseSpace.h | 2 +- AmpGen/Plots.h | 104 --------- AmpGen/PolarisedSum.h | 22 +- AmpGen/ProfileClock.h | 24 +- AmpGen/Projection.h | 15 +- AmpGen/RecursivePhaseSpace.h | 2 +- AmpGen/SumPDF.h | 45 +++- AmpGen/ThreeBodyCalculators.h | 6 +- AmpGen/TreePhaseSpace.h | 4 +- AmpGen/Utilities.h | 9 +- Standalone.cmake | 12 +- apps/ConvertToSourceCode.cpp | 10 +- apps/Debugger.cpp | 8 +- apps/Fitter.cpp | 23 +- examples/QcGenerator.cpp | 4 +- examples/SignalOnlyFitter.cpp | 40 ++-- src/ASTResolver.cpp | 1 + src/BinDT.cpp | 18 +- src/Chi2Estimator.cpp | 5 +- src/CoherentSum.cpp | 171 +++++++------- src/CompiledExpressionBase.cpp | 6 +- src/CompilerWrapper.cpp | 15 +- src/Event.cpp | 19 +- src/EventList.cpp | 24 +- src/EventListSIMD.cpp | 303 ++++++++++++++++++++++++ src/Expression.cpp | 24 +- src/Generator.cpp | 2 +- src/IncoherentSum.cpp | 17 +- src/Integrator.cpp | 77 +++++++ src/Integrator2.cpp | 80 ------- src/IntegratorSIMD.cpp | 69 ++++++ src/Lineshapes.cpp | 31 ++- src/Lineshapes/kMatrix.cpp | 4 +- src/Minimiser.cpp | 2 +- src/Particle.cpp | 9 +- src/PhaseSpace.cpp | 4 +- src/Plots.cpp | 107 --------- src/PolarisedSum.cpp | 69 +++--- src/Projection.cpp | 48 +++- src/RecursivePhaseSpace.cpp | 8 +- src/ThreeBodyCalculators.cpp | 25 +- src/TreePhaseSpace.cpp | 8 +- src/UnaryExpression.cpp | 27 ++- test/test_phaseSpace.cpp | 2 +- 62 files changed, 1584 insertions(+), 1227 deletions(-) create mode 100644 AmpGen/EventListSIMD.h delete mode 100644 AmpGen/Integrator2.h create mode 100644 AmpGen/IntegratorSIMD.h delete mode 100644 AmpGen/Plots.h create mode 100644 src/EventListSIMD.cpp delete mode 100644 src/Integrator2.cpp create mode 100644 src/IntegratorSIMD.cpp delete mode 100644 src/Plots.cpp diff --git a/AmpGen/ASTResolver.h b/AmpGen/ASTResolver.h index fcfe3297fc0..aa978026649 100644 --- a/AmpGen/ASTResolver.h +++ b/AmpGen/ASTResolver.h @@ -43,7 +43,9 @@ namespace AmpGen { } size_t nParams() const { return m_nParameters ; } bool enableCuda() const { return m_enable_cuda ; } + bool enableAVX() const { return m_enable_avx; } bool enableCompileConstants() const { return m_enable_compileTimeConstants ;} + void setEnableAVX(){ m_enable_avx = true ; } std::map> cacheFunctions() const; void addResolvedParameter(const IExpression* param, const std::string& thing); void addResolvedParameter(const IExpression* param, const size_t& address, const size_t& arg=0); @@ -60,8 +62,9 @@ namespace AmpGen { const MinuitParameterSet* m_mps; /// Set of MinuitParameters std::map m_tempTrees; /// temporary store of sub-trees for performing cse reduction unsigned int m_nParameters; /// Number of parameters - bool m_enable_cuda; /// flag to generate CUDA code <> - bool m_enable_compileTimeConstants; /// flag to enable compile time constants <> + bool m_enable_cuda {false}; /// flag to generate CUDA code <> + bool m_enable_avx {false};/// flag to generate vectorised code <> + bool m_enable_compileTimeConstants {false}; /// flag to enable compile time constants <> }; template <> void ASTResolver::resolve( const Parameter& obj ); diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index 2dbb64ed0db..2c57d2e5689 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -14,6 +14,9 @@ #include "AmpGen/Event.h" #include "AmpGen/Particle.h" #include "AmpGen/ExpressionParser.h" +#if ENABLE_AVX2 + #include "AmpGen/EventListSIMD.h" +#endif namespace AmpGen { @@ -84,15 +87,20 @@ namespace AmpGen template struct TransitionMatrix { + #if ENABLE_AVX2 + using float_v = AVX2::float_t; + #else + using float_v = real_t; + #endif TransitionMatrix() = default; TransitionMatrix(const Particle& dt, const TotalCoupling& coupling, - const CompiledExpression & amp) : + const CompiledExpression & amp) : decayTree(dt), coupling(coupling), amp(amp) {} - TransitionMatrix(Particle& dt, + TransitionMatrix(const Particle& dt, const TotalCoupling& coupling, const MinuitParameterSet& mps, const std::map& evtFormat, @@ -100,17 +108,21 @@ namespace AmpGen decayTree(dt), coupling(coupling), amp(decayTree.getExpression(debugThis ? &db : nullptr ), decayTree.decayDescriptor(), evtFormat, db, &mps ) {} - - const RT operator()(const Event& event) const { return amp(event.address() ); } - const RT operator()(const Event& event, const size_t& cacheOffset) const { return amp(event.address() + cacheOffset); } + #if ENABLE_AVX2 + const RT operator()(const Event& event) const { return amp(EventListSIMD::makeEvent(event).data()); } + #else + const RT operator()(const Event& event) const { return amp(event.address()); } + #endif const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } + std::string name() const { return amp.name(); } + unsigned returnTypeSize() const { return amp.returnTypeSize(); } Particle decayTree; TotalCoupling coupling; complex_t coefficient; DebugSymbols db; - CompiledExpression amp; - size_t addressData = {999}; + CompiledExpression amp; + size_t addressData = {999}; }; template std::vector processIndex(const std::vector>& tm, const std::string& label) @@ -146,7 +158,7 @@ namespace AmpGen TransitionMatrix() = default; TransitionMatrix(const Particle& dt, const TotalCoupling& coupling, - const CompiledExpression & amp) : + const CompiledExpression & amp) : decayTree(dt), coupling(coupling), amp(amp) {} @@ -176,9 +188,11 @@ namespace AmpGen TotalCoupling coupling; complex_t coefficient; DebugSymbols db; - CompiledExpression amp; + CompiledExpression amp; size_t addressData = {999}; bool workToDo = {false}; + std::string name() const { return amp.name(); } + unsigned returnTypeSize() const { return amp.returnTypeSize(); } }; } // namespace AmpGen diff --git a/AmpGen/ArgumentPack.h b/AmpGen/ArgumentPack.h index c1decb23862..a9e745a7750 100644 --- a/AmpGen/ArgumentPack.h +++ b/AmpGen/ArgumentPack.h @@ -12,7 +12,7 @@ namespace AmpGen #define DECLARE_ARGUMENT(X, Y) \ struct X : public AmpGen::Argument { \ template \ - explicit X(Z val) : AmpGen::Argument(val){} \ + explicit X(Z val = Z()) : AmpGen::Argument(val){} \ X() : AmpGen::Argument(){} \ } /** @class IArgument @@ -76,14 +76,20 @@ namespace AmpGen std::tuple argTuple( args... ); for_each(argTuple, [this](const auto& f){ this->addArgument(f) ; } ); } - template - ARG getArg( const DEFAULT_TYPE& default_argument = DEFAULT_TYPE() ) const + template arg_type* get() const { - for ( auto param : m_parameters ) { - auto ptr = dynamic_cast( param.get() ); - if ( ptr != nullptr ) return *ptr; + for( const auto& param : m_parameters ) + { + auto ptr = dynamic_cast(param.get()); + if( ptr != nullptr ) return ptr; } - return ARG(default_argument); + return nullptr; + } + template + arg_type getArg( const default_arg_type& default_argument = default_arg_type() ) const + { + auto p = get(); + return p == nullptr ? arg_type(default_argument) : *p; } private: std::vector> m_parameters; diff --git a/AmpGen/BinDT.h b/AmpGen/BinDT.h index 5a29f25aefd..2201e8f36d2 100644 --- a/AmpGen/BinDT.h +++ b/AmpGen/BinDT.h @@ -89,9 +89,15 @@ namespace AmpGen { m_top = makeNodes( addr ); } - template BinDT( const EventList& events, const ARGS&... args ) : BinDT(ArgumentPack( args... ) ) + template BinDT( const EventList& events, const ARGS&... args ) : BinDT(ArgumentPack(args...) ) + { + m_top = makeNodes( events.begin(), events.end() ); + } + template BinDT( const iterator_type& begin, + const iterator_type& end, const ARGS&... args ) : BinDT(ArgumentPack( args... ) ) { - m_top = makeNodes( events ); + m_top = makeNodes(begin, end ); } explicit BinDT( const ArgumentPack& args ); BinDT( const EventList& events, const ArgumentPack& args ); @@ -114,7 +120,21 @@ namespace AmpGen std::function( const Event& )> makeDefaultFunctors(); void refreshQueue(const std::vector&, std::queue&, const unsigned&); - std::shared_ptr makeNodes(const EventList&); + template + std::shared_ptr makeNodes(const iterator_type& begin, const iterator_type& end) + { + std::vector data( m_dim * (end-begin) ); + std::vector addresses( end-begin ); + size_t counter = 0; + for ( auto evt = begin; evt != end; ++evt ) + { + auto val = m_functors( *evt ); + for ( unsigned int i = 0; i < m_dim; ++i ) data[m_dim * counter + i] = val[i]; + addresses[counter] = &( data[m_dim * counter] ); + counter++; + } + return makeNodes( addresses ); + } std::shared_ptr makeNodes(const std::vector&, std::queue, const unsigned&); std::shared_ptr makeNodes(const std::vector&); std::shared_ptr makeNodes(const std::vector&, const std::vector&); diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 514c5d0d46f..41f8cdfa30d 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -13,14 +13,15 @@ #include "AmpGen/AmplitudeRules.h" #include "AmpGen/CompiledExpression.h" #include "AmpGen/EventList.h" +#include "AmpGen/EventListSIMD.h" #include "AmpGen/EventType.h" #include "AmpGen/Integrator.h" -#include "AmpGen/Integrator2.h" +#include "AmpGen/IntegratorSIMD.h" #include "AmpGen/Types.h" #include "AmpGen/Event.h" #include "AmpGen/Projection.h" #include "AmpGen/MinuitParameter.h" -//#include "AmpGen/functional/pdf.h" + namespace AmpGen { @@ -40,9 +41,20 @@ namespace AmpGen where @f$\mathcal{P}(\psi)@f$ is the probability, @f$g_i@f$ is the coupling to an isobar channel, and @f$\mathcal{A}_i(\psi)@f$ is the amplitude of the ith channel. */ - class CoherentSum // : public functional::pdf_base + class CoherentSum { public: + #if ENABLE_AVX2 + using EventList_type = EventListSIMD; + using Integrator_type= IntegratorSIMD; + using complex_v = AVX2::complex_t; + using float_v = AVX2::float_t; + #else + using EventList_type = EventList; + using Integrator_type = Integrator; + using complex_v = complex_t; + using float_v = real_t; + #endif CoherentSum(); CoherentSum( const EventType& type, const AmpGen::MinuitParameterSet& mps, const std::string& prefix = "" ); virtual ~CoherentSum() = default; @@ -50,8 +62,8 @@ namespace AmpGen AmplitudeRules protoAmplitudes() { return m_rules; } std::string prefix() const { return m_prefix; } - TransitionMatrix operator[]( const size_t& index ) { return m_matrixElements[index]; } - const TransitionMatrix operator[]( const size_t& index ) const { return m_matrixElements[index]; } + auto operator[]( const size_t& index ) { return m_matrixElements[index]; } + const auto operator[]( const size_t& index ) const { return m_matrixElements[index]; } size_t size() const { return m_matrixElements.size(); } real_t getWeight() const { return m_weight; } @@ -64,10 +76,8 @@ namespace AmpGen complex_t norm( const size_t& x, const size_t& y ) const; complex_t getVal( const Event& evt ) const; - complex_t getVal( const Event& evt, const std::vector& cacheAddresses ) const; complex_t getValNoCache( const Event& evt ) const; - complex_t getValNoCache( const Event& evt, const size_t& offset ) const; - + void transferParameters(); void prepare(); void printVal( const Event& evt ); @@ -75,28 +85,35 @@ namespace AmpGen void setWeight( MinuitProxy param ) { m_weight = param; } void makeTotalExpression(); void reset( bool resetEvents = false ); - void setEvents( EventList& list ); - void setMC( EventList& sim ); + void setEvents( EventList_type& list ); + #if ENABLE_AVX2 + void setEvents( EventList& list) { setEvents( *(new EventListSIMD(list)) ) ; } + void setMC( EventList& list) { setMC( *(new EventListSIMD(list)) ) ; } + float_v operator()( const float_v*, const unsigned) const; + #endif + void setMC( EventList_type& sim ); + void debug( const Event& evt, const std::string& nameMustContain=""); void generateSourceCode( const std::string& fname, const double& normalisation = 1, bool add_mt = false ); - std::vector cacheAddresses( const EventList& evts ) const; std::vector fitFractions( const LinearErrorPropagator& linProp ); - std::vector> matrixElements() const { return m_matrixElements; } + auto matrixElements() const { return m_matrixElements; } std::map> getGroupedAmplitudes(); Bilinears norms() const { return m_normalisations ; } - std::function evaluator(const EventList* = nullptr) const; - KeyedView componentEvaluator(const EventList* = nullptr) const; + std::function evaluator(const EventList_type* = nullptr) const; + KeyedView componentEvaluator(const EventList_type* = nullptr) const; protected: - std::vector> m_matrixElements; ///< Vector of (expanded) matrix elements + std::vector> m_matrixElements; ///< Vector of (expanded) matrix elements Bilinears m_normalisations; ///< Normalisation integrals AmplitudeRules m_rules; ///< Ruleset for the selected transition. - Integrator2 m_integrator; ///< Integral dispatch tool (with default unroll = 10) + + Integrator_type m_integrator; ///< Integral dispatch tool (with default unroll = 10) TransitionMatrix m_total; ///< Total Matrix Element - EventList* m_events = {nullptr}; ///< Data events to evaluate PDF on + EventList_type* m_events = {nullptr}; ///< Data events to evaluate PDF on + EventType m_evtType; ///< Final state for this amplitude size_t m_prepareCalls = {0}; ///< Number of times prepare has been called size_t m_lastPrint = {0}; ///< Last time verbose PDF info was printed diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index d3289d6c4a6..558f3588b56 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -27,216 +27,219 @@ namespace AmpGen WARNING("Asking for the size_of the return buffer of an RTO expression"); return 0; } }; } + template class CompiledExpression; template - class CompiledExpression : public CompiledExpressionBase - { - - private: - DynamicFCN m_fcn; - DynamicFCN>(ARGS...)> m_fdb; - std::vector m_externals = {}; - bool m_hasExternalsChanged = {false}; - - public: - typedef RETURN_TYPE return_type; - - CompiledExpression( const Expression& expression, - const std::string& name, - const std::map& evtMapping = - std::map(), - const DebugSymbols& db = {}, - const MinuitParameterSet* mps = nullptr ) - : CompiledExpressionBase( expression, name, db, evtMapping ) + class CompiledExpression : public CompiledExpressionBase { - resolve(mps); - } - - CompiledExpression( const std::string& name = "" ) : CompiledExpressionBase( name ) {}; - std::vector externBuffer() const { return m_externals ; } - std::string returnTypename() const override { return typeof(); } - std::string fcnSignature() const override - { - return CompiledExpressionBase::fcnSignature(typelist(), use_rto()); - } - bool use_rto() const override { - return std::is_same::value; - } - std::string args() const override - { - std::string signature; - auto argTypes = typelist(); - for( unsigned int i = 0 ; i < argTypes.size(); ++i ) - { - signature += " x"+std::to_string(i) ; - if( i != argTypes.size() - 1 ) signature += ", "; - } - return signature; - } - - void resolve( const MinuitParameterSet* mps=nullptr ) - { - CompiledExpressionBase::resolve(mps); - } - void setExternals( const std::vector& external ) { m_externals = external; } - - unsigned int getNParams() const { return m_externals.size(); } - - void print() const override - { - INFO( "Name = " << name() ); - INFO( "Hash = " << hash() ); - INFO( "IsReady? = " << isReady() << " IsLinked? " << (m_fcn.isLinked() ) ); - INFO( "args = ["<< vectorToString( m_externals, ", ") <<"]"); - for( auto& c : m_cacheTransfers ){ c->print() ; } - } - - void setExternal( const double& value, const unsigned int& address ) override - { - DEBUG( "Setting external " << address << " / " << m_externals.size() << " to value = " << value << " ; current = " << m_externals[address] ); - if ( m_externals[address] == value ) return; - m_externals[address] = value; - m_hasExternalsChanged = true; - } - void resizeExternalCache(const size_t& N ) override { - if( m_externals.size() < N ){ - m_externals.resize(N); - } - } - bool hasExternalsChanged() { return m_hasExternalsChanged; } - void resetExternals() { m_hasExternalsChanged = false; } - Expression& expression() { return m_obj; } - void compileDetails( std::ostream& stream ) const - { - stream << "extern \"C\" int " << progName() << "_pSize () {\n" - << " return " << m_externals.size() << ";\n"; - stream << "}\n"; - - stream << "extern \"C\" double " << progName() << "_pVal (int n) {\n"; - for ( size_t i = 0; i < m_externals.size(); i++ ) - stream << " if(n == " << i << ") return " << m_externals.at( i ) << ";\n"; - stream << " return 0;\n}\n"; - } + private: + DynamicFCN m_fcn; + DynamicFCN m_batchFcn; + DynamicFCN>(ARGS...)> m_fdb; + std::vector m_externals = {}; + bool m_hasExternalsChanged = {false}; + + public: + typedef RETURN_TYPE return_type; + + CompiledExpression( const Expression& expression, + const std::string& name, + const std::map& evtMapping = + std::map(), + const DebugSymbols& db = {}, + const MinuitParameterSet* mps = nullptr ) + : CompiledExpressionBase( expression, name, db, evtMapping ) + { + resolve(mps); + } + + CompiledExpression( const std::string& name = "" ) : CompiledExpressionBase( name ) {}; + std::vector externBuffer() const { return m_externals ; } + std::string returnTypename() const override { return typeof(); } + std::string fcnSignature() const override + { + return CompiledExpressionBase::fcnSignature(typelist(), use_rto()); + } + bool use_rto() const override { + return std::is_same::value; + } + std::string args(bool includeTypes = false) const override + { + std::string signature; + auto argTypes = typelist(); + for( unsigned int i = 0 ; i < argTypes.size(); ++i ) + { + signature += (includeTypes ? argTypes[i] : "") + " x"+std::to_string(i) ; + if( i != argTypes.size() - 1 ) signature += ", "; + } + return signature; + } + + void resolve( const MinuitParameterSet* mps=nullptr ) + { + CompiledExpressionBase::resolve(mps); + } + void setExternals( const std::vector& external ) { m_externals = external; } + + unsigned int getNParams() const { return m_externals.size(); } + + void print() const override + { + INFO( "Name = " << name() ); + INFO( "Hash = " << hash() ); + INFO( "IsReady? = " << isReady() << " IsLinked? " << (m_fcn.isLinked() ) ); + INFO( "args = ["<< vectorToString( m_externals, ", ") <<"]"); + for( auto& c : m_cacheTransfers ){ c->print() ; } + } + + void setExternal( const double& value, const unsigned int& address ) override + { + DEBUG( "Setting external " << address << " / " << m_externals.size() << " to value = " << value << " ; current = " << m_externals[address] ); + if ( m_externals[address] == value ) return; + m_externals[address] = value; + m_hasExternalsChanged = true; + } + void resizeExternalCache(const size_t& N ) override { + if( m_externals.size() < N ){ + m_externals.resize(N); + } + } + bool hasExternalsChanged() { return m_hasExternalsChanged; } + void resetExternals() { m_hasExternalsChanged = false; } + + Expression& expression() { return m_obj; } + + void compileDetails( std::ostream& stream ) const + { + stream << "extern \"C\" int " << progName() << "_pSize () {\n" + << " return " << m_externals.size() << ";\n"; + stream << "}\n"; + + stream << "extern \"C\" double " << progName() << "_pVal (int n) {\n"; + for ( size_t i = 0; i < m_externals.size(); i++ ) + stream << " if(n == " << i << ") return " << m_externals.at( i ) << ";\n"; + stream << " return 0;\n}\n"; + } + void compileBatch( std::ostream& stream ) const + { + stream << "#include \n"; + stream << "extern \"C\" void " << progName() + << "_batch(" << returnTypename() << "* rt" + << ", const size_t& N, " + << " const size_t& eventSize, " + << " const size_t& cacheSize, " << args(true) << ") {\n"; + stream << "#pragma omp parallel for\n"; + stream << "for( unsigned int i = 0; i != N/8; ++i ){\n"; + stream << " rt[cacheSize*i] = " << progName() + "( x0, x1 + i * eventSize);"; + stream << "}\n}"; + } + + void compileWithParameters( std::ostream& stream ) const + { + DEBUG( "Compiling " << name() << " = " << hash() ); + stream << "extern \"C\" " << returnTypename() << " " << progName() << "_wParams" + << "( const double*__restrict__ E ){" << std::endl; + stream << " double externalParameters [] = {" << (m_externals.size() == 0 ? "0" : vectorToString(m_externals,", ") ) <<"};\n" ; + stream << " return " << progName() << "( externalParameters, E ); // E is P \n}\n"; + } + + bool isReady() const override { return m_fcn.isLinked(); } + bool isLinked() const { return m_fcn.isLinked() ; } + + unsigned returnTypeSize() const override { return detail::size_of()(); } + + template < class T > + RETURN_TYPE operator()( const T* event ) const + { + return m_fcn( m_externals.data(), event ); + } + RETURN_TYPE operator()( const ARGS&... args ) const + { + return m_fcn( args... ); + } + template void batch( arg_types... args ) const { + m_batchFcn(args...); + } + + template < class T> + void debug( const T* event ) const + { + if ( !m_fcn.isLinked() ) { + FATAL( "Function " << name() << " not linked" ); + } + if ( !m_fdb.isLinked() ) { + FATAL( "Function" << name() << " debugging symbols not linked" ); + } + std::vector> debug_results; + if constexpr(std::is_same::value) debug_results = m_fdb( nullptr, &( m_externals[0] ), event ); + else debug_results = m_fdb( &(m_externals[0]), event); + for( auto& debug_result : debug_results ){ + auto val = debug_result.second; + auto label = debug_result.first; + if( std::real(val) == -999. ) std::cout << bold_on << std::setw(50) << std::left << label << bold_off << std::endl; + else if( std::imag(val) == 0 ) std::cout << " " << std::setw(50) << std::left << label << " = " << std::real(val) << std::endl; + else std::cout << " " << std::setw(50) << std::left << label << " = " << val << std::endl; + } + } + + bool link( void* handle ) override + { + const std::string symbol = progName() ; + bool status = true; + status &= m_fcn.set(handle, symbol, true); + status &= m_db.size() == 0 || m_fdb.set(handle, symbol + "_DB"); + status &= !m_enableBatch || m_batchFcn.set(handle, symbol + "_batch"); + return status; + } + bool link( const std::string& handle ) override + { + return link( dlopen( handle.c_str(), RTLD_NOW ) ); + }; + }; - void compileWithParameters( std::ostream& stream ) const + template + CompiledExpression + make_rto_expression( const Expression& expression, const std::string& name , const bool& verbose=false) { - DEBUG( "Compiling " << name() << " = " << hash() ); - stream << "extern \"C\" " << returnTypename() << " " << progName() << "_wParams" - << "( const double*__restrict__ E ){" << std::endl; - stream << " double externalParameters [] = {" << (m_externals.size() == 0 ? "0" : vectorToString(m_externals,", ") ) <<"};\n" ; - stream << " return " << progName() << "( externalParameters, E ); // E is P \n}\n"; + CompiledExpression rt(expression,name); + rt.compile(); + rt.prepare(); + return rt; } - bool isReady() const override { return m_fcn.isLinked(); } - bool isLinked() const { return m_fcn.isLinked() ; } - - unsigned returnTypeSize() const override { return detail::size_of()(); } - - template < class T > - RETURN_TYPE operator()( const T* event ) const - { - return m_fcn( (const double*)( &( m_externals[0] ) ), event ); - } - RETURN_TYPE operator()( const ARGS&... args ) const - { - return m_fcn( args... ); - } - template < class T> - void debug( const T* event ) const + template + CompiledExpression + make_expression( const Expression& expression, const std::string& name , const bool& verbose=false) { - if ( !m_fcn.isLinked() ) { - FATAL( "Function " << name() << " not linked" ); - } - if ( !m_fdb.isLinked() ) { - FATAL( "Function" << name() << " debugging symbols not linked" ); - } - std::vector> debug_results; - if constexpr(std::is_same::value) debug_results = m_fdb( nullptr, &( m_externals[0] ), event ); - else debug_results = m_fdb( &(m_externals[0]), event); - for( auto& debug_result : debug_results ){ - auto val = debug_result.second; - auto label = debug_result.first; - if( std::real(val) == -999. ) std::cout << bold_on << std::setw(50) << std::left << label << bold_off << std::endl; - else if( std::imag(val) == 0 ) std::cout << " " << std::setw(50) << std::left << label << " = " << std::real(val) << std::endl; - else std::cout << " " << std::setw(50) << std::left << label << " = " << val << std::endl; - } + CompiledExpression rt(expression,name); + rt.compile(); + rt.prepare(); + return rt; } - - bool link( void* handle ) override + template + CompiledExpression + make_expression( const Expression& expression, + const std::string& name, + const MinuitParameterSet& mps ) { - const std::string symbol = progName() ; - if ( m_fcn.set( handle, symbol ) == 0 ) { - ERROR( dlerror() ); - FATAL( name() << " (symbol = " << symbol << ") linking fails" ); - return false; - } - if ( m_db.size() ==0 ) return true; - if ( m_fdb.set( handle, progName() + "_DB" ) == 0 ) { - ERROR( "Linking of " << name() << " symbol = " << symbol << ") for debugging fails" ); - return false; - } - return true; + CompiledExpression rt(expression,name,{},{},&mps); + rt.compile(); + rt.prepare(); + return rt; } - bool link( const std::string& handle ) override + template + CompiledExpression + make_expression( const Expression& expression, + const std::string& name, + const std::map & evtMap, + const MinuitParameterSet& mps ) { - DEBUG( "Linking " << name() << ( m_db.size() !=0 ? " (debugging)" : "" ) << " hash = " << hash() ); - const std::string symbol = progName(); - if ( m_fcn.set( handle, symbol ) == 0 ) { - ERROR( "Function not linked: " << name() << " (sym="<< symbol << ")" ); - return false; - } - if ( m_db.size() ==0 ) return true; - const std::string dbsymbol = symbol + "_DB"; - if ( m_fdb.set( handle, dbsymbol ) == 0 ) { - ERROR( "Linking of " << name() << " symbol = " << dbsymbol << ")" ); - return false; - } - return true; + CompiledExpression rt(expression,name,evtMap,{},&mps); + rt.compile(); + rt.prepare(); + return rt; } - }; - template - CompiledExpression - make_rto_expression( const Expression& expression, const std::string& name , const bool& verbose=false) - { - CompiledExpression rt(expression,name); - rt.compile(); - rt.prepare(); - return rt; - } - - template - CompiledExpression - make_expression( const Expression& expression, const std::string& name , const bool& verbose=false) - { - CompiledExpression rt(expression,name); - rt.compile(); - rt.prepare(); - return rt; - } - template - CompiledExpression - make_expression( const Expression& expression, - const std::string& name, - const MinuitParameterSet& mps ) - { - CompiledExpression rt(expression,name,{},{},&mps); - rt.compile(); - rt.prepare(); - return rt; - } - template - CompiledExpression - make_expression( const Expression& expression, - const std::string& name, - const std::map & evtMap, - const MinuitParameterSet& mps ) - { - CompiledExpression rt(expression,name,evtMap,{},&mps); - rt.compile(); - rt.prepare(); - return rt; - } } // namespace AmpGen diff --git a/AmpGen/CompiledExpressionBase.h b/AmpGen/CompiledExpressionBase.h index d1e26ea9a7a..d80f542b090 100644 --- a/AmpGen/CompiledExpressionBase.h +++ b/AmpGen/CompiledExpressionBase.h @@ -41,24 +41,26 @@ namespace AmpGen void resolve(const MinuitParameterSet* mps = nullptr); void prepare(); void compile(const std::string& fname=""); + virtual void compileBatch( std::ostream& stream ) const = 0; void to_stream( std::ostream& stream ) const; unsigned int hash() const; std::string name() const; std::string progName() const; - virtual bool link( void* handle ) = 0; - virtual bool link( const std::string& handle ) = 0; - virtual void setExternal( const double& value, const unsigned int& address ) = 0; - virtual void resizeExternalCache( const size_t& N ) = 0; + virtual bool link( void*) = 0; + virtual bool link( const std::string&) = 0; + virtual void setExternal(const double&, const unsigned&) = 0; + virtual void resizeExternalCache(const size_t&) = 0; virtual bool isReady() const = 0; virtual std::string returnTypename() const = 0; virtual std::string fcnSignature() const = 0; - virtual std::string args() const = 0; + virtual std::string args(bool=false) const = 0; virtual void print() const = 0; virtual ~CompiledExpressionBase(); virtual unsigned returnTypeSize() const = 0; - static std::string fcnSignature(const std::vector& argList, bool rto); + static std::string fcnSignature(const std::vector&, bool); virtual bool use_rto() const = 0; Expression expression() const { return m_obj; } + void enableBatch() { m_enableBatch = true ; } protected: Expression m_obj; std::string m_name; @@ -69,6 +71,8 @@ namespace AmpGen std::vector> m_debugSubexpressions; std::vector> m_cacheTransfers; std::shared_ptr m_resolver; + std::vector m_additionalHeaders; + bool m_enableBatch = {false}; private: void addDebug( std::ostream& stream ) const; void addDependentExpressions( std::ostream& stream, size_t& sizeOfStream ) const; diff --git a/AmpGen/CompilerWrapper.h b/AmpGen/CompilerWrapper.h index 0dcbc738147..1e17d9eb03c 100644 --- a/AmpGen/CompilerWrapper.h +++ b/AmpGen/CompilerWrapper.h @@ -21,6 +21,7 @@ namespace AmpGen void compileSource(const std::string& fname, const std::string& oname ); void setVerbose() { m_verbose = true ; } void preamble(std::ostream& os ) const ; + void addHeader(const std::string& include ) { m_includes.push_back(include); } private: std::vector m_includes = {"array","complex","math.h","vector"}; bool m_verbose; diff --git a/AmpGen/DynamicFCN.h b/AmpGen/DynamicFCN.h index d8758b29c90..59157e5c7b7 100644 --- a/AmpGen/DynamicFCN.h +++ b/AmpGen/DynamicFCN.h @@ -50,11 +50,12 @@ namespace AmpGen } return set(m_handle,name); } - bool set( void* handle, const std::string& name ) + bool set( void* handle, const std::string& name, bool isFatal = false) { m_fcn = (RETURN_TYPE( * )( IN_TYPES... ))dlsym( handle, name.c_str() ); if ( m_fcn == nullptr ) { - ERROR( dlerror() ); + if( !isFatal ) ERROR( "Failed to link: " << name << " error: " << dlerror() ); + else FATAL("Failed to link: " << name << " error: " << dlerror() ); return false; } return true; diff --git a/AmpGen/Event.h b/AmpGen/Event.h index bf168f02cd3..5c2145301e7 100644 --- a/AmpGen/Event.h +++ b/AmpGen/Event.h @@ -13,26 +13,26 @@ namespace AmpGen { /** @class Event @brief Encapsulates the final state particles of a single event - Encapsulates the final state particles of a single event, or candidate in the language of proton-proton collisions. Typically will store (i) the event kinematics, i.e. four-momenta, (ii). a cache of complex numbers that contain intermediate calculations of the amplitude, (iii). the weight of the given event/candidate, (iv). The probability that the event was generated with, in the case of a simulated event */ + Encapsulates the final state particles of a single event, or candidate in the language of proton-proton collisions. Typically will store (i) the event kinematics, i.e. four-momenta, (ii). the weight of the given event/candidate, (iii). The probability that the event was generated with, in the case of a simulated event */ class Event { public: - - Event( const unsigned& N, const unsigned& cacheSize=0 ); - Event( const real_t* data, const unsigned& N, const unsigned& cacheSize=0); + Event () = default; + Event( const unsigned& N ); + Event( const real_t* data, const unsigned& N ); void set( const unsigned& i, const std::vector& p ); void set( const unsigned& i, const real_t* p ); void set( const real_t* evt ); void set( const unsigned& i, const real_t& p ) ; void swap( const unsigned int& i , const unsigned int& j ); - void setCache(const complex_t& value, const unsigned& pos) ; - template void setCache( const std::array& value, const unsigned& pos ) - { - std::memmove( m_cache.data() + pos, value.data(), sizeof(std::array) ); - } - void setCache( const std::vector& value, const unsigned& pos ); - void resizeCache( const unsigned int& new_size ); + // void setCache(const complex_t& value, const unsigned& pos) ; + // template void setCache( const std::array& value, const unsigned& pos ) + // { + // std::memmove( m_cache.data() + pos, value.data(), sizeof(std::array) ); + // } + // void setCache( const std::vector& value, const unsigned& pos ); + // void resizeCache( const unsigned int& new_size ); unsigned size() const { return m_event.size(); } @@ -41,7 +41,7 @@ namespace AmpGen { const real_t* address(const unsigned& ref=0) const { return &(m_event[ref]); } real_t* address(const unsigned& ref=0) { return &(m_event[ref]); } - unsigned cacheSize() const { return m_cache.size(); } + // unsigned cacheSize() const { return m_cache.size(); } real_t weight() const { return m_weight; } real_t genPdf() const { return m_genPdf; } real_t operator[](const unsigned& i) const { return m_event[i]; } @@ -49,27 +49,28 @@ namespace AmpGen { operator const real_t*() const { return &(m_event[0]); } operator real_t*() { return &(m_event[0]); } - const complex_t& getCache(const unsigned& pos) const { return m_cache[pos]; } - const complex_t* getCachePtr(const unsigned& pos=0) const { return &(m_cache[0]) + pos; } - complex_t* getCachePtr(const unsigned& pos=0) { return &(m_cache[0]) + pos; } + // const complex_t& getCache(const unsigned& pos) const { return m_cache[pos]; } + // const complex_t* getCachePtr(const unsigned& pos=0) const { return &(m_cache[0]) + pos; } + // complex_t* getCachePtr(const unsigned& pos=0) { return &(m_cache[0]) + pos; } void setWeight( const real_t& weight ){ m_weight = weight ; } void setGenPdf( const real_t& genPdf ){ m_genPdf = genPdf ; } void extendEvent(const real_t& value) { m_event.push_back( value ); } void print() const; - void printCache() const; - + // void printCache() const; + void setIndex(const unsigned& index){ m_index = index; } + unsigned index() const { return m_index; } real_t s( const unsigned& index) const ; real_t s( const unsigned& index1, const unsigned& index2 ) const ; real_t s( const unsigned& index1, const unsigned& index2, const unsigned& index3 ) const; real_t s( const std::vector& indices ) const ; private: std::vector m_event; - std::vector m_cache; + // std::vector m_cache; real_t m_genPdf = {1}; real_t m_weight = {1}; - + unsigned m_index = {0}; inline real_t get(const unsigned& index ) const { return m_event[index]; }; }; } diff --git a/AmpGen/EventList.h b/AmpGen/EventList.h index 131af735ea0..260cd3affbd 100644 --- a/AmpGen/EventList.h +++ b/AmpGen/EventList.h @@ -31,10 +31,10 @@ namespace AmpGen { private: std::vector m_data = {}; + std::vector m_cache = {}; EventType m_eventType = {}; std::map m_pdfIndex = {}; std::map m_extensions = {}; - size_t m_lastCachePosition = {0}; public: typedef Event value_type; EventList() = default; @@ -71,7 +71,7 @@ namespace AmpGen const Event& at( const size_t& pos ) const { return m_data[pos]; } size_t size() const { return m_data.size(); } double integral() const; - + void reserve( const size_t& size ) { m_data.reserve( size ); } void push_back( const Event& evt ) { m_data.push_back( evt ); } void setEventType( const EventType& type ) { m_eventType = type; } @@ -86,48 +86,50 @@ namespace AmpGen size_t getCacheIndex( const CompiledExpressionBase& PDF, bool& status ) const; size_t getCacheIndex( const CompiledExpressionBase& PDF ) const; - template - size_t registerExpression(const T& expression, const size_t& size_of=0) + template size_t registerExpression(const T& expression, const size_t& size_of=0) { auto key = FNV1a_hash( expression.name() ); auto pdfIndex = m_pdfIndex.find( key ); - if ( pdfIndex != m_pdfIndex.end() ) { - return pdfIndex->second; - } else { - size_t lcp = m_lastCachePosition; - size_t expression_size = size_of == 0 ? - expression.returnTypeSize() / sizeof(complex_t) : size_of; - if (lcp >= at( 0 ).cacheSize() ) { - WARNING("Cache index " << lcp << " exceeds cache size = " - << at(0).cacheSize() << " resizing to " - << lcp + expression_size ); - resizeCache( lcp + expression_size ); - } - m_pdfIndex[key] = m_lastCachePosition; - m_lastCachePosition += expression_size; - return lcp; + if ( pdfIndex != m_pdfIndex.end() ) return pdfIndex->second; + else { + size_t expression_size = size_of == 0 ? expression.returnTypeSize() / sizeof(complex_t) : size_of; + m_pdfIndex[key] = m_cache.size() / m_data.size(); + m_cache.resize( m_cache.size() + m_data.size() * expression_size ); + return m_pdfIndex[key]; } } - + complex_t cache( const unsigned& evtIndex, const unsigned& cacheElement ) + { + unsigned cacheSize = m_cache.size() / m_data.size(); + return m_cache[cacheSize * evtIndex + cacheElement]; + } + void setCache( const complex_t& v, const unsigned& p ) + { + m_cache[p] = v; + } + void setCache( const std::vector& v, const unsigned& p ) + { + std::memmove(m_cache.data() +p, v.data(), sizeof(complex_t) * v.size() ); + } template void updateCache( const FCN& fcn, const size_t& index ) { + unsigned cacheSize = m_cache.size() / m_data.size(); if constexpr( std::is_same< typename FCN::return_type, void >::value ) { #ifdef _OPENMP #pragma omp parallel for #endif - for ( size_t i = 0; i < size(); ++i ) { - fcn( m_data[i].getCachePtr(index), fcn.externBuffer().data(), m_data[i].address() ); + for ( size_t evt = 0; evt < size(); ++evt ) + { + fcn( m_cache.data() + cacheSize*evt +index , fcn.externBuffer().data(), m_data[evt].address() ); } } else { #ifdef _OPENMP #pragma omp parallel for #endif - for ( size_t i = 0; i < size(); ++i ) { - m_data[i].setCache(fcn(m_data[i].address()), index); - } - } + for ( size_t evt = 0; evt < size(); ++evt ) { setCache( fcn(m_data[evt].address() ), cacheSize*evt + index ) ; } + } } void reserveCache(const size_t& index); void resizeCache(const size_t& newCacheSize ); @@ -183,7 +185,6 @@ namespace AmpGen DECLARE_ARGUMENT(Branches, std::vector); DECLARE_ARGUMENT(EntryList, std::vector); DECLARE_ARGUMENT(GetGenPdf, bool); - DECLARE_ARGUMENT(CacheSize, size_t); DECLARE_ARGUMENT(Filter, std::string); DECLARE_ARGUMENT(WeightBranch, std::string); DECLARE_ARGUMENT(ApplySym, bool); diff --git a/AmpGen/EventListSIMD.h b/AmpGen/EventListSIMD.h new file mode 100644 index 00000000000..bad640db26c --- /dev/null +++ b/AmpGen/EventListSIMD.h @@ -0,0 +1,173 @@ +#ifndef AMPGEN_EVENTLIST2_H +#define AMPGEN_EVENTLIST2_H + +#include "AmpGen/ArgumentPack.h" +#include "AmpGen/EventType.h" +#include "AmpGen/MsgService.h" +#include "AmpGen/Event.h" +#include "AmpGen/Projection.h" +#include "AmpGen/Utilities.h" +#include "AmpGen/MetaUtils.h" +#include "AmpGen/EventList.h" +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef _OPENMP + #include +#endif + +#if ENABLE_AVX2 + + #include "AmpGen/simd/avx2_types.h" + #include "AmpGen/simd/iterator.h" + +namespace AmpGen +{ + using float_v = AVX2::float_t; + using complex_v = AVX2::complex_t; + + class CompiledExpressionBase; + class EventListSIMD + { + private: + std::vector m_data = {}; + std::vector m_weights = {}; + std::vector m_genPDF = {}; + std::vector m_cache = {}; + EventType m_eventType = {}; + std::map m_pdfIndex = {}; + unsigned m_eventSize = {0}; + unsigned m_nEvents = {0}; + unsigned m_nBlocks = {0}; + public: + typedef Event value_type; + EventListSIMD() = default; + EventListSIMD( const EventType& type ); + template < class ... ARGS > EventListSIMD( const std::string& fname, const EventType& evtType, const ARGS&... args ) : EventListSIMD(evtType) + { + loadFromFile( fname, ArgumentPack(args...) ); + } + template < class ... ARGS > EventListSIMD( const std::string& fname, const ARGS&... args ) : EventListSIMD() + { + loadFromFile( fname, ArgumentPack(args...) ); + } + template < class ... ARGS > EventListSIMD( const std::vector& fname, const EventType& evtType, const ARGS&... args ) : EventListSIMD(evtType) + { + for( auto& f : fname ) loadFromFile( f, ArgumentPack(args...) ); + } + template < class ... ARGS > EventListSIMD( TTree* tree, const EventType& evtType, const ARGS&... args ) : EventListSIMD(evtType) + { + loadFromTree( tree, ArgumentPack(args...) ); + } + const float_v* data() const { return m_data.data(); } + const AVX2::complex_t* cache() const { return m_cache.data() ; } + EventListSIMD( const EventList& other ); + void resetCache(); + const AVX2::complex_t cache( const unsigned& evtIndex, const unsigned& cachePos ) + { + return m_cache[ (unsigned)(evtIndex/float_v::size) * cacheSize() + cachePos ]; + } + const Event at(const unsigned& p) const { return EventListSIMD::operator[](p) ; } + const float_v* block(const unsigned& p) { return m_data.data() + p * m_eventSize ; } + float_v weight(const unsigned& p) const { return m_weights[p]; } + float_v genPDF(const unsigned& p) const { return m_genPDF[p]; } + const Event operator[]( const size_t&) const; + std::array scatter(unsigned) const; + void gather(const std::array&, unsigned); + auto begin() const { return make_scatter_iterator(0,this); } + auto end() const { return make_scatter_iterator(m_nEvents, (const EventListSIMD*)(nullptr) ); } + auto begin() { return make_scatter_iterator(0, this); } + auto end() { return make_scatter_iterator(m_nEvents, (EventListSIMD*)(nullptr) ); } + EventType eventType() const { return m_eventType; } + size_t aligned_size() const { return nBlocks() * float_v::size; } ///aligned number of events + size_t cacheSize() const { return m_cache.size() / m_nBlocks; } /// number of cached elements + double integral() const; + size_t eventSize() const { return m_eventSize; } + size_t size() const { return m_nEvents ; } + size_t nBlocks() const { return m_nBlocks; } + void reserve( const size_t& size ) { m_data.reserve( size * m_eventType.size() ); } + void setEventType( const EventType& type ) { m_eventType = type; m_eventSize = m_eventType.size(); } + void add( const EventListSIMD& evts ); + void loadFromTree( TTree* tree, const ArgumentPack& args ); + void loadFromFile( const std::string& fname, const ArgumentPack& args ); + void printCacheInfo( const unsigned int& nEvt = 0 ); + void clear(); + + TTree* tree( const std::string& name, const std::vector& extraBranches = {} ) const; + + size_t getCacheIndex( const CompiledExpressionBase& PDF, bool& status ) const; + size_t getCacheIndex( const CompiledExpressionBase& PDF ) const; + template unsigned registerExpression(const T& expression, const unsigned& size_of=0) + { + auto key = FNV1a_hash( expression.name() ); + auto pdfIndex = m_pdfIndex.find( key ); + if ( pdfIndex != m_pdfIndex.end() ) return pdfIndex->second; + else { + unsigned nEvents = aligned_size(); + unsigned expression_size = size_of == 0 ? expression.returnTypeSize() / sizeof(AmpGen::AVX2::complex_t) : size_of; + m_pdfIndex[key] = m_cache.size() / nBlocks(); + m_cache.resize(m_cache.size() + nBlocks() * expression_size); + return m_pdfIndex[key]; + } + } + template void updateCache( const FCN& fcn, const size_t& index ) + { + fcn.batch(m_cache.data() + index, aligned_size(), m_eventSize, cacheSize(), fcn.externBuffer().data(), m_data.data()); + } + void reserveCache(const unsigned& index); + void resizeCache( const unsigned& index); + TH1D* makeProjection(const Projection& projection , const ArgumentPack& args = ArgumentPack()) const; + TH2D* makeProjection(const Projection2D& projection, const ArgumentPack& args = ArgumentPack()) const; + std::vector makeProjections( const std::vector& projections, const ArgumentPack& args ); + + template std::vector makeDefaultProjections( const ARGS&... args ) + { + auto argPack = ArgumentPack( args... ); + size_t nBins = argPack.getArg(100); + auto proj = eventType().defaultProjections(nBins); + return makeProjections( proj , argPack ); + } + + template std::vector makeProjections( const std::vector& projections, const ARGS&... args ) + { + return makeProjections( projections, ArgumentPack( args... ) ); + } + + template , ArgumentPack>::value > > + TH1D* makeProjection( const Projection& projection, const ARGS&... args ) const + { + return makeProjection( projection, ArgumentPack(args...) ); + } + + template , ArgumentPack>::value > > + TH2D* makeProjection( const Projection2D& projection, const ARGS&... args ) + { + return makeProjection( projection, ArgumentPack(args...) ); + } + + template EventListSIMD& transform( functor&& fcn ) + { + for ( auto& event : *this ) fcn( event ); + return *this; + } + static std::vector makeEvent( const Event& event ) + { + std::vector rt( event.size() ); + for( unsigned i = 0 ; i != event.size(); ++i ) rt[i] = event[i]; + return rt; + } + }; + +} // namespace AmpGen +#endif + +#endif diff --git a/AmpGen/Expression.h b/AmpGen/Expression.h index 9c44737404b..85c3f90dca3 100644 --- a/AmpGen/Expression.h +++ b/AmpGen/Expression.h @@ -57,6 +57,11 @@ complex_t X::operator()() const { return F( m_expression() ); } \ std::string X::to_string(const ASTResolver* resolver) const { return std::string(#F)+"("+ m_expression.to_string(resolver)+")";} +#define DEFINE_UNARY_OPERATOR_NO_RESOLVER( X, F ) \ + X::X( const AmpGen::Expression& expression) : IUnaryExpression(expression) {} \ + X::operator Expression() const { return Expression( std::make_shared(*this) ) ; } \ + complex_t X::operator()() const { return F( m_expression() ); } + /// @ingroup ExpressionEngine macro DECLARE_UNARY_OPERATOR /// Macro to declare a unary operator, \ref ExpressionEngine "see IUnaryExpression" #define DECLARE_UNARY_OPERATOR( X ) \ diff --git a/AmpGen/Generator.h b/AmpGen/Generator.h index 761e954e4bb..e55372e71b4 100644 --- a/AmpGen/Generator.h +++ b/AmpGen/Generator.h @@ -33,20 +33,29 @@ namespace AmpGen m_rnd = rand; m_gps.setRandom( m_rnd ); } - void fillEventListPhaseSpace( EventList& list, const size_t& N, const size_t& cacheSize = 0 ) + void fillEventListPhaseSpace( EventList& list, const size_t& N) { - fillEventListPhaseSpace( list, N, cacheSize, []( const Event& evt ) { return 1; } ); + list.reserve( N ); + while( list.size() < N ){ + Event newEvent = m_gps.makeEvent(); + newEvent.setWeight( 1 ); + newEvent.setIndex( list.size() ); + list.push_back( newEvent ); + } } void setBlockSize( const size_t& blockSize ) { m_generatorBlock = blockSize; } void setNormFlag( const bool& normSetting ) { m_normalise = normSetting; } - template void fillEventListPhaseSpace( EventList& list, const size_t& N, const size_t& cacheSize, HARD_CUT cut ) + template void fillEventListPhaseSpace( EventList& list, const size_t& N, HARD_CUT cut) { list.reserve( N ); while( list.size() < N ){ - Event newEvent = m_gps.makeEvent( cacheSize ); + Event newEvent = m_gps.makeEvent(); newEvent.setWeight( 1 ); - if ( cut( newEvent ) ) list.push_back( newEvent ); + if ( cut( newEvent ) ){ + newEvent.setIndex( list.size() ); + list.push_back( newEvent ); + } } } @@ -74,7 +83,7 @@ namespace AmpGen while ( list.size() - size0 < N ) { EventList mc( m_eventType ); t_phsp.start(); - fillEventListPhaseSpace( mc, m_generatorBlock, pdf.size(), cut ); + fillEventListPhaseSpace(mc, m_generatorBlock, cut); t_phsp.stop(); t_eval.start(); pdf.setEvents( mc ); @@ -136,10 +145,10 @@ namespace AmpGen fillEventList( pdf, evts, nEvents ); return evts; } - EventList generate(const size_t& nEvents, const size_t& cacheSize=0) + EventList generate(const size_t& nEvents) { EventList evts( m_eventType ); - fillEventListPhaseSpace( evts, nEvents, cacheSize ); + fillEventListPhaseSpace( evts, nEvents); return evts; } }; diff --git a/AmpGen/Integrator.h b/AmpGen/Integrator.h index 5312012b13b..a83cce80da2 100644 --- a/AmpGen/Integrator.h +++ b/AmpGen/Integrator.h @@ -51,231 +51,80 @@ namespace AmpGen Integral(const size_t& i, const size_t& j, TransferFCN t) : i(i), j(j), transfer(t) {} }; -/* - template - class Integrator - { - private: - typedef const complex_t& arg; - size_t m_counter = {0}; - std::array, NROLL> m_integrals; - EventList* m_events = {nullptr}; - void calculate() - { - integrateBlock(); - m_counter = 0; - } - void integrateBlock() - { - real_t re[NROLL] = {0}; - real_t im[NROLL] = {0}; - #pragma omp parallel for reduction(+: re, im) - for ( size_t i = 0; i < m_events->size(); ++i ) { - auto& evt = ( *m_events )[i]; - real_t w = evt.weight() / evt.genPdf(); - for ( size_t roll = 0; roll < NROLL; ++roll ) { - auto c = evt.getCache(m_integrals[roll].i) * std::conj(evt.getCache(m_integrals[roll].j)); - re[roll] += w * std::real(c); - im[roll] += w * std::imag(c); - } - } - real_t nv = m_events->norm(); - for ( size_t j = 0; j < m_counter; ++j ) - m_integrals[j].transfer( complex_t( re[j], im[j] ) / nv ); - } + class Integrator + { + typedef const complex_t& arg; + typedef std::function TransferFCN; - public: - Integrator( EventList* events = nullptr ) : m_events( events ){} - - double norm() { return m_events->norm(); } - bool isReady() const { return m_events != nullptr; } - EventList& events() { return *m_events; } - const EventList& events() const { return *m_events; } - void reserveCache(const unsigned& size){ m_events->reserveCache(size); } - template - void addIntegral( const T1& f1, const T2& f2, const Integral::TransferFCN& tf ) - { - addIntegralKeyed( m_events->getCacheIndex(f1), m_events->getCacheIndex(f2), tf ); - } - void queueIntegral(const size_t& i, const size_t& j, complex_t* result){ - addIntegralKeyed(i, j, [result](arg& val){ *result = val ; } ); - } - void queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim = true ) - { - if( ! out->workToDo(i,j) )return; - if( sim ) - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ - out->set(i,j,val); - if( i != j ) out->set(j,i, std::conj(val) ); } ); - else - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ out->set(i,j,val); } ); - } - void addIntegralKeyed(const size_t& c1, const size_t& c2, const Integral::TransferFCN& tf ) - { - m_integrals[m_counter++] = Integral(c1, c2, tf); - if ( m_counter == NROLL ) calculate(); - } + public: + explicit Integrator( const EventList* events = nullptr ); - void flush() - { - if ( m_counter == 0 ) return; - calculate(); - } - template - void prepareExpression( const EXPRESSION& expression, const size_t& size_of = 0 ) - { - if( m_events == nullptr ) return; - auto index = m_events->registerExpression( expression , size_of ); - m_events->updateCache( expression, index ); - } - size_t getCacheIndex(const CompiledExpressionBase& expression) const { - return m_events->getCacheIndex(expression); + bool isReady() const; + const EventList* events() const; + void queueIntegral(const size_t& c1, + const size_t& c2, + const size_t& i, + const size_t& j, + Bilinears* out, + const bool& sim = true); + void addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ); + void queueIntegral(const size_t& i, const size_t& j, complex_t* result); + void flush(); + void setBuffer( complex_t* pos, const complex_t& value, const size_t& size ); + void setBuffer( complex_t* pos, const std::vector& value, const size_t& size); + complex_t get(const unsigned& i, const unsigned& evt) const { return m_cache[i * m_events->size() + evt ]; } + template unsigned getCacheIndex( const T& t ) const { return m_index.find( t.name() )->second.first; } + double norm() const { return m_norm; } + template void allocate( const std::vector& expressions, const size_t& size_of = 0) + { + if( m_events == nullptr ) return; + unsigned totalSize = 0; + for( unsigned i = 0; i != expressions.size(); ++i ){ + size_t vsize = size_of == 0 ? expressions[i].returnTypeSize() / sizeof(complex_t) : size_of; + m_index[ expressions[i].name() ] = std::make_pair(totalSize, vsize); + totalSize += vsize; } - }; - */ - template - class BinnedIntegrator - { - private: - typedef const std::vector>& arg; - typedef std::function TransferFCN; - - size_t m_counter = {0}; - std::vector m_view = {0}; - std::vector m_slice = {0}; - std::array, NROLL> m_integrals; - EventList* m_events = {nullptr}; - real_t m_norm = {0}; - void calculate() - { - integrateBlock(); - m_counter = 0; - } - void integrateBlock() + m_cache.resize( m_events->size() * totalSize ); + } + + template void prepareExpression(const T& expression) + { + if( m_events == nullptr ) return; + auto f = m_index.find( expression.name() ); + if( f == m_index.end() ) FATAL("Expression: " << expression.name() << " is not registed"); + auto [p0, s] = f->second; + INFO("Preparing: " << expression.name() << " index = " << p0 << " with: " << s << " values" ); + if constexpr( std::is_same< typename T::return_type, void >::value ) { - double re[( NBINS + 1 ) * NROLL] = {0}; - double im[( NBINS + 1 ) * NROLL] = {0}; - auto ij = [&]( const Event& evt, const unsigned int& i, const unsigned int& j ) { - return evt.getCache( i ) * std::conj( evt.getCache( j ) ); - }; - if ( m_slice.size() == 0 ) { - #pragma omp parallel for reduction( + : re, im ) - for ( unsigned int i = 0; i < m_events->size(); ++i ) { - auto& evt = ( *m_events )[i]; - size_t binNo = m_view[i]; - double w = evt.weight() / evt.genPdf(); - for ( unsigned int roll = 0; roll < NROLL; ++roll ) { - auto c = ij( evt, m_integrals[roll].i, m_integrals[roll].j ); - DEBUG( "pos = " << roll * NBINS + binNo << " val = " << w * c ); - re[roll * NBINS + binNo] += w * std::real( c ); - im[roll * NBINS + binNo] += w * std::imag( c ); - } - } - } else { - #pragma omp parallel for reduction( + : re, im ) - for ( unsigned int i = 0; i < m_slice.size(); ++i ) { - auto& evt = ( *m_events )[m_slice[i]]; - size_t binNo = m_view[i]; - double w = evt.weight() / evt.genPdf(); - for ( unsigned int roll = 0; roll < NROLL; ++roll ) { - auto c = ij( evt, m_integrals[roll].i, m_integrals[roll].j ); - re[roll * NBINS + binNo] += w * std::real( c ); - im[roll * NBINS + binNo] += w * std::imag( c ); - } - } - } - for ( size_t thisIntegral = 0; thisIntegral < m_counter; ++thisIntegral ) { - std::vector> tmpBins( NBINS ); - size_t offset = thisIntegral * NBINS; - for ( size_t nBin = 0; nBin < NBINS; ++nBin ) - tmpBins[nBin] = std::complex( re[offset + nBin], im[offset + nBin] ) / m_norm; - m_integrals[thisIntegral].transfer( tmpBins ); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t i = 0; i < m_events->size(); ++i ) + { + std::vector buf(s); + expression(&buf[0], expression.externBuffer().data(), m_events->at(i).address() ); + for( unsigned j = 0; j != s; ++j ) m_cache[ (p0+j) * m_events->size() + i] = buf[j]; } } - public: - BinnedIntegrator( EventList* events = nullptr ) : m_events( events ) - { - if( m_events == nullptr ) return; - for ( const auto& event : *m_events ) m_norm += event.weight() / event.genPdf(); - } - void setView( const std::function& binNumber ) - { - if ( m_slice.size() == 0 ) { - if ( m_view.size() != m_events->size() ) m_view.resize( m_events->size() ); - for ( unsigned int i = 0; i < m_events->size(); ++i ) { - - m_view[i] = binNumber( ( *m_events )[i] ); - - if ( m_view[i] >= NBINS ) { - m_view[i] = NBINS; - WARNING( "Event " << m_slice[i] << " bin number = " << m_view[i] << " is out of range!" ); - } - } - } else { - if ( m_view.size() != m_slice.size() ) m_view.resize( m_slice.size() ); - for ( unsigned int i = 0; i < m_slice.size(); ++i ) { - m_view[i] = binNumber( ( *m_events )[m_slice[i]] ); - if ( m_view[i] >= NBINS ) { - m_view[i] = NBINS; - WARNING( "Event " << m_slice[i] << " bin number = " << m_view[i] << " is out of range!" ); - } - } - } + else { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t i = 0; i < m_events->size(); ++i ) + setBuffer( &(m_cache[p0 * m_events->size() +i] ), expression(m_events->at(i).address()),s ); } - void setSlice( const std::function& sliceFunction ) - { - if ( m_slice.size() != 0 ) m_slice.clear(); - for ( unsigned int i = 0; i < m_events->size(); ++i ) - if ( sliceFunction( ( *m_events )[i] ) ) m_slice.push_back( i ); - } - - template - void addIntegral( const T1& f1, const T2& f2, const TransferFCN& tFunc ) - { - m_integrals[m_counter++] = Integral( m_events->getCacheIndex( f1 ), m_events->getCacheIndex( f2 ), tFunc ); - if ( m_counter == NROLL ) calculate(); - } - void flush() - { - if ( m_counter == 0 ) return; - calculate(); - } - template - void update( FCN& fcn, std::array& normalisations ) - { - auto mE = fcn.matrixElements(); - auto size = mE.size(); - std::vector toUpdate; - std::vector integralHasChanged( size * size ); - for ( size_t x = 0; x < size; ++x ) { - auto& pdf = mE[x].amp; - pdf.prepare(); - if ( !pdf.hasExternalsChanged() ) continue; - m_events->updateCache( pdf, m_events->getCacheIndex( pdf ) ); - toUpdate.push_back( x ); - } - for ( auto& i : toUpdate ) { - DEBUG( "Updating: " << mE[i].decayTree->uniqueString() ); - for ( unsigned int j = 0; j < size; ++j ) { - if ( integralHasChanged[i * size + j] ) continue; - integralHasChanged[i * size + j] = true; - integralHasChanged[j * size + i] = true; - - addIntegral( mE[i].amp, mE[j].amp, - [i, j, &normalisations]( const auto& val ) { - for ( unsigned int bin = 0; bin < NBINS; ++bin ) { - normalisations[bin].set( i, j, val[bin] ); - if ( i != j ) normalisations[bin].set( j, i, std::conj( val[bin] ) ); - } - } ); - } - } - } - }; + } + + private: + static constexpr size_t N = {10}; ///unroll factor + size_t m_counter = {0}; /// + std::array, N> m_integrals; + const EventList* m_events = {nullptr}; + std::vector m_cache; + std::vector m_weight; + std::map> m_index; + double m_norm = {0}; + void integrateBlock(); + }; } // namespace AmpGen #endif diff --git a/AmpGen/Integrator2.h b/AmpGen/Integrator2.h deleted file mode 100644 index f13efd1ad6f..00000000000 --- a/AmpGen/Integrator2.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef AMPGEN_INTEGRATOR2_H -#define AMPGEN_INTEGRATOR2_H 1 - -#include "AmpGen/Integrator.h" - -namespace AmpGen { - /// test /// - class Integrator2 - { - typedef const complex_t& arg; - typedef std::function TransferFCN; - - public: - explicit Integrator2( const EventList* events = nullptr ); - - bool isReady() const; - const EventList& events() const; - void queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim = true); - void addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ); - void queueIntegral(const size_t& i, const size_t& j, complex_t* result); - void flush(); - void setBuffer( complex_t* pos, const complex_t& value, const size_t& size ); - void setBuffer( complex_t* pos, const std::vector& value, const size_t& size); - void reserveCache(const unsigned int& size){}; - complex_t get(const unsigned& i, const unsigned& evt) const { return m_buffer[i][evt]; } - double norm() const { return m_norm; } - template size_t getCacheIndex(const T& expression) const - { - return m_index.find(expression.name())->second; - } - template void prepareExpression( const T& expression, const size_t& size_of = 0 ) - { - if( m_events == nullptr ) return; - size_t vsize = size_of == 0 ? expression.returnTypeSize() / sizeof(complex_t) : size_of; - auto it = m_index.find( expression.name() ); - auto index = 0; - if( it == m_index.end() ) - { - index = m_buffer.size(); - m_index[ expression.name() ] = index; - m_buffer.resize(index+vsize); - for(size_t j = 0 ; j != vsize; ++j ) m_buffer[index+j].resize( m_events->size() ); - } - else index = it->second; - if constexpr( std::is_same< typename T::return_type, void >::value ) - { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t i = 0; i < m_events->size(); ++i ) - { - std::vector buf(vsize); - expression(&buf[0], expression.externBuffer().data(), m_events->at(i).address() ); - for( unsigned j = 0; j != vsize; ++j ) m_buffer[index+j][i] = buf[j]; - //expression(&(m_buffer[index][i]), expression.externBuffer().data(), m_events->at(i).address() ); - } - } - else { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t i = 0; i < m_events->size(); ++i ) - setBuffer( &(m_buffer[index][i]), expression(m_events->at(i).address()), vsize ); - } - } - - private: - static constexpr size_t N = {10}; ///unroll factor - size_t m_counter = {0}; /// - std::array, N> m_integrals; - const EventList* m_events = {nullptr}; - std::vector> m_buffer; - std::vector m_weight; - std::map m_index; - double m_norm = {0}; - void integrateBlock(); - }; -} -#endif diff --git a/AmpGen/IntegratorSIMD.h b/AmpGen/IntegratorSIMD.h new file mode 100644 index 00000000000..2e166cd1294 --- /dev/null +++ b/AmpGen/IntegratorSIMD.h @@ -0,0 +1,73 @@ +#ifndef AMPGEN_INTEGRATORSIMD_H +#define AMPGEN_INTEGRATORSIMD_H 1 + +#if ENABLE_AVX2 + +#include "AmpGen/Integrator.h" +#include "AmpGen/simd/avx2_types.h" +#include "AmpGen/EventListSIMD.h" + +namespace AmpGen { + /// test /// + class IntegratorSIMD + { + typedef const complex_t& arg; + typedef std::function TransferFCN; + + public: + explicit IntegratorSIMD( const EventListSIMD* events = nullptr ); + + bool isReady() const; + void queueIntegral(const size_t& c1, + const size_t& c2, + const size_t& i, + const size_t& j, + Bilinears* out, + const bool& sim = true); + void addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ); + void queueIntegral(const size_t& i, const size_t& j, complex_t* result); + void flush(); + complex_t get(const unsigned& i, const unsigned& evt) const { return m_cache[i * m_events->size() + evt/float_v::size ].at( evt % float_v::size ); } + template unsigned getCacheIndex( const T& t ) const { return m_index.find( t.name() )->second.first; } + double norm() const { return m_norm; } + const EventListSIMD* events() const { return m_events; } + template void allocate( const std::vector& expressions, const size_t& size_of = 0) + { + if( m_events == nullptr ) return; + unsigned totalSize = 0; + for( unsigned i = 0; i != expressions.size(); ++i ){ + size_t vsize = size_of == 0 ? expressions[i].returnTypeSize() / sizeof(AVX2::complex_t) : size_of; + m_index[ expressions[i].name() ] = std::make_pair(totalSize, vsize); + totalSize += vsize; + } + m_cache.resize( m_events->size() * totalSize ); + } + + template void prepareExpression(const T& expression) + { + if( m_events == nullptr ) return; + auto f = m_index.find( expression.name() ); + if( f == m_index.end() ) FATAL("Expression: " << expression.name() << " is not registed"); + auto [p0, s] = f->second; + expression.batch(m_cache.data() + p0*m_events->aligned_size(), + m_events->aligned_size(), + m_events->eventSize(), + 1, + expression.externBuffer().data(), + m_events->data() ); + } + + private: + static constexpr size_t N = {10}; ///unroll factor + size_t m_counter = {0}; /// + std::array, N> m_integrals; + const EventListSIMD* m_events = {nullptr}; + std::vector m_cache; + std::vector m_weight; + std::map> m_index; + double m_norm = {0}; + void integrateBlock(); + }; +} +#endif +#endif diff --git a/AmpGen/LiteSpan.h b/AmpGen/LiteSpan.h index ac1a4548d60..ed4692a015b 100644 --- a/AmpGen/LiteSpan.h +++ b/AmpGen/LiteSpan.h @@ -27,14 +27,12 @@ namespace AmpGen { typedef typename container_type::value_type value_type; public: KeyedView( const container_type& container, const unsigned width ) : - m_begin( &(container[0]) ), + m_container(&container), m_cache( width * container.size(),0 ), m_width(width), m_size(container.size()), m_keys( width, "") {} - unsigned index(const value_type& it) const { - if( &it - m_begin < 0 || &it -m_begin >= m_size ) ERROR("Invalid address: " << &it - m_begin ); - return &it -m_begin; } + unsigned index(const value_type& it) const { return it.index() ; } const std::string& key(const unsigned int& column ) const { return m_keys[column] ; } const return_type* operator()( const value_type& it ) const { if( m_width *index(it) >= m_cache.size()) ERROR("Out-of-bounds access : " << index(it) ); @@ -48,7 +46,7 @@ namespace AmpGen { unsigned int column, const std::string& key = "") { - for( unsigned i = 0 ; i != m_size; ++i ) m_cache[ i*m_width + column] = functor(m_begin[i]); + for(const auto& element : *m_container) m_cache[ element.index() * m_width + column] = functor(element); if( key != "" ) m_keys[column] = key; } cache_type& operator()(const value_type& it, const unsigned entry ) { @@ -58,11 +56,11 @@ namespace AmpGen { void setKey(const unsigned& column, const std::string& key ) { m_keys[column] = key ; } void print() { - INFO( "width = " << m_width << ", size = " << m_size << " begin = " << m_begin << " keys = " << vectorToString( m_keys , " ") << " cache size = " << m_cache.size() ); + INFO( "width = " << m_width << ", size = " << m_size << " keys = " << vectorToString( m_keys , " ") << " cache size = " << m_cache.size() ); for( unsigned int i = 0 ; i != m_width ; ++i ) std::cout << m_cache[i] << " "; } private: - const value_type* m_begin; + const container_type* m_container; std::vector m_cache; unsigned m_width; unsigned m_size; diff --git a/AmpGen/PhaseSpace.h b/AmpGen/PhaseSpace.h index 9f462bedc66..2d3f37257d6 100644 --- a/AmpGen/PhaseSpace.h +++ b/AmpGen/PhaseSpace.h @@ -29,7 +29,7 @@ namespace AmpGen bool setDecay( const double& m0, const std::vector& mass ); ///< Set the parameters of this phase space generator void setRandom( TRandom* rand ) { m_rand = rand; } ///< Set the random number used by this phase space generator size_t size() const { return m_nt; } ///< Return the number of decay products - Event makeEvent( const size_t& cacheSize = 0 ); ///< Make an event in this phase space. + Event makeEvent(); ///< Make an event in this phase space. EventType eventType() const; ///< Returns the EventType that this phase space is generating void provideEfficiencyReport(const std::vector& report){} diff --git a/AmpGen/Plots.h b/AmpGen/Plots.h deleted file mode 100644 index 7f8c81388fa..00000000000 --- a/AmpGen/Plots.h +++ /dev/null @@ -1,104 +0,0 @@ -#ifndef AMPGEN_PLOTS_H -#define AMPGEN_PLOTS_H -#include "AmpGen/ErrorPropagator.h" -#include "AmpGen/EventList.h" -#include "AmpGen/CoherentSum.h" -#include "AmpGen/IncoherentSum.h" -#include "AmpGen/Integrator.h" -#include "AmpGen/MinuitParameterSet.h" -#include "AmpGen/Projection.h" -#include "AmpGen/Utilities.h" -#include "AmpGen/EventList.h" - -#include -#include -#include -#include -#include - -namespace AmpGen -{ - void perAmplitudePlot(const EventList& evts, const Projection& projection, const CoherentSum& pdf); - - template - std::array getNorms( CoherentSum& fcn, BinnedIntegrator& bid ) - { - std::array normalisations; - for ( unsigned int i = 0; i < NBINS; ++i ) normalisations[i] = Bilinears( fcn.size(), fcn.size() ); - for ( unsigned int i = 0; i < fcn.size(); ++i ) { - for ( unsigned int j = i; j < fcn.size(); ++j ) { - bid.addIntegral( fcn[i].amp, fcn[j].amp, [i, j, &normalisations]( const auto& val ) - { - for ( unsigned int bin = 0; bin < NBINS; ++bin ) { - normalisations[bin].set( i, j, val[bin] ); - if ( i != j ) normalisations[bin].set( j, i, std::conj( val[bin] ) ); - } - } ); - } - } - bid.flush(); - return normalisations; - } - - template - std::array getNorms( IncoherentSum& fcn, BinnedIntegrator& bid ) - { - std::array normalisations; - for ( unsigned int i = 0; i < NBINS; ++i ) normalisations[i] = Bilinears( fcn.size(), fcn.size() ); - for ( unsigned int i = 0; i < fcn.size(); ++i ) { - bid.addIntegral( fcn[i].amp, fcn[i].amp, [i, &normalisations]( const auto& val ) { - for ( unsigned int bin = 0; bin < NBINS; ++bin ) normalisations[bin].set( i, 0, val[bin] ); - } ); - } - bid.flush(); - return normalisations; - } - - template - TH1D* plotWithError( EventList& events, FCN& fcn, const Projection& projection, const std::string& prefix, - LinearErrorPropagator& linProp, const std::function& selection = nullptr ) - { - BinnedIntegrator bid( &events ); - if ( selection != nullptr ) bid.setSlice( selection ); - bid.setView( projection.binFunctor() ); - TH1D* plot = projection.plot(); - plot->SetName( ( prefix + plot->GetName() ).c_str() ); - auto normalisations = getNorms( fcn, bid ); - auto vectorBinFunctor = [&normalisations, &fcn, &bid] { - fcn.transferParameters(); - bid.update( fcn, normalisations ); - std::vector values(NBINS); - double total = 0; - for ( size_t bin = 0; bin < NBINS; ++bin ) { - values[bin] = fcn.norm( normalisations[bin] ); - total += values[bin]; - } - for ( size_t bin = 0; bin < NBINS; ++bin ) values[bin] /= total; - return values; - }; - auto values = vectorBinFunctor(); - auto errors = linProp.getVectorError( vectorBinFunctor, NBINS ); - for ( size_t bin = 0; bin < NBINS; ++bin ) { - plot->SetBinContent( bin + 1, values[bin] ); - plot->SetBinError( bin + 1, errors[bin] ); - } - return plot; - } - - template - std::vector bandPlot( EventList& events, const std::string& prefix, FCN& fcn, LinearErrorPropagator& linProp ) - { - std::vector plots; - auto axes = events.eventType().defaultProjections( NBINS ); - for ( auto& proj : axes ) { - INFO( "Making plot:" << proj.name() ); - plots.push_back( plotWithError( events, fcn, proj, prefix, linProp ) ); - } - return plots; - } - TGraph* boundary(const AmpGen::EventType& type, - const std::function& p1, - const std::function& p2 ); -} // namespace AmpGen - -#endif diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index 0cc662c728d..308f61833e3 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -15,12 +15,14 @@ #include "AmpGen/CompiledExpression.h" #include "AmpGen/EventList.h" #include "AmpGen/EventType.h" -#include "AmpGen/Integrator2.h" +#include "AmpGen/Integrator.h" #include "AmpGen/CoherentSum.h" #include "AmpGen/Expression.h" #include "AmpGen/Tensor.h" #include "AmpGen/MinuitParameter.h" +#include "AmpGen/IntegratorSIMD.h" + #include "TMatrixD.h" namespace AmpGen @@ -35,7 +37,17 @@ namespace AmpGen class PolarisedSum { public: - typedef Integrator2 integrator; + #if ENABLE_AVX2 + using EventList_type = EventListSIMD; + using Integrator_type= IntegratorSIMD; + using complex_v = AVX2::complex_t; + using float_v = AVX2::float_t; + #else + using EventList_type = EventList; + using Integrator_type = Integrator; + using complex_v = complex_t; + using float_v = real_t; + #endif PolarisedSum() = default; PolarisedSum(const EventType&, MinuitParameterSet&, const std::vector& = {}); @@ -53,7 +65,7 @@ namespace AmpGen Expression probExpression(const Tensor&, const std::vector&, DebugSymbols* = nullptr) const; size_t size() const; real_t norm() const; - complex_t norm(const size_t&, const size_t&, integrator* = nullptr); + complex_t norm(const size_t&, const size_t&, Integrator* = nullptr); inline real_t operator()(const Event& evt) const { return m_weight * prob_unnormalised(evt) / m_norm; } real_t prob_unnormalised(const Event&) const; real_t prob(const Event&) const; @@ -74,7 +86,7 @@ namespace AmpGen std::vector m_pVector = {}; bool m_verbosity = {0}; bool m_debug = {0}; - integrator m_integrator; + Integrator m_integrator; std::vector m_norms; EventType m_eventType; std::string m_prefix = ""; @@ -83,7 +95,7 @@ namespace AmpGen AmplitudeRules m_rules; std::pair m_dim; std::vector> m_matrixElements; - CompiledExpression m_probExpression; + CompiledExpression m_probExpression; std::vector> indexProduct(const std::vector>&, const std::vector&) const; std::vector polarisations(const std::string&) const ; diff --git a/AmpGen/ProfileClock.h b/AmpGen/ProfileClock.h index b625b72e55e..2b334861f9c 100644 --- a/AmpGen/ProfileClock.h +++ b/AmpGen/ProfileClock.h @@ -21,11 +21,31 @@ namespace AmpGen{ }; template - double Profile( const FCN& fcn ){ + double Profile( const FCN& fcn, const std::string& name ="" ){ ProfileClock t; for( size_t i = 0 ; i < N; ++i ) fcn(); t.stop(); - INFO( typeof() << " " << t/double(N) << "[ms] per iteration" ); + INFO( (name == "" ? typeof() : name ) << " " << t/double(N) << "[ms] per iteration" ); + return t; + } + template + double ProfileWithStat( const FCN& fcn, const std::string& name ="" ){ + double t = 0; + double t2 = 0; + double tmin = 1e9; + double tmax = 0; + for( size_t i = 0 ; i < N; ++i ){ + ProfileClock pi; + fcn(); + pi.stop(); + t += pi; + t2 += pi*pi; + tmin = pi < tmin ? pi : tmin; + tmax = pi > tmax ? pi : tmax; + } + t /= double(N); + t2 = sqrt( t2 / double(N) - t*t); + INFO( (name == "" ? typeof() : name ) << " " << t << " ± " << t2 << "[ms] per iteration << [" << tmin << ", " << tmax << "]" ); return t; } diff --git a/AmpGen/Projection.h b/AmpGen/Projection.h index 53730e4de3e..2a2481693fa 100644 --- a/AmpGen/Projection.h +++ b/AmpGen/Projection.h @@ -31,13 +31,13 @@ namespace AmpGen const std::string& xAxisTitle, const size_t& nBins, const double& min, const double& max, const std::string& units = "" ); const std::string name() const; - template TH1D* operator()(const EventList& evt, const ARGS... args) const + template TH1D* operator()(const eventlist_type& evts, const ARGS... args) const { - return projInternal(evt, ArgumentPack(args...) ); + return projInternal(evts, ArgumentPack(args...) ); } - template std::tuple, THStack*> operator()(const EventList& evt, const KeyedView& weightFunction, const ARGS... args ) const + template std::tuple, THStack*> operator()(const eventlist_type& evts, const KeyedView& weightFunction, const ARGS... args ) const { - return projInternal(evt, weightFunction, ArgumentPack(args...) ); + return projInternal(evts, weightFunction, ArgumentPack(args...) ); } double operator()( const Event& evt ) const; @@ -49,8 +49,10 @@ namespace AmpGen friend class Projection2D; private: - TH1D* projInternal(const EventList&, const ArgumentPack&) const; - std::tuple, THStack*> projInternal(const EventList&, const KeyedView&, const ArgumentPack&) const; + template + TH1D* projInternal(const eventlist_type&, const ArgumentPack&) const; + template + std::tuple, THStack*> projInternal(const eventlist_type&, const KeyedView&, const ArgumentPack&) const; std::function m_func; std::string m_name = {""}; std::string m_xAxisTitle = {""}; @@ -81,6 +83,7 @@ namespace AmpGen DECLARE_ARGUMENT(Prefix , std::string); DECLARE_ARGUMENT(Norm , double); DECLARE_ARGUMENT(AddTo , THStack*); + DECLARE_ARGUMENT(AutoWrite , bool); } } // namespace AmpGen diff --git a/AmpGen/RecursivePhaseSpace.h b/AmpGen/RecursivePhaseSpace.h index 73cb08f8bbe..bf71534a906 100644 --- a/AmpGen/RecursivePhaseSpace.h +++ b/AmpGen/RecursivePhaseSpace.h @@ -47,7 +47,7 @@ namespace AmpGen void print( const size_t& offset = 0 ) const; void setRandom( TRandom* rand ); - Event makeEvent( const size_t& cacheSize = 0 ); + Event makeEvent(); size_t size() const; EventType eventType() const ; diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index 3388560266d..0bf34fa827e 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -8,6 +8,11 @@ #include "AmpGen/LiteSpan.h" #include +#if ENABLE_AVX2 + #include "AmpGen/simd/avx2_types.h" + #include "AmpGen/simd/utils.h" +#endif + namespace AmpGen { class EventList; @@ -49,18 +54,42 @@ namespace AmpGen SumPDF( const pdfTypes&... pdfs ) : m_pdfs( std::tuple( pdfs... ) ) {} /// Returns negative twice the log-likelihood for this PDF and the given dataset. + double getVal() { - double LL = 0; - for_each( m_pdfs, []( auto& f ) { f.prepare(); } ); - #pragma omp parallel for reduction( +: LL ) - for ( unsigned int i = 0; i < m_events->size(); ++i ) { - auto prob = ((*this))(( *m_events)[i] ); - LL += log(prob); + if constexpr( std::is_same::value ) + { + double LL = 0; + for_each( m_pdfs, []( auto& f ) { f.prepare(); } ); + #pragma omp parallel for reduction( +: LL ) + for ( unsigned int i = 0; i < m_events->size(); ++i ) { + auto prob = ((*this))(( *m_events)[i] ); + LL += log(prob); + } + return -2 * LL; + } + #if ENABLE_AVX2 + if constexpr( std::is_same::value ) + { + float_v LL = 0.f; + for_each( m_pdfs, []( auto& f ) { f.prepare(); } ); + #pragma omp parallel for reduction( +: LL ) + for ( unsigned int block = 0; block < m_events->nBlocks(); ++block ) { + LL += log(this->operator()(m_events->block(block), block)); + } + return -2 * utils::sum_elements(LL); } - return -2 * LL; + #endif + } + /// Returns the probability for the given event. + #if ENABLE_AVX2 + float_v operator()( const float_v* evt , const unsigned block) + { + float_v prob = 0.f; + for_each( this->m_pdfs, [&prob, &evt,block]( const auto& f ) { prob += f(evt, block); } ); + return prob; } - + #endif /// Returns the probability for the given event. double operator()( const eventValueType& evt ) { diff --git a/AmpGen/ThreeBodyCalculators.h b/AmpGen/ThreeBodyCalculators.h index 887baf2b958..d7b24724dd4 100644 --- a/AmpGen/ThreeBodyCalculators.h +++ b/AmpGen/ThreeBodyCalculators.h @@ -18,12 +18,12 @@ namespace AmpGen struct PartialWidth { CoherentSum fcs; DalitzIntegrator integrator; - CompiledExpression< std::complex, const real_t*, const real_t* > totalWidth; + CompiledExpression totalWidth; EventType type; - std::vector, const real_t*, const real_t*>> partialWidths; + std::vector> partialWidths; double getWidth( const double& m ); PartialWidth( const EventType& type, MinuitParameterSet& mps ); - Expression spinAverageMatrixElement( const std::vector>>& elements, + Expression spinAverageMatrixElement( const std::vector >& elements, DebugSymbols* msym ); }; Expression calculateSAME( const std::string& particle ); diff --git a/AmpGen/TreePhaseSpace.h b/AmpGen/TreePhaseSpace.h index f9c3f33922a..bca2cd1da5c 100644 --- a/AmpGen/TreePhaseSpace.h +++ b/AmpGen/TreePhaseSpace.h @@ -51,7 +51,7 @@ namespace AmpGen void generate(); void print(const unsigned& offset = 0) const; void place(Event& event); - Event event(const unsigned& eventSize, const unsigned& cacheSize=0); + Event event(const unsigned& eventSize); void generateFullEvent(); void setRhoMax(); void setRandom(TRandom3* rnd); @@ -82,7 +82,7 @@ namespace AmpGen TreePhaseSpace(const std::vector& decayChains, const EventType& type, TRandom* rndm = nullptr); void setRandom( TRandom* rand ); - Event makeEvent( const unsigned& cacheSize = 0 ); + Event makeEvent(); size_t size() const; EventType eventType() const ; double genPdf( const Event& event) const ; diff --git a/AmpGen/Utilities.h b/AmpGen/Utilities.h index 929c55c558f..791ae437a37 100644 --- a/AmpGen/Utilities.h +++ b/AmpGen/Utilities.h @@ -161,13 +161,10 @@ namespace AmpGen { } return total; } - template std::function - arrayToFunctor( const std::vector& values, const container_type* container ) + template std::function + arrayToFunctor( const std::vector& values) { - return [container, values](const typename container_type::value_type& event) -> return_type { - int addr = &event - &container->at(0); - return *(values.data() + addr); }; + return [values](const contained_type& event) -> return_type {return *(values.data() + event.index()); }; } template diff --git a/Standalone.cmake b/Standalone.cmake index f96ab83bb78..a0047390f44 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -13,6 +13,7 @@ if( NOT "${CMAKE_CXX_STANDARD}" ) endif() SET(USE_OPENMP TRUE CACHE BOOL "USE_OPENMP") +SET(ENABLE_AVX2 TRUE CACHE BOOL "ENABLE_AVX2") set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -137,18 +138,25 @@ endif() target_compile_definitions(AmpGen PRIVATE "AMPGENROOT_CMAKE=\"${CMAKE_BINARY_DIR}/bin\"" + "AMPGENROOT=\"${PROJECT_SOURCE_DIR}\"" "AMPGEN_CXX=\"${AMPGEN_CXX}\"" "USE_OPENMP=\"${USE_OPENMP}\"" $<$:DEBUGLEVEL=1> $<$:TRACELEVEL=1>) + target_compile_options(AmpGen INTERFACE -Wall -Wextra -Wpedantic -g3 -Wno-unused-parameter -Wno-unknown-pragmas - -march=native - $<$:-Ofast>) + $<$:-O3>) + +if( ENABLE_AVX2 ) + message(STATUS "Enabling AVX2...") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1") + target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx2 -DHAVE_AVX2_INSTRUCTIONS) +endif() if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" ) target_link_libraries(AmpGen PUBLIC stdc++) diff --git a/apps/ConvertToSourceCode.cpp b/apps/ConvertToSourceCode.cpp index 7e476408153..c753204bb7c 100644 --- a/apps/ConvertToSourceCode.cpp +++ b/apps/ConvertToSourceCode.cpp @@ -16,11 +16,12 @@ #include "AmpGen/Particle.h" #include "AmpGen/ParticlePropertiesList.h" #include "AmpGen/AddCPConjugate.h" - +#include "AmpGen/simd/utils.h" #include "TRandom3.h" using namespace AmpGen; +/* template void create_integration_tests(T& pdf, const EventType& type, @@ -62,12 +63,13 @@ void create_integration_tests(T& pdf, unit_tests << "};" << std::endl; unit_tests << " auto expr = make_expression(p.getExpression(), p.decayDescriptor(), type.getEventFormat(), mps);" << std::endl; unit_tests << " auto eval = expr(event);" << std::endl; - unit_tests << " BOOST_TEST( std::real(eval) == " << std::real(value)<< ", boost::test_tools::tolerance(1e-6)) ;" << std::endl; - unit_tests << " BOOST_TEST( std::imag(eval) == " << std::imag(value)<< ", boost::test_tools::tolerance(1e-6)) ;" << std::endl; + unit_tests << " BOOST_TEST( std::real(eval) == " << std::real(utils::get<0>(value))<< ", boost::test_tools::tolerance(1e-6)) ;" << std::endl; + unit_tests << " BOOST_TEST( std::imag(eval) == " << std::imag(utils::get<0>(value))<< ", boost::test_tools::tolerance(1e-6)) ;" << std::endl; unit_tests << "}\n\n"; } unit_tests.close(); } +*/ template void generate_source(T& pdf, EventList& normEvents, const std::string& sourceFile, MinuitParameterSet& mps, const double& sf) { @@ -130,7 +132,7 @@ int main( int argc, char** argv ) if( type == "CoherentSum" ){ CoherentSum sig( eventType, MPS, "" ); generate_source( sig, phspEvents, sourceFile, MPS, safetyFactor ); - create_integration_tests(sig, eventType, MPS, {phspEvents[15]}, sourceFile ); + //create_integration_tests(sig, eventType, MPS, {phspEvents[15]}, sourceFile ); } if( type == "PolarisedSum" ){ PolarisedSum sig( eventType, MPS ); diff --git a/apps/Debugger.cpp b/apps/Debugger.cpp index a440aab9247..29a1c2346a5 100644 --- a/apps/Debugger.cpp +++ b/apps/Debugger.cpp @@ -89,10 +89,12 @@ int main( int argc, char** argv ) EventList accepted = infile == "" ? EventList( eventType ) : EventList( infile, eventType ); std::string input_units = NamedParameter("Units","GeV"); - if( input_units == "MeV" && infile != "") accepted.transform([](auto& event){ for( int i = 0;i<16;++i) event[i]/=1000; } ); + if( input_units == "MeV" && infile != "") accepted.transform([](auto& event){ for( unsigned i = 0;i< event.size();++i) event[i]/=1000; } ); if( infile == "" ){ - Event evt = PhaseSpace( eventType, rndm ).makeEvent(); - accepted.push_back(evt); + for( unsigned i = 0 ; i != 16; ++i ){ + Event evt = PhaseSpace( eventType, rndm ).makeEvent(); + accepted.push_back(evt); + } } accepted[0].print(); diff --git a/apps/Fitter.cpp b/apps/Fitter.cpp index a1dedbb40b7..5159a789cd7 100644 --- a/apps/Fitter.cpp +++ b/apps/Fitter.cpp @@ -28,13 +28,20 @@ #include "AmpGen/ThreeBodyCalculators.h" #include "AmpGen/Utilities.h" #include "AmpGen/Generator.h" -#include "AmpGen/Plots.h" #ifdef _OPENMP #include #include #endif +#if ENABLE_AVX2 + #include "AmpGen/EventListSIMD.h" + using EventList_type = AmpGen::EventListSIMD; +#else + #include "AmpGen/EventList.h" + using EventList_type = AmpGen::EventList; +#endif + #include "TFile.h" #include "TRandom3.h" @@ -60,15 +67,6 @@ void randomizeStartingPoint( MinuitParameterSet& MPS, TRandom3& rand, bool Splin } } -unsigned int count_amplitudes( const AmpGen::MinuitParameterSet& mps ) -{ - unsigned int counter = 0; - for ( auto param = mps.cbegin(); param != mps.cend(); ++param ) { - if ( ( *param )->name().find( "_Re" ) != std::string::npos ) counter++; - } - return counter; -} - template void addExtendedTerms( Minimiser& mini, SIGPDF& pdf, MinuitParameterSet& mps ) { @@ -219,10 +217,9 @@ int main( int argc, char* argv[] ) const std::string cut = NamedParameter( "Cut", "1" ); const std::string simCut = NamedParameter( "SimCut", "1" ); bool BAR = NamedParameter("Bar",false); - size_t defaultCacheSize = count_amplitudes( MPS ); - EventList events( dataFile, !BAR ? evtType : evtType.conj() , CacheSize(defaultCacheSize), Filter(cut) ); - EventList eventsMC = mcFile == "" ? EventList( evtType) : EventList( mcFile, !BAR ? evtType : evtType.conj() , CacheSize(defaultCacheSize), Filter(simCut) ) ; + EventList events( dataFile, !BAR ? evtType : evtType.conj() , Filter(cut) ); + EventList eventsMC = mcFile == "" ? EventList( evtType) : EventList( mcFile, !BAR ? evtType : evtType.conj(), Filter(simCut) ) ; auto scale_transform = [](auto& event){ for( size_t x = 0 ; x < event.size(); ++x ) event[x] /= 1000.; }; if( NamedParameter("Units", "GeV").getVal() == "MeV") { diff --git a/examples/QcGenerator.cpp b/examples/QcGenerator.cpp index 8fbcf63695e..48878c4a5a3 100644 --- a/examples/QcGenerator.cpp +++ b/examples/QcGenerator.cpp @@ -50,7 +50,7 @@ struct DTEvent AmpGen::Event signal; AmpGen::Event tag; double prob; - DTEvent() : signal(0,0,0), tag(0,0,0) {}; + DTEvent() : signal(0,0), tag(0,0) {}; DTEvent( const AmpGen::Event& signal, const AmpGen::Event& tag ) : signal(signal), tag(tag) {}; void set( const AmpGen::Event& s1, const AmpGen::Event& s2 ) { signal.set(s1); tag.set(s2); }; void invertParity(){ @@ -143,7 +143,7 @@ template class Psi3770 { { double n1(0), n2(0), zR(0), zI(0); auto normEvents = Generator(type).generate(m_blockSize); -#pragma omp parallel for reduction(+:zR,zI,n1,n2) + #pragma omp parallel for reduction(+:zR,zI,n1,n2) for(size_t i = 0; i < m_blockSize; ++i){ auto p1 = t1(normEvents[i]); auto p2 = t2(normEvents[i]); diff --git a/examples/SignalOnlyFitter.cpp b/examples/SignalOnlyFitter.cpp index ce4bc7d5ded..3d3932e573e 100644 --- a/examples/SignalOnlyFitter.cpp +++ b/examples/SignalOnlyFitter.cpp @@ -9,7 +9,6 @@ #include #include "AmpGen/Chi2Estimator.h" -#include "AmpGen/EventList.h" #include "AmpGen/EventType.h" #include "AmpGen/CoherentSum.h" #include "AmpGen/IncoherentSum.h" @@ -27,14 +26,21 @@ #include #endif +#if ENABLE_AVX2 + #include "AmpGen/EventListSIMD.h" + using EventList_type = AmpGen::EventListSIMD; +#else + #include "AmpGen/EventList.h" + using EventList_type = AmpGen::EventList; +#endif + #include #include #include using namespace AmpGen; -template -FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& MPS ); +template FitResult* doFit( PDF&& pdf, EventList_type& data, EventList_type& mc, MinuitParameterSet& MPS ); int main( int argc, char* argv[] ) { @@ -98,12 +104,12 @@ int main( int argc, char* argv[] ) /* Events are read in from ROOT files. If only the filename and the event type are specified, the file is assumed to be in the specific format that is defined by the event type, unless the branches to load are specified in the user options */ - EventList events(dataFile, evtType, Branches(bNames), GetGenPdf(false) ); + EventList_type events(dataFile, evtType, Branches(bNames), GetGenPdf(false) ); /* Generate events to normalise the PDF with. This can also be loaded from a file, which will be the case when efficiency variations are included. Default number of normalisation events is 5 million. */ - EventList eventsMC = intFile == "" ? Generator<>(evtType, &rndm).generate(2e6) : EventList(intFile, evtType, GetGenPdf(true)); + EventList_type eventsMC = intFile == "" ? Generator<>(evtType, &rndm).generate(2.5e6) : EventList_type(intFile, evtType, GetGenPdf(true)); sig.setMC( eventsMC ); @@ -119,19 +125,12 @@ int main( int argc, char* argv[] ) fr->addFractions( fitFractions ); fr->writeToFile( logFile ); - output->cd(); - /* Write out the data plots. This also shows the first example of the named arguments - to functions, emulating python's behaviour in this area */ - - auto plots = events.makeDefaultProjections(PlotOptions::Prefix("Data"), PlotOptions::Bins(100)); - for ( auto& plot : plots ) plot->Write(); - output->Close(); } template -FitResult* doFit( likelihoodType&& likelihood, EventList& data, EventList& mc, MinuitParameterSet& MPS ) +FitResult* doFit( likelihoodType&& likelihood, EventList_type& data, EventList_type& mc, MinuitParameterSet& MPS ) { auto time_wall = std::chrono::high_resolution_clock::now(); auto time = std::clock(); @@ -151,9 +150,9 @@ FitResult* doFit( likelihoodType&& likelihood, EventList& data, EventList& mc, M /* Estimate the chi2 using an adaptive / decision tree based binning, down to a minimum bin population of 15, and add it to the output. */ - Chi2Estimator chi2( data, mc, likelihood.evaluator(&mc), MinEvents(15), Dim(data.eventType().dof()) ); - chi2.writeBinningToFile("chi2_binning.txt"); - fr->addChi2( chi2.chi2(), chi2.nBins() ); + //Chi2Estimator chi2( data, mc, likelihood.evaluator(&mc), MinEvents(15), Dim(data.eventType().dof()) ); + //chi2.writeBinningToFile("chi2_binning.txt"); + //fr->addChi2( chi2.chi2(), chi2.nBins() ); fr->print(); /* Make the plots for the different components in the PDF, i.e. the signal and backgrounds. @@ -163,12 +162,9 @@ FitResult* doFit( likelihoodType&& likelihood, EventList& data, EventList& mc, M auto projections = data.eventType().defaultProjections(100); for( const auto& proj : projections ) { - auto [components, total] = proj(mc, evaluator, PlotOptions::Norm(data.size() ) ); - for( const auto& component : components ) component->Write(); - total->Write(); - auto [signal_components, total_signal_component] = proj(mc, evaluator_per_component, PlotOptions::Norm(data.size()) ); - for( const auto& component : signal_components ) component->Write(); - total_signal_component->Write(); + proj(mc, evaluator, PlotOptions::Norm(data.size()), PlotOptions::AutoWrite() ); + proj(mc, evaluator_per_component, PlotOptions::Prefix("amp"), PlotOptions::Norm(data.size()), PlotOptions::AutoWrite() ); + proj(data, PlotOptions::Prefix("Data") )->Write(); } return fr; } diff --git a/src/ASTResolver.cpp b/src/ASTResolver.cpp index 8732bed9b14..2b6785aba3f 100644 --- a/src/ASTResolver.cpp +++ b/src/ASTResolver.cpp @@ -19,6 +19,7 @@ ASTResolver::ASTResolver(const std::map& evtMap, m_nParameters(0) { m_enable_cuda = NamedParameter("UseCUDA",false); + m_enable_avx = NamedParameter("UseAVX",false); m_enable_compileTimeConstants = NamedParameter("ASTResolver::CompileTimeConstants", false); } diff --git a/src/BinDT.cpp b/src/BinDT.cpp index e6c15149b0d..24a62280198 100644 --- a/src/BinDT.cpp +++ b/src/BinDT.cpp @@ -97,10 +97,11 @@ BinDT::BinDT( const ArgumentPack& args ) } } -BinDT::BinDT( const EventList& events, const ArgumentPack& args ) : BinDT(args) +BinDT::BinDT( const EventList& events, const ArgumentPack& args ) : BinDT(args) { - m_top = makeNodes(events); + makeNodes( events.begin(), events.end() ); } + void BinDT::readFromStream( std::istream& stream ) { std::map>> nodes; @@ -368,16 +369,3 @@ void BinDT::Decision::visit( const std::function& visit_fun m_right->visit( visit_function ); } -std::shared_ptr BinDT::makeNodes( const EventList& events ) -{ - std::vector data( m_dim * events.size() ); - std::vector addresses( events.size() ); - size_t counter = 0; - for ( auto& evt : events ) { - auto val = m_functors( evt ); - for ( unsigned int i = 0; i < m_dim; ++i ) data[m_dim * counter + i] = val[i]; - addresses[counter] = &( data[m_dim * counter] ); - counter++; - } - return makeNodes( addresses ); -} diff --git a/src/Chi2Estimator.cpp b/src/Chi2Estimator.cpp index 500210f0489..a94b04fdc67 100644 --- a/src/Chi2Estimator.cpp +++ b/src/Chi2Estimator.cpp @@ -56,10 +56,11 @@ void Chi2Estimator::doChi2( const EventList& dataEvents, const EventList& mcEv j++; } j = 0; - for ( auto& evt : mcEvents ) { + for ( auto& evt : mcEvents ) + { if ( j % 1000000 == 0 && j != 0 ) INFO( "Binned " << j << " sim. events" ); double w = fcn( evt ) * evt.weight() / evt.genPdf(); - mc[m_binning.getBinNumber( evt )].add( w ); + mc[m_binning.getBinNumber(evt)].add( w ); total_int_weight += w; j++; } diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 3bc4b612121..9ad5a6629c8 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -25,6 +25,7 @@ #include "AmpGen/CompilerWrapper.h" #include "AmpGen/ThreadPool.h" #include "AmpGen/ProfileClock.h" +#include "AmpGen/simd/utils.h" #ifdef _OPENMP #include @@ -34,13 +35,13 @@ using namespace AmpGen; CoherentSum::CoherentSum() = default; CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, const std::string& prefix ) - : m_rules(mps) - , m_evtType (type) - , m_printFreq (NamedParameter( "CoherentSum::PrintFrequency", 100) ) - , m_dbThis (NamedParameter( "CoherentSum::Debug" , false)) - , m_verbosity (NamedParameter( "CoherentSum::Verbosity" , 0) ) - , m_objCache (NamedParameter("CoherentSum::ObjectCache" ,"") ) - , m_prefix (prefix) + : m_rules (mps) + , m_evtType (type) + , m_printFreq(NamedParameter( "CoherentSum::PrintFrequency", 100) ) + , m_dbThis (NamedParameter( "CoherentSum::Debug" , false)) + , m_verbosity(NamedParameter( "CoherentSum::Verbosity" , 0) ) + , m_objCache (NamedParameter("CoherentSum::ObjectCache" ,"") ) + , m_prefix (prefix) { auto amplitudes = m_rules.getMatchingRules( m_evtType, prefix); if( amplitudes.size() == 0 ){ @@ -53,21 +54,12 @@ CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, ThreadPool tp(nThreads); for(size_t i = 0; i < m_matrixElements.size(); ++i){ tp.enqueue( [i,this,&mps,&litudes]{ - m_matrixElements[i] = TransitionMatrix( amplitudes[i].first, amplitudes[i].second, mps, this->m_evtType.getEventFormat(), this->m_dbThis); + m_matrixElements[i] = TransitionMatrix( amplitudes[i].first, amplitudes[i].second, mps, this->m_evtType.getEventFormat(), this->m_dbThis); CompilerWrapper().compile( m_matrixElements[i].amp, this->m_objCache); } ); } m_isConstant = false; } -void updateCache(EventList* events, TransitionMatrix& me, const size_t& sizeMax) -{ - if ( me.addressData == 999 ) - { - if( events->at(0).cacheSize() <= sizeMax) events->resizeCache(sizeMax); - me.addressData = events->registerExpression( me.amp ); - } - events->updateCache(me.amp, me.addressData); -} void CoherentSum::prepare() { @@ -75,22 +67,38 @@ void CoherentSum::prepare() transferParameters(); std::vector changedPdfIndices; ProfileClock clockEval; - bool print = false; + if( m_prepareCalls == 0 && m_events != nullptr ){ + m_events->reserveCache(m_matrixElements.size()); + for( auto& me : m_matrixElements ) me.addressData = m_events->registerExpression( me.amp ); + } + if( m_prepareCalls == 0 ) m_integrator.allocate( m_matrixElements ); for ( size_t i = 0; i < m_matrixElements.size(); ++i ) { m_matrixElements[i].amp.prepare(); if ( m_prepareCalls != 0 && !m_matrixElements[i].amp.hasExternalsChanged() ) continue; - if ( m_events != nullptr ) updateCache( m_events, m_matrixElements[i], m_matrixElements.size() ); - m_integrator.prepareExpression( m_matrixElements[i].amp ); + if ( m_events != nullptr ) m_events->updateCache( m_matrixElements[i].amp, m_matrixElements[i].addressData ); + m_integrator.prepareExpression(m_matrixElements[i].amp ); changedPdfIndices.push_back(i); m_matrixElements[i].amp.resetExternals(); - print = true; } clockEval.stop(); ProfileClock clockIntegral; if ( m_integrator.isReady()) updateNorms( changedPdfIndices ); else if ( m_verbosity ) WARNING( "No simulated sample specified for " << this ); m_norm = norm(); - if ( m_verbosity && print ) { + if ( m_prepareCalls == 0 ){ + INFO( "Norm: " << m_norm ); + for(unsigned i = 0 ; i != m_matrixElements.size() ; ++i ){ + for(unsigned j = 0 ; j != m_matrixElements.size() ; ++j ){ + if( std::isnan( std::real(m_normalisations(i,j) )) || std::isnan( std::imag(m_normalisations(i,j))) ) + ERROR("Norm: " << m_matrixElements[i].name() << " " << m_matrixElements[j].name() << " is ill-posed!"); + } + } + // INFO( m_normalisations.get(0,0) << " " + // << m_normalisations.get(1,0) << " " + // << m_normalisations.get(0,1) << " " + // << m_normalisations.get(2,2) ); + } + if ( m_verbosity && changedPdfIndices.size() !=0 ) { clockIntegral.stop(); INFO( "Time Performance: " << "Eval = " << clockEval << " ms" @@ -121,17 +129,18 @@ void CoherentSum::debug( const Event& evt, const std::string& nameMustContain ) for ( auto& me : m_matrixElements ) { auto A = me(evt); INFO( std::setw(70) << me.decayTree.uniqueString() - << " A = [ " << std::real(A) << " " << std::imag(A) - << " ] g = [ "<< std::real(me.coupling()) << " " << std::imag(me.coupling()) << " ] " + << " A = [ " << utils::get(A.real()) << " " << utils::get(A.imag()) + << " ] g = [ "<< me.coupling().real() << " " << me.coupling().imag() << " ] " + << m_events->cache( evt.index(), me.addressData ) << me.decayTree.CP() ); - if( m_dbThis ) me.amp.debug( evt.address() ); + // if( m_dbThis ) me.amp.debug( evt.address() ); } - else - for ( auto& me : m_matrixElements ) - if ( me.amp.name().find( nameMustContain ) != std::string::npos ) me.amp.debug( evt.address() ); - if( evt.cacheSize() != 0 ) INFO( "Pdf = " << prob_unnormalised( evt ) ); - INFO( "A(x) = " << getVal(evt) ); + //else + // for ( auto& me : m_matrixElements ) + // if ( me.amp.name().find( nameMustContain ) != std::string::npos ) me.amp.debug( evt.address() ); + // if( evt.cacheSize() != 0 ) INFO( "Pdf = " << prob_unnormalised( evt ) ); + INFO( "A(x) = " << getVal(evt) << " without cache: " << getValNoCache(evt) ); } std::vector CoherentSum::fitFractions(const LinearErrorPropagator& linProp) @@ -194,11 +203,11 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor Expression this_amplitude = p.coupling() * Function( programatic_name( p.amp.name() ) + "_wParams", {event} ); amplitude = amplitude + ( p.decayTree.finalStateParity() == 1 ? 1 : pa ) * this_amplitude; } - stream << CompiledExpression< std::complex, const double*, const int&>( amplitude , "AMP" ) << std::endl; - stream << CompiledExpression< double, const double*, const int&>(fcn::norm(amplitude) / normalisation, "FCN" ) << std::endl; + stream << CompiledExpression(const double*, const int&)>( amplitude , "AMP" ) << std::endl; + stream << CompiledExpression(fcn::norm(amplitude) / normalisation, "FCN" ) << std::endl; if( includePythonBindings ){ - stream << CompiledExpression< unsigned int >( m_matrixElements.size(), "matrix_elements_n" ) << std::endl; - stream << CompiledExpression< double > ( normalisation, "normalization") << std::endl; + stream << CompiledExpression( m_matrixElements.size(), "matrix_elements_n" ) << std::endl; + stream << CompiledExpression( normalisation, "normalization") << std::endl; stream << "extern \"C\" const char* matrix_elements(int n) {\n"; for ( size_t i = 0; i < m_matrixElements.size(); i++ ) { @@ -228,21 +237,6 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor stream << ( i == size() - 1 ? ";" : " +" ) << "\n"; } stream << " out[i] = std::norm(amplitude) / " << normalisation << ";\n }\n}\n"; - stream << "extern \"C\" void FCN_mt(double* out, double* events, unsigned int size, int parity, double* amps){\n"; - stream << " unsigned int n = std::thread::hardware_concurrency();\n"; - stream << " unsigned int batch_size = size / n;\n"; - stream << " std::vector threads;\n"; - stream << " for(size_t i=0; im_events )[0].size() - << ", len, parity, amps);\n"; - stream << " }\n"; - stream << " for(auto &thread : threads)\n"; - stream << " thread.join();\n"; - stream << "}\n\n"; stream << "extern \"C\" double coefficients( int n, int which, int parity){\n"; for ( size_t i = 0; i < size(); i++ ) { @@ -260,18 +254,10 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor complex_t CoherentSum::getValNoCache( const Event& evt ) const { - return std::accumulate( m_matrixElements.begin(), - m_matrixElements.end(), - complex_t(0,0), - [&evt]( auto& a, auto& b ){ return a + b.coefficient * b(evt);} ); -} - -complex_t CoherentSum::getValNoCache(const Event& evt, const size_t& offset) const -{ - return std::accumulate( m_matrixElements.begin(), + return utils::get<0>( complex_v(std::accumulate( m_matrixElements.begin(), m_matrixElements.end(), - complex_t(0,0), - [&evt,&offset]( auto& a, auto& b ){ return a + b.coefficient * b(evt, offset);} ); + complex_v(0,0), + [&evt]( const auto& a, const auto& b ){ return a + b.coefficient * b(evt);} )) ); } void CoherentSum::reset( bool resetEvents ) @@ -281,22 +267,23 @@ void CoherentSum::reset( bool resetEvents ) for ( auto& mE : m_matrixElements ) mE.addressData = 999; if ( resetEvents ){ m_events = nullptr; - m_integrator = Integrator2(); + m_integrator = Integrator_type(); } } -void CoherentSum::setEvents( EventList& list ) +void CoherentSum::setEvents( EventList_type& list ) { if ( m_verbosity ) INFO( "Setting event list with:" << list.size() << " events for " << this ); reset(); m_events = &list; } -void CoherentSum::setMC( EventList& sim ) + +void CoherentSum::setMC( EventList_type& sim ) { if ( m_verbosity ) INFO( "Setting norm. event list with:" << sim.size() << " events for " << this ); reset(); - m_integrator = Integrator2(&sim); + m_integrator = Integrator_type( &sim ); } real_t CoherentSum::norm() const @@ -333,7 +320,7 @@ void CoherentSum::printVal(const Event& evt) { for ( auto& mE : m_matrixElements ) { unsigned int address = mE.addressData; - std::cout << mE.decayTree.decayDescriptor() << " = " << mE.coefficient << " x " << evt.getCache( address ) + std::cout << mE.decayTree.decayDescriptor() << " = " << mE.coefficient << " x " << m_events->cache( evt.index(), address ) << " address = " << address << " " << mE( evt ) << std::endl; if( mE.coupling.size() != 1 ){ std::cout << "CouplingConstants: " << std::endl; @@ -343,56 +330,57 @@ void CoherentSum::printVal(const Event& evt) } } -std::vector CoherentSum::cacheAddresses( const EventList& evts ) const -{ - std::vector addresses; - std::transform( m_matrixElements.begin(), m_matrixElements.end(), std::back_inserter(addresses), - [&evts](auto& it ){ return evts.getCacheIndex( it.amp ) ; } ); - return addresses; -} - complex_t CoherentSum::getVal( const Event& evt ) const { - complex_t value( 0., 0. ); - for ( auto& mE : m_matrixElements ) { - value += mE.coefficient * evt.getCache( mE.addressData ); + complex_v value( 0., 0. ); + for ( const auto& mE : m_matrixElements ) { + value = value + mE.coefficient * m_events->cache( evt.index(), mE.addressData ); } + #if ENABLE_AVX2 + return value.at(evt.index() % float_v::size); + #else return value; + #endif } -complex_t CoherentSum::getVal( const Event& evt, const std::vector& cacheAddresses ) const +#if ENABLE_AVX2 +float_v CoherentSum::operator()( const float_v* /*evt*/, const unsigned block ) const { - complex_t value( 0., 0. ); - for ( size_t i = 0; i < m_matrixElements.size(); ++i ) - value += m_matrixElements[i].coefficient * evt.getCache( cacheAddresses[i] ); - return value; + complex_v value( 0., 0. ); + for ( const auto& mE : m_matrixElements ) { + value = value + mE.coefficient * m_events->cache()[ block * m_events->cacheSize() + mE.addressData ]; + } + return (m_weight/m_norm ) * AVX2::norm( value ); } -std::function CoherentSum::evaluator(const EventList* events) const +#endif + + +std::function CoherentSum::evaluator(const EventList_type* events) const { - if( events != nullptr && events != &this->m_integrator.events() ) + if( events != nullptr && events != m_integrator.events() ) ERROR("Evaluator only working on the integration sample, fix me!"); std::vector address_mapping( size() ); for( const auto& me : m_matrixElements ) address_mapping[me.addressData] = m_integrator.getCacheIndex( me.amp ); - std::vector values( m_integrator.events().size() ); + std::vector values( events->size() ); #ifdef _OPENMP #pragma omp parallel for #endif - for( unsigned int i = 0 ; i != m_integrator.events().size(); ++i ) + for( unsigned int i = 0 ; i != events->size(); ++i ) { complex_t amp = 0; for( unsigned j = 0 ; j != address_mapping.size(); ++j ) amp += m_matrixElements[j].coefficient * this->m_integrator.get(address_mapping[j], i); values[i] = m_weight * std::norm(amp) / m_norm; } - return arrayToFunctor(values, events); + return arrayToFunctor(values); } -KeyedView CoherentSum::componentEvaluator(const EventList* events) const +KeyedView CoherentSum::componentEvaluator(const EventList_type* events) const { - if( events != nullptr && events != &this->m_integrator.events() ) + if( events != nullptr && events != m_integrator.events() ) ERROR("Evaluator only working on the integration sample, fix me!"); - KeyedView rt(*events, m_matrixElements.size() ); + KeyedView rt(*events, m_matrixElements.size() ); std::vector address_mapping(m_matrixElements.size()); for( unsigned i = 0; i != m_matrixElements.size(); ++i ) address_mapping[i] = m_integrator.getCacheIndex( m_matrixElements[i].amp ); @@ -403,14 +391,13 @@ KeyedView CoherentSum::componentEvaluator(const EventList* ev #ifdef _OPENMP #pragma omp parallel for #endif - for( unsigned evt = 0 ; evt != m_integrator.events().size(); ++evt ) + for( unsigned evt = 0 ; evt != events->size(); ++evt ) { complex_t total = 0; for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ total += this->m_integrator.get( address_mapping[i], evt ) * m_matrixElements[i].coefficient * std::conj( this->m_integrator.get( address_mapping[j], evt ) * m_matrixElements[j].coefficient ); - } - + } rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; } } diff --git a/src/CompiledExpressionBase.cpp b/src/CompiledExpressionBase.cpp index e5a1de4071e..fea41d84a69 100644 --- a/src/CompiledExpressionBase.cpp +++ b/src/CompiledExpressionBase.cpp @@ -48,6 +48,10 @@ std::string AmpGen::programatic_name( std::string s ) void CompiledExpressionBase::resolve(const MinuitParameterSet* mps) { if( m_resolver == nullptr ) m_resolver = std::make_shared( m_evtMap, mps ); + if( fcnSignature().find("AVX") != std::string::npos ) { + m_resolver->setEnableAVX(); + enableBatch(); + } m_dependentSubexpressions = m_resolver->getOrderedSubExpressions( m_obj ); for ( auto& sym : m_db ){ auto expressions_for_this = m_resolver->getOrderedSubExpressions( sym.second); @@ -103,7 +107,6 @@ void CompiledExpressionBase::to_stream( std::ostream& stream ) const } } else if( !enable_cuda ){ -// stream << "#pragma clang diagnostic push\n#pragma clang diagnostic ignored \"-Wreturn-type-c-linkage\"\n"; stream << "extern \"C\" " << returnTypename() << " " << progName() << "(" << fcnSignature() << "){\n"; addDependentExpressions( stream , sizeOfStream ); stream << "return " << m_obj.to_string(m_resolver.get()) << ";\n}\n"; @@ -124,6 +127,7 @@ void CompiledExpressionBase::to_stream( std::ostream& stream ) const stream << "}\n"; } if ( m_db.size() != 0 ) addDebug( stream ); + if( m_enableBatch ) compileBatch(stream); } std::ostream& AmpGen::operator<<( std::ostream& os, const CompiledExpressionBase& expression ) diff --git a/src/CompilerWrapper.cpp b/src/CompilerWrapper.cpp index 2a2699c7aa1..edfb02d59e6 100644 --- a/src/CompilerWrapper.cpp +++ b/src/CompilerWrapper.cpp @@ -23,6 +23,8 @@ using namespace AmpGen; #ifdef AMPGEN_CXX #pragma message "Using c++ compiler: " AMPGEN_CXX " for JIT" +#pragma message "Using AMPGENROOT: " AMPGENROOT +#pragma message "Using AMPGENROOT_CMAKE: " AMPGENROOT_CMAKE #else #pragma warning "No AMPGEN_CXX for JIT set" #endif @@ -51,6 +53,7 @@ void CompilerWrapper::generateSource( const CompiledExpressionBase& expression, { std::ofstream output( filename ); for ( auto& include : m_includes ) output << "#include <" << include << ">\n"; + if( expression.fcnSignature().find("AVX2") != std::string::npos ) output << "#include \"AmpGen/simd/avx2_types.h\"\n" ; output << expression << std::endl; output.close(); } @@ -130,7 +133,17 @@ void CompilerWrapper::compileSource( const std::string& fname, const std::string { using namespace std::chrono_literals; std::vector compile_flags = NamedParameter("CompilerWrapper::Flags", - {"-Ofast", "--std="+get_cpp_version(),"-march=native"} ); + {"-Ofast", "--std="+get_cpp_version()}); + + #if ENABLE_AVX2 + compile_flags.push_back("-march=native"); + compile_flags.push_back("-mavx2"); + compile_flags.push_back("-DHAVE_AVX2_INSTRUCTIONS"); + compile_flags.push_back( std::string("-I") + AMPGENROOT) ; + #endif + #ifdef _OPENMP + compile_flags.push_back("-fopenmp"); + #endif std::vector argp = { m_cxx.c_str(), "-shared", diff --git a/src/Event.cpp b/src/Event.cpp index 6ffa95df0b5..3f1bf940bd9 100644 --- a/src/Event.cpp +++ b/src/Event.cpp @@ -10,15 +10,11 @@ using namespace AmpGen; -Event::Event( const unsigned& N, const unsigned& cacheSize) : - m_event(N), - m_cache(cacheSize) { - } +Event::Event( const unsigned& N) : + m_event(N) {} -Event::Event( const real_t* data, const unsigned& N, const unsigned& cacheSize) : - m_event(data, data+N), - m_cache(cacheSize) { - } +Event::Event( const real_t* data, const unsigned& N) : + m_event(data, data+N) {} void Event::print() const { unsigned nParticles = m_event.size()/4; @@ -73,11 +69,6 @@ real_t Event::s( const std::vector& indices ) const { } return E*E -px*px - py*py - pz*pz; } -void Event::printCache() const { - for( unsigned int i = 0 ; i < m_cache.size(); ++i){ - INFO("Cache adddress [" << i << "] = " << m_cache[i] ); - } -} void Event::set( const unsigned& i, const std::vector& p ){ for( unsigned j = 0 ; j < 4; ++j) m_event[4*i + j ] = p[j]; @@ -98,6 +89,7 @@ void Event::swap( const unsigned& i , const unsigned& j ) std::memmove( &m_event[4*i], &tmp,sizeof(tmp)); } +/* void Event::setCache(const complex_t& value, const unsigned& pos){ m_cache[pos] = value; } void Event::setCache( const std::vector& value, const unsigned& pos ) { @@ -105,3 +97,4 @@ void Event::setCache( const std::vector& value, const unsigned& pos ) } void Event::resizeCache( const unsigned int& new_size ){ m_cache.resize(new_size); } +*/ diff --git a/src/EventList.cpp b/src/EventList.cpp index c7b90bdd5c5..1ec87d4d377 100644 --- a/src/EventList.cpp +++ b/src/EventList.cpp @@ -62,7 +62,6 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) if( tokens.size() != 1 ) m_eventType = EventType( tokens ); INFO("Attempted automatic deduction of eventType: " << m_eventType ); } - auto pdfSize = args.getArg(0).val; auto filter = args.getArg(std::string("")).val; auto getGenPdf = args.getArg(false).val; auto weightBranch = args.getArg(std::string("")).val; @@ -71,7 +70,8 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) auto entryList = args.getArg().val; auto eventFormat = m_eventType.getEventFormat( true ); - Event temp( branches.size() == 0 ? eventFormat.size() : branches.size() , pdfSize ); + Event temp( branches.size() == 0 ? eventFormat.size() : branches.size()); + temp.setWeight( 1 ); temp.setGenPdf( 1 ); tree->SetBranchStatus( "*", 0 ); @@ -112,6 +112,7 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) for ( unsigned int evt = 0; evt < nEvents; ++evt ) { tr.getEntry( hasEventList ? entryList[evt] : evt ); if( applySym ) symmetriser( temp ); + temp.setIndex( m_data.size() ); m_data.push_back( temp ); } read_time.stop(); @@ -187,7 +188,7 @@ TH2D* EventList::makeProjection( const Projection2D& projection, const ArgumentP void EventList::printCacheInfo( const unsigned int& nEvt ) { for ( auto& ind : m_pdfIndex ) { - INFO( "Cache[" << ind.second << "] = " << ind.first << " = " << at( nEvt ).getCache( ind.second ) ); + INFO( "Cache[" << ind.second << "] = " << ind.first << " = " << cache(nEvt, ind.second ) ); } } @@ -215,8 +216,7 @@ size_t EventList::getCacheIndex( const CompiledExpressionBase& PDF, bool& isRegi void EventList::resetCache() { m_pdfIndex.clear(); - for ( auto evt = begin(); evt != end(); ++evt ) evt->resizeCache( 0 ); - m_lastCachePosition = 0; + m_cache.clear(); } double EventList::integral() const @@ -228,10 +228,7 @@ void EventList::add( const EventList& evts ) { resetCache(); WARNING( "Adding event lists invalidates cache state" ); - for ( auto& evt : evts ) { - m_data.push_back( evt ); - rbegin()->resizeCache( 0 ); - } + for ( auto& evt : evts ) m_data.push_back( evt ); } void EventList::clear() @@ -247,12 +244,7 @@ void EventList::erase(const std::vector::iterator& begin, void EventList::reserveCache(const size_t& size) { - if ( size >= at(0).cacheSize() ) - for (auto& evt : *this) evt.resizeCache(evt.cacheSize() + size); -} - -void EventList::resizeCache(const size_t& newCacheSize ) -{ - for (auto& evt : *this) evt.resizeCache( newCacheSize ); + if ( size * m_data.size() >= m_cache.size() ) + m_cache.reserve( m_data.size() * m_cache.size() ); } diff --git a/src/EventListSIMD.cpp b/src/EventListSIMD.cpp new file mode 100644 index 00000000000..bf7b0ff4864 --- /dev/null +++ b/src/EventListSIMD.cpp @@ -0,0 +1,303 @@ +#if ENABLE_AVX2 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "AmpGen/ArgumentPack.h" +#include "AmpGen/CompiledExpressionBase.h" +#include "AmpGen/EventListSIMD.h" +#include "AmpGen/EventType.h" +#include "AmpGen/MsgService.h" +#include "AmpGen/Projection.h" +#include "AmpGen/TreeReader.h" +#include "AmpGen/Utilities.h" +#include "AmpGen/Event.h" +#include "AmpGen/Types.h" +#include "AmpGen/ProfileClock.h" +#include "AmpGen/simd/utils.h" +using namespace AmpGen; + +EventListSIMD::EventListSIMD( const EventType& type ) : m_eventType( type ), m_eventSize( m_eventType.eventSize() ) {} + +void EventListSIMD::loadFromFile( const std::string& fname, const ArgumentPack& args ) +{ + auto current_file = gFile; + auto tokens = split( fname, ':'); + TTree* tree = nullptr; + if( fname == "" ) FATAL("Filename must be specified to load data"); + if( tokens.size() == 2 ){ + gFile = TFile::Open( tokens[0].c_str(), "READ"); + if( gFile == nullptr ) FATAL("Failed to load file: " << tokens[0] ); + tree = (TTree*)gFile->Get( tokens[1].c_str() ); + } + else { + gFile = TFile::Open( fname.c_str(), "READ"); + if( gFile == nullptr ) FATAL("Failed to load file: " << tokens[0] ); + if( tree == nullptr ) tree = (TTree*)gFile->Get("EventList"); + tree = (TTree*)gFile->Get("DalitzEventList"); + } + if( tree == nullptr ) FATAL( "Failed to load tree from file: " << fname ); + loadFromTree( tree, args ); + gFile->Close(); + gFile = current_file; +} + +void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) +{ + ProfileClock read_time; + if( m_eventType.size() == 0 ){ + auto tokens = split( tree->GetTitle(), ' '); + if( tokens.size() != 1 ) setEventType( EventType( tokens ) ); + INFO("Attempted automatic deduction of eventType: " << m_eventType ); + } + auto filter = args.getArg(std::string("")).val; + auto getGenPdf = args.getArg(false).val; + auto weightBranch = args.getArg(std::string("")).val; + auto branches = args.getArg().val; + auto applySym = args.getArg(false).val; + auto entryList = args.getArg().val; + auto eventFormat = m_eventType.getEventFormat( true ); + + Event temp( branches.size() == 0 ? eventFormat.size() : branches.size()); + temp.setWeight( 1 ); + temp.setGenPdf( 1 ); + tree->SetBranchStatus( "*", 0 ); + + TreeReader tr( tree ); + if( branches.size() != 0 ){ + INFO("Branches = [" << vectorToString(branches, ", ") << "]" ); + for ( auto branch = branches.begin(); branch != branches.end(); ++branch ) { + unsigned int pos = std::distance( branches.begin(), branch ); + tr.setBranch( *branch, &(temp[pos]) ); + } + } + else { + for ( auto& branch : eventFormat ){ + tr.setBranch( branch.first, &(temp[branch.second]) ); + } + } + if( getGenPdf ) tr.setBranch( "genPdf", temp.pGenPdf() ); + if( weightBranch != "" ) tr.setBranch( weightBranch, temp.pWeight() ); + if( filter != "" ){ + if( entryList.size() != 0 ){ + WARNING("Specified entry list and filter, will overwrite list with specified selection"); + } + tr.prepare(); + tree->Draw(">>evtList", filter.c_str() ); + TEventList* evtList = (TEventList*)gDirectory->Get("evtList"); + for( int i = 0 ; i < evtList->GetN(); ++i ) entryList.push_back( evtList->GetEntry(i) ); + } + bool hasEventList = entryList.size() != 0; + m_nEvents = hasEventList ? entryList.size() : tree->GetEntries(); + auto aligned_size = utils::aligned_size(m_nEvents); + std::array buffer; + + m_nBlocks = aligned_size / float_v::size; + m_data.resize( m_nBlocks * m_eventSize ); + m_weights.resize( m_nBlocks ); + m_genPDF.resize( m_nBlocks ); + auto symmetriser = m_eventType.symmetriser(); + for ( unsigned int block = 0; block < m_nBlocks; ++block ) + { + for( unsigned k = 0 ; k != float_v::size; ++k ) + { + auto evt = k + block * float_v::size; + if(evt >= m_nEvents ) break; + tr.getEntry( hasEventList ? entryList[evt] : evt ); + if( applySym ) symmetriser( temp ); + buffer[k] = temp; + } + gather( buffer, block ); + } + read_time.stop(); + INFO("Time to read tree = " << read_time << "[ms]; nEntries = " << size() ); +} + + +EventListSIMD::EventListSIMD( const EventList& other ) : EventListSIMD( other.eventType() ) +{ + unsigned aligned_size = utils::aligned_size(other.size()); + m_nBlocks = aligned_size / float_v::size; + m_nEvents = other.size(); + m_data.resize( m_nBlocks * m_eventSize ) ; + m_weights.resize( m_nBlocks ); + m_genPDF.resize ( m_nBlocks ); + for( unsigned evt = 0 ; evt != m_nBlocks; evt ++ ) + { + for( unsigned j = 0 ; j != m_eventSize; ++j ) + m_data[m_eventSize * evt + j] = utils::gather(other, [j](auto& event){ return event[j]; } , evt * float_v::size ); + m_weights[evt] = utils::gather(other, [](auto& event){ return event.weight(); }, evt * float_v::size, 0); + m_genPDF [evt] = utils::gather(other, [](auto& event){ return event.genPdf(); }, evt * float_v::size, 1 ); + } +} + + + +TTree* EventListSIMD::tree( const std::string& name, const std::vector& extraBranches ) const +{ + std::string title = m_eventType.mother(); + for( unsigned i = 0 ; i != m_eventType.size(); ++i ) title += " " + m_eventType[i]; + TTree* outputTree = new TTree( name.c_str(), title.c_str() ); + if ( size() == 0 ) { + ERROR( "Trying to output empty tree" ); + return nullptr; + } + Event tmp = *( begin() ); + double genPdf = 1; + double weight = 1; + auto format = m_eventType.getEventFormat( true ); + + for ( const auto& f : format ) outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); + // for ( const auto& f : m_extensions ) outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); + + outputTree->Branch( "genPdf", &genPdf ); + outputTree->Branch( "weight", &weight ); + for ( const auto& evt : *this ) { + tmp = evt; + genPdf = evt.genPdf(); + weight = evt.weight(); + outputTree->Fill(); + } + return outputTree; +} + +std::vector EventListSIMD::makeProjections( const std::vector& projections, const ArgumentPack& args ) +{ + std::vector plots; + for ( const auto& proj : projections ) plots.push_back( makeProjection(proj, args) ); + return plots; +} + +TH1D* EventListSIMD::makeProjection( const Projection& projection, const ArgumentPack& args ) const +{ + auto selection = args.getArg().val; + auto weightFunction = args.getArg().val; + std::string prefix = args.getArg(std::string("")); + auto plot = projection.plot(prefix); + plot->SetLineColor(args.getArg(kBlack).val); + plot->SetMarkerSize(0); + for( const auto evt : *this ) + { + if( selection != nullptr && !selection(evt) ) continue; + auto pos = projection(evt); + plot->Fill( pos, evt.weight() * ( weightFunction == nullptr ? 1 : weightFunction(evt) / evt.genPdf() ) ); + } + if( selection != nullptr ) INFO("Filter efficiency = " << plot->GetEntries() << " / " << size() ); + return plot; +} +TH2D* EventListSIMD::makeProjection( const Projection2D& projection, const ArgumentPack& args ) const +{ + auto selection = args.getArg().val; + auto weightFunction = args.getArg().val; + std::string prefix = args.getArg().val; + auto plot = projection.plot(prefix); + for ( const auto evt : *this ){ + if ( selection != nullptr && !selection(evt) ) continue; + auto pos = projection(evt); + plot->Fill( pos.first, pos.second, evt.weight() * ( weightFunction == nullptr ? 1 : weightFunction(evt) / evt.genPdf() ) ); + } + return plot; +} + +size_t EventListSIMD::getCacheIndex( const CompiledExpressionBase& PDF ) const +{ + auto pdfIndex = m_pdfIndex.find( FNV1a_hash( PDF.name() ) ); + if ( pdfIndex != m_pdfIndex.end() ) + return pdfIndex->second; + else + ERROR( "FATAL: PDF Index for " << PDF.name() << " not found" ); + return 999; +} + +size_t EventListSIMD::getCacheIndex( const CompiledExpressionBase& PDF, bool& isRegistered ) const +{ + auto pdfIndex = m_pdfIndex.find( FNV1a_hash( PDF.name() ) ); + if ( pdfIndex != m_pdfIndex.end() ) { + isRegistered = true; + return pdfIndex->second; + } + isRegistered = false; + return 999; +} + +void EventListSIMD::resetCache() +{ + m_pdfIndex.clear(); + m_cache.clear(); +} + +void EventListSIMD::clear() +{ + m_data.clear(); + m_cache.clear(); +} + +void EventListSIMD::reserveCache(const unsigned& newSize) +{ + m_cache.reserve( newSize * nBlocks() ); +} + +void EventListSIMD::resizeCache(const unsigned& newSize ) +{ + WARNING("Will only reserve, because i don't want to keep track anymore ... "); + reserveCache( newSize ); +} + +const Event EventListSIMD::operator[]( const size_t& pos ) const +{ + unsigned nEvents = size(); + unsigned p = pos / float_v::size; + unsigned q = pos % float_v::size; + Event tempEvent( m_eventSize ); + for( unsigned i = 0 ; i != m_eventSize; ++i ) + tempEvent[i] = m_data[p * m_eventSize + i ].at(q); + tempEvent.setWeight( m_weights[p].at(q) ); + tempEvent.setGenPdf( m_genPDF[p].at(q) ); + tempEvent.setIndex( pos ); + return tempEvent; +} + +std::array EventListSIMD::scatter( unsigned pos ) const +{ + unsigned p = pos / float_v::size; + std::array rt; + auto vw = m_weights[p].to_array(); + auto vg = m_genPDF[p].to_array(); + for( unsigned evt = 0 ; evt != float_v::size; ++evt ){ + rt[evt] = Event( m_eventSize ); + rt[evt].setWeight(vw[evt]); + rt[evt].setGenPdf(vg[evt]); + rt[evt].setIndex(evt + pos); + } + for( unsigned field = 0 ; field != m_eventSize; ++field){ + auto v = m_data[p * m_eventSize +field].to_array(); + for( unsigned evt = 0; evt != float_v::size; ++evt ) rt[evt][field] = v[evt]; + } + return rt; +} + +void EventListSIMD::gather( const std::array& data, unsigned pos ) +{ + for( unsigned field = 0 ; field != m_eventSize; ++field ) + m_data[pos*m_eventSize +field] = utils::gather(data, [field](auto& event){ return event[field]; } ); + m_weights[pos] = utils::gather(data, [](auto& event){ return event.weight() ; } ); + m_genPDF[pos] = utils::gather(data, [](auto& event){ return event.genPdf(); } ); +} + +#endif diff --git a/src/Expression.cpp b/src/Expression.cpp index aaf947bf391..88551e23f02 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -17,6 +17,8 @@ #include "AmpGen/Types.h" using namespace AmpGen; +using namespace AmpGen::fcn; +using namespace std::complex_literals; DEFINE_CAST(Constant ) DEFINE_CAST(Parameter ) @@ -53,10 +55,20 @@ std::string Constant::to_string(const ASTResolver* resolver) const { str.erase ( str.find_last_not_of('0') + 1, std::string::npos ); return str; }; - std::string complex_type_string = resolver != nullptr && resolver->enableCuda() ? "ampgen_cuda::complex_t" : typeof() ; - std::string literalSuffix = resolver != nullptr && resolver->enableCuda() ? "f" : ""; + std::string complex_type = typeof(); + std::string literalSuffix = ""; + if( resolver != nullptr && resolver->enableCuda() ) + { + complex_type = "ampgen_cuda::complex_t"; + literalSuffix = "f"; + } + if( resolver != nullptr && resolver->enableAVX() ) + { + complex_type = "AmpGen::AVX2::complex_t"; + literalSuffix = "f"; + } return std::imag(m_value) == 0 ? "(" + rounded_string(std::real(m_value)) +literalSuffix + ")" : - complex_type_string +"("+rounded_string(std::real(m_value))+literalSuffix+","+rounded_string(std::imag(m_value))+literalSuffix+")"; + complex_type +"("+rounded_string(std::real(m_value))+literalSuffix+","+rounded_string(std::imag(m_value))+literalSuffix+")"; } Expression simplify_constant_addition( const Constant& constant, const Expression& expression ) @@ -203,7 +215,9 @@ Ternary::Ternary( const Expression& cond, const Expression& v1, const Expression } std::string Ternary::to_string(const ASTResolver* resolver) const { - return "(" + m_cond.to_string(resolver) + "?" + m_v1.to_string(resolver) + ":" + m_v2.to_string(resolver) + ")"; + return resolver != nullptr && resolver->enableAVX() ? "AmpGen::AVX2::select(" + m_cond.to_string(resolver) + ", " + + m_v1.to_string(resolver) + ", " + m_v2.to_string(resolver) +")" + : "(" + m_cond.to_string(resolver) + "?" + m_v1.to_string(resolver) + ":" + m_v2.to_string(resolver) + ")"; } void Ternary::resolve( ASTResolver& resolver ) const @@ -295,7 +309,7 @@ Expression AmpGen::fcn::complex_sqrt( const Expression& expression ) { if( is(expression ) ) return sqrt( expression() ); auto st = make_cse(expression); - return Ternary( st > 0, Sqrt(st), Constant(0,1)*Sqrt(-st) ); + return Ternary( st > 0, Sqrt(st), 1i*Sqrt(-st) ); } Expression AmpGen::fcn::isqrt( const Expression& expression ) diff --git a/src/Generator.cpp b/src/Generator.cpp index 8383d4d5cac..30d30eb4f47 100644 --- a/src/Generator.cpp +++ b/src/Generator.cpp @@ -8,7 +8,7 @@ extern "C" void AmpGen::PyGenerate(const char* eventType, double* out, const uns EventType type( split( std::string(eventType),' ') ); INFO( type << " generating: " ); auto phsp = Generator(type, new TRandom3() ); - auto events = phsp.generate( size,0 ); + auto events = phsp.generate( size ); for( size_t i = 0 ; i < events.size(); ++i ){ for( size_t j = 0 ; j < events[i].size(); ++j) out[events[i].size() * i + j] = events[i][j]; diff --git a/src/IncoherentSum.cpp b/src/IncoherentSum.cpp index bce1e238814..2a75ae3ca1f 100644 --- a/src/IncoherentSum.cpp +++ b/src/IncoherentSum.cpp @@ -39,6 +39,7 @@ double IncoherentSum::norm( const Bilinears& norms ) const void IncoherentSum::prepare() { + /* if ( m_isConstant && m_prepareCalls != 0 ) return; transferParameters(); for ( auto& mE : m_matrixElements ) { @@ -47,11 +48,10 @@ void IncoherentSum::prepare() if ( m_prepareCalls != 0 && !amp.hasExternalsChanged() ) continue; if ( m_prepareCalls == 0 && m_events != nullptr ) mE.addressData = m_events->registerExpression( amp ); - if ( m_events != nullptr ) m_events->updateCache( amp, mE.addressData ); - if ( m_prepareCalls == 0 && m_integrator.isReady() ){ - m_integrator.prepareExpression( amp ); - } - INFO( mE.addressData << " " << m_events->at(0).getCache(mE.addressData) ); +// if ( m_events != nullptr ) m_events->updateCache( amp, mE.addressData ); + // if ( m_prepareCalls == 0 && m_integrator.isReady() ){ + // m_integrator.prepareExpression( amp ); + // } amp.resetExternals(); } if( m_prepareCalls == 0 ){ @@ -64,6 +64,7 @@ void IncoherentSum::prepare() m_prepareCalls++; m_norm = norm(); INFO( "norm = " << m_norm << " weight = " << m_weight ); + */ } std::vector IncoherentSum::fitFractions( const LinearErrorPropagator& linProp ) @@ -86,8 +87,8 @@ double IncoherentSum::prob( const Event& evt ) const double IncoherentSum::prob_unnormalised( const Event& evt ) const { double value( 0. ); - for ( auto& mE : m_matrixElements ) { - value += std::norm( mE.coefficient * evt.getCache( mE.addressData ) ); - } + //for ( auto& mE : m_matrixElements ) { + // value += std::norm( mE.coefficient * m_events->cache(evt.index(), mE.addressData ) ); + //} return value; } diff --git a/src/Integrator.cpp b/src/Integrator.cpp index e51f3d6639f..c1e45cf2d21 100644 --- a/src/Integrator.cpp +++ b/src/Integrator.cpp @@ -33,3 +33,80 @@ void Bilinears::resize( const size_t& r, const size_t& c) calculate[i] = true; } } + +void Integrator::integrateBlock() +{ + real_t re[N] = {0}; + real_t im[N] = {0}; + size_t addr_i[N] = {0}; + size_t addr_j[N] = {0}; + for( size_t roll = 0 ; roll < N; ++roll ) + { + addr_i[roll] = m_integrals[roll].i; + addr_j[roll] = m_integrals[roll].j; + } + for ( size_t roll = 0; roll < N; ++roll ) { + complex_t* b1 = m_cache.data() + m_integrals[roll].i * m_events->size(); + complex_t* b2 = m_cache.data() + m_integrals[roll].j * m_events->size(); + #pragma omp parallel for reduction(+: re, im) + for ( size_t i = 0; i < m_events->size(); ++i ) { + auto c = b1[i] * std::conj(b2[i]); + re[roll] += m_weight[i] * std::real(c); + im[roll] += m_weight[i] * std::imag(c); + } + } + for ( size_t j = 0; j < m_counter; ++j ) m_integrals[j].transfer( complex_t( re[j], im[j] ) / m_norm ); + m_counter = 0; +} + +Integrator::Integrator( const EventList* events ) : m_events( events ) +{ + if( m_events == nullptr ) return; + m_weight.resize( m_events->size() ); + for( size_t i = 0 ; i < m_events->size(); ++i ) + { + m_weight[i] = m_events->at(i).weight() / m_events->at(i).genPdf(); + m_norm += m_weight[i]; + } +} + +bool Integrator::isReady() const { return m_events != nullptr; } +const EventList* Integrator::events() const { return m_events; } +void Integrator::queueIntegral(const size_t& c1, + const size_t& c2, + const size_t& i, + const size_t& j, + Bilinears* out, + const bool& sim) +{ + if( !out->workToDo(i,j) ) return; + if( sim ) + addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ + out->set(i,j,val); + if( i != j ) out->set(j,i, std::conj(val) ); } ); + else + addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ out->set(i,j,val); } ); +} +void Integrator::addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ) +{ + m_integrals[m_counter++] = Integral(c1,c2,tFunc); + if ( m_counter == N ) integrateBlock(); +} +void Integrator::queueIntegral(const size_t& i, const size_t& j, complex_t* result) +{ + addIntegralKeyed(i, j, [result](arg& val){ *result = val ; } ); +} +void Integrator::flush() +{ + if ( m_counter == 0 ) return; + integrateBlock(); +} +void Integrator::setBuffer( complex_t* pos, const complex_t& value, const size_t& size ) +{ + *pos = value; +} + +void Integrator::setBuffer( complex_t* pos, const std::vector& value, const size_t& size) +{ + memcpy( pos, &(value[0]), size * sizeof(complex_t) ); +} diff --git a/src/Integrator2.cpp b/src/Integrator2.cpp deleted file mode 100644 index c36df07a789..00000000000 --- a/src/Integrator2.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "AmpGen/Integrator2.h" - -using namespace AmpGen; - -void Integrator2::integrateBlock() -{ - real_t re[N] = {0}; - real_t im[N] = {0}; - size_t addr_i[N] = {0}; - size_t addr_j[N] = {0}; - for( size_t roll = 0 ; roll < N; ++roll ) - { - addr_i[roll] = m_integrals[roll].i; - addr_j[roll] = m_integrals[roll].j; - } - for ( size_t roll = 0; roll < N; ++roll ) { - auto& b1 = m_buffer[m_integrals[roll].i]; - auto& b2 = m_buffer[m_integrals[roll].j]; - #pragma omp parallel for reduction(+: re, im) - for ( size_t i = 0; i < m_events->size(); ++i ) { - auto c = b1[i] * std::conj(b2[i]); - re[roll] += m_weight[i] * std::real(c); - im[roll] += m_weight[i] * std::imag(c); - } - } - for ( size_t j = 0; j < m_counter; ++j ) m_integrals[j].transfer( complex_t( re[j], im[j] ) / m_norm ); - m_counter = 0; -} - -Integrator2::Integrator2( const EventList* events ) : m_events( events ) -{ - if( m_events == nullptr ) return; - m_weight.resize( m_events->size() ); - for( size_t i = 0 ; i < m_events->size(); ++i ) - { - m_weight[i] = m_events->at(i).weight() / m_events->at(i).genPdf(); - m_norm += m_weight[i]; - } -} - -bool Integrator2::isReady() const { return m_events != nullptr; } -const EventList& Integrator2::events() const { return *m_events; } -void Integrator2::queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim) -{ - if( !out->workToDo(i,j) ) return; - if( sim ) - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ - out->set(i,j,val); - if( i != j ) out->set(j,i, std::conj(val) ); } ); - else - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ out->set(i,j,val); } ); -} -void Integrator2::addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ) -{ - m_integrals[m_counter++] = Integral(c1,c2,tFunc); - if ( m_counter == N ) integrateBlock(); -} -void Integrator2::queueIntegral(const size_t& i, const size_t& j, complex_t* result) -{ - addIntegralKeyed(i, j, [result](arg& val){ *result = val ; } ); -} -void Integrator2::flush() -{ - if ( m_counter == 0 ) return; - integrateBlock(); -} -void Integrator2::setBuffer( complex_t* pos, const complex_t& value, const size_t& size ) -{ - *pos = value; -} - -void Integrator2::setBuffer( complex_t* pos, const std::vector& value, const size_t& size) -{ - memcpy( pos, &(value[0]), size * sizeof(complex_t) ); -} diff --git a/src/IntegratorSIMD.cpp b/src/IntegratorSIMD.cpp new file mode 100644 index 00000000000..375d84a3181 --- /dev/null +++ b/src/IntegratorSIMD.cpp @@ -0,0 +1,69 @@ +#include "AmpGen/IntegratorSIMD.h" +#include "AmpGen/simd/utils.h" + +using namespace AmpGen; +using namespace AmpGen::AVX2; + +void IntegratorSIMD::integrateBlock() +{ + #pragma omp parallel for + for ( size_t roll = 0; roll < N; ++roll ) { + float_v re( _mm256_set1_ps(0.f) ); + float_v im( _mm256_set1_ps(0.f) ); + auto b1 = m_cache.data() + m_integrals[roll].i * m_events->size(); + auto b2 = m_cache.data() + m_integrals[roll].j * m_events->size(); + for ( size_t i = 0; i < m_events->nBlocks(); ++i ) { + auto c = b1[i] * conj(b2[i]); + re = _mm256_fmadd_ps(re, m_weight[i], real(c) ); + im = _mm256_fmadd_ps(im, m_weight[i], imag(c) ); + } + m_integrals[roll].transfer( complex_t( utils::sum_elements(float_v(re)), + utils::sum_elements(float_v(im)) ) / m_norm ); + } + m_counter = 0; +} + +IntegratorSIMD::IntegratorSIMD( const EventListSIMD* events ) : m_events( events ) +{ + if( m_events == nullptr ) return; + m_weight.resize( m_events->nBlocks() ); + float_v norm_acc = 0.; + for( size_t i = 0 ; i < m_events->nBlocks(); ++i ) + { + m_weight[i] = m_events->weight(i) / m_events->genPDF(i); + norm_acc = norm_acc + m_weight[i]; + } + m_norm = utils::sum_elements(norm_acc); +} + +bool IntegratorSIMD::isReady() const { return m_events != nullptr; } + +void IntegratorSIMD::queueIntegral(const size_t& c1, + const size_t& c2, + const size_t& i, + const size_t& j, + Bilinears* out, + const bool& sim) +{ + if( !out->workToDo(i,j) ) return; + if( sim ) + addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ + out->set(i,j,val); + if( i != j ) out->set(j,i, std::conj(val) ); } ); + else + addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ out->set(i,j,val); } ); +} +void IntegratorSIMD::addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ) +{ + m_integrals[m_counter++] = Integral(c1,c2,tFunc); + if ( m_counter == N ) integrateBlock(); +} +void IntegratorSIMD::queueIntegral(const size_t& i, const size_t& j, complex_t* result) +{ + addIntegralKeyed(i, j, [result](arg& val){ *result = val ; } ); +} +void IntegratorSIMD::flush() +{ + if ( m_counter == 0 ) return; + integrateBlock(); +} diff --git a/src/Lineshapes.cpp b/src/Lineshapes.cpp index b101b615c37..7406c7d21db 100644 --- a/src/Lineshapes.cpp +++ b/src/Lineshapes.cpp @@ -23,8 +23,10 @@ Expression AmpGen::kFactor( const Expression& mass, const Expression& width, Deb return sqrt(k); } -Expression AmpGen::BlattWeisskopf_Norm( const Expression& z, const Expression& z0, unsigned int L ) +Expression AmpGen::BlattWeisskopf_Norm( const Expression& ze, const Expression& z0e, unsigned int L ) { + auto z = make_cse(ze); + auto z0 = make_cse(z0e); switch( L ) { case 0: return 1; case 1: return (1+z0) / (1+z); @@ -37,8 +39,9 @@ Expression AmpGen::BlattWeisskopf_Norm( const Expression& z, const Expression& z } } -Expression AmpGen::BlattWeisskopf( const Expression& z, unsigned int L ) +Expression AmpGen::BlattWeisskopf( const Expression& ze, unsigned int L ) { + auto z = make_cse(ze); switch( L ) { case 0: return 1; case 1: return 2 *fpow(z,1) / (1+z); @@ -75,27 +78,19 @@ Expression AmpGen::width( const Expression& s, const Expression& s1, const Expre const Expression& width, const Expression& radius, unsigned int L, DebugSymbols* dbexpressions ) { - auto q2v = make_cse( Q2(s,s1,s2) ); + auto q2v = make_cse( Q2(s,s1,s2) ); const Expression q2 = Ternary( q2v > 0, q2v, 0 ); - const Expression q20 = Abs( Q2( mass * mass, s1, s2 ) ); + const Expression q20 = abs( Q2( mass * mass, s1, s2 ) ); Expression BF = BlattWeisskopf_Norm( q2 * radius * radius, q20 * radius * radius, L ); - Expression qr = sqrt( q2 / q20 ) * fpow( q2/q20, L ); - - const Expression mreco = isqrt( s ); - const Expression mr = mass * mreco; + auto q2r = make_cse(q2 / q20); ADD_DEBUG(q2 , dbexpressions); ADD_DEBUG(q20 , dbexpressions); ADD_DEBUG(sqrt(q2/q20) , dbexpressions); ADD_DEBUG(BF , dbexpressions); - ADD_DEBUG(qr , dbexpressions); - ADD_DEBUG(mr , dbexpressions); - ADD_DEBUG(qr*mr , dbexpressions); - ADD_DEBUG(sqrt(q2)/mreco, dbexpressions); - ADD_DEBUG(sqrt(q20)/mass, dbexpressions); - ADD_DEBUG(width*BF*qr*mr, dbexpressions); - - return width * BF * qr * mr; + const auto rt = make_cse( width * BF * mass * sqrt( q2r / s ) * fpow( q2r, L ) ); + ADD_DEBUG(rt, dbexpressions); + return rt; } bool Lineshape::Factory::isLineshape( const std::string& lineshape ) @@ -142,8 +137,8 @@ Expression AmpGen::pol( const Expression& X, const std::vector& p ) Expression F = 0; Expression L = 1; for ( auto& ip : p ) { - F = F + ip * L; - L = L * X; + F += ip * L; + L *= X; } return F; } diff --git a/src/Lineshapes/kMatrix.cpp b/src/Lineshapes/kMatrix.cpp index 85b4937840e..5ba105bd98d 100644 --- a/src/Lineshapes/kMatrix.cpp +++ b/src/Lineshapes/kMatrix.cpp @@ -102,11 +102,11 @@ DEFINE_LINESHAPE( kMatrix ) std::vector fScatt = paramVector( "f_scatt", nChannels ); std::vector poleConfigs; - + bool addImaginaryMass = NamedParameter("kMatrix::fp", true ); for ( unsigned int pole = 1; pole <= nPoles; ++pole ) { std::string stub = "IS_p" + std::to_string( pole ) + "_"; Expression mass = Parameter( stub + "mass" ); - poleConfig p( mass * mass ); + poleConfig p( mass * mass + addImaginaryMass * (1i)*(1.e-6) ); /// add a tiny imaginary part to the mass to avoid floating point errors // for ( unsigned int ch = 0; ch < nChannels; ++ch ) p.add( Parameter( stub + channels[ch] ) ); poleConfigs.push_back( p ); } diff --git a/src/Minimiser.cpp b/src/Minimiser.cpp index 5589c1fb2d0..29cd3312e7b 100644 --- a/src/Minimiser.cpp +++ b/src/Minimiser.cpp @@ -59,7 +59,7 @@ void Minimiser::prepare() { std::string algorithm = NamedParameter( "Minimiser::Algorithm", "Hesse"); size_t maxCalls = NamedParameter( "Minimiser::MaxCalls" , 100000); - double tolerance = NamedParameter( "Minimiser::Tolerance" , 1); + double tolerance = NamedParameter( "Minimiser::Tolerance" , 2.0); m_printLevel = NamedParameter( "Minimiser::PrintLevel", 4); m_normalise = NamedParameter( "Minimiser::Normalise",false); if ( m_minimiser != nullptr ) delete m_minimiser; diff --git a/src/Particle.cpp b/src/Particle.cpp index e6b12032b3d..fdd239cc255 100644 --- a/src/Particle.cpp +++ b/src/Particle.cpp @@ -431,7 +431,14 @@ Expression Particle::getExpression( DebugSymbols* db, const unsigned int& index } ADD_DEBUG( total, db ); double nPermutations = doSymmetrisation ? orderings.size() : 1; - if ( sumAmplitudes ) return total / fcn::sqrt( nPermutations ); + if ( sumAmplitudes ) + { + if ( is(total) ){ + WARNING("Amplitude is just a constant: " << total << " may cause problems for compiler, making a little bit complex" ); + total += 1i * 0.00001; + } + return total / fcn::sqrt( nPermutations ); + } else { Expression sqrted = fcn::sqrt( total / nPermutations ); ADD_DEBUG( sqrted, db ); diff --git a/src/PhaseSpace.cpp b/src/PhaseSpace.cpp index 7eedb1b3f2c..c63be198602 100644 --- a/src/PhaseSpace.cpp +++ b/src/PhaseSpace.cpp @@ -35,12 +35,12 @@ double PhaseSpace::q( double m, double m1, double m2 ) const return 0.5 * sqrt( m*m - 2*m1*m1 - 2*m2*m2 + (m1*m1-m2*m2)*(m1*m1-m2*m2)/(m*m) ); } -Event PhaseSpace::makeEvent(const size_t& cacheSize) +Event PhaseSpace::makeEvent() { std::array rno; std::array pd; std::array invMas; - Event rt(4*m_nt + m_type.isTimeDependent(), cacheSize); + Event rt(4*m_nt + m_type.isTimeDependent()); rno[0] = 0; size_t n; diff --git a/src/Plots.cpp b/src/Plots.cpp deleted file mode 100644 index 800f12af2bc..00000000000 --- a/src/Plots.cpp +++ /dev/null @@ -1,107 +0,0 @@ -#if __cplusplus >= 201402L -#include "AmpGen/ErrorPropagator.h" -#include "AmpGen/EventList.h" -#include "AmpGen/CoherentSum.h" -#include "AmpGen/IncoherentSum.h" -#include "AmpGen/Integrator.h" -#include "AmpGen/MinuitParameterSet.h" -#include "AmpGen/Projection.h" -#include "AmpGen/Utilities.h" -#include "AmpGen/EventList.h" -#include "AmpGen/Plots.h" -#include "AmpGen/DalitzIntegrator.h" - -#include "TFile.h" -#include "TH1D.h" -#include "TH2D.h" -#include -using namespace AmpGen; -void AmpGen::perAmplitudePlot( const EventList& evts, - const Projection& projection, - const CoherentSum& pdf ) -{ - struct PlotIJ { - unsigned int i; - unsigned int j; - TH1D* hist; - std::complex amp; - }; - - TDirectory* dir = (TDirectory*)gFile->Get( ("perAmp_"+projection.name()).c_str() ); - if( dir == nullptr ) - { - gFile->mkdir( ("perAmp_"+ projection.name() ).c_str() ); - dir = (TDirectory*)gFile->Get( ("perAmp_"+projection.name()).c_str() ); - } - dir->cd(); - - std::vector tmpPlots( pdf.size() * ( pdf.size() + 1 ) / 2 ); - - unsigned int s = 0; - for ( unsigned int i = 0; i < pdf.size(); ++i ) { - - for ( unsigned int j = i; j < pdf.size(); ++j ) { - auto pdf_i = pdf[i].amp; - auto pdf_j = pdf[j].amp; - unsigned int index_i = evts.getCacheIndex(pdf_i); - unsigned int index_j = evts.getCacheIndex(pdf_j); - const std::string name = pdf_i.name() + "_" + pdf_j.name(); - tmpPlots[s].hist = projection.plot(name); - tmpPlots[s].i = index_i; - tmpPlots[s].j = index_j; - tmpPlots[s].amp = pdf[i].coupling() * std::conj( pdf[j].coupling() ); - if ( index_i != index_j ) tmpPlots[s].amp = 2.0 * tmpPlots[s].amp; - s++; - } - } - for ( auto& evt : evts ) { - double f = projection( evt ); - for ( auto& h : tmpPlots ) { - std::complex pdfValue = evt.getCache( h.i ) * std::conj( evt.getCache( h.j ) ); - double weight = std::real( h.amp * pdfValue ) * evt.weight() / evt.genPdf(); - h.hist->Fill( f, weight ); - } - } - for ( auto& h : tmpPlots ) { - h.hist->Write(); - delete h.hist; - } - dir->Write(); - gFile->cd(); -} - -TGraph* AmpGen::boundary(const AmpGen::EventType& eventType, - const std::function& p1, - const std::function& p2 ) -{ - auto s0 = pow(eventType.motherMass(),2); - auto s1 = pow(eventType.mass(0),2); - auto s2 = pow(eventType.mass(1),2); - auto s3 = pow(eventType.mass(2),2); - - DalitzIntegrator di( s0, s1, s2, s3 ); - - TGraph* gboundary = new TGraph(); - - Event tmp(12); - - for( double x = 0 ; x <= 1; x+=0.001){ - di.setEvent( {x,0}, tmp.address() ); - gboundary->SetPoint( gboundary->GetN(), p1(tmp), p2(tmp) ); - } - for( double y = 0 ; y <= 1; y+=0.01){ - di.setEvent( {1,y}, tmp.address() ); - gboundary->SetPoint( gboundary->GetN(), p1(tmp), p2(tmp) ); - } - for( double x = 0 ; x <= 1; x+=0.001){ - di.setEvent( {1-x,1}, tmp.address() ); - gboundary->SetPoint( gboundary->GetN(), p1(tmp), p2(tmp) ); - } - for( double y = 0 ; y <= 1; y+=0.01){ - di.setEvent( {0,1-y}, tmp.address() ); - gboundary->SetPoint( gboundary->GetN(), p1(tmp), p2(tmp) ); - } - return gboundary; -} - -#endif diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index e63ab654231..34083588200 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -74,7 +74,7 @@ PolarisedSum::PolarisedSum(const EventType& type, m_matrixElements[i] = TransitionMatrix( p, coupling, - CompiledExpression( + CompiledExpression( TensorExpression(thisExpression), p.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); @@ -100,7 +100,7 @@ PolarisedSum::PolarisedSum(const EventType& type, m_matrixElements[i] = TransitionMatrix( tm.first, tm.second, - CompiledExpression( + CompiledExpression( TensorExpression(thisExpression), tm.first.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); @@ -192,11 +192,11 @@ void PolarisedSum::prepare() ProfileClock tEval; size_t size_of = size() / m_matrixElements.size(); if( m_events != nullptr ) m_events->reserveCache( size() ); - if( m_integrator.isReady() ) m_integrator.reserveCache( size() ); + if( m_integrator.isReady() ) m_integrator.allocate(m_matrixElements, size() ); for(auto& t : m_matrixElements){ if( m_nCalls != 0 && !t.amp.hasExternalsChanged() ) continue; m_events->updateCache(t.amp, t.addressData); - m_integrator.prepareExpression(t.amp, size_of); + m_integrator.prepareExpression(t.amp); t.amp.resetExternals(); t.workToDo = true; nChanges++; @@ -230,9 +230,9 @@ void PolarisedSum::prepare() void PolarisedSum::debug_norm() { double norm_slow = 0; - for( auto& evt : m_integrator.events() ) + for( auto& evt : *m_integrator.events() ) norm_slow += evt.weight() * getValNoCache(evt) / evt.genPdf(); - auto evt = m_integrator.events()[0]; + auto evt = (*m_integrator.events())[0]; INFO("Event[0]: " << prob_unnormalised(evt) << " " << getValNoCache(evt) ); INFO("Norm : " << std::setprecision(10) << "bilinears=" << m_norm @@ -252,7 +252,7 @@ void PolarisedSum::setEvents( EventList& events ) void PolarisedSum::setMC( EventList& events ) { m_nCalls = 0; - m_integrator = integrator(&events); + m_integrator = Integrator(&events); } @@ -267,7 +267,7 @@ void PolarisedSum::build_probunnormalised() { DebugSymbols db; auto prob = probExpression(transitionMatrix(), convertProxies(m_pVector,[](auto& p){ return Parameter(p->name());} ), m_debug ? &db : nullptr); - m_probExpression = CompiledExpression(prob, "prob_unnormalised", {}, db, m_mps); + m_probExpression = CompiledExpression(prob, "prob_unnormalised", {}, db, m_mps); CompilerWrapper().compile(m_probExpression); m_probExpression.prepare(); } @@ -290,7 +290,8 @@ Tensor PolarisedSum::transitionMatrix() real_t PolarisedSum::prob_unnormalised( const Event& evt ) const { - return m_probExpression( evt.getCachePtr(0) ); + return 0; + //return m_probExpression( &m_events->cache(evt.index(), 0) ); } double PolarisedSum::norm() const @@ -298,7 +299,7 @@ double PolarisedSum::norm() const return m_norm; } -complex_t PolarisedSum::norm(const size_t& i, const size_t& j, PolarisedSum::integrator* integ) +complex_t PolarisedSum::norm(const size_t& i, const size_t& j, Integrator* integ) { auto ai = m_integIndex[i]; auto aj = m_integIndex[j]; @@ -336,7 +337,7 @@ void PolarisedSum::debug(const Event& evt) for(const auto& me : m_matrixElements) { std::vector this_cache(0,tsize); - for(unsigned i = 0 ; i != tsize; ++i ) this_cache.emplace_back( evt.getCache(me.addressData+i) ); + for(unsigned i = 0 ; i != tsize; ++i ) this_cache.emplace_back( m_events->cache(evt.index(), me.addressData+i) ); INFO( me.decayDescriptor() << " " << vectorToString( this_cache, " ") ); } INFO("P(x) = " << getValNoCache(evt) ); @@ -344,15 +345,12 @@ void PolarisedSum::debug(const Event& evt) if( m_debug ) { transferParameters(); - Event copy(evt); - copy.resizeCache( size() ); - for(auto& me : m_matrixElements){ - auto values = me(copy); - copy.setCache( values , me.addressData ); - me.amp.debug( copy.address() ); + std::vector cache( tsize * m_matrixElements.size() ); + for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ){ + std::memmove( cache.data() + tsize *i, m_matrixElements[i](evt).data(), tsize * sizeof(complex_t) ); + m_matrixElements[i].amp.debug( evt.address() ); } - m_probExpression.debug( copy.getCachePtr() ); - + m_probExpression.debug( cache.data() ); } } @@ -365,7 +363,7 @@ void PolarisedSum::generateSourceCode(const std::string& fname, const double& no Expression event = Parameter("x0",0,true); std::vector expressions(size); for( auto& p : m_matrixElements ){ - auto expr = CompiledExpression, const real_t*, const real_t*>( + auto expr = CompiledExpression(const real_t*, const real_t*)>( p.amp.expression(), p.decayDescriptor(), m_eventType.getEventFormat(), DebugSymbols() ,m_mps ) ; @@ -380,16 +378,16 @@ void PolarisedSum::generateSourceCode(const std::string& fname, const double& no T_matrix.st(); auto amp = probExpression(T_matrix, convertProxies(m_pVector, [](auto& proxy) -> Expression{ return double(proxy);} )); auto amp_extPol = probExpression(T_matrix, {Parameter("x2",0,true), Parameter("x3",0,true), Parameter("x4",0,true)}); - stream << CompiledExpression( amp / normalisation, "FCN",{},{}, m_mps ) << std::endl ; + const int&)>( amp / normalisation, "FCN",{},{}, m_mps ) << std::endl ; - stream << CompiledExpression( amp_extPol / normalisation, "FCN_extPol",{},{},m_mps ) << std::endl; + const double&)>( amp_extPol / normalisation, "FCN_extPol",{},{},m_mps ) << std::endl; stream.close(); } @@ -489,13 +487,12 @@ void PolarisedSum::transferParameters() real_t PolarisedSum::getValNoCache( const Event& evt ) { transferParameters(); - Event copy(evt); - copy.resizeCache( size() ); - for(auto& me : m_matrixElements){ - auto values = me(copy); - copy.setCache( values , me.addressData ); + auto tsize = m_dim.first * m_dim.second; + std::vector cache( tsize * m_matrixElements.size() ); + for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ){ + std::memmove( cache.data() + tsize *i, m_matrixElements[i](evt).data(), tsize * sizeof(complex_t) ); } - return m_probExpression( copy.getCachePtr() ); + return m_probExpression( cache.data() ); } void PolarisedSum::setWeight( MinuitProxy param ){ m_weight = param; } @@ -503,7 +500,7 @@ double PolarisedSum::getWeight() const { return m_weight ; } std::function PolarisedSum::evaluator(const EventList* events) const { - if( events != nullptr && events != &this->m_integrator.events() ) + if( events != nullptr && events != m_integrator.events() ) ERROR("Evaluator only working on the integration sample, fix me!"); std::vector address_mapping( size() ); @@ -511,22 +508,22 @@ std::function PolarisedSum::evaluator(const EventList* eve for( unsigned i = 0; i != size() / m_matrixElements.size(); ++i ) address_mapping[me.addressData+i] = m_integrator.getCacheIndex( me.amp ) + i; } - std::vector values( m_integrator.events().size() ); + std::vector values( m_integrator.events()->size() ); std::vector buffer(address_mapping.size()); #ifdef _OPENMP #pragma omp parallel for firstprivate(buffer) #endif - for( unsigned int i = 0 ; i != m_integrator.events().size(); ++i ) + for( unsigned int i = 0 ; i != m_integrator.events()->size(); ++i ) { for( unsigned j = 0 ; j != address_mapping.size(); ++j ) buffer[j] = this->m_integrator.get(address_mapping[j], i); values[i] = m_weight * m_probExpression(&buffer[0]) / m_norm; } - return arrayToFunctor(values, events); + return arrayToFunctor(values); } KeyedView PolarisedSum::componentEvaluator(const EventList* events) const { - if( events != nullptr && events != &this->m_integrator.events() ) + if( events != nullptr && events != m_integrator.events() ) ERROR("Evaluator only working on the integration sample, fix me!"); KeyedView rt(*events, m_matrixElements.size() ); std::vector address_mapping(m_matrixElements.size()); @@ -540,7 +537,7 @@ KeyedView PolarisedSum::componentEvaluator(const EventList* e #ifdef _OPENMP #pragma omp parallel for #endif - for( unsigned evt = 0 ; evt != m_integrator.events().size(); ++evt ) + for( unsigned evt = 0 ; evt != m_integrator.events()->size(); ++evt ) { complex_t total = 0; for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ diff --git a/src/Projection.cpp b/src/Projection.cpp index 1a16dbbd1b6..aeeb6901ebc 100644 --- a/src/Projection.cpp +++ b/src/Projection.cpp @@ -2,6 +2,7 @@ #include "AmpGen/Utilities.h" #include "AmpGen/Event.h" #include "AmpGen/EventList.h" +#include "AmpGen/EventListSIMD.h" #include #include "TAxis.h" @@ -70,17 +71,22 @@ std::pair Projection2D::operator()( const Event& evt ) const return {xAxis.m_func( evt ), yAxis.m_func( evt )}; } -TH1D* Projection::projInternal( const EventList& events, const ArgumentPack& args) const +template <> TH1D* Projection::projInternal( const EventList& events, const ArgumentPack& args) const { return events.makeProjection(*this, args); } -std::tuple, THStack*> Projection::projInternal(const EventList& events, const KeyedView& weightFunction, const ArgumentPack& args) const +template <> TH1D* Projection::projInternal( const EventListSIMD& events, const ArgumentPack& args) const +{ + return events.makeProjection(*this, args); +} + +template <> std::tuple, THStack*> Projection::projInternal(const EventList& events, const KeyedView& weightFunction, const ArgumentPack& args) const { -// INFO("Making projection: " << m_name << " classes = " << weightFunction.width() << " " << &(events[0]) ); std::vector hists; double norm_sum = args.getArg(1).val; std::string prefix = args.getArg().val; + bool autowrite = args.get() != nullptr; THStack* stack = args.getArg(new THStack()).val; if( prefix != "" ) prefix = prefix +"_"; for( unsigned int i = 0 ; i != weightFunction.width(); ++i ) @@ -97,7 +103,41 @@ std::tuple, THStack*> Projection::projInternal(const EventLis if( total == 0 ) ERROR("Norm = " << total ); else for( auto& h : hists ) h->Scale( norm_sum / total ); stack->SetName( (prefix + name() + "_stack").c_str()); - for( auto& h : hists ) stack->Add(h, "C HIST"); + for( auto& h : hists ){ + stack->Add(h, "C HIST"); + if( autowrite ) h->Write(); + } + if( autowrite ) stack->Write(); + return {hists, stack}; +} + +template <> std::tuple, THStack*> Projection::projInternal(const EventListSIMD& events, const KeyedView& weightFunction, const ArgumentPack& args) const +{ + std::vector hists; + double norm_sum = args.getArg(1).val; + std::string prefix = args.getArg().val; + bool autowrite = args.get() != nullptr; + THStack* stack = args.getArg(new THStack()).val; + if( prefix != "" ) prefix = prefix +"_"; + for( unsigned int i = 0 ; i != weightFunction.width(); ++i ) + hists.push_back( plot(prefix + weightFunction.key(i)==""?"C"+std::to_string(i):weightFunction.key(i)) ); + auto selection = args.getArg().val; + for( const auto& evt : events ){ + if( selection != nullptr && !selection(evt) ) continue; + auto pos = operator()(evt); + auto weights = weightFunction(evt); + for( unsigned j = 0 ; j != weightFunction.width(); ++j ) hists[j]->Fill( pos, evt.weight() * weights[j] / evt.genPdf() ); + } + std::sort( std::begin(hists), std::end(hists), [](auto& h1, auto& h2){ return h1->Integral() < h2->Integral() ; } ); + double total = std::accumulate( std::begin(hists), std::end(hists), 0.0, [](double& t, auto& h){ return t + h->Integral() ; } ); + if( total == 0 ) ERROR("Norm = " << total ); + else for( auto& h : hists ) h->Scale( norm_sum / total ); + stack->SetName( (prefix + name() + "_stack").c_str()); + for( auto& h : hists ){ + stack->Add(h, "C HIST"); + if( autowrite ) h->Write(); + } + if( autowrite ) stack->Write(); return {hists, stack}; } diff --git a/src/RecursivePhaseSpace.cpp b/src/RecursivePhaseSpace.cpp index ce812ab3e3a..c8f67908071 100644 --- a/src/RecursivePhaseSpace.cpp +++ b/src/RecursivePhaseSpace.cpp @@ -40,10 +40,10 @@ RecursivePhaseSpace::RecursivePhaseSpace(const Particle& decayChain, const Event setRandom( rndm ); } -AmpGen::Event RecursivePhaseSpace::makeEvent( const size_t& cacheSize ) +AmpGen::Event RecursivePhaseSpace::makeEvent() { - AmpGen::Event evt = m_phsp.makeEvent( cacheSize ); - AmpGen::Event rt( 4 * m_eventType.size(), cacheSize ); + AmpGen::Event evt = m_phsp.makeEvent(); + AmpGen::Event rt( 4 * m_eventType.size()); for (size_t i = 0; i < m_nodes.size(); ++i ) { auto& segment = m_nodes[i]; double px = evt[4*i + 0]; @@ -58,7 +58,7 @@ AmpGen::Event RecursivePhaseSpace::makeEvent( const size_t& cacheSize ) rt[4*segment.sink + 3] = pE; } } else { - auto evtTmp = segment.decayProds->makeEvent(cacheSize); + auto evtTmp = segment.decayProds->makeEvent(); double v = sqrt( px * px + py * py + pz * pz ) / pE; boost( evtTmp, std::tuple(px, py, pz), v ); for(size_t j = 0; j < rt.size(); ++j) rt[j] += evtTmp[j]; diff --git a/src/ThreeBodyCalculators.cpp b/src/ThreeBodyCalculators.cpp index fc7d1b9aa29..d6351163d76 100644 --- a/src/ThreeBodyCalculators.cpp +++ b/src/ThreeBodyCalculators.cpp @@ -136,18 +136,18 @@ double ThreeBodyCalculator::PartialWidth::getWidth( const double& s ) } Expression ThreeBodyCalculator::PartialWidth::spinAverageMatrixElement( - const std::vector>& elements, DebugSymbols* msym ) + const std::vector>& elements, DebugSymbols* msym ) { std::vector currents; - for ( auto& element : elements ) { - Particle particle(element.decayDescriptor(), type.finalStates() ); + for ( auto& [s, c] : elements ) { + Particle particle(s.decayDescriptor(), type.finalStates() ); auto perm = particle.identicalDaughterOrderings(); for ( auto& p : perm ) { particle.setOrdering(p); particle.setLineshape( "FormFactor" ); - Expression prop = make_cse( element.coupling.to_expression() ) * make_cse( particle.propagator( msym ) ); - if ( msym != nullptr ) msym->emplace_back( element.decayTree.name() + "_g", element.coupling.to_expression() ); - if ( msym != nullptr ) msym->emplace_back( element.decayTree.name() + "_p", particle.propagator() ); + Expression prop = make_cse( c.to_expression() ) * make_cse( particle.propagator( msym ) ); + if ( msym != nullptr ) msym->emplace_back( s.name() + "_g", c.to_expression() ); + if ( msym != nullptr ) msym->emplace_back( s.name() + "_p", particle.propagator() ); Tensor zt = particle.spinTensor(msym); zt.st() ; currents.push_back( zt * prop ); @@ -232,14 +232,17 @@ ThreeBodyCalculator::PartialWidth::PartialWidth( const EventType& evt, MinuitPar , type(evt) { DebugSymbols msym; - Expression matrixElementTotal = spinAverageMatrixElement( fcs.matrixElements(), &msym ); + std::vector> unpacked; + for( auto& p : fcs.matrixElements() ) unpacked.emplace_back( p.decayTree, p.coupling ); + + Expression matrixElementTotal = spinAverageMatrixElement(unpacked, &msym ); std::string name = ""; auto evtFormat = evt.getEventFormat(); - for ( auto& p : fcs.matrixElements() ) { - name += p.decayDescriptor(); - partialWidths.emplace_back( spinAverageMatrixElement( {p}, &msym ), p.decayDescriptor(), evtFormat, DebugSymbols(), &mps ); + for ( auto& p : unpacked ) { + name += p.first.decayDescriptor(); + partialWidths.emplace_back( spinAverageMatrixElement( {p}, &msym ), p.first.decayDescriptor(), evtFormat, DebugSymbols(), &mps ); } - totalWidth = CompiledExpression< std::complex, const real_t*, const real_t* > ( matrixElementTotal, "width", evtFormat, {} , &mps ); + totalWidth = CompiledExpression< complex_t(const real_t*, const real_t*) > ( matrixElementTotal, "width", evtFormat, {} , &mps ); CompilerWrapper(true).compile( totalWidth, ""); } diff --git a/src/TreePhaseSpace.cpp b/src/TreePhaseSpace.cpp index b2b5de5e232..18ea846740a 100644 --- a/src/TreePhaseSpace.cpp +++ b/src/TreePhaseSpace.cpp @@ -47,11 +47,11 @@ TreePhaseSpace::TreePhaseSpace(const std::vector& decayChains, const E m_dice = std::discrete_distribution<>(m_weights.begin(), m_weights.end()); } -Event TreePhaseSpace::makeEvent( const unsigned& cacheSize ) +Event TreePhaseSpace::makeEvent() { unsigned j = m_dice(m_gen); m_top[j].generate(); - auto event = m_top[j].event(m_type.size(), cacheSize); + auto event = m_top[j].event(m_type.size()); event.setGenPdf(genPdf(event) / m_top[j].weight()); m_generatorRecord.push_back(j); return event; @@ -195,9 +195,9 @@ void TreePhaseSpace::Vertex::place(Event& event) if( right != nullptr ) right->place(event); } -Event TreePhaseSpace::Vertex::event(const unsigned& eventSize, const unsigned& cacheSize) +Event TreePhaseSpace::Vertex::event(const unsigned& eventSize) { - Event output(4 * eventSize, cacheSize); + Event output(4 * eventSize); mom.SetXYZT(0,0,0,sqrt(s)); generateFullEvent(); place(output); diff --git a/src/UnaryExpression.cpp b/src/UnaryExpression.cpp index a00563c6833..0fb9b4c4994 100644 --- a/src/UnaryExpression.cpp +++ b/src/UnaryExpression.cpp @@ -17,7 +17,7 @@ T rsqrt( const T& arg ){ return 1. / sqrt(arg) ; } DEFINE_UNARY_OPERATOR( Log , log ) DEFINE_UNARY_OPERATOR( Sqrt, sqrt ) DEFINE_UNARY_OPERATOR( Exp , exp ) -DEFINE_UNARY_OPERATOR( Abs , std::abs ) +DEFINE_UNARY_OPERATOR_NO_RESOLVER( Abs , std::fabs ) DEFINE_UNARY_OPERATOR( Sin , sin ) DEFINE_UNARY_OPERATOR( Cos , cos ) DEFINE_UNARY_OPERATOR( Tan , tan ) @@ -28,17 +28,7 @@ DEFINE_UNARY_OPERATOR( Norm, std::norm ) DEFINE_UNARY_OPERATOR( Conj, std::conj ) DEFINE_UNARY_OPERATOR( Real, std::real ) DEFINE_UNARY_OPERATOR( Imag, std::imag ) -//DEFINE_UNARY_OPERATOR( LGamma, std::lgamma ); - //DEFINE_UNARY_OPERATOR( ISqrt, rsqrt ) - -ISqrt::ISqrt( const Expression& expression) : IUnaryExpression(expression) {} -ISqrt::operator Expression() const { return Expression( std::make_shared(*this) ) ; } -complex_t ISqrt::operator()() const { return 1./sqrt( m_expression() ); } -std::string ISqrt::to_string(const ASTResolver* resolver) const { - return resolver != nullptr && resolver->enableCuda() ? - "rsqrt("+m_expression.to_string(resolver)+")" : - "1./sqrt("+m_expression.to_string(resolver)+")" ; -} +DEFINE_UNARY_OPERATOR_NO_RESOLVER( ISqrt, rsqrt ) LGamma::LGamma( const Expression& expression) : IUnaryExpression(expression) {} LGamma::operator Expression() const { return Expression( std::make_shared(*this) ) ; } @@ -47,6 +37,19 @@ std::string LGamma::to_string(const ASTResolver* resolver) const { return "std::lgamma(" + m_expression.to_string(resolver) + ")"; } +std::string ISqrt::to_string(const ASTResolver* resolver) const { + return resolver != nullptr && resolver->enableCuda() ? + "rsqrt("+m_expression.to_string(resolver)+")" : + "1./sqrt("+m_expression.to_string(resolver)+")" ; +} + +std::string Abs::to_string( const ASTResolver* resolver ) const +{ + return resolver != nullptr && resolver->enableAVX() ? + "AmpGen::AVX2::abs(" + m_expression.to_string(resolver) +")" : + "std::fabs("+m_expression.to_string(resolver) +")"; +} + Expression Log::d() const { return 1. / arg(); } Expression Sqrt::d() const { return 1. / ( 2 * fcn::sqrt( arg() ) ); } Expression Exp::d() const { return fcn::exp(arg()) ; } diff --git a/test/test_phaseSpace.cpp b/test/test_phaseSpace.cpp index 4dc139124af..6bea1fd1380 100644 --- a/test/test_phaseSpace.cpp +++ b/test/test_phaseSpace.cpp @@ -27,7 +27,7 @@ BOOST_AUTO_TEST_CASE( phaseSpace_threeBody ) std::vector test_event = {-0.235918, -0.242689, 0.278177, 1.19862,-0.300608, -0.0584944, -0.0117436, 0.584418, 0.536526, 0.301183, -0.266434, 0.684864} ; - auto new_event = phsp.makeEvent(0); + auto new_event = phsp.makeEvent(); // auto new_event = phsp.generate(); for( int i = 0 ; i < 12 ; ++i ) From 7d2285853de0f9e233bfe9a041f53f31a53fb319 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Thu, 9 Apr 2020 17:35:09 +0200 Subject: [PATCH 03/67] add simd headers --- AmpGen/simd/avx2_types.h | 122 +++++++ AmpGen/simd/avx_mathfun.h | 727 ++++++++++++++++++++++++++++++++++++++ AmpGen/simd/iterator.h | 43 +++ AmpGen/simd/utils.h | 42 +++ 4 files changed, 934 insertions(+) create mode 100644 AmpGen/simd/avx2_types.h create mode 100644 AmpGen/simd/avx_mathfun.h create mode 100644 AmpGen/simd/iterator.h create mode 100644 AmpGen/simd/utils.h diff --git a/AmpGen/simd/avx2_types.h b/AmpGen/simd/avx2_types.h new file mode 100644 index 00000000000..a9ec3e082ee --- /dev/null +++ b/AmpGen/simd/avx2_types.h @@ -0,0 +1,122 @@ +#ifndef AMPGEN_AVX_TYPES +#define AMPGEN_AVX_TYPES 1 + +#include +#include +#include +#include +#include "AmpGen/simd/avx_mathfun.h" +#include + +namespace AmpGen { + namespace AVX2 { + struct float_t { + __m256 data; + static constexpr unsigned size = 8 ; + typedef float scalar_type; + float_t() = default; + float_t(__m256 data ) : data(data) {} + float_t(const float& f ) : data( _mm256_set1_ps(f) ) {} + float_t(const double& f ) : data( _mm256_set1_ps( float(f) )) {} + float_t(const float* f ) : data( _mm256_loadu_ps( f ) ) {} + void store( float* ptr ) const { _mm256_storeu_ps( ptr, data ); } + std::array to_array() const { std::array b; store( &b[0] ); return b; } + float at(const unsigned i) const { return to_array()[i] ; } + operator __m256() const { return data ; } + }; + + struct complex_t { + float_t re; + float_t im; + typedef std::complex scalar_type; + static constexpr unsigned size = 8 ; + + float_t real() const { return re; } + float_t imag() const { return im; } + complex_t() = default; + complex_t( const float_t& re, const float_t& im) : re(re), im(im) {} + complex_t( const float& re, const float& im) : re(re), im(im) {} + complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } + void store( float* sre, float* sim ){ re.store(sre); im.store(sim); } + void store( std::complex* r ){ + auto re_arr = re.to_array(); + auto im_arr = im.to_array(); + for( unsigned i = 0 ; i != float_t::size; ++i ) r[i] = std::complex( re_arr[i], im_arr[i] ); + } + }; + + inline std::ostream& operator<<( std::ostream& os, const float_t& obj ) { + auto buffer = obj.to_array(); + for( unsigned i = 0 ; i != float_t::size; ++i ) os << buffer[i] << " "; + return os; + } + inline float_t operator+( const float_t& lhs, const float_t& rhs ) { return _mm256_add_ps(lhs, rhs); } + inline float_t operator-( const float_t& lhs, const float_t& rhs ) { return _mm256_sub_ps(lhs, rhs); } + inline float_t operator*( const float_t& lhs, const float_t& rhs ) { return _mm256_mul_ps(lhs, rhs); } + inline float_t operator/( const float_t& lhs, const float_t& rhs ) { return _mm256_div_ps(lhs, rhs); } + inline float_t operator-( const float_t& x ) { return -1.f * x; } + inline float_t operator&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_ps( lhs, rhs ); } + inline float_t operator|( const float_t& lhs, const float_t& rhs ) { return _mm256_or_ps( lhs, rhs ); } + inline float_t operator^( const float_t& lhs, const float_t& rhs ) { return _mm256_xor_ps( lhs, rhs ); } + inline float_t operator+=(float_t& lhs, const float_t& rhs ){ lhs = lhs + rhs; return lhs; } + inline float_t operator-=(float_t& lhs, const float_t& rhs ){ lhs = lhs - rhs; return lhs; } + inline float_t operator*=(float_t& lhs, const float_t& rhs ){ lhs = lhs * rhs; return lhs; } + inline float_t operator/=(float_t& lhs, const float_t& rhs ){ lhs = lhs / rhs; return lhs; } + inline float_t operator&&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_ps( lhs, rhs ); } + inline float_t operator||( const float_t& lhs, const float_t& rhs ) { return _mm256_or_ps( lhs, rhs ); } + inline float_t operator!( const float_t& x ) { return x ^ _mm256_castsi256_ps( _mm256_set1_epi32( -1 ) ); } + inline float_t operator<( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_LT_OS ); } + inline float_t operator>( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_GT_OS ); } + inline float_t operator==( const float_t& lhs, const float_t& rhs ){ return _mm256_cmp_ps( lhs, rhs, _CMP_EQ_OS ); } + inline float_t sqrt( const float_t& v ) { return _mm256_sqrt_ps(v); } + inline float_t sin( const float_t& v ) { return sin256_ps(v) ; } + inline float_t cos( const float_t& v ) { return cos256_ps(v) ; } + inline float_t tan( const float_t& v ) { float_t s; float_t c; sincos256_ps(v, (__m256*)&s, (__m256*)&c) ; return s/c; } + inline float_t log( const float_t& v ) { return log256_ps(v) ; } + inline float_t exp( const float_t& v ) { return exp256_ps(v) ; } + inline float_t abs ( const float_t& v ) { return v & _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); } + inline float_t select(const float_t& mask, const float_t& a, const float_t& b ) { return _mm256_blendv_ps( b, a, mask ); } + inline float_t select(const bool& mask , const float_t& a, const float_t& b ) { return mask ? a : b; } + inline float_t atan2( const float_t& y, const float_t& x ){ + std::array bx{x.to_array()}, by{y.to_array()}, rt; + for( unsigned i = 0 ; i != float_t::size ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); + return float_t (rt.data() ); + } + inline float_t real(const complex_t& arg ){ return arg.re ; } + inline float_t imag(const complex_t& arg ){ return arg.im ; } + inline complex_t conj(const complex_t& arg ){ return complex_t(arg.re, -arg.im) ; } + inline complex_t operator+( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re + rhs, lhs.im); } + inline complex_t operator-( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re - rhs, lhs.im); } + inline complex_t operator*( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re*rhs, lhs.im*rhs); } + inline complex_t operator/( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re/rhs, lhs.im/rhs); } + inline complex_t operator+( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs + rhs.re, rhs.im); } + inline complex_t operator-( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs - rhs.re, - rhs.im); } + inline complex_t operator*( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs*rhs.re, lhs*rhs.im); } + inline complex_t operator/( const float_t& lhs, const complex_t& rhs ) { return complex_t( lhs * rhs.re , -lhs *rhs.im) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_t operator+( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re + rhs.re, lhs.im + rhs.im); } + inline complex_t operator-( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re - rhs.re, lhs.im - rhs.im); } + inline complex_t operator*( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re - lhs.im*rhs.im, lhs.re*rhs.im + lhs.im*rhs.re); } + inline complex_t operator/( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re + lhs.im*rhs.im, -lhs.re*rhs.im + lhs.im*rhs.re) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_t operator-( const complex_t& x ) { return -1.f * x; } + inline float_t abs( const complex_t& v ) { return sqrt( v.re * v.re + v.im * v.im ) ; } + inline float_t norm( const complex_t& v ) { return ( v.re * v.re + v.im * v.im ) ; } + inline complex_t select(const float_t& mask, const complex_t& a, const complex_t& b ) { return complex_t( _mm256_blendv_ps( b.re, a.re, mask ), _mm256_blendv_ps( b.im, a.im, mask ) ); } + inline complex_t select(const float_t& mask, const float_t& a, const complex_t& b ) { return complex_t( _mm256_blendv_ps( b.re, a , mask ), _mm256_blendv_ps( b.im, float_t(0.f), mask ) ); } + inline complex_t select(const float_t& mask, const complex_t& a, const float_t& b ) { return complex_t( _mm256_blendv_ps( b, a.re, mask ), _mm256_blendv_ps( float_t(0.f), a.im, mask ) ); } + inline complex_t select(const bool& mask , const complex_t& a, const complex_t& b ) { return mask ? a : b; } + inline complex_t exp( const complex_t& v ){ + float_t s; float_t c; sincos256_ps(v.im, (__m256*)&s, (__m256*)&c) ; + return exp( v.re ) * complex_t(c, s); + } + inline std::ostream& operator<<( std::ostream& os, const complex_t& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } + #pragma omp declare reduction(+: float_t: \ + omp_out = omp_out + omp_in) + #pragma omp declare reduction(+: complex_t: \ + omp_out = omp_out + omp_in) + + } +} + +#endif diff --git a/AmpGen/simd/avx_mathfun.h b/AmpGen/simd/avx_mathfun.h new file mode 100644 index 00000000000..918a7a35f42 --- /dev/null +++ b/AmpGen/simd/avx_mathfun.h @@ -0,0 +1,727 @@ +#ifndef AMPGEN_AVX_MATHFUN_H +#define AMPGEN_AVX_MATHFUN_H 1 + +/* + AVX implementation of sin, cos, sincos, exp and log + + Based on "sse_mathfun.h", by Julien Pommier + http://gruntthepeon.free.fr/ssemath/ + + Copyright (C) 2012 Giovanni Garberoglio + Interdisciplinary Laboratory for Computational Science (LISC) + Fondazione Bruno Kessler and University of Trento + via Sommarive, 18 + I-38123 Trento (Italy) + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + (this is the zlib license) + + modified 8/4/2020: inline all functions, as they end up wrapped anyway +*/ + +#include + +/* yes I know, the top of this file is quite ugly */ +# define ALIGN32_BEG +# define ALIGN32_END __attribute__((aligned(32))) + +/* __m128 is ugly to write */ +typedef __m256 v8sf; // vector of 8 float (avx) +typedef __m256i v8si; // vector of 8 int (avx) +typedef __m128i v4si; // vector of 8 int (avx) + +#define _PI32AVX_CONST(Name, Val) \ + static const ALIGN32_BEG int _pi32avx_##Name[4] ALIGN32_END = { Val, Val, Val, Val } + +_PI32AVX_CONST(1, 1); +_PI32AVX_CONST(inv1, ~1); +_PI32AVX_CONST(2, 2); +_PI32AVX_CONST(4, 4); + + +/* declare some AVX constants -- why can't I figure a better way to do that? */ +#define _PS256_CONST(Name, Val) \ + static const ALIGN32_BEG float _ps256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } +#define _PI32_CONST256(Name, Val) \ + static const ALIGN32_BEG int _pi32_256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } +#define _PS256_CONST_TYPE(Name, Type, Val) \ + static const ALIGN32_BEG Type _ps256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } + +_PS256_CONST(1 , 1.0f); +_PS256_CONST(0p5, 0.5f); +/* the smallest non denormalized float number */ +_PS256_CONST_TYPE(min_norm_pos, int, 0x00800000); +_PS256_CONST_TYPE(mant_mask, int, 0x7f800000); +_PS256_CONST_TYPE(inv_mant_mask, int, ~0x7f800000); + +_PS256_CONST_TYPE(sign_mask, int, (int)0x80000000); +_PS256_CONST_TYPE(inv_sign_mask, int, ~0x80000000); + +_PI32_CONST256(0, 0); +_PI32_CONST256(1, 1); +_PI32_CONST256(inv1, ~1); +_PI32_CONST256(2, 2); +_PI32_CONST256(4, 4); +_PI32_CONST256(0x7f, 0x7f); + +_PS256_CONST(cephes_SQRTHF, 0.707106781186547524); +_PS256_CONST(cephes_log_p0, 7.0376836292E-2); +_PS256_CONST(cephes_log_p1, - 1.1514610310E-1); +_PS256_CONST(cephes_log_p2, 1.1676998740E-1); +_PS256_CONST(cephes_log_p3, - 1.2420140846E-1); +_PS256_CONST(cephes_log_p4, + 1.4249322787E-1); +_PS256_CONST(cephes_log_p5, - 1.6668057665E-1); +_PS256_CONST(cephes_log_p6, + 2.0000714765E-1); +_PS256_CONST(cephes_log_p7, - 2.4999993993E-1); +_PS256_CONST(cephes_log_p8, + 3.3333331174E-1); +_PS256_CONST(cephes_log_q1, -2.12194440e-4); +_PS256_CONST(cephes_log_q2, 0.693359375); + +#ifndef __AVX2__ + +typedef union imm_xmm_union { + v8si imm; + v4si xmm[2]; +} imm_xmm_union; + +#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \ + imm_xmm_union u __attribute__((aligned(32))); \ + u.imm = imm_; \ + xmm0_ = u.xmm[0]; \ + xmm1_ = u.xmm[1]; \ +} + +#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \ + imm_xmm_union u __attribute__((aligned(32))); \ + u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm; \ + } + + +#define AVX2_BITOP_USING_SSE2(fn) \ +static inline v8si avx2_mm256_##fn(v8si x, int a) \ +{ \ + /* use SSE2 instruction to perform the bitop AVX2 */ \ + v4si x1, x2; \ + v8si ret; \ + COPY_IMM_TO_XMM(x, x1, x2); \ + x1 = _mm_##fn(x1,a); \ + x2 = _mm_##fn(x2,a); \ + COPY_XMM_TO_IMM(x1, x2, ret); \ + return(ret); \ +} + +//#warning "Using SSE2 to perform AVX2 bitshift ops" +AVX2_BITOP_USING_SSE2(slli_epi32) +AVX2_BITOP_USING_SSE2(srli_epi32) + +#define AVX2_INTOP_USING_SSE2(fn) \ +static inline v8si avx2_mm256_##fn(v8si x, v8si y) \ +{ \ + /* use SSE2 instructions to perform the AVX2 integer operation */ \ + v4si x1, x2; \ + v4si y1, y2; \ + v8si ret; \ + COPY_IMM_TO_XMM(x, x1, x2); \ + COPY_IMM_TO_XMM(y, y1, y2); \ + x1 = _mm_##fn(x1,y1); \ + x2 = _mm_##fn(x2,y2); \ + COPY_XMM_TO_IMM(x1, x2, ret); \ + return(ret); \ +} + +//#warning "Using SSE2 to perform AVX2 integer ops" +AVX2_INTOP_USING_SSE2(and_si128) +AVX2_INTOP_USING_SSE2(andnot_si128) +AVX2_INTOP_USING_SSE2(cmpeq_epi32) +AVX2_INTOP_USING_SSE2(sub_epi32) +AVX2_INTOP_USING_SSE2(add_epi32) +#define avx2_mm256_and_si256 avx2_mm256_and_si128 +#define avx2_mm256_andnot_si256 avx2_mm256_andnot_si128 +#else +#define avx2_mm256_slli_epi32 _mm256_slli_epi32 +#define avx2_mm256_srli_epi32 _mm256_srli_epi32 +#define avx2_mm256_and_si256 _mm256_and_si256 +#define avx2_mm256_andnot_si256 _mm256_andnot_si256 +#define avx2_mm256_cmpeq_epi32 _mm256_cmpeq_epi32 +#define avx2_mm256_sub_epi32 _mm256_sub_epi32 +#define avx2_mm256_add_epi32 _mm256_add_epi32 +#endif /* __AVX2__ */ + + +/* natural logarithm computed for 8 simultaneous float + return NaN for x <= 0 +*/ +inline v8sf log256_ps(v8sf x) { + v8si imm0; + v8sf one = *(v8sf*)_ps256_1; + + //v8sf invalid_mask = _mm256_cmple_ps(x, _mm256_setzero_ps()); + v8sf invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_LE_OS); + + x = _mm256_max_ps(x, *(v8sf*)_ps256_min_norm_pos); /* cut off denormalized stuff */ + + // can be done with AVX2 + imm0 = avx2_mm256_srli_epi32(_mm256_castps_si256(x), 23); + + /* keep only the fractional part */ + x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_mant_mask); + x = _mm256_or_ps(x, *(v8sf*)_ps256_0p5); + + // this is again another AVX2 instruction + imm0 = avx2_mm256_sub_epi32(imm0, *(v8si*)_pi32_256_0x7f); + v8sf e = _mm256_cvtepi32_ps(imm0); + + e = _mm256_add_ps(e, one); + + /* part2: + if( x < SQRTHF ) { + e -= 1; + x = x + x - 1.0; + } else { x = x - 1.0; } + */ + //v8sf mask = _mm256_cmplt_ps(x, *(v8sf*)_ps256_cephes_SQRTHF); + v8sf mask = _mm256_cmp_ps(x, *(v8sf*)_ps256_cephes_SQRTHF, _CMP_LT_OS); + v8sf tmp = _mm256_and_ps(x, mask); + x = _mm256_sub_ps(x, one); + e = _mm256_sub_ps(e, _mm256_and_ps(one, mask)); + x = _mm256_add_ps(x, tmp); + + v8sf z = _mm256_mul_ps(x,x); + + v8sf y = *(v8sf*)_ps256_cephes_log_p0; + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p1); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p2); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p3); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p4); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p5); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p6); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p7); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p8); + y = _mm256_mul_ps(y, x); + + y = _mm256_mul_ps(y, z); + + tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q1); + y = _mm256_add_ps(y, tmp); + + + tmp = _mm256_mul_ps(z, *(v8sf*)_ps256_0p5); + y = _mm256_sub_ps(y, tmp); + + tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q2); + x = _mm256_add_ps(x, y); + x = _mm256_add_ps(x, tmp); + x = _mm256_or_ps(x, invalid_mask); // negative arg will be NAN + return x; +} + +_PS256_CONST(exp_hi, 88.3762626647949f); +_PS256_CONST(exp_lo, -88.3762626647949f); + +_PS256_CONST(cephes_LOG2EF, 1.44269504088896341); +_PS256_CONST(cephes_exp_C1, 0.693359375); +_PS256_CONST(cephes_exp_C2, -2.12194440e-4); + +_PS256_CONST(cephes_exp_p0, 1.9875691500E-4); +_PS256_CONST(cephes_exp_p1, 1.3981999507E-3); +_PS256_CONST(cephes_exp_p2, 8.3334519073E-3); +_PS256_CONST(cephes_exp_p3, 4.1665795894E-2); +_PS256_CONST(cephes_exp_p4, 1.6666665459E-1); +_PS256_CONST(cephes_exp_p5, 5.0000001201E-1); + +inline v8sf exp256_ps(v8sf x) { + v8sf tmp = _mm256_setzero_ps(), fx; + v8si imm0; + v8sf one = *(v8sf*)_ps256_1; + + x = _mm256_min_ps(x, *(v8sf*)_ps256_exp_hi); + x = _mm256_max_ps(x, *(v8sf*)_ps256_exp_lo); + + /* express exp(x) as exp(g + n*log(2)) */ + fx = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_LOG2EF); + fx = _mm256_add_ps(fx, *(v8sf*)_ps256_0p5); + + /* how to perform a floorf with SSE: just below */ + //imm0 = _mm256_cvttps_epi32(fx); + //tmp = _mm256_cvtepi32_ps(imm0); + + tmp = _mm256_floor_ps(fx); + + /* if greater, substract 1 */ + //v8sf mask = _mm256_cmpgt_ps(tmp, fx); + v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS); + mask = _mm256_and_ps(mask, one); + fx = _mm256_sub_ps(tmp, mask); + + tmp = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C1); + v8sf z = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C2); + x = _mm256_sub_ps(x, tmp); + x = _mm256_sub_ps(x, z); + + z = _mm256_mul_ps(x,x); + + v8sf y = *(v8sf*)_ps256_cephes_exp_p0; + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p1); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p2); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p3); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p4); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p5); + y = _mm256_mul_ps(y, z); + y = _mm256_add_ps(y, x); + y = _mm256_add_ps(y, one); + + /* build 2^n */ + imm0 = _mm256_cvttps_epi32(fx); + // another two AVX2 instructions + imm0 = avx2_mm256_add_epi32(imm0, *(v8si*)_pi32_256_0x7f); + imm0 = avx2_mm256_slli_epi32(imm0, 23); + v8sf pow2n = _mm256_castsi256_ps(imm0); + y = _mm256_mul_ps(y, pow2n); + return y; +} + +_PS256_CONST(minus_cephes_DP1, -0.78515625); +_PS256_CONST(minus_cephes_DP2, -2.4187564849853515625e-4); +_PS256_CONST(minus_cephes_DP3, -3.77489497744594108e-8); +_PS256_CONST(sincof_p0, -1.9515295891E-4); +_PS256_CONST(sincof_p1, 8.3321608736E-3); +_PS256_CONST(sincof_p2, -1.6666654611E-1); +_PS256_CONST(coscof_p0, 2.443315711809948E-005); +_PS256_CONST(coscof_p1, -1.388731625493765E-003); +_PS256_CONST(coscof_p2, 4.166664568298827E-002); +_PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI + + +/* evaluation of 8 sines at onces using AVX intrisics + + The code is the exact rewriting of the cephes sinf function. + Precision is excellent as long as x < 8192 (I did not bother to + take into account the special handling they have for greater values + -- it does not return garbage for arguments over 8192, though, but + the extra precision is missing). + + Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the + surprising but correct result. + +*/ +inline v8sf sin256_ps(v8sf x) { // any x + v8sf xmm1, xmm2 = _mm256_setzero_ps(), xmm3, sign_bit, y; + v8si imm0, imm2; + +#ifndef __AVX2__ + v4si imm0_1, imm0_2; + v4si imm2_1, imm2_2; +#endif + + sign_bit = x; + /* take the absolute value */ + x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_sign_mask); + /* extract the sign bit (upper one) */ + sign_bit = _mm256_and_ps(sign_bit, *(v8sf*)_ps256_sign_mask); + + /* scale by 4/Pi */ + y = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_FOPI); + + /* + Here we start a series of integer operations, which are in the + realm of AVX2. + If we don't have AVX, let's perform them using SSE2 directives + */ + +#ifdef __AVX2__ + /* store the integer part of y in mm0 */ + imm2 = _mm256_cvttps_epi32(y); + /* j=(j+1) & (~1) (see the cephes sources) */ + // another two AVX2 instruction + imm2 = avx2_mm256_add_epi32(imm2, *(v8si*)_pi32_256_1); + imm2 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_inv1); + y = _mm256_cvtepi32_ps(imm2); + + /* get the swap sign flag */ + imm0 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_4); + imm0 = avx2_mm256_slli_epi32(imm0, 29); + /* get the polynom selection mask + there is one polynom for 0 <= x <= Pi/4 + and another one for Pi/4 + +namespace AmpGen { + template class scatter_iterator + { + store_type* m_store; + std::array m_buffer; + size_t m_pos{0}; + public: + scatter_iterator( const size_t& pos, store_type* store ) : + m_store(store), + m_pos(pos) { + if( m_store != nullptr && pos < m_store->aligned_size()) m_buffer = m_store->scatter(pos / simd_size ); + } + stored_type* operator->() const { return &( m_buffer )[m_pos % simd_size]; } + stored_type operator*() const { return m_buffer [m_pos % simd_size]; } + stored_type& operator*() { return m_buffer [m_pos % simd_size]; } + scatter_iterator& operator++() + { + m_pos++; + if ( m_pos % simd_size == 0 ) + { + if constexpr(modifiable == true ) m_store->gather(m_buffer, (m_pos-1) / simd_size); + m_buffer = m_store->scatter( m_pos ); + } + return *this; + } + ~scatter_iterator() + { + if constexpr(modifiable == true) + { + if(m_store != nullptr && m_pos % simd_size != 0 ){ + m_store->gather(m_buffer, m_pos/simd_size); + } + } + } + bool operator==( const scatter_iterator& rhs ) const { return m_pos == rhs.m_pos ; } + bool operator!=( const scatter_iterator& rhs ) const { return m_pos != rhs.m_pos ; } + }; + template + auto make_scatter_iterator( const unsigned& pos, store_type* store) { + return scatter_iterator(pos, store) ; } +} diff --git a/AmpGen/simd/utils.h b/AmpGen/simd/utils.h new file mode 100644 index 00000000000..b8b5032a5e2 --- /dev/null +++ b/AmpGen/simd/utils.h @@ -0,0 +1,42 @@ +#include +#include "AmpGen/simd/avx2_types.h" + +namespace AmpGen { + namespace utils { + + template struct is_vector_type { static constexpr bool value = false; }; + template <> struct is_vector_type { static constexpr bool value = true ; }; + template <> struct is_vector_type { static constexpr bool value = true ; }; + + template simd_type gather( + const container_type& container, const functor_type& functor, unsigned offset=0, float df =0.) + { + std::array rv; + if( df == 0. ) + for( unsigned k = 0 ; k != simd_type::size; ++k ) rv[k] = offset + k < container.size() ? functor(container[offset+k]) : functor(container[container.size()-1]); + else + for( unsigned k = 0 ; k != simd_type::size; ++k ) rv[k] = offset + k < container.size() ? functor(container[offset+k]) : df; + return simd_type( rv.data() ); + } + + template size_t aligned_size( const size_t& unaligned_size ) { + return simd_type::size * unsigned ( 1 + (unaligned_size -1 ) / simd_type::size ); + } + template auto sum_elements( const simd_type& obj ) + { + if constexpr ( is_vector_type::value ) + { + auto arr = obj.to_array(); + auto rt = arr[0]; + for( unsigned i = 1 ; i != simd_type::size; ++i ) rt = rt + arr[i]; + return rt; + } + else return obj; + } + template auto get( vtype v ){ + if constexpr ( is_vector_type::value ) return v.at(p); + if constexpr ( ! is_vector_type::value ) return v; + } + + } +} From a46c0ff31bd8a48390a6d0ab643cc051ecc3f9c0 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 14 Apr 2020 18:41:56 +0200 Subject: [PATCH 04/67] Consolidate SIMD integration / caching code with scalar version, removed caching from eventlist into PDF --- AmpGen/AmplitudeRules.h | 72 ++++---- AmpGen/CoherentSum.h | 41 ++--- AmpGen/CompiledExpression.h | 21 ++- AmpGen/CompiledExpressionBase.h | 4 +- AmpGen/EventList.h | 63 +------ AmpGen/EventListSIMD.h | 84 +++------ AmpGen/EventType.h | 5 +- AmpGen/Generator.h | 6 +- AmpGen/Integrator.h | 122 ++++++------- AmpGen/IntegratorSIMD.h | 73 -------- AmpGen/MsgService.h | 113 ++++++------ AmpGen/PolarisedSum.h | 41 ++--- AmpGen/Store.h | 144 +++++++++++++++ AmpGen/SumPDF.h | 4 +- AmpGen/simd/avx2_types.h | 67 +++---- AmpGen/simd/avx2d_types.h | 152 ++++++++++++++++ AmpGen/simd/iterator.h | 2 + AmpGen/simd/utils.h | 84 +++++++-- Standalone.cmake | 15 +- apps/ConvertToSourceCode.cpp | 2 +- apps/Debugger.cpp | 14 +- apps/Generator.cpp | 2 +- examples/FitterWithPolarisation.cpp | 41 +++-- examples/QcGenerator.cpp | 2 +- src/CoherentSum.cpp | 189 +++++++++----------- src/CompiledExpressionBase.cpp | 7 +- src/CompilerWrapper.cpp | 3 +- src/ErrorPropagator.cpp | 2 +- src/EventList.cpp | 50 ++---- src/EventListSIMD.cpp | 89 +++------- src/Expression.cpp | 6 +- src/Integrator.cpp | 69 ++++---- src/IntegratorSIMD.cpp | 69 -------- src/Lineshapes/CoupledChannel.cpp | 4 +- src/Minimiser.cpp | 8 +- src/PolarisedSum.cpp | 264 +++++++++++++--------------- src/Projection.cpp | 13 +- src/Tensor.cpp | 6 +- src/UnaryExpression.cpp | 41 ++++- src/Utilities.cpp | 14 +- 40 files changed, 1024 insertions(+), 984 deletions(-) delete mode 100644 AmpGen/IntegratorSIMD.h create mode 100644 AmpGen/Store.h create mode 100644 AmpGen/simd/avx2d_types.h delete mode 100644 src/IntegratorSIMD.cpp diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index 2c57d2e5689..38823a0dc11 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -85,44 +85,38 @@ namespace AmpGen std::map> m_rules; }; - template struct TransitionMatrix + template struct TransitionMatrix : public CompiledExpression { - #if ENABLE_AVX2 - using float_v = AVX2::float_t; - #else - using float_v = real_t; - #endif + using amp_type = CompiledExpression; TransitionMatrix() = default; - TransitionMatrix(const Particle& dt, + TransitionMatrix(Particle dt, const TotalCoupling& coupling, - const CompiledExpression & amp) : + const amp_type& amp) : + amp_type(amp), decayTree(dt), - coupling(coupling), - amp(amp) {} + coupling(coupling) {} - TransitionMatrix(const Particle& dt, + TransitionMatrix(Particle dt, const TotalCoupling& coupling, const MinuitParameterSet& mps, const std::map& evtFormat, const bool& debugThis=false) : + amp_type(dt.getExpression(debugThis ? &db : nullptr ), dt.decayDescriptor(), evtFormat, db, &mps ), decayTree(dt), - coupling(coupling), - amp(decayTree.getExpression(debugThis ? &db : nullptr ), decayTree.decayDescriptor(), evtFormat, db, &mps ) {} + coupling(coupling) {} + #if ENABLE_AVX2 - const RT operator()(const Event& event) const { return amp(EventListSIMD::makeEvent(event).data()); } + const RT operator()(const Event& event) const { return amp_type::operator()(EventListSIMD::makeEvent(event).data()); } #else - const RT operator()(const Event& event) const { return amp(event.address()); } + const RT operator()(const Event& event) const { return amp_type::operator()(event.address()); } #endif const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } - std::string name() const { return amp.name(); } - unsigned returnTypeSize() const { return amp.returnTypeSize(); } Particle decayTree; TotalCoupling coupling; complex_t coefficient; DebugSymbols db; - CompiledExpression amp; - size_t addressData = {999}; + bool workToDo = {false}; }; template std::vector processIndex(const std::vector>& tm, const std::string& label) @@ -153,46 +147,46 @@ namespace AmpGen return rt; } - template <> struct TransitionMatrix + template <> struct TransitionMatrix : public CompiledExpression { + using amp_type = CompiledExpression; TransitionMatrix() = default; - TransitionMatrix(const Particle& dt, + TransitionMatrix(Particle dt, const TotalCoupling& coupling, - const CompiledExpression & amp) : + const amp_type& amp) : + amp_type(amp), decayTree(dt), - coupling(coupling), - amp(amp) {} + coupling(coupling) {} - TransitionMatrix(const Particle& dt, + TransitionMatrix(Particle dt, const TotalCoupling& coupling, const MinuitParameterSet& mps, const std::map& evtFormat, const bool& debugThis=false) : + amp_type(dt.getExpression(debugThis ? &db : nullptr ), dt.decayDescriptor(), evtFormat, db, &mps ), decayTree(dt), - coupling(coupling), - amp(decayTree.getExpression(debugThis ? &db : nullptr ), decayTree.decayDescriptor(), evtFormat, db, &mps ) { amp.use_rto();} + coupling(coupling) + { use_rto();} - const std::vector operator()(const Event& event) const { - std::vector rt(4); - amp(rt.data(), amp.externBuffer().data(), event.address() ); - return rt; - } - const std::vector operator()(const Event& event, const size_t& cacheOffset) const { - std::vector rt(4); - amp(rt.data(), amp.externBuffer().data(), event.address() + cacheOffset); + const std::vector operator()(const Event& event) const + { + std::vector rt(4); + #if ENABLE_AVX2 + amp_type::operator()(rt.data(), 1, externBuffer().data(), EventListSIMD::makeEvent(event).data()); + #else + amp_type::operator()(rt.data(), 1, externBuffer().data(), event.address()); + #endif return rt; } + const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } Particle decayTree; TotalCoupling coupling; complex_t coefficient; DebugSymbols db; - CompiledExpression amp; - size_t addressData = {999}; bool workToDo = {false}; - std::string name() const { return amp.name(); } - unsigned returnTypeSize() const { return amp.returnTypeSize(); } + unsigned size = {0}; }; } // namespace AmpGen diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 41f8cdfa30d..89bd84cb3f6 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -16,12 +16,11 @@ #include "AmpGen/EventListSIMD.h" #include "AmpGen/EventType.h" #include "AmpGen/Integrator.h" -#include "AmpGen/IntegratorSIMD.h" #include "AmpGen/Types.h" #include "AmpGen/Event.h" #include "AmpGen/Projection.h" #include "AmpGen/MinuitParameter.h" - +#include "AmpGen/Store.h" namespace AmpGen { @@ -46,14 +45,8 @@ namespace AmpGen public: #if ENABLE_AVX2 using EventList_type = EventListSIMD; - using Integrator_type= IntegratorSIMD; - using complex_v = AVX2::complex_t; - using float_v = AVX2::float_t; #else using EventList_type = EventList; - using Integrator_type = Integrator; - using complex_v = complex_t; - using float_v = real_t; #endif CoherentSum(); CoherentSum( const EventType& type, const AmpGen::MinuitParameterSet& mps, const std::string& prefix = "" ); @@ -64,33 +57,30 @@ namespace AmpGen auto operator[]( const size_t& index ) { return m_matrixElements[index]; } const auto operator[]( const size_t& index ) const { return m_matrixElements[index]; } - size_t size() const { return m_matrixElements.size(); } - - real_t getWeight() const { return m_weight; } - real_t operator()( const Event& evt ) const { return m_weight*std::norm(getVal(evt))/m_norm; } - real_t prob( const Event& evt ) const { return m_weight*std::norm(getVal(evt))/m_norm; } - real_t prob_unnormalised( const Event& evt ) const { return std::norm(getVal(evt)); } - real_t norm( const Bilinears& norms ) const; + size_t size() const { return m_matrixElements.size(); } + real_t getWeight() const { return m_weight; } + real_t norm( const Bilinears& norms ) const; real_t norm() const; real_t getNorm( const Bilinears& normalisations ); complex_t norm( const size_t& x, const size_t& y ) const; complex_t getVal( const Event& evt ) const; - complex_t getValNoCache( const Event& evt ) const; - + complex_t getValNoCache( const Event& evt ) const; + void transferParameters(); void prepare(); void printVal( const Event& evt ); - void updateNorms( const std::vector& changedPdfIndices ); + void updateNorms(); void setWeight( MinuitProxy param ) { m_weight = param; } void makeTotalExpression(); void reset( bool resetEvents = false ); void setEvents( EventList_type& list ); #if ENABLE_AVX2 - void setEvents( EventList& list) { setEvents( *(new EventListSIMD(list)) ) ; } + void setEvents( EventList& list) { m_ownEvents = true; setEvents( *(new EventListSIMD(list)) ) ; } void setMC( EventList& list) { setMC( *(new EventListSIMD(list)) ) ; } - float_v operator()( const float_v*, const unsigned) const; #endif + float_v operator()(const float_v*, const unsigned) const; + real_t operator()(const Event& evt ) const { return m_weight*std::norm(getVal(evt))/m_norm; } void setMC( EventList_type& sim ); void debug( const Event& evt, const std::string& nameMustContain=""); @@ -106,20 +96,21 @@ namespace AmpGen KeyedView componentEvaluator(const EventList_type* = nullptr) const; protected: - std::vector> m_matrixElements; ///< Vector of (expanded) matrix elements + std::vector> m_matrixElements; ///< Vector of matrix elements Bilinears m_normalisations; ///< Normalisation integrals AmplitudeRules m_rules; ///< Ruleset for the selected transition. - Integrator_type m_integrator; ///< Integral dispatch tool (with default unroll = 10) - TransitionMatrix m_total; ///< Total Matrix Element + Integrator m_integrator; ///< Tool to calculate integrals EventList_type* m_events = {nullptr}; ///< Data events to evaluate PDF on - + Store m_cache; ///< Store of intermediate values for the PDF calculation + + bool m_ownEvents = {false}; ///< Flag as to whether events are owned by this PDF or not EventType m_evtType; ///< Final state for this amplitude size_t m_prepareCalls = {0}; ///< Number of times prepare has been called size_t m_lastPrint = {0}; ///< Last time verbose PDF info was printed size_t m_printFreq = {0}; ///< Frequency to print verbose PDF info MinuitProxy m_weight = {nullptr, 1}; ///< Weight (i.e. the normalised yield) - double m_norm = {0}; ///< Normalisation integral + double m_norm = {1}; ///< Normalisation integral bool m_isConstant = {false}; ///< Flag for a constant PDF bool m_dbThis = {false}; ///< Flag to generate amplitude level debugging bool m_verbosity = {false}; ///< Flag for verbose printing diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index 558f3588b56..be6c4eecfaf 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -9,7 +9,7 @@ #include "AmpGen/MsgService.h" #include "AmpGen/Utilities.h" #include "AmpGen/Types.h" - +#include "AmpGen/simd/utils.h" #include #include #include @@ -35,7 +35,7 @@ namespace AmpGen private: DynamicFCN m_fcn; - DynamicFCN m_batchFcn; + DynamicFCN m_batchFcn; DynamicFCN>(ARGS...)> m_fdb; std::vector m_externals = {}; bool m_hasExternalsChanged = {false}; @@ -64,13 +64,13 @@ namespace AmpGen bool use_rto() const override { return std::is_same::value; } - std::string args(bool includeTypes = false) const override + std::string args() const override { std::string signature; auto argTypes = typelist(); for( unsigned int i = 0 ; i < argTypes.size(); ++i ) { - signature += (includeTypes ? argTypes[i] : "") + " x"+std::to_string(i) ; + signature += " x"+std::to_string(i) ; if( i != argTypes.size() - 1 ) signature += ", "; } return signature; @@ -125,13 +125,16 @@ namespace AmpGen { stream << "#include \n"; stream << "extern \"C\" void " << progName() - << "_batch(" << returnTypename() << "* rt" - << ", const size_t& N, " + << "_batch("; + stream << " const size_t& N, " << " const size_t& eventSize, " - << " const size_t& cacheSize, " << args(true) << ") {\n"; + << " const size_t& cacheSize, "; + stream << typeof() << " * rt, "; + stream << CompiledExpressionBase::fcnSignature(typelist(), use_rto(), false) << ") {\n"; stream << "#pragma omp parallel for\n"; - stream << "for( unsigned int i = 0; i != N/8; ++i ){\n"; - stream << " rt[cacheSize*i] = " << progName() + "( x0, x1 + i * eventSize);"; + stream << "for( size_t i = 0; i != N/" << utils::size::value << "; ++i ){\n"; + if( use_rto() ) stream << progName() + "( r + cacheSize * i, s, x0, x1 + i * eventSize);"; + else stream << " rt[cacheSize*i] = " << progName() + "( x0, x1 + i * eventSize);"; stream << "}\n}"; } diff --git a/AmpGen/CompiledExpressionBase.h b/AmpGen/CompiledExpressionBase.h index d80f542b090..691f1f94685 100644 --- a/AmpGen/CompiledExpressionBase.h +++ b/AmpGen/CompiledExpressionBase.h @@ -53,11 +53,11 @@ namespace AmpGen virtual bool isReady() const = 0; virtual std::string returnTypename() const = 0; virtual std::string fcnSignature() const = 0; - virtual std::string args(bool=false) const = 0; + virtual std::string args() const = 0; virtual void print() const = 0; virtual ~CompiledExpressionBase(); virtual unsigned returnTypeSize() const = 0; - static std::string fcnSignature(const std::vector&, bool); + static std::string fcnSignature(const std::vector&, bool=false, bool=true); virtual bool use_rto() const = 0; Expression expression() const { return m_obj; } void enableBatch() { m_enableBatch = true ; } diff --git a/AmpGen/EventList.h b/AmpGen/EventList.h index 260cd3affbd..d96f5326c91 100644 --- a/AmpGen/EventList.h +++ b/AmpGen/EventList.h @@ -31,10 +31,9 @@ namespace AmpGen { private: std::vector m_data = {}; - std::vector m_cache = {}; EventType m_eventType = {}; - std::map m_pdfIndex = {}; std::map m_extensions = {}; + public: typedef Event value_type; EventList() = default; @@ -55,8 +54,7 @@ namespace AmpGen { loadFromTree( tree, ArgumentPack(args...) ); } - - void resetCache(); + const EventList& store() const { return *this;} std::vector::reverse_iterator rbegin() { return m_data.rbegin(); } std::vector::reverse_iterator rend() { return m_data.rend(); } std::vector::iterator begin() { return m_data.begin(); } @@ -70,69 +68,22 @@ namespace AmpGen EventType eventType() const { return m_eventType; } const Event& at( const size_t& pos ) const { return m_data[pos]; } size_t size() const { return m_data.size(); } + size_t aligned_size() const { return m_data.size() ; } + size_t nBlocks() const { return m_data.size() ; } double integral() const; - + real_t weight( const size_t& pos) const { return m_data[pos].weight(); } + real_t genPDF( const size_t& pos) const { return m_data[pos].genPdf(); } void reserve( const size_t& size ) { m_data.reserve( size ); } void push_back( const Event& evt ) { m_data.push_back( evt ); } void setEventType( const EventType& type ) { m_eventType = type; } void add( const EventList& evts ); void loadFromTree( TTree* tree, const ArgumentPack& args ); void loadFromFile( const std::string& fname, const ArgumentPack& args ); - void printCacheInfo( const unsigned int& nEvt = 0 ); void clear(); void erase( const std::vector::iterator& begin, const std::vector::iterator& end ); TTree* tree( const std::string& name, const std::vector& extraBranches = {} ) const; - - size_t getCacheIndex( const CompiledExpressionBase& PDF, bool& status ) const; - size_t getCacheIndex( const CompiledExpressionBase& PDF ) const; - template size_t registerExpression(const T& expression, const size_t& size_of=0) - { - auto key = FNV1a_hash( expression.name() ); - auto pdfIndex = m_pdfIndex.find( key ); - if ( pdfIndex != m_pdfIndex.end() ) return pdfIndex->second; - else { - size_t expression_size = size_of == 0 ? expression.returnTypeSize() / sizeof(complex_t) : size_of; - m_pdfIndex[key] = m_cache.size() / m_data.size(); - m_cache.resize( m_cache.size() + m_data.size() * expression_size ); - return m_pdfIndex[key]; - } - } - complex_t cache( const unsigned& evtIndex, const unsigned& cacheElement ) - { - unsigned cacheSize = m_cache.size() / m_data.size(); - return m_cache[cacheSize * evtIndex + cacheElement]; - } - void setCache( const complex_t& v, const unsigned& p ) - { - m_cache[p] = v; - } - void setCache( const std::vector& v, const unsigned& p ) - { - std::memmove(m_cache.data() +p, v.data(), sizeof(complex_t) * v.size() ); - } - template void updateCache( const FCN& fcn, const size_t& index ) - { - unsigned cacheSize = m_cache.size() / m_data.size(); - if constexpr( std::is_same< typename FCN::return_type, void >::value ) - { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t evt = 0; evt < size(); ++evt ) - { - fcn( m_cache.data() + cacheSize*evt +index , fcn.externBuffer().data(), m_data[evt].address() ); - } - } - else { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t evt = 0; evt < size(); ++evt ) { setCache( fcn(m_data[evt].address() ), cacheSize*evt + index ) ; } - } - } - void reserveCache(const size_t& index); - void resizeCache(const size_t& newCacheSize ); + TH1D* makeProjection(const Projection& projection , const ArgumentPack& args = ArgumentPack()) const; TH2D* makeProjection(const Projection2D& projection, const ArgumentPack& args = ArgumentPack()) const; std::vector makeProjections( const std::vector& projections, const ArgumentPack& args ); diff --git a/AmpGen/EventListSIMD.h b/AmpGen/EventListSIMD.h index bad640db26c..61873b890c3 100644 --- a/AmpGen/EventListSIMD.h +++ b/AmpGen/EventListSIMD.h @@ -23,29 +23,20 @@ #include #endif -#if ENABLE_AVX2 - - #include "AmpGen/simd/avx2_types.h" - #include "AmpGen/simd/iterator.h" +#include "AmpGen/simd/iterator.h" +#include "AmpGen/simd/utils.h" +#include "AmpGen/Store.h" namespace AmpGen { - using float_v = AVX2::float_t; - using complex_v = AVX2::complex_t; - class CompiledExpressionBase; class EventListSIMD { private: - std::vector m_data = {}; - std::vector m_weights = {}; - std::vector m_genPDF = {}; - std::vector m_cache = {}; - EventType m_eventType = {}; - std::map m_pdfIndex = {}; - unsigned m_eventSize = {0}; - unsigned m_nEvents = {0}; - unsigned m_nBlocks = {0}; + Store m_data {}; + std::vector m_weights {}; + std::vector m_genPDF {}; + EventType m_eventType {}; public: typedef Event value_type; EventListSIMD() = default; @@ -66,63 +57,36 @@ namespace AmpGen { loadFromTree( tree, ArgumentPack(args...) ); } - const float_v* data() const { return m_data.data(); } - const AVX2::complex_t* cache() const { return m_cache.data() ; } EventListSIMD( const EventList& other ); - void resetCache(); - const AVX2::complex_t cache( const unsigned& evtIndex, const unsigned& cachePos ) - { - return m_cache[ (unsigned)(evtIndex/float_v::size) * cacheSize() + cachePos ]; - } + const float_v* data() const { return m_data.data(); } + operator Store () const { return m_data ; } + const auto& store() const { return m_data; } const Event at(const unsigned& p) const { return EventListSIMD::operator[](p) ; } - const float_v* block(const unsigned& p) { return m_data.data() + p * m_eventSize ; } + const float_v* block(const unsigned& p) { return m_data.data() + p * m_data.nFields(); } float_v weight(const unsigned& p) const { return m_weights[p]; } float_v genPDF(const unsigned& p) const { return m_genPDF[p]; } const Event operator[]( const size_t&) const; - std::array scatter(unsigned) const; - void gather(const std::array&, unsigned); - auto begin() const { return make_scatter_iterator(0,this); } - auto end() const { return make_scatter_iterator(m_nEvents, (const EventListSIMD*)(nullptr) ); } - auto begin() { return make_scatter_iterator(0, this); } - auto end() { return make_scatter_iterator(m_nEvents, (EventListSIMD*)(nullptr) ); } + std::array::value> scatter(unsigned) const; + void gather(const std::array::value>&, unsigned); + auto begin() const { return make_scatter_iterator::value>(0,this); } + auto end() const { return make_scatter_iterator::value>(size(), (const EventListSIMD*)(nullptr) ); } + auto begin() { return make_scatter_iterator::value, true>(0, this); } + auto end() { return make_scatter_iterator::value, true>(size(), (EventListSIMD*)(nullptr) ); } EventType eventType() const { return m_eventType; } - size_t aligned_size() const { return nBlocks() * float_v::size; } ///aligned number of events - size_t cacheSize() const { return m_cache.size() / m_nBlocks; } /// number of cached elements + size_t aligned_size() const { return m_data.aligned_size(); } double integral() const; - size_t eventSize() const { return m_eventSize; } - size_t size() const { return m_nEvents ; } - size_t nBlocks() const { return m_nBlocks; } - void reserve( const size_t& size ) { m_data.reserve( size * m_eventType.size() ); } - void setEventType( const EventType& type ) { m_eventType = type; m_eventSize = m_eventType.size(); } + size_t eventSize() const { return m_data.nFields(); } + size_t size() const { return m_data.size(); } + size_t nBlocks() const { return m_data.nBlocks(); } + void setEventType( const EventType& type ) { m_eventType = type; } void add( const EventListSIMD& evts ); void loadFromTree( TTree* tree, const ArgumentPack& args ); void loadFromFile( const std::string& fname, const ArgumentPack& args ); - void printCacheInfo( const unsigned int& nEvt = 0 ); void clear(); TTree* tree( const std::string& name, const std::vector& extraBranches = {} ) const; - size_t getCacheIndex( const CompiledExpressionBase& PDF, bool& status ) const; - size_t getCacheIndex( const CompiledExpressionBase& PDF ) const; - template unsigned registerExpression(const T& expression, const unsigned& size_of=0) - { - auto key = FNV1a_hash( expression.name() ); - auto pdfIndex = m_pdfIndex.find( key ); - if ( pdfIndex != m_pdfIndex.end() ) return pdfIndex->second; - else { - unsigned nEvents = aligned_size(); - unsigned expression_size = size_of == 0 ? expression.returnTypeSize() / sizeof(AmpGen::AVX2::complex_t) : size_of; - m_pdfIndex[key] = m_cache.size() / nBlocks(); - m_cache.resize(m_cache.size() + nBlocks() * expression_size); - return m_pdfIndex[key]; - } - } - template void updateCache( const FCN& fcn, const size_t& index ) - { - fcn.batch(m_cache.data() + index, aligned_size(), m_eventSize, cacheSize(), fcn.externBuffer().data(), m_data.data()); - } - void reserveCache(const unsigned& index); - void resizeCache( const unsigned& index); + TH1D* makeProjection(const Projection& projection , const ArgumentPack& args = ArgumentPack()) const; TH2D* makeProjection(const Projection2D& projection, const ArgumentPack& args = ArgumentPack()) const; std::vector makeProjections( const std::vector& projections, const ArgumentPack& args ); @@ -169,5 +133,3 @@ namespace AmpGen } // namespace AmpGen #endif - -#endif diff --git a/AmpGen/EventType.h b/AmpGen/EventType.h index d83dd26f815..93fa1467f03 100644 --- a/AmpGen/EventType.h +++ b/AmpGen/EventType.h @@ -15,6 +15,8 @@ namespace AmpGen Deals with final state configuration of events, specifically dealing with the ordering of particles in trees. */ + class EventType; + std::ostream& operator<<( std::ostream& os, const EventType& type ); class EventType { @@ -63,6 +65,8 @@ namespace AmpGen /// Calculates the number of spin indices associated with the initial and final state, i.e. the rank of the relevant transition matrix. std::pair dim() const; + + friend std::ostream& AmpGen::operator<<( std::ostream& os, const EventType& type ); private: std::string m_mother; ///< name of decaying particle @@ -76,7 +80,6 @@ namespace AmpGen std::pair m_dim; ///< Rank of the relevant transition matrix bool m_alt_part_names; ///< alternative naming in ouput tree (e.g. Xi- pi+ pi+ becomes Xim pip0 pip1 rather than _1_Xi# _2_pi~ _3_pi~) }; - std::ostream& operator<<( std::ostream& os, const EventType& type ); } // namespace AmpGen #endif diff --git a/AmpGen/Generator.h b/AmpGen/Generator.h index e55372e71b4..4a97856dadd 100644 --- a/AmpGen/Generator.h +++ b/AmpGen/Generator.h @@ -76,7 +76,7 @@ namespace AmpGen auto size0 = list.size(); auto tStartTotal = std::chrono::high_resolution_clock::now(); pdf.reset( true ); - ProgressBar pb(60, trimmedString(__PRETTY_FUNCTION__) ); + ProgressBar pb(60, detail::trimmedString(__PRETTY_FUNCTION__) ); ProfileClock t_phsp, t_eval, t_acceptReject; std::vector efficiencyReport(m_generatorBlock,false); @@ -92,7 +92,7 @@ namespace AmpGen if ( maxProb == 0 ) { double max = 0; for ( auto& evt : mc ) { - double value = pdf.prob_unnormalised(evt) / evt.genPdf(); + double value = pdf(evt) / evt.genPdf(); if ( value > max ) max = value; } maxProb = max * 1.5; @@ -104,7 +104,7 @@ namespace AmpGen #pragma omp parallel for #endif for ( size_t i=0; i < mc.size(); ++i ) - mc[i].setGenPdf(pdf.prob_unnormalised(mc[i]) / mc[i].genPdf()); + mc[i].setGenPdf(pdf(mc[i]) / mc[i].genPdf()); for( size_t i=0; i != mc.size(); ++i ) { diff --git a/AmpGen/Integrator.h b/AmpGen/Integrator.h index a83cce80da2..cc98c208031 100644 --- a/AmpGen/Integrator.h +++ b/AmpGen/Integrator.h @@ -3,9 +3,12 @@ #include "AmpGen/Types.h" #include "AmpGen/EventList.h" -#include "AmpGen/CompiledExpressionBase.h" #include #include +#include "AmpGen/simd/utils.h" +#include "AmpGen/Store.h" +#include "AmpGen/EventListSIMD.h" +#include "AmpGen/EventList.h" /* * Calculates Bilinears A_i A_j^* integrated over the phase-space. @@ -43,7 +46,7 @@ namespace AmpGen template struct Integral { - typedef std::function TransferFCN; + typedef std::function TransferFCN; size_t i = {0}; size_t j = {0}; TransferFCN transfer; @@ -51,80 +54,67 @@ namespace AmpGen Integral(const size_t& i, const size_t& j, TransferFCN t) : i(i), j(j), transfer(t) {} }; + class Integrator { - typedef const complex_t& arg; - typedef std::function TransferFCN; + typedef std::function TransferFCN; public: - explicit Integrator( const EventList* events = nullptr ); + Integrator() = default; - bool isReady() const; - const EventList* events() const; - void queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim = true); - void addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ); - void queueIntegral(const size_t& i, const size_t& j, complex_t* result); - void flush(); - void setBuffer( complex_t* pos, const complex_t& value, const size_t& size ); - void setBuffer( complex_t* pos, const std::vector& value, const size_t& size); - complex_t get(const unsigned& i, const unsigned& evt) const { return m_cache[i * m_events->size() + evt ]; } - template unsigned getCacheIndex( const T& t ) const { return m_index.find( t.name() )->second.first; } - double norm() const { return m_norm; } - template void allocate( const std::vector& expressions, const size_t& size_of = 0) - { - if( m_events == nullptr ) return; - unsigned totalSize = 0; - for( unsigned i = 0; i != expressions.size(); ++i ){ - size_t vsize = size_of == 0 ? expressions[i].returnTypeSize() / sizeof(complex_t) : size_of; - m_index[ expressions[i].name() ] = std::make_pair(totalSize, vsize); - totalSize += vsize; - } - m_cache.resize( m_events->size() * totalSize ); + template Integrator( const EventList_type* events, const std::vector& expressions ={}, const size_t& size_of =0) : m_events(events) + { + if( events == nullptr ) { + WARNING("No events specified, returning"); + return; } - - template void prepareExpression(const T& expression) + m_cache = Store(events->size(), expressions, size_of ); + m_weight.resize( events->nBlocks() ); + float_v norm_acc = 0.; + for( size_t i = 0 ; i < events->nBlocks(); ++i ) { - if( m_events == nullptr ) return; - auto f = m_index.find( expression.name() ); - if( f == m_index.end() ) FATAL("Expression: " << expression.name() << " is not registed"); - auto [p0, s] = f->second; - INFO("Preparing: " << expression.name() << " index = " << p0 << " with: " << s << " values" ); - if constexpr( std::is_same< typename T::return_type, void >::value ) - { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t i = 0; i < m_events->size(); ++i ) - { - std::vector buf(s); - expression(&buf[0], expression.externBuffer().data(), m_events->at(i).address() ); - for( unsigned j = 0; j != s; ++j ) m_cache[ (p0+j) * m_events->size() + i] = buf[j]; - } - } - else { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t i = 0; i < m_events->size(); ++i ) - setBuffer( &(m_cache[p0 * m_events->size() +i] ), expression(m_events->at(i).address()),s ); - } + m_weight[i] = events->weight(i) / events->genPDF(i); + norm_acc = norm_acc + m_weight[i]; } + m_norm = utils::sum_elements(norm_acc); + } + + bool isReady() const; + void queueIntegral(const size_t& c1, + const size_t& c2, + const size_t& i, + const size_t& j, + Bilinears* out, + const bool& sim = true); + void addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ); + void queueIntegral(const size_t& i, const size_t& j, complex_t* result); + void flush(); + template return_type get( const unsigned& index, const unsigned& evt ) const ; + template unsigned getCacheIndex( const T& t ) const { return m_cache.find(t) ; } + double norm() const { return m_norm; } + + template void updateCache(const T& expression) + { + #if ENABLE_AVX2 + if( m_events != nullptr ) m_cache.update( static_cast(m_events)->store(), expression ); + #else + if( m_events != nullptr ) m_cache.update( static_cast(m_events)->store(), expression ); + #endif + } + template + const T* events() const { return static_cast(m_events) ; } + private: - static constexpr size_t N = {10}; ///unroll factor - size_t m_counter = {0}; /// - std::array, N> m_integrals; - const EventList* m_events = {nullptr}; - std::vector m_cache; - std::vector m_weight; - std::map> m_index; - double m_norm = {0}; - void integrateBlock(); + static constexpr size_t N = {8}; ///unroll factor + size_t m_counter = {0}; /// + std::array, N> m_integrals; + const void* m_events = {nullptr}; + std::vector m_weight; + Store m_cache; + double m_norm = {0}; + void integrateBlock(); }; + } // namespace AmpGen #endif diff --git a/AmpGen/IntegratorSIMD.h b/AmpGen/IntegratorSIMD.h deleted file mode 100644 index 2e166cd1294..00000000000 --- a/AmpGen/IntegratorSIMD.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef AMPGEN_INTEGRATORSIMD_H -#define AMPGEN_INTEGRATORSIMD_H 1 - -#if ENABLE_AVX2 - -#include "AmpGen/Integrator.h" -#include "AmpGen/simd/avx2_types.h" -#include "AmpGen/EventListSIMD.h" - -namespace AmpGen { - /// test /// - class IntegratorSIMD - { - typedef const complex_t& arg; - typedef std::function TransferFCN; - - public: - explicit IntegratorSIMD( const EventListSIMD* events = nullptr ); - - bool isReady() const; - void queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim = true); - void addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ); - void queueIntegral(const size_t& i, const size_t& j, complex_t* result); - void flush(); - complex_t get(const unsigned& i, const unsigned& evt) const { return m_cache[i * m_events->size() + evt/float_v::size ].at( evt % float_v::size ); } - template unsigned getCacheIndex( const T& t ) const { return m_index.find( t.name() )->second.first; } - double norm() const { return m_norm; } - const EventListSIMD* events() const { return m_events; } - template void allocate( const std::vector& expressions, const size_t& size_of = 0) - { - if( m_events == nullptr ) return; - unsigned totalSize = 0; - for( unsigned i = 0; i != expressions.size(); ++i ){ - size_t vsize = size_of == 0 ? expressions[i].returnTypeSize() / sizeof(AVX2::complex_t) : size_of; - m_index[ expressions[i].name() ] = std::make_pair(totalSize, vsize); - totalSize += vsize; - } - m_cache.resize( m_events->size() * totalSize ); - } - - template void prepareExpression(const T& expression) - { - if( m_events == nullptr ) return; - auto f = m_index.find( expression.name() ); - if( f == m_index.end() ) FATAL("Expression: " << expression.name() << " is not registed"); - auto [p0, s] = f->second; - expression.batch(m_cache.data() + p0*m_events->aligned_size(), - m_events->aligned_size(), - m_events->eventSize(), - 1, - expression.externBuffer().data(), - m_events->data() ); - } - - private: - static constexpr size_t N = {10}; ///unroll factor - size_t m_counter = {0}; /// - std::array, N> m_integrals; - const EventListSIMD* m_events = {nullptr}; - std::vector m_cache; - std::vector m_weight; - std::map> m_index; - double m_norm = {0}; - void integrateBlock(); - }; -} -#endif -#endif diff --git a/AmpGen/MsgService.h b/AmpGen/MsgService.h index a8d8c873585..3067529e460 100644 --- a/AmpGen/MsgService.h +++ b/AmpGen/MsgService.h @@ -2,98 +2,93 @@ #define AMPGEN_MSGSERVICE_H /** @defgroup msgService Messaging and logging - MsgService Header - Defines coloured and organised output macro streams using __PRETTY_FUNCTION__ - INFO() - info level messages, always displayed - ERROR() - error level messages, always displayed - FATAL() - error message that throws the process, always displayed - WARNING() - warning level messages, can be switched with the WARNINGLEVEL flag - DEBUG() - debug level messages, can be switched with the DEBUGLEVEL flag - */ + MsgService Header + Defines coloured and organised output macro streams using __PRETTY_FUNCTION__ + INFO() - info level messages, always displayed + ERROR() - error level messages, always displayed + FATAL() - error message that throws the process, always displayed + WARNING() - warning level messages, can be switched with the WARNINGLEVEL flag + DEBUG() - debug level messages, can be switched with the DEBUGLEVEL flag + */ #include #include #include +#include #define WARNINGLEVEL 1 -//#define DEBUGLEVEL 0 -//#define TRACELEVEL 0 #define FCNNAMELENGTH 45 -inline std::string trimmedString( std::string thing, const unsigned int& length = FCNNAMELENGTH ) -{ - size_t pos2=0; - do { - pos2 = thing.find( "AmpGen::" ); - if ( pos2 != std::string::npos ) thing = thing.replace( pos2, 8, "" ); - } while( pos2 != std::string::npos ); - - pos2 = thing.find( "std::" ); - if ( pos2 != std::string::npos ) thing.replace( pos2, 5, "" ); - - pos2 = thing.find( "virtual " ); - if ( pos2 != std::string::npos ) thing = thing.replace( pos2, 8, "" ); - - size_t pos = thing.find( "(" ); - - if ( pos != std::string::npos ) { - return pos < length ? thing.substr( 0, pos ) : thing.substr( 0, length ); +namespace AmpGen { + namespace detail { + inline std::string trimmedString( std::string thing, const unsigned int& length = FCNNAMELENGTH ) + { + size_t pos2=0; + do { + pos2 = thing.find( "AmpGen::" ); + if ( pos2 != std::string::npos ) thing = thing.replace( pos2, 8, "" ); + } while( pos2 != std::string::npos ); + + pos2 = thing.find( "std::" ); + if ( pos2 != std::string::npos ) thing.replace( pos2, 5, "" ); + + pos2 = thing.find( "virtual " ); + if ( pos2 != std::string::npos ) thing = thing.replace( pos2, 8, "" ); + + size_t pos = thing.find( "(" ); + + if ( pos != std::string::npos ) { + return pos < length ? thing.substr( 0, pos ) : thing.substr( 0, length ); + } + return thing.size() < length ? thing : thing.substr( 0, length ) + "..."; + } + template struct debug_type : std::false_type {}; } - return thing.size() < length ? thing : thing.substr( 0, length ) + "..."; } +#define ENABLE_DEBUG(X) \ + namespace AmpGen { namespace detail { template <> struct debug_type : std::true_type {}; } } + /// @ingroup msgService macro DEBUG /// Used for printing verbose debugging messages, only if DEBUGLEVEL is defined. -#ifdef DEBUGLEVEL -#define DEBUG( X ) \ - std::cout << "\033[2;32m" << std::left << std::setw( FCNNAMELENGTH ) << trimmedString( __PRETTY_FUNCTION__ ) \ -<< " DEBUG " \ -<< "\033[0m" << X << std::endl -#else -#define DEBUG( X ) -#endif +#define DEBUG( X ) { \ + if constexpr( AmpGen::detail::debug_type::type>::value ) { \ + std::cout << "\033[2;32m" << std::left << std::setw( FCNNAMELENGTH ) << AmpGen::detail::trimmedString(__PRETTY_FUNCTION__) \ + << " DEBUG " \ + << "\033[0m" << X << " " << std::endl; } } /// @ingroup msgService macro INFO /// Used for printing information messages, and will always be printed. #define INFO( X ) \ - std::cout << "\033[2;34m" << std::left << std::setw( FCNNAMELENGTH ) << trimmedString( __PRETTY_FUNCTION__ ) \ -<< " INFO " \ -<< "\033[0m" << X << std::endl + std::cout << "\033[2;34m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ + << " INFO " \ + << "\033[0m" << X << std::endl /// @ingroup msgService macro ERROR /// Used for printing errors messages, and will always be printed. #define ERROR( X ) \ - std::cout << "\033[1;31m" << std::left << std::setw( FCNNAMELENGTH ) << trimmedString( __PRETTY_FUNCTION__ ) \ -<< " ERROR " \ -<< "\033[0m" << X << std::endl + std::cout << "\033[1;31m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ + << " ERROR " \ + << "\033[0m" << X << std::endl /// @ingroup msgService macro FATAL /// Used for printing fatal errors messages, and will always be printed and will terminate the process afterwards. #define FATAL( X ) \ - { std::cout << "\033[1;31m" << std::left << std::setw( FCNNAMELENGTH ) << trimmedString( __PRETTY_FUNCTION__ ) \ -<< " FATAL " \ -<< "\033[0m" << X << std::endl; \ -throw std::runtime_error( trimmedString( __PRETTY_FUNCTION__)+ " FATAL" ) ;} +{ std::cout << "\033[1;31m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ + << " FATAL " \ + << "\033[0m" << X << std::endl; \ + throw std::runtime_error( detail::trimmedString( __PRETTY_FUNCTION__)+ " FATAL" ) ;} /// @ingroup msgService macro FATAL /// Used for printing warning messages, can be switched off using WARNINGLEVEL. These messages are often harmless, but sometimes not! #ifdef WARNINGLEVEL #define WARNING( X ) \ - std::cout << "\033[1;35m" << std::left << std::setw( FCNNAMELENGTH ) << trimmedString( __PRETTY_FUNCTION__ ) \ -<< " WARNING " \ -<< "\033[0m" << X << std::endl + std::cout << "\033[1;35m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ + << " WARNING " \ + << "\033[0m" << X << std::endl #else #define WARNING( X ) #endif -#ifdef TRACELEVEL -#define TRACE( X ) \ - std::cout << "\033[1;36m" << std::left << std::setw( FCNNAMELENGTH ) << trimmedString( __PRETTY_FUNCTION__ ) \ -<< " TRACE " \ -<< "\033[0m" << X << std::endl -#else -#define TRACE( X ) -#endif - #endif diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index 308f61833e3..3bba2f76609 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -21,7 +21,6 @@ #include "AmpGen/Tensor.h" #include "AmpGen/MinuitParameter.h" -#include "AmpGen/IntegratorSIMD.h" #include "TMatrixD.h" @@ -38,49 +37,47 @@ namespace AmpGen { public: #if ENABLE_AVX2 - using EventList_type = EventListSIMD; - using Integrator_type= IntegratorSIMD; - using complex_v = AVX2::complex_t; - using float_v = AVX2::float_t; + using EventList_type = EventListSIMD; #else using EventList_type = EventList; - using Integrator_type = Integrator; - using complex_v = complex_t; - using float_v = real_t; #endif PolarisedSum() = default; PolarisedSum(const EventType&, MinuitParameterSet&, const std::vector& = {}); void prepare(); - void setEvents(EventList&); - void setMC(EventList&); + void setEvents(EventList_type&); + void setMC(EventList_type&); + #if ENABLE_AVX2 + void setEvents(EventList& evts){ m_ownEvents = true; setEvents( *new EventList_type(evts)) ; }; + void setMC(EventList& evts){ setMC( *new EventList_type(evts)) ; }; + #endif + float_v operator()( const float_v*, const unsigned) const; + real_t operator()(const Event& evt) const; void reset(const bool& = false); void debug(const Event&); void debug_norm(); void setWeight(MinuitProxy); double getWeight() const; - void calculateNorms(); + void updateNorms(); void generateSourceCode(const std::string&, const double& = 1, bool = false); - void build_probunnormalised(); Expression probExpression(const Tensor&, const std::vector&, DebugSymbols* = nullptr) const; size_t size() const; real_t norm() const; complex_t norm(const size_t&, const size_t&, Integrator* = nullptr); - inline real_t operator()(const Event& evt) const { return m_weight * prob_unnormalised(evt) / m_norm; } - real_t prob_unnormalised(const Event&) const; - real_t prob(const Event&) const; - real_t getValNoCache(const Event&) ; + real_t getValNoCache(const Event&) const; std::vector fitFractions(const LinearErrorPropagator&); std::vector> matrixElements() const; void transferParameters(); Tensor transitionMatrix(); const TransitionMatrix& operator[](const size_t& i) const { return m_matrixElements[i] ; } - std::function evaluator(const EventList* = nullptr) const; - KeyedView componentEvaluator(const EventList* = nullptr) const; + std::function evaluator(const EventList_type* = nullptr) const; + KeyedView componentEvaluator(const EventList_type* = nullptr) const; private: size_t m_nCalls = {0}; real_t m_norm = {1}; - EventList* m_events = {nullptr}; + EventList_type* m_events = {nullptr}; + Store m_cache = {}; + bool m_ownEvents = {false}; MinuitParameterSet* m_mps = {nullptr}; MinuitProxy m_weight = {nullptr,1}; std::vector m_pVector = {}; @@ -94,9 +91,9 @@ namespace AmpGen std::vector m_integIndex; AmplitudeRules m_rules; std::pair m_dim; - std::vector> m_matrixElements; - CompiledExpression m_probExpression; - + std::vector> m_matrixElements; + CompiledExpression m_probExpression; + std::vector m_pdfCache; std::vector> indexProduct(const std::vector>&, const std::vector&) const; std::vector polarisations(const std::string&) const ; }; diff --git a/AmpGen/Store.h b/AmpGen/Store.h new file mode 100644 index 00000000000..594efb83ff9 --- /dev/null +++ b/AmpGen/Store.h @@ -0,0 +1,144 @@ +#ifndef AMPGEN_STORE_H +#define AMPGEN_STORE_H + +#include "AmpGen/simd/utils.h" +#include "AmpGen/EventList.h" + +namespace AmpGen { + + enum Alignment { + SoA, AoS + }; + + template class Store + { + public: + Store( const size_t& nEntries=0, const size_t& nFields=0) : + m_nEntries(nEntries), + m_nBlocks(utils::aligned_size( nEntries ) / utils::size::value ), + m_nFields(nFields), + m_store(m_nBlocks * m_nFields) {} + + template Store( const Store& store, const std::vector& functors, const size_t& fieldsPerFunctor=0) + : Store(store.size(), functors, fieldsPerFunctor){ + for( auto& f : functors ) update(store,f); + } + + template Store( const size_t& nEntries, const std::vector& functors, const size_t& fieldsPerFunctor = 0) + { + for(const auto& functor : functors) + { + auto vsize = fieldsPerFunctor == 0 ? functor.returnTypeSize() / sizeof(stored_type) : fieldsPerFunctor; + DEBUG("Registering: " << functor.name() << " I = " << m_nFields << " / " << functors.size() * vsize ); + m_index[ functor.name() ] = std::make_pair(m_nFields, vsize); + m_nFields += vsize; + } + m_nEntries = nEntries; + m_nBlocks = utils::aligned_size(nEntries)/utils::size::value; + m_store.resize(m_nBlocks * m_nFields); + } + + __always_inline stored_type operator[]( const size_t& index ) const { return m_store[index]; } + __always_inline stored_type& operator[]( const size_t& index ) { return m_store[index]; } + template unsigned find( const T& t ) const { return m_index.find( t.name() )->second.first; } + + __always_inline size_t size() const { return m_nEntries; } + __always_inline size_t nBlocks() const { return m_nBlocks; } + __always_inline size_t nFields() const { return m_nFields; } + __always_inline size_t aligned_size() const { return m_nBlocks * utils::size::value ; } + __always_inline const stored_type& operator()(const size_t& index, const size_t& field) const + { + if constexpr( align == Alignment::SoA ) return m_store[ field * m_nBlocks + index] ; + else return m_store[index*m_nFields+field]; + } + __always_inline const stored_type* data() const { return m_store.data(); } + __always_inline stored_type& operator()(const size_t& index, const size_t& field) + { + if constexpr( align == Alignment::SoA ) return m_store[ field * m_nBlocks + index] ; + else return m_store[index*m_nFields+field]; + } + + void resize(const size_t& nEntries, const size_t& nFields ) + { + m_nEntries = nEntries; + m_nBlocks = utils::aligned_size(nEntries)/utils::size::value; + m_nFields = nFields; + m_store.resize(m_nBlocks * m_nFields); + m_index.clear(); + } + void clear() { m_store.clear(); m_index.clear() ; } + void store( const size_t& event0, const size_t& index0, const stored_type* item, const unsigned N = 1 ) + { + if constexpr( align == Alignment::AoS ) + std::memcpy( &(*this)(event0, index0) , item, N * sizeof( stored_type ) ); + else + { + for( unsigned i = 0 ; i != N ; ++i ) (*this)(event0, index0 +i ) = item[i]; + } + } + + template void update(const Store& is, const functor_type& fcn) + { + auto f = m_index.find( fcn.name() ); + if( f == m_index.end() ) FATAL("Expression: " << fcn.name() << " is not registed"); + auto [p0, s] = f->second; + DEBUG("Updating: " << fcn.name() << " index = " << p0 << " size_of = " << s << " on store: " << is.size() << " blocks = " << is.nBlocks() << " fields = " << is.nFields () ); + if constexpr( align == Alignment::AoS ) + { + if constexpr( std::is_same< typename functor_type::return_type, void >::value ) + fcn.batch(aligned_size(), is.nFields(), m_nFields, nullptr, m_store.data() + p0, 1, fcn.externBuffer().data(), is.data()); + if constexpr( ! std::is_same< typename functor_type::return_type, void >::value ) + fcn.batch(aligned_size(), is.nFields(), m_nFields , m_store.data() + p0 , fcn.externBuffer().data(), is.data()); + } + else + { + if constexpr( std::is_same< typename functor_type::return_type, void >::value) + fcn.batch(aligned_size(), is.nFields(), 1, nullptr, m_store.data() + p0*m_nBlocks, m_nBlocks, fcn.externBuffer().data(), is.data() ); + else + fcn.batch(aligned_size(), is.nFields(), 1 , m_store.data() + p0*m_nBlocks , fcn.externBuffer().data(), is.data() ); + } + } + template void update( const EventList& events, const functor_type& fcn ) + { + auto f = m_index.find( fcn.name() ); + if( f == m_index.end() ) FATAL("Expression: " << fcn.name() << " is not registed"); + auto [p0, s] = f->second; + if constexpr( std::is_same< typename functor_type::return_type, void >::value ) + { + + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t evt = 0; evt < events.size(); ++evt ) + { + std::vector buffer(s); + fcn(buffer.data(), 1, fcn.externBuffer().data(), events[evt].address() ); + store(evt, p0, buffer.data(), s ); + } + } + else { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t evt = 0; evt < events.size(); ++evt ){ + auto tmp = fcn( events[evt].address( ) ); + store( evt, p0, &tmp, s); + } + } + } + + private: + size_t m_nEntries{0}; /// Number of entries, i.e. number of events + size_t m_nBlocks {0}; /// Number of blocks, i.e. number of entries aligned to the size, divided by block size. + size_t m_nFields {0}; /// Number of fields per entry + std::vector m_store; + std::map> m_index; + }; +} +//using aos_store = AmpGen::Store; +//using soa_store = AmpGen::Store; +// +//ENABLE_DEBUG(aos_store) +//ENABLE_DEBUG(soa_store) + +#endif diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index 0bf34fa827e..cc0d466bca2 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -75,21 +75,19 @@ namespace AmpGen for_each( m_pdfs, []( auto& f ) { f.prepare(); } ); #pragma omp parallel for reduction( +: LL ) for ( unsigned int block = 0; block < m_events->nBlocks(); ++block ) { - LL += log(this->operator()(m_events->block(block), block)); + LL += m_events->weight(block) * log(this->operator()(m_events->block(block), block)); } return -2 * utils::sum_elements(LL); } #endif } /// Returns the probability for the given event. - #if ENABLE_AVX2 float_v operator()( const float_v* evt , const unsigned block) { float_v prob = 0.f; for_each( this->m_pdfs, [&prob, &evt,block]( const auto& f ) { prob += f(evt, block); } ); return prob; } - #endif /// Returns the probability for the given event. double operator()( const eventValueType& evt ) { diff --git a/AmpGen/simd/avx2_types.h b/AmpGen/simd/avx2_types.h index a9ec3e082ee..04579d4ad73 100644 --- a/AmpGen/simd/avx2_types.h +++ b/AmpGen/simd/avx2_types.h @@ -20,38 +20,11 @@ namespace AmpGen { float_t(const double& f ) : data( _mm256_set1_ps( float(f) )) {} float_t(const float* f ) : data( _mm256_loadu_ps( f ) ) {} void store( float* ptr ) const { _mm256_storeu_ps( ptr, data ); } - std::array to_array() const { std::array b; store( &b[0] ); return b; } + std::array to_array() const { std::array b; store( &b[0] ); return b; } float at(const unsigned i) const { return to_array()[i] ; } operator __m256() const { return data ; } }; - - struct complex_t { - float_t re; - float_t im; - typedef std::complex scalar_type; - static constexpr unsigned size = 8 ; - - float_t real() const { return re; } - float_t imag() const { return im; } - complex_t() = default; - complex_t( const float_t& re, const float_t& im) : re(re), im(im) {} - complex_t( const float& re, const float& im) : re(re), im(im) {} - complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} - complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} - const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } - void store( float* sre, float* sim ){ re.store(sre); im.store(sim); } - void store( std::complex* r ){ - auto re_arr = re.to_array(); - auto im_arr = im.to_array(); - for( unsigned i = 0 ; i != float_t::size; ++i ) r[i] = std::complex( re_arr[i], im_arr[i] ); - } - }; - - inline std::ostream& operator<<( std::ostream& os, const float_t& obj ) { - auto buffer = obj.to_array(); - for( unsigned i = 0 ; i != float_t::size; ++i ) os << buffer[i] << " "; - return os; - } + inline float_t operator+( const float_t& lhs, const float_t& rhs ) { return _mm256_add_ps(lhs, rhs); } inline float_t operator-( const float_t& lhs, const float_t& rhs ) { return _mm256_sub_ps(lhs, rhs); } inline float_t operator*( const float_t& lhs, const float_t& rhs ) { return _mm256_mul_ps(lhs, rhs); } @@ -80,13 +53,45 @@ namespace AmpGen { inline float_t select(const float_t& mask, const float_t& a, const float_t& b ) { return _mm256_blendv_ps( b, a, mask ); } inline float_t select(const bool& mask , const float_t& a, const float_t& b ) { return mask ? a : b; } inline float_t atan2( const float_t& y, const float_t& x ){ - std::array bx{x.to_array()}, by{y.to_array()}, rt; - for( unsigned i = 0 ; i != float_t::size ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); + std::array bx{x.to_array()}, by{y.to_array()}, rt; + for( unsigned i = 0 ; i != 8 ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); return float_t (rt.data() ); } + inline float_t fmadd( const float_t& a, const float_t& b, const float_t& c ) + { + return _mm256_fmadd_ps(a, b, c ); + } + struct complex_t { + float_t re; + float_t im; + typedef std::complex scalar_type; + + float_t real() const { return re; } + float_t imag() const { return im; } + float_t norm() const { return re*re + im *im ; } + complex_t() = default; + complex_t( const float_t& re, const float_t& im) : re(re), im(im) {} + complex_t( const float& re, const float& im) : re(re), im(im) {} + complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } + void store( float* sre, float* sim ){ re.store(sre); im.store(sim); } + void store( std::complex* r ){ + auto re_arr = re.to_array(); + auto im_arr = im.to_array(); + for( unsigned i = 0 ; i != re_arr.size(); ++i ) r[i] = std::complex( re_arr[i], im_arr[i] ); + } + }; + + inline std::ostream& operator<<( std::ostream& os, const float_t& obj ) { + auto buffer = obj.to_array(); + for( unsigned i = 0 ; i != 8; ++i ) os << buffer[i] << " "; + return os; + } inline float_t real(const complex_t& arg ){ return arg.re ; } inline float_t imag(const complex_t& arg ){ return arg.im ; } inline complex_t conj(const complex_t& arg ){ return complex_t(arg.re, -arg.im) ; } + inline float_t conj(const float_t& arg ){ return arg ; } inline complex_t operator+( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re + rhs, lhs.im); } inline complex_t operator-( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re - rhs, lhs.im); } inline complex_t operator*( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re*rhs, lhs.im*rhs); } diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h new file mode 100644 index 00000000000..594b95dcedc --- /dev/null +++ b/AmpGen/simd/avx2d_types.h @@ -0,0 +1,152 @@ +#ifndef AMPGEN_AVXd_TYPES +#define AMPGEN_AVXd_TYPES 1 + +#include +#include +#include +#include +#include "AmpGen/simd/avx_mathfun.h" +#include + +namespace AmpGen { + namespace AVX2d { + struct float_t { + __m256d data; + static constexpr unsigned size = 4; + typedef double scalar_type; + float_t() = default; + float_t(__m256d data ) : data(data) {} + float_t(const double& f ) : data( _mm256_set1_pd( f )) {} + float_t(const double* f ) : data( _mm256_loadu_pd( f ) ) {} + void store( double* ptr ) const { _mm256_storeu_pd( ptr, data ); } + std::array to_array() const { std::array b; store( &b[0] ); return b; } + double at(const unsigned i) const { return to_array()[i] ; } + operator __m256d() const { return data ; } + }; + + inline float_t operator+( const float_t& lhs, const float_t& rhs ) { return _mm256_add_pd(lhs, rhs); } + inline float_t operator-( const float_t& lhs, const float_t& rhs ) { return _mm256_sub_pd(lhs, rhs); } + inline float_t operator*( const float_t& lhs, const float_t& rhs ) { return _mm256_mul_pd(lhs, rhs); } + inline float_t operator/( const float_t& lhs, const float_t& rhs ) { return _mm256_div_pd(lhs, rhs); } + inline float_t operator-( const float_t& x ) { return -1.f * x; } + inline float_t operator&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_pd( lhs, rhs ); } + inline float_t operator|( const float_t& lhs, const float_t& rhs ) { return _mm256_or_pd( lhs, rhs ); } + inline float_t operator^( const float_t& lhs, const float_t& rhs ) { return _mm256_xor_pd( lhs, rhs ); } + inline float_t operator+=(float_t& lhs, const float_t& rhs ){ lhs = lhs + rhs; return lhs; } + inline float_t operator-=(float_t& lhs, const float_t& rhs ){ lhs = lhs - rhs; return lhs; } + inline float_t operator*=(float_t& lhs, const float_t& rhs ){ lhs = lhs * rhs; return lhs; } + inline float_t operator/=(float_t& lhs, const float_t& rhs ){ lhs = lhs / rhs; return lhs; } + inline float_t operator&&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_pd( lhs, rhs ); } + inline float_t operator||( const float_t& lhs, const float_t& rhs ) { return _mm256_or_pd( lhs, rhs ); } + inline float_t operator!( const float_t& x ) { return x ^ _mm256_castsi256_pd( _mm256_set1_epi32( -1 ) ); } + inline float_t operator<( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_LT_OS ); } + inline float_t operator>( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_GT_OS ); } + inline float_t operator==( const float_t& lhs, const float_t& rhs ){ return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OS ); } + inline float_t sqrt( const float_t& v ) { return _mm256_sqrt_pd(v); } + // inline float_t sin( const float_t& v ) { return sin256_pd(v) ; } + // inline float_t cos( const float_t& v ) { return cos256_pd(v) ; } + // inline float_t tan( const float_t& v ) { float_t s; float_t c; sincos256_pd(v, (__m256*)&s, (__m256*)&c) ; return s/c; } + // inline float_t log( const float_t& v ) { return log256_ps(v) ; } + // inline float_t exp( const float_t& v ) { return exp256_ps(v) ; } + inline float_t abs ( const float_t& v ) { + static const __m256d sign_mask = _mm256_set1_pd(-0.); // -0. = 1 << 63 + return _mm256_andnot_pd(sign_mask, v); + } + inline float_t select(const float_t& mask, const float_t& a, const float_t& b ) { return _mm256_blendv_pd( b, a, mask ); } + inline float_t select(const bool& mask , const float_t& a, const float_t& b ) { return mask ? a : b; } + inline float_t atan2( const float_t& y, const float_t& x ){ + std::array bx{x.to_array()}, by{y.to_array()}, rt; + for( unsigned i = 0 ; i != 4 ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); + return float_t (rt.data() ); + } + inline float_t log( const float_t& x ) + { + std::array bx{x.to_array()}, rt; + for( unsigned i = 0 ; i != 4 ; ++i ) rt[i] = std::log(bx[i]); + return float_t (rt.data()); + } + + struct complex_t { + float_t re; + float_t im; + typedef std::complex scalar_type; + + float_t real() const { return re; } + float_t imag() const { return im; } + float_t norm() const { return re*re + im *im ; } + complex_t() = default; + complex_t( const float_t& re, const float_t& im) : re(re), im(im) {} + complex_t( const float& re, const float& im) : re(re), im(im) {} + complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } + void store( double* sre, double* sim ){ re.store(sre); im.store(sim); } + void store( scalar_type* r ) const { + auto re_arr = re.to_array(); + auto im_arr = im.to_array(); + for( unsigned i = 0 ; i != re_arr.size(); ++i ) r[i] = scalar_type( re_arr[i], im_arr[i] ); + } + auto to_array() const + { + std::array rt; + store( rt.data() ); + return rt; + } + }; + + inline std::ostream& operator<<( std::ostream& os, const float_t& obj ) { + auto buffer = obj.to_array(); + for( unsigned i = 0 ; i != 4; ++i ) os << buffer[i] << " "; + return os; + } + inline float_t real(const complex_t& arg ){ return arg.re ; } + inline float_t imag(const complex_t& arg ){ return arg.im ; } + inline complex_t conj(const complex_t& arg ){ return complex_t(arg.re, -arg.im) ; } + inline float_t conj(const float_t& arg ){ return arg ; } + inline complex_t operator+( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re + rhs, lhs.im); } + inline complex_t operator-( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re - rhs, lhs.im); } + inline complex_t operator*( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re*rhs, lhs.im*rhs); } + inline complex_t operator/( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re/rhs, lhs.im/rhs); } + inline complex_t operator+( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs + rhs.re, rhs.im); } + inline complex_t operator-( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs - rhs.re, - rhs.im); } + inline complex_t operator*( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs*rhs.re, lhs*rhs.im); } + inline complex_t operator/( const float_t& lhs, const complex_t& rhs ) { return complex_t( lhs * rhs.re , -lhs *rhs.im) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_t operator+( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re + rhs.re, lhs.im + rhs.im); } + inline complex_t operator-( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re - rhs.re, lhs.im - rhs.im); } + inline complex_t operator*( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re - lhs.im*rhs.im, lhs.re*rhs.im + lhs.im*rhs.re); } + inline complex_t operator/( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re + lhs.im*rhs.im, -lhs.re*rhs.im + lhs.im*rhs.re) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_t operator-( const complex_t& x ) { return -1.f * x; } + inline float_t abs( const complex_t& v ) { return sqrt( v.re * v.re + v.im * v.im ) ; } + inline float_t norm( const complex_t& v ) { return ( v.re * v.re + v.im * v.im ) ; } + inline complex_t select(const float_t& mask, const complex_t& a, const complex_t& b ) { return complex_t( select(mask, a.re, b.re), select(mask, a.im, b.im ) ) ; } + inline complex_t select(const float_t& mask, const float_t& a, const complex_t& b ) { return complex_t( select(mask, a , b.re), select(mask, 0.f, b.im) ); } + inline complex_t select(const float_t& mask, const complex_t& a, const float_t& b ) { return complex_t( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); } + inline complex_t select(const bool& mask , const complex_t& a, const complex_t& b ) { return mask ? a : b; } + inline complex_t exp( const complex_t& v ){ + // float_t s; float_t c; sincos256_ps(v.im, (__m256*)&s, (__m256*)&c); + auto vr = v.re.to_array(); + auto vi = v.im.to_array(); + std::array rt_re; + std::array rt_im; + for( unsigned i = 0 ; i != 4; ++i ){ + rt_re[i] = std::exp( vr[i] ) * cos( vi[i] ); + rt_im[i] = std::exp( vr[i] ) * sin( vi[i] ); + // rt[i] = complex_t( exp(vr[i]) * cos(vi[i]), exp(vr[i]) * sin( vi[i] ) ); + } + return complex_t( rt_re.data(), rt_im.data() ); + } + inline float_t fmadd( const float_t& a, const float_t& b, const float_t& c ) + { + return _mm256_fmadd_pd(a, b, c ); + } + + inline std::ostream& operator<<( std::ostream& os, const complex_t& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } + #pragma omp declare reduction(+: float_t: \ + omp_out = omp_out + omp_in) + #pragma omp declare reduction(+: complex_t: \ + omp_out = omp_out + omp_in) + + } +} + +#endif diff --git a/AmpGen/simd/iterator.h b/AmpGen/simd/iterator.h index 44e115f9b7d..3949a0a44ea 100644 --- a/AmpGen/simd/iterator.h +++ b/AmpGen/simd/iterator.h @@ -7,6 +7,7 @@ namespace AmpGen { std::array m_buffer; size_t m_pos{0}; public: + int pos() const { return m_pos ; } scatter_iterator( const size_t& pos, store_type* store ) : m_store(store), m_pos(pos) { @@ -36,6 +37,7 @@ namespace AmpGen { } bool operator==( const scatter_iterator& rhs ) const { return m_pos == rhs.m_pos ; } bool operator!=( const scatter_iterator& rhs ) const { return m_pos != rhs.m_pos ; } + friend int operator-( const scatter_iterator& lhs, const scatter_iterator& rhs) { return lhs.pos() - rhs.pos() ; } }; template auto make_scatter_iterator( const unsigned& pos, store_type* store) { diff --git a/AmpGen/simd/utils.h b/AmpGen/simd/utils.h index b8b5032a5e2..094877931b2 100644 --- a/AmpGen/simd/utils.h +++ b/AmpGen/simd/utils.h @@ -1,13 +1,49 @@ +#ifndef AMPGEN_SIMD_UTILS_H +#define AMPGEN_SIMD_UTILS_H + #include -#include "AmpGen/simd/avx2_types.h" +#if ENABLE_AVX2 +#if DOUBLE_PRECISION + #include "AmpGen/simd/avx2d_types.h" +#else + #include "AmpGen/simd/avx2_types.h" +#endif +#endif namespace AmpGen { - namespace utils { +#if ENABLE_AVX2 +#if DOUBLE_PRECISION + using float_v = AVX2d::float_t; + using complex_v = AVX2d::complex_t; +#else + using float_v = AVX2::float_t; + using complex_v = AVX2::complex_t; +#endif +#else + using float_v = double; + using complex_v = std::complex; +#endif - template struct is_vector_type { static constexpr bool value = false; }; - template <> struct is_vector_type { static constexpr bool value = true ; }; - template <> struct is_vector_type { static constexpr bool value = true ; }; + namespace utils { + template struct is_vector_type : std::false_type {}; + template struct size { static constexpr unsigned value = 1; } ; +#if ENABLE_AVX2 +#if DOUBLE_PRECISION + template <> struct is_vector_type : std::true_type {}; + template <> struct is_vector_type : std::true_type {}; + template <> struct size { static constexpr unsigned value = 4; }; + template <> struct size { static constexpr unsigned value = 4; }; +#else + template <> struct is_vector_type : std::true_type {}; + template <> struct is_vector_type : std::true_type {}; + template <> struct size { static constexpr unsigned value = 8; }; + template <> struct size { static constexpr unsigned value = 8; }; +#endif +#else + template <> struct is_vector_type : std::false_type {}; + template <> struct is_vector_type : std::false_type {}; +#endif template simd_type gather( const container_type& container, const functor_type& functor, unsigned offset=0, float df =0.) { @@ -20,7 +56,7 @@ namespace AmpGen { } template size_t aligned_size( const size_t& unaligned_size ) { - return simd_type::size * unsigned ( 1 + (unaligned_size -1 ) / simd_type::size ); + return size::value * unsigned ( 1 + (unaligned_size -1 ) / size::value ); } template auto sum_elements( const simd_type& obj ) { @@ -28,15 +64,43 @@ namespace AmpGen { { auto arr = obj.to_array(); auto rt = arr[0]; - for( unsigned i = 1 ; i != simd_type::size; ++i ) rt = rt + arr[i]; + for( unsigned i = 1 ; i != size::value; ++i ) rt = rt + arr[i]; return rt; } else return obj; } - template auto get( vtype v ){ + template auto get( vtype v ) + { if constexpr ( is_vector_type::value ) return v.at(p); if constexpr ( ! is_vector_type::value ) return v; - } - + } + template < class vtype> auto at( vtype v, const unsigned p=0 ) + { + if constexpr ( is_vector_type::value ) return v.at(p); + if constexpr ( ! is_vector_type::value ) return v; + } + template auto norm( const ctype& v ) + { + #if ENABLE_AVX2 && DOUBLE_PRECISION + if constexpr( is_vector_type::value ) return AVX2d::norm(v); + #endif + #if ENABLE_AVX2 && ! DOUBLE_PRECISION + if constexpr( is_vector_type::value ) return AVX2::norm(v); + #endif + if constexpr( ! is_vector_type::value ) return std::norm(v); + } + template void store( store_type* container, const type& v) + { + if constexpr( is_vector_type::value ) + { + auto arr = v.to_array(); + for( unsigned k = 0 ; k != utils::size::value; ++k ) container[k] = arr[k]; + } + else { + *container = v; + } + } } } + +#endif diff --git a/Standalone.cmake b/Standalone.cmake index a0047390f44..f0248d9c909 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -13,7 +13,8 @@ if( NOT "${CMAKE_CXX_STANDARD}" ) endif() SET(USE_OPENMP TRUE CACHE BOOL "USE_OPENMP") -SET(ENABLE_AVX2 TRUE CACHE BOOL "ENABLE_AVX2") +SET(ENABLE_AVX2 FALSE CACHE BOOL "ENABLE_AVX2") +SET(PRECISION "DOUBLE" CACHE STRING "PRECISION") set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -152,9 +153,15 @@ target_compile_options(AmpGen -Wno-unknown-pragmas $<$:-O3>) -if( ENABLE_AVX2 ) - message(STATUS "Enabling AVX2...") - target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1") +if( ENABLE_AVX2 ) + if( "${PRECISION}" MATCHES "DOUBLE" ) + message(STATUS "Enabling AVX2 [double precision]") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=1") + elseif( "${PRECISION}" MATCHES "SINGLE" ) + message(STATUS "Enabling AVX2 [single precision]") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=0") + + endif() target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx2 -DHAVE_AVX2_INSTRUCTIONS) endif() diff --git a/apps/ConvertToSourceCode.cpp b/apps/ConvertToSourceCode.cpp index c753204bb7c..c6864aca788 100644 --- a/apps/ConvertToSourceCode.cpp +++ b/apps/ConvertToSourceCode.cpp @@ -91,7 +91,7 @@ template void generate_source(T& pdf, EventList& normEvents, const std mps["Pz"]->setCurrentFitVal(pz); pdf.transferParameters(); } - double n = pdf.prob_unnormalised( evt ); + double n = pdf(evt); if ( n > pMax ) pMax = n; } norm = pMax * sf ; diff --git a/apps/Debugger.cpp b/apps/Debugger.cpp index 29a1c2346a5..a19f3f83563 100644 --- a/apps/Debugger.cpp +++ b/apps/Debugger.cpp @@ -49,16 +49,17 @@ void invertParity( Event& event, const size_t& nParticles) template < class FCN > void debug( FCN& sig, EventList& accepted, bool verbose, TRandom3* rndm, MinuitParameterSet& mps ){ INFO("Debugging: "); + unsigned eventToDebug = 0; sig.setEvents( accepted ); sig.prepare(); - sig.debug( accepted[0] ); - accepted[0].print(); + sig.debug( accepted[eventToDebug] ); + accepted[eventToDebug].print(); // if( verbose ) print( accepted[0], sig.matrixElements(), verbose ); - invertParity(accepted[0], accepted.eventType().size() ); - accepted[0].print(); + invertParity(accepted[eventToDebug], accepted.eventType().size() ); + accepted[eventToDebug].print(); sig.reset(); sig.prepare(); - sig.debug( accepted[0] ); + sig.debug( accepted[eventToDebug] ); } int main( int argc, char** argv ) @@ -93,11 +94,10 @@ int main( int argc, char** argv ) if( infile == "" ){ for( unsigned i = 0 ; i != 16; ++i ){ Event evt = PhaseSpace( eventType, rndm ).makeEvent(); + evt.setIndex(i); accepted.push_back(evt); } } - accepted[0].print(); - std::string type = NamedParameter("Type","CoherentSum"); if( type == "PolarisedSum") diff --git a/apps/Generator.cpp b/apps/Generator.cpp index 89bbb6d2cba..3abf40b7bb6 100644 --- a/apps/Generator.cpp +++ b/apps/Generator.cpp @@ -41,7 +41,7 @@ struct FixedLibPDF { void prepare(){}; void setEvents( AmpGen::EventList& evts ){}; - double prob_unnormalised( const AmpGen::Event& evt ) const { return PDF( evt, 1 ); } + double operator()( const AmpGen::Event& evt ) const { return PDF( evt, 1 ); } FixedLibPDF( const std::string& lib ) { void* handle = dlopen( lib.c_str(), RTLD_NOW ); diff --git a/examples/FitterWithPolarisation.cpp b/examples/FitterWithPolarisation.cpp index 8ee80173425..69b39fd171f 100644 --- a/examples/FitterWithPolarisation.cpp +++ b/examples/FitterWithPolarisation.cpp @@ -32,10 +32,18 @@ #include #include +#if ENABLE_AVX2 + #include "AmpGen/EventListSIMD.h" + using EventList_type = AmpGen::EventListSIMD; +#else + #include "AmpGen/EventList.h" + using EventList_type = AmpGen::EventList; +#endif + using namespace AmpGen; template -FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& MPS ); +FitResult* doFit( PDF&& pdf, EventList_type& data, EventList_type& mc, MinuitParameterSet& MPS ); int main( int argc, char* argv[] ) { @@ -79,6 +87,7 @@ int main( int argc, char* argv[] ) the parsed options. For historical reasons, this is referred to as loading it from a "Stream" */ MinuitParameterSet MPS; MPS.loadFromStream(); + for( auto& p : MPS ) if( p->flag() == Flag::Free ) p->setResult( gRandom->Gaus( p->mean(), p->err() ), p->err(), 0,0 ); /* An EventType specifies the initial and final state particles as a vector that will be described by the fit. It is typically loaded from the interface parameter EventType. */ @@ -94,12 +103,12 @@ int main( int argc, char* argv[] ) /* Events are read in from ROOT files. If only the filename and the event type are specified, the file is assumed to be in the specific format that is defined by the event type, unless the branches to load are specified in the user options */ - EventList events(dataFile, evtType, Branches(bNames), GetGenPdf(false) ); + EventList_type events(dataFile, evtType, Branches(bNames), GetGenPdf(false) ); /* Generate events to normalise the PDF with. This can also be loaded from a file, which will be the case when efficiency variations are included. Default number of normalisation events is 2 million. */ - EventList eventsMC = Generator<>(evtType, &rndm).generate(int(2e6)); + EventList_type eventsMC = Generator<>(evtType, &rndm).generate(int(2e6)); sig.setMC( eventsMC ); @@ -107,7 +116,7 @@ int main( int argc, char* argv[] ) /* Do the fit and return the fit results, which can be written to the log and contains the covariance matrix, fit parameters, and other observables such as fit fractions */ - FitResult* fr = doFit(make_pdf(sig), events, eventsMC, MPS ); + FitResult* fr = doFit(make_pdf(sig), events, eventsMC, MPS ); /* Calculate the `fit fractions` using the signal model and the error propagator (i.e. fit results + covariance matrix) of the fit result, and write them to a file. */ @@ -116,20 +125,12 @@ int main( int argc, char* argv[] ) INFO("Adding fraction to file..."); fr->addFractions( fitFractions ); INFO("Writing file ... "); - fr->writeToFile( logFile ); - output->cd(); - - /* Write out the data plots. This also shows the first example of the named arguments - to functions, emulating python's behaviour in this area */ - - auto plots = events.makeDefaultProjections(PlotOptions::Prefix("Data"), PlotOptions::Bins(100)); - for ( auto& plot : plots ) plot->Write(); - + fr->writeToFile( logFile ); output->Close(); } template -FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& MPS ) +FitResult* doFit( PDF&& pdf, EventList_type& data, EventList_type& mc, MinuitParameterSet& MPS ) { auto time_wall = std::chrono::high_resolution_clock::now(); auto time = std::clock(); @@ -166,6 +167,18 @@ FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& double tWall = std::chrono::duration( twall_end - time_wall ).count(); INFO( "Wall time = " << tWall / 1000. ); INFO( "CPU time = " << time_cpu ); + auto evaluator = pdf.componentEvaluator(&mc); + auto projections = data.eventType().defaultProjections(100); + + /* Write out the data plots. This also shows the first example of the named arguments + to functions, emulating python's behaviour in this area */ + + for( const auto& proj : projections ) + { + proj(mc, evaluator, PlotOptions::Norm(data.size()), PlotOptions::AutoWrite() ); + //proj(mc, evaluator_per_component, PlotOptions::Prefix("amp"), PlotOptions::Norm(data.size()), PlotOptions::AutoWrite() ); + proj(data, PlotOptions::Prefix("Data") )->Write(); + } fr->print(); return fr; } diff --git a/examples/QcGenerator.cpp b/examples/QcGenerator.cpp index 48878c4a5a3..f40a90331e7 100644 --- a/examples/QcGenerator.cpp +++ b/examples/QcGenerator.cpp @@ -184,7 +184,7 @@ template class Psi3770 { DTEventList generate( const size_t& N ) { DTEventList output( m_signalType, m_tagType ); - ProgressBar pb(60, trimmedString(__PRETTY_FUNCTION__)); + ProgressBar pb(60, detail::trimmedString(__PRETTY_FUNCTION__)); auto tStartTotal = std::chrono::high_resolution_clock::now(); int currentSize = 0; double norm = -1; diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 9ad5a6629c8..e74de1ba47a 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -28,7 +28,7 @@ #include "AmpGen/simd/utils.h" #ifdef _OPENMP - #include +#include #endif using namespace AmpGen; @@ -36,12 +36,12 @@ CoherentSum::CoherentSum() = default; CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, const std::string& prefix ) : m_rules (mps) - , m_evtType (type) - , m_printFreq(NamedParameter( "CoherentSum::PrintFrequency", 100) ) - , m_dbThis (NamedParameter( "CoherentSum::Debug" , false)) - , m_verbosity(NamedParameter( "CoherentSum::Verbosity" , 0) ) + , m_evtType (type) + , m_printFreq(NamedParameter( "CoherentSum::PrintFrequency", 100) ) + , m_dbThis (NamedParameter( "CoherentSum::Debug" , false)) + , m_verbosity(NamedParameter( "CoherentSum::Verbosity" , 0) ) , m_objCache (NamedParameter("CoherentSum::ObjectCache" ,"") ) - , m_prefix (prefix) + , m_prefix (prefix) { auto amplitudes = m_rules.getMatchingRules( m_evtType, prefix); if( amplitudes.size() == 0 ){ @@ -52,90 +52,74 @@ CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, m_normalisations.resize( m_matrixElements.size(), m_matrixElements.size() ); size_t nThreads = NamedParameter ("nCores" , std::thread::hardware_concurrency(), "Number of threads to use" ); ThreadPool tp(nThreads); + //#pragma omp parallel for for(size_t i = 0; i < m_matrixElements.size(); ++i){ tp.enqueue( [i,this,&mps,&litudes]{ - m_matrixElements[i] = TransitionMatrix( amplitudes[i].first, amplitudes[i].second, mps, this->m_evtType.getEventFormat(), this->m_dbThis); - CompilerWrapper().compile( m_matrixElements[i].amp, this->m_objCache); } ); + m_matrixElements[i] = TransitionMatrix( amplitudes[i].first, amplitudes[i].second, mps, this->m_evtType.getEventFormat(), this->m_dbThis); + CompilerWrapper().compile( m_matrixElements[i], this->m_objCache); } ); } - m_isConstant = false; } void CoherentSum::prepare() { - if ( m_isConstant && m_prepareCalls != 0 ) return; transferParameters(); - std::vector changedPdfIndices; ProfileClock clockEval; - if( m_prepareCalls == 0 && m_events != nullptr ){ - m_events->reserveCache(m_matrixElements.size()); - for( auto& me : m_matrixElements ) me.addressData = m_events->registerExpression( me.amp ); - } - if( m_prepareCalls == 0 ) m_integrator.allocate( m_matrixElements ); - for ( size_t i = 0; i < m_matrixElements.size(); ++i ) { - m_matrixElements[i].amp.prepare(); - if ( m_prepareCalls != 0 && !m_matrixElements[i].amp.hasExternalsChanged() ) continue; - if ( m_events != nullptr ) m_events->updateCache( m_matrixElements[i].amp, m_matrixElements[i].addressData ); - m_integrator.prepareExpression(m_matrixElements[i].amp ); - changedPdfIndices.push_back(i); - m_matrixElements[i].amp.resetExternals(); + for (auto& t : m_matrixElements ) { + t.prepare(); + if ( m_prepareCalls != 0 && !t.hasExternalsChanged() ) continue; + if ( m_events != nullptr ) m_cache.update(m_events->store(), t ); + m_integrator.updateCache(t); + t.resetExternals(); + t.workToDo = true; } clockEval.stop(); ProfileClock clockIntegral; - if ( m_integrator.isReady()) updateNorms( changedPdfIndices ); + if ( m_integrator.isReady()) updateNorms(); else if ( m_verbosity ) WARNING( "No simulated sample specified for " << this ); - m_norm = norm(); - if ( m_prepareCalls == 0 ){ - INFO( "Norm: " << m_norm ); - for(unsigned i = 0 ; i != m_matrixElements.size() ; ++i ){ - for(unsigned j = 0 ; j != m_matrixElements.size() ; ++j ){ - if( std::isnan( std::real(m_normalisations(i,j) )) || std::isnan( std::imag(m_normalisations(i,j))) ) - ERROR("Norm: " << m_matrixElements[i].name() << " " << m_matrixElements[j].name() << " is ill-posed!"); - } - } - // INFO( m_normalisations.get(0,0) << " " - // << m_normalisations.get(1,0) << " " - // << m_normalisations.get(0,1) << " " - // << m_normalisations.get(2,2) ); - } - if ( m_verbosity && changedPdfIndices.size() !=0 ) { - clockIntegral.stop(); + clockIntegral.stop(); + if ( m_verbosity && m_prepareCalls % 100 == 0 ) { INFO( "Time Performance: " << "Eval = " << clockEval << " ms" << ", Integral = " << clockIntegral << " ms" << ", Total = " << clockEval + clockIntegral << " ms; normalisation = " << m_norm ); m_lastPrint = m_prepareCalls; } + for( auto& t : m_matrixElements ) t.workToDo = false; m_prepareCalls++; } -void CoherentSum::updateNorms( const std::vector& changedPdfIndices ) +void CoherentSum::updateNorms() { std::vector cacheIndex; std::transform( m_matrixElements.begin(), m_matrixElements.end(), std::back_inserter(cacheIndex), - [this](auto& m){ return this->m_integrator.getCacheIndex( m.amp ) ; } ); - for ( auto& i : changedPdfIndices ) - for ( size_t j = 0; j < size(); ++j ) - m_integrator.queueIntegral( cacheIndex[i], cacheIndex[j] ,i, j, &m_normalisations ); + [this](auto& m){ return this->m_integrator.getCacheIndex( m ) ; } ); + if(std::any_of(m_matrixElements.begin(),m_matrixElements.end(), [](auto& me){ return me.workToDo; } )) + { + for ( unsigned i = 0; i != m_matrixElements.size(); ++i ) + for ( size_t j = i; j < size(); ++j ){ + if( m_matrixElements[i].workToDo || m_matrixElements[j].workToDo ) + m_integrator.queueIntegral( cacheIndex[i], cacheIndex[j] ,i, j, &m_normalisations ); + } + } m_integrator.flush(); m_normalisations.resetCalculateFlags(); + m_norm = norm(); } void CoherentSum::debug( const Event& evt, const std::string& nameMustContain ) { prepare(); - for ( auto& me : m_matrixElements ) me.amp.resetExternals(); - if ( nameMustContain == "" ) - for ( auto& me : m_matrixElements ) { - auto A = me(evt); - INFO( std::setw(70) << me.decayTree.uniqueString() - << " A = [ " << utils::get(A.real()) << " " << utils::get(A.imag()) - << " ] g = [ "<< me.coupling().real() << " " << me.coupling().imag() << " ] " - << m_events->cache( evt.index(), me.addressData ) - << me.decayTree.CP() ); - - // if( m_dbThis ) me.amp.debug( evt.address() ); - } + for ( auto& me : m_matrixElements ) { + auto A = me(evt); + INFO( std::setw(70) << me.decayTree.uniqueString() + << " A = [ " << utils::get<0>(A.real()) << " " << utils::get<0>(A.imag()) + << " ] g = [ "<< me.coupling().real() << " " << me.coupling().imag() << " ] " + << m_cache( evt.index(), std::distance(&m_matrixElements[0], &me ) ) + << me.decayTree.CP() ); + + // if( m_dbThis ) me.amp.debug( evt.address() ); + } //else // for ( auto& me : m_matrixElements ) // if ( me.amp.name().find( nameMustContain ) != std::string::npos ) me.amp.debug( evt.address() ); @@ -169,8 +153,8 @@ std::vector CoherentSum::fitFractions(const LinearErrorPropagator& for ( size_t j = i + 1; j < ffForHead.size(); ++j ) { iCalc.emplace_back(ffForHead[i].name() + "x" + ffForHead[j].name(), - processIndex(m_matrixElements, ffForHead[i].name()), - processIndex(m_matrixElements, ffForHead[j].name()) ); + processIndex(m_matrixElements, ffForHead[i].name()), + processIndex(m_matrixElements, ffForHead[j].name()) ); } } std::vector interferenceFractions = iCalc(m_evtType.mother()+"_interference",linProp); @@ -191,16 +175,16 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor bool includePythonBindings = NamedParameter("CoherentSum::IncludePythonBindings",false); for ( auto& p : m_matrixElements ){ - stream << p.amp << std::endl; - p.amp.compileWithParameters( stream ); - if( includePythonBindings ) p.amp.compileDetails( stream ); + stream << p << std::endl; + p.compileWithParameters( stream ); + if( includePythonBindings ) p.compileDetails( stream ); } Expression event = Parameter("x0",0,true); Expression pa = Parameter("double(x1)",0,true); Expression amplitude; for( unsigned int i = 0 ; i < size(); ++i ){ auto& p = m_matrixElements[i]; - Expression this_amplitude = p.coupling() * Function( programatic_name( p.amp.name() ) + "_wParams", {event} ); + Expression this_amplitude = p.coupling() * Function( programatic_name( p.name() ) + "_wParams", {event} ); amplitude = amplitude + ( p.decayTree.finalStateParity() == 1 ? 1 : pa ) * this_amplitude; } stream << CompiledExpression(const double*, const int&)>( amplitude , "AMP" ) << std::endl; @@ -211,7 +195,7 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor stream << "extern \"C\" const char* matrix_elements(int n) {\n"; for ( size_t i = 0; i < m_matrixElements.size(); i++ ) { - stream << " if(n ==" << i << ") return \"" << m_matrixElements.at(i).amp.progName() << "\" ;\n"; + stream << " if(n ==" << i << ") return \"" << m_matrixElements.at(i).progName() << "\" ;\n"; } stream << " return 0;\n}\n"; stream << "extern \"C\" void FCN_all(double* out, double* events, unsigned int size, int parity, double* amps){\n"; @@ -233,7 +217,7 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor int parity = p.decayTree.finalStateParity(); if ( parity == -1 ) stream << "double(parity) * "; stream << "std::complex(amps[" << i * 2 << "],amps[" << i * 2 + 1 << "]) * "; - stream << programatic_name( p.amp.name() )<< "_wParams( E )"; + stream << programatic_name( p.name() )<< "_wParams( E )"; stream << ( i == size() - 1 ? ";" : " +" ) << "\n"; } stream << " out[i] = std::norm(amplitude) / " << normalisation << ";\n }\n}\n"; @@ -255,19 +239,18 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor complex_t CoherentSum::getValNoCache( const Event& evt ) const { return utils::get<0>( complex_v(std::accumulate( m_matrixElements.begin(), - m_matrixElements.end(), - complex_v(0,0), - [&evt]( const auto& a, const auto& b ){ return a + b.coefficient * b(evt);} )) ); + m_matrixElements.end(), + complex_v(0,0), + [&evt]( const auto& a, const auto& b ){ return a + b.coefficient * b(evt);} )) ); } void CoherentSum::reset( bool resetEvents ) { m_prepareCalls = 0; m_lastPrint = 0; - for ( auto& mE : m_matrixElements ) mE.addressData = 999; if ( resetEvents ){ m_events = nullptr; - m_integrator = Integrator_type(); + m_integrator = Integrator(); } } @@ -275,7 +258,9 @@ void CoherentSum::setEvents( EventList_type& list ) { if ( m_verbosity ) INFO( "Setting event list with:" << list.size() << " events for " << this ); reset(); + if( m_ownEvents && m_events != nullptr ) delete m_events; m_events = &list; + m_cache = Store( m_events->size(), m_matrixElements ); } @@ -283,7 +268,7 @@ void CoherentSum::setMC( EventList_type& sim ) { if ( m_verbosity ) INFO( "Setting norm. event list with:" << sim.size() << " events for " << this ); reset(); - m_integrator = Integrator_type( &sim ); + m_integrator = Integrator( &sim, m_matrixElements ); } real_t CoherentSum::norm() const @@ -319,8 +304,8 @@ void CoherentSum::transferParameters() void CoherentSum::printVal(const Event& evt) { for ( auto& mE : m_matrixElements ) { - unsigned int address = mE.addressData; - std::cout << mE.decayTree.decayDescriptor() << " = " << mE.coefficient << " x " << m_events->cache( evt.index(), address ) + unsigned int address = std::distance( &mE , &m_matrixElements[0] ); + std::cout << mE.decayTree.decayDescriptor() << " = " << mE.coefficient << " x " << m_cache( evt.index() / utils::size::value, address ) << " address = " << address << " " << mE( evt ) << std::endl; if( mE.coupling.size() != 1 ){ std::cout << "CouplingConstants: " << std::endl; @@ -333,57 +318,50 @@ void CoherentSum::printVal(const Event& evt) complex_t CoherentSum::getVal( const Event& evt ) const { complex_v value( 0., 0. ); - for ( const auto& mE : m_matrixElements ) { - value = value + mE.coefficient * m_events->cache( evt.index(), mE.addressData ); + for (unsigned int i = 0 ; i != m_matrixElements.size(); ++i ) { + value = value + m_matrixElements[i].coefficient * m_cache(evt.index() / utils::size::value, i ); } - #if ENABLE_AVX2 - return value.at(evt.index() % float_v::size); - #else +#if ENABLE_AVX2 + return value.at(evt.index() % utils::size::value ); +#else return value; - #endif +#endif } -#if ENABLE_AVX2 float_v CoherentSum::operator()( const float_v* /*evt*/, const unsigned block ) const { complex_v value( 0., 0. ); - for ( const auto& mE : m_matrixElements ) { - value = value + mE.coefficient * m_events->cache()[ block * m_events->cacheSize() + mE.addressData ]; + for ( const auto& mE : m_matrixElements ) + { + unsigned address = &mE - &m_matrixElements[0]; + value = value + mE.coefficient * m_cache(block, address); } - return (m_weight/m_norm ) * AVX2::norm( value ); + return (m_weight/m_norm ) * utils::norm(value); } -#endif - - -std::function CoherentSum::evaluator(const EventList_type* events) const +std::function CoherentSum::evaluator(const EventList_type* ievents) const { - if( events != nullptr && events != m_integrator.events() ) - ERROR("Evaluator only working on the integration sample, fix me!"); - std::vector address_mapping( size() ); - for( const auto& me : m_matrixElements ) address_mapping[me.addressData] = m_integrator.getCacheIndex( me.amp ); - std::vector values( events->size() ); + auto events = ievents == nullptr ? m_integrator.events() : ievents; + Store store( events->store(), m_matrixElements); + std::vector values( events->aligned_size() ); #ifdef _OPENMP #pragma omp parallel for #endif - for( unsigned int i = 0 ; i != events->size(); ++i ) + for( unsigned int block = 0 ; block != events->nBlocks(); ++block ) { - complex_t amp = 0; - for( unsigned j = 0 ; j != address_mapping.size(); ++j ) amp += m_matrixElements[j].coefficient * this->m_integrator.get(address_mapping[j], i); - values[i] = m_weight * std::norm(amp) / m_norm; + complex_v amp(0.,0.); + for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ) + amp = amp + m_matrixElements[j].coefficient * store(block, j); + utils::store( values.data() + block * utils::size::value, (m_weight/m_norm) * utils::norm(amp) ); } return arrayToFunctor(values); } -KeyedView CoherentSum::componentEvaluator(const EventList_type* events) const +KeyedView CoherentSum::componentEvaluator(const EventList_type* ievents) const { - if( events != nullptr && events != m_integrator.events() ) - ERROR("Evaluator only working on the integration sample, fix me!"); - - KeyedView rt(*events, m_matrixElements.size() ); - std::vector address_mapping(m_matrixElements.size()); - for( unsigned i = 0; i != m_matrixElements.size(); ++i ) address_mapping[i] = m_integrator.getCacheIndex( m_matrixElements[i].amp ); + auto events = ievents == nullptr ? m_integrator.events() : ievents; + KeyedView rt(*events, m_matrixElements.size() ); for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) { auto& me = m_matrixElements[i]; @@ -395,12 +373,11 @@ KeyedView CoherentSum::componentEvaluator(c { complex_t total = 0; for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ - total += this->m_integrator.get( address_mapping[i], evt ) * m_matrixElements[i].coefficient - * std::conj( this->m_integrator.get( address_mapping[j], evt ) * m_matrixElements[j].coefficient ); + total += this->m_integrator.get(i, evt) * m_matrixElements[i].coefficient + * std::conj( this->m_integrator.get(j, evt) * m_matrixElements[j].coefficient ); } rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; } } return rt; } - diff --git a/src/CompiledExpressionBase.cpp b/src/CompiledExpressionBase.cpp index fea41d84a69..acf12e97540 100644 --- a/src/CompiledExpressionBase.cpp +++ b/src/CompiledExpressionBase.cpp @@ -102,7 +102,7 @@ void CompiledExpressionBase::to_stream( std::ostream& stream ) const else { auto as_tensor = cast(m_obj).tensor(); for(unsigned j=0; j != as_tensor.size(); ++j ) - stream << "r["<("IncludePythonBindings", false) == true && returnTypename().find("complex") != std::string::npos ){ -// stream << "#pragma clang diagnostic pop\n\n"; stream << "extern \"C\" void " << progName() << "_c" << "(double *real, double *imag, " << fcnSignature() << "){\n"; stream << " auto val = " << progName() << "(" << args() << ") ;\n"; stream << " *real = val.real();\n"; @@ -161,10 +160,10 @@ void CompiledExpressionBase::addDebug( std::ostream& stream ) const } } -std::string CompiledExpressionBase::fcnSignature(const std::vector& argList, bool rto=false) +std::string CompiledExpressionBase::fcnSignature(const std::vector& argList, bool rto, bool includeStagger) { unsigned counter=0; auto fcn = [counter](const auto& str) mutable {return str + " x"+std::to_string(counter++); }; - if( rto ) return argList[0] + " r, " + vectorToString( argList.begin()+1, argList.end(), ", ", fcn ); + if( rto ) return argList[0] + " r, " + argList[1] + " s, " + vectorToString( argList.begin()+2, argList.end(), ", ", fcn ); return vectorToString( argList.begin(), argList.end(), ", ", fcn); } diff --git a/src/CompilerWrapper.cpp b/src/CompilerWrapper.cpp index edfb02d59e6..b018506382e 100644 --- a/src/CompilerWrapper.cpp +++ b/src/CompilerWrapper.cpp @@ -53,7 +53,8 @@ void CompilerWrapper::generateSource( const CompiledExpressionBase& expression, { std::ofstream output( filename ); for ( auto& include : m_includes ) output << "#include <" << include << ">\n"; - if( expression.fcnSignature().find("AVX2") != std::string::npos ) output << "#include \"AmpGen/simd/avx2_types.h\"\n" ; + if( expression.fcnSignature().find("AVX2d") != std::string::npos ) output << "#include \"AmpGen/simd/avx2d_types.h\"\n; using namespace AmpGen::AVX2d;\n" ; + else if( expression.fcnSignature().find("AVX2") != std::string::npos ) output << "#include \"AmpGen/simd/avx2_types.h\"\n; using namespace AmpGen::AVX2;\n;" ; output << expression << std::endl; output.close(); } diff --git a/src/ErrorPropagator.cpp b/src/ErrorPropagator.cpp index 44e1b72ae07..815b50e577a 100644 --- a/src/ErrorPropagator.cpp +++ b/src/ErrorPropagator.cpp @@ -106,7 +106,7 @@ double LinearErrorPropagator::getError( const std::function& fcn ) unsigned int N = m_cov.GetNrows(); TVectorD errorVec( N ); for ( unsigned int i = 0; i < N; ++i ) { - DEBUG( "Perturbing parameter: [" << m_parameters[i]->name() << "] " << startingValue << " by " + DEBUG( "Perturbing parameter: [" << m_parameters[i]->name() << "] " << m_parameters[i]->mean() << " by " << sqrt( m_cov( i, i ) ) << " " << m_parameters[i] ); errorVec(i) = derivative(fcn,i); fcn(); diff --git a/src/EventList.cpp b/src/EventList.cpp index 1ec87d4d377..4ab59d90009 100644 --- a/src/EventList.cpp +++ b/src/EventList.cpp @@ -185,39 +185,11 @@ TH2D* EventList::makeProjection( const Projection2D& projection, const ArgumentP return plot; } -void EventList::printCacheInfo( const unsigned int& nEvt ) -{ - for ( auto& ind : m_pdfIndex ) { - INFO( "Cache[" << ind.second << "] = " << ind.first << " = " << cache(nEvt, ind.second ) ); - } -} - -size_t EventList::getCacheIndex( const CompiledExpressionBase& PDF ) const -{ - auto pdfIndex = m_pdfIndex.find( FNV1a_hash( PDF.name() ) ); - if ( pdfIndex != m_pdfIndex.end() ) - return pdfIndex->second; - else - ERROR( "FATAL: PDF Index for " << PDF.name() << " not found" ); - return 999; -} - -size_t EventList::getCacheIndex( const CompiledExpressionBase& PDF, bool& isRegistered ) const -{ - auto pdfIndex = m_pdfIndex.find( FNV1a_hash( PDF.name() ) ); - if ( pdfIndex != m_pdfIndex.end() ) { - isRegistered = true; - return pdfIndex->second; - } - isRegistered = false; - return 999; -} - -void EventList::resetCache() -{ - m_pdfIndex.clear(); - m_cache.clear(); -} +// void EventList::resetCache() +// { +// m_index.clear(); +// //m_cache.clear(); +// } double EventList::integral() const { @@ -226,8 +198,6 @@ double EventList::integral() const void EventList::add( const EventList& evts ) { - resetCache(); - WARNING( "Adding event lists invalidates cache state" ); for ( auto& evt : evts ) m_data.push_back( evt ); } @@ -242,9 +212,9 @@ void EventList::erase(const std::vector::iterator& begin, m_data.erase( begin, end ); } -void EventList::reserveCache(const size_t& size) -{ - if ( size * m_data.size() >= m_cache.size() ) - m_cache.reserve( m_data.size() * m_cache.size() ); -} +//void EventList::reserveCache(const size_t& size) +//{ +// if ( size * m_data.size() >= m_cache.size() ) +// m_cache.reserve( m_data.size() * m_cache.size() ); +//} diff --git a/src/EventListSIMD.cpp b/src/EventListSIMD.cpp index bf7b0ff4864..0492917bf82 100644 --- a/src/EventListSIMD.cpp +++ b/src/EventListSIMD.cpp @@ -33,7 +33,7 @@ #include "AmpGen/simd/utils.h" using namespace AmpGen; -EventListSIMD::EventListSIMD( const EventType& type ) : m_eventType( type ), m_eventSize( m_eventType.eventSize() ) {} +EventListSIMD::EventListSIMD( const EventType& type ) : m_eventType( type ) {} void EventListSIMD::loadFromFile( const std::string& fname, const ArgumentPack& args ) { @@ -104,21 +104,19 @@ void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) for( int i = 0 ; i < evtList->GetN(); ++i ) entryList.push_back( evtList->GetEntry(i) ); } bool hasEventList = entryList.size() != 0; - m_nEvents = hasEventList ? entryList.size() : tree->GetEntries(); - auto aligned_size = utils::aligned_size(m_nEvents); + size_t nEvents = hasEventList ? entryList.size() : tree->GetEntries(); std::array buffer; - m_nBlocks = aligned_size / float_v::size; - m_data.resize( m_nBlocks * m_eventSize ); - m_weights.resize( m_nBlocks ); - m_genPDF.resize( m_nBlocks ); + m_data = Store(nEvents, m_eventType.eventSize() ); + m_weights.resize( m_data.nBlocks() ); + m_genPDF.resize( m_data.nBlocks() ); auto symmetriser = m_eventType.symmetriser(); - for ( unsigned int block = 0; block < m_nBlocks; ++block ) + for ( unsigned int block = 0; block < m_data.nBlocks(); ++block ) { for( unsigned k = 0 ; k != float_v::size; ++k ) { auto evt = k + block * float_v::size; - if(evt >= m_nEvents ) break; + if(evt >= m_data.size() ) break; tr.getEntry( hasEventList ? entryList[evt] : evt ); if( applySym ) symmetriser( temp ); buffer[k] = temp; @@ -132,18 +130,15 @@ void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) EventListSIMD::EventListSIMD( const EventList& other ) : EventListSIMD( other.eventType() ) { - unsigned aligned_size = utils::aligned_size(other.size()); - m_nBlocks = aligned_size / float_v::size; - m_nEvents = other.size(); - m_data.resize( m_nBlocks * m_eventSize ) ; - m_weights.resize( m_nBlocks ); - m_genPDF.resize ( m_nBlocks ); - for( unsigned evt = 0 ; evt != m_nBlocks; evt ++ ) + m_data = Store(other.size(), m_eventType.eventSize() ); + m_weights.resize( m_data.nBlocks() ); + m_genPDF.resize ( m_data.nBlocks() ); + for( unsigned block = 0 ; block != m_data.nBlocks(); block++ ) { - for( unsigned j = 0 ; j != m_eventSize; ++j ) - m_data[m_eventSize * evt + j] = utils::gather(other, [j](auto& event){ return event[j]; } , evt * float_v::size ); - m_weights[evt] = utils::gather(other, [](auto& event){ return event.weight(); }, evt * float_v::size, 0); - m_genPDF [evt] = utils::gather(other, [](auto& event){ return event.genPdf(); }, evt * float_v::size, 1 ); + for( unsigned j = 0 ; j != m_data.nFields(); ++j ) + m_data(block, j ) = utils::gather(other, [j](auto& event){ return event[j]; } , block * float_v::size ); + m_weights[block] = utils::gather(other, [](auto& event){ return event.weight(); }, block * float_v::size, 0); + m_genPDF [block] = utils::gather(other, [](auto& event){ return event.genPdf(); }, block * float_v::size, 1 ); } } @@ -215,48 +210,10 @@ TH2D* EventListSIMD::makeProjection( const Projection2D& projection, const Argum return plot; } -size_t EventListSIMD::getCacheIndex( const CompiledExpressionBase& PDF ) const -{ - auto pdfIndex = m_pdfIndex.find( FNV1a_hash( PDF.name() ) ); - if ( pdfIndex != m_pdfIndex.end() ) - return pdfIndex->second; - else - ERROR( "FATAL: PDF Index for " << PDF.name() << " not found" ); - return 999; -} - -size_t EventListSIMD::getCacheIndex( const CompiledExpressionBase& PDF, bool& isRegistered ) const -{ - auto pdfIndex = m_pdfIndex.find( FNV1a_hash( PDF.name() ) ); - if ( pdfIndex != m_pdfIndex.end() ) { - isRegistered = true; - return pdfIndex->second; - } - isRegistered = false; - return 999; -} - -void EventListSIMD::resetCache() -{ - m_pdfIndex.clear(); - m_cache.clear(); -} void EventListSIMD::clear() { m_data.clear(); - m_cache.clear(); -} - -void EventListSIMD::reserveCache(const unsigned& newSize) -{ - m_cache.reserve( newSize * nBlocks() ); -} - -void EventListSIMD::resizeCache(const unsigned& newSize ) -{ - WARNING("Will only reserve, because i don't want to keep track anymore ... "); - reserveCache( newSize ); } const Event EventListSIMD::operator[]( const size_t& pos ) const @@ -264,9 +221,9 @@ const Event EventListSIMD::operator[]( const size_t& pos ) const unsigned nEvents = size(); unsigned p = pos / float_v::size; unsigned q = pos % float_v::size; - Event tempEvent( m_eventSize ); - for( unsigned i = 0 ; i != m_eventSize; ++i ) - tempEvent[i] = m_data[p * m_eventSize + i ].at(q); + Event tempEvent( eventSize() ); + for( unsigned i = 0 ; i != tempEvent.size(); ++i ) + tempEvent[i] = m_data(p, i).at(q); tempEvent.setWeight( m_weights[p].at(q) ); tempEvent.setGenPdf( m_genPDF[p].at(q) ); tempEvent.setIndex( pos ); @@ -280,13 +237,13 @@ std::array EventListSIMD::scatter( unsigned pos ) auto vw = m_weights[p].to_array(); auto vg = m_genPDF[p].to_array(); for( unsigned evt = 0 ; evt != float_v::size; ++evt ){ - rt[evt] = Event( m_eventSize ); + rt[evt] = Event( m_data.nFields() ); rt[evt].setWeight(vw[evt]); rt[evt].setGenPdf(vg[evt]); rt[evt].setIndex(evt + pos); } - for( unsigned field = 0 ; field != m_eventSize; ++field){ - auto v = m_data[p * m_eventSize +field].to_array(); + for( unsigned field = 0 ; field != m_data.nFields(); ++field){ + auto v = m_data(p, field).to_array(); for( unsigned evt = 0; evt != float_v::size; ++evt ) rt[evt][field] = v[evt]; } return rt; @@ -294,8 +251,8 @@ std::array EventListSIMD::scatter( unsigned pos ) void EventListSIMD::gather( const std::array& data, unsigned pos ) { - for( unsigned field = 0 ; field != m_eventSize; ++field ) - m_data[pos*m_eventSize +field] = utils::gather(data, [field](auto& event){ return event[field]; } ); + for( unsigned field = 0; field != m_data.nFields(); ++field ) + m_data(pos, field) = utils::gather(data, [field](auto& event){ return event[field]; } ); m_weights[pos] = utils::gather(data, [](auto& event){ return event.weight() ; } ); m_genPDF[pos] = utils::gather(data, [](auto& event){ return event.genPdf(); } ); } diff --git a/src/Expression.cpp b/src/Expression.cpp index 88551e23f02..058ee0d342a 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -59,12 +59,12 @@ std::string Constant::to_string(const ASTResolver* resolver) const { std::string literalSuffix = ""; if( resolver != nullptr && resolver->enableCuda() ) { - complex_type = "ampgen_cuda::complex_t"; + complex_type = "AmpGen::CUDA::complex_t"; literalSuffix = "f"; } if( resolver != nullptr && resolver->enableAVX() ) { - complex_type = "AmpGen::AVX2::complex_t"; + complex_type = "AmpGen::AVX2d::complex_t"; literalSuffix = "f"; } return std::imag(m_value) == 0 ? "(" + rounded_string(std::real(m_value)) +literalSuffix + ")" : @@ -215,7 +215,7 @@ Ternary::Ternary( const Expression& cond, const Expression& v1, const Expression } std::string Ternary::to_string(const ASTResolver* resolver) const { - return resolver != nullptr && resolver->enableAVX() ? "AmpGen::AVX2::select(" + m_cond.to_string(resolver) + ", " + + return resolver != nullptr && resolver->enableAVX() ? "AmpGen::AVX2d::select(" + m_cond.to_string(resolver) + ", " + m_v1.to_string(resolver) + ", " + m_v2.to_string(resolver) +")" : "(" + m_cond.to_string(resolver) + "?" + m_v1.to_string(resolver) + ":" + m_v2.to_string(resolver) + ")"; } diff --git a/src/Integrator.cpp b/src/Integrator.cpp index c1e45cf2d21..531248a016b 100644 --- a/src/Integrator.cpp +++ b/src/Integrator.cpp @@ -36,42 +36,29 @@ void Bilinears::resize( const size_t& r, const size_t& c) void Integrator::integrateBlock() { - real_t re[N] = {0}; - real_t im[N] = {0}; - size_t addr_i[N] = {0}; - size_t addr_j[N] = {0}; - for( size_t roll = 0 ; roll < N; ++roll ) - { - addr_i[roll] = m_integrals[roll].i; - addr_j[roll] = m_integrals[roll].j; - } + #pragma omp parallel for for ( size_t roll = 0; roll < N; ++roll ) { - complex_t* b1 = m_cache.data() + m_integrals[roll].i * m_events->size(); - complex_t* b2 = m_cache.data() + m_integrals[roll].j * m_events->size(); - #pragma omp parallel for reduction(+: re, im) - for ( size_t i = 0; i < m_events->size(); ++i ) { - auto c = b1[i] * std::conj(b2[i]); - re[roll] += m_weight[i] * std::real(c); - im[roll] += m_weight[i] * std::imag(c); + float_v re( 0.f ); + float_v im( 0.f ); + auto b1 = m_cache.data() + m_integrals[roll].i * m_cache.nBlocks(); + auto b2 = m_cache.data() + m_integrals[roll].j * m_cache.nBlocks(); + for ( size_t i = 0; i < m_cache.nBlocks(); ++i ) { + auto c = b1[i] * conj(b2[i]); + #if ENABLE_AVX2 + re = fmadd(re, m_weight[i], real(c) ); + im = fmadd(im, m_weight[i], imag(c) ); + #else + re = re + m_weight[i] * real(c); + im = im + m_weight[i] * imag(c); + #endif } + m_integrals[roll].transfer( utils::sum_elements( complex_v(re, im) ) / m_norm ); } - for ( size_t j = 0; j < m_counter; ++j ) m_integrals[j].transfer( complex_t( re[j], im[j] ) / m_norm ); m_counter = 0; } -Integrator::Integrator( const EventList* events ) : m_events( events ) -{ - if( m_events == nullptr ) return; - m_weight.resize( m_events->size() ); - for( size_t i = 0 ; i < m_events->size(); ++i ) - { - m_weight[i] = m_events->at(i).weight() / m_events->at(i).genPdf(); - m_norm += m_weight[i]; - } -} - bool Integrator::isReady() const { return m_events != nullptr; } -const EventList* Integrator::events() const { return m_events; } + void Integrator::queueIntegral(const size_t& c1, const size_t& c2, const size_t& i, @@ -81,32 +68,36 @@ void Integrator::queueIntegral(const size_t& c1, { if( !out->workToDo(i,j) ) return; if( sim ) - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ - out->set(i,j,val); - if( i != j ) out->set(j,i, std::conj(val) ); } ); + addIntegralKeyed( c1, c2, [out,i,j]( const complex_t& val ){ out->set(i,j,val); if( i != j ) out->set(j,i, std::conj(val) ); } ); else - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ out->set(i,j,val); } ); + addIntegralKeyed( c1, c2, [out,i,j]( const complex_t& val ){ out->set(i,j,val); } ); } + void Integrator::addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ) { - m_integrals[m_counter++] = Integral(c1,c2,tFunc); + m_integrals[m_counter++] = Integral(c1,c2,tFunc); if ( m_counter == N ) integrateBlock(); } + void Integrator::queueIntegral(const size_t& i, const size_t& j, complex_t* result) { - addIntegralKeyed(i, j, [result](arg& val){ *result = val ; } ); + addIntegralKeyed(i, j, [result](const complex_t& val){ *result = val ; } ); } + void Integrator::flush() { if ( m_counter == 0 ) return; integrateBlock(); } -void Integrator::setBuffer( complex_t* pos, const complex_t& value, const size_t& size ) + +#if ENABLE_AVX2 +template <> complex_t Integrator::get( const unsigned& index, const unsigned& evt ) const { - *pos = value; + return utils::at( m_cache( evt/utils::size::value, index), evt % utils::size::value ); } +#endif -void Integrator::setBuffer( complex_t* pos, const std::vector& value, const size_t& size) +template <> complex_v Integrator::get( const unsigned& index, const unsigned& evt ) const { - memcpy( pos, &(value[0]), size * sizeof(complex_t) ); + return m_cache(evt, index); } diff --git a/src/IntegratorSIMD.cpp b/src/IntegratorSIMD.cpp deleted file mode 100644 index 375d84a3181..00000000000 --- a/src/IntegratorSIMD.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "AmpGen/IntegratorSIMD.h" -#include "AmpGen/simd/utils.h" - -using namespace AmpGen; -using namespace AmpGen::AVX2; - -void IntegratorSIMD::integrateBlock() -{ - #pragma omp parallel for - for ( size_t roll = 0; roll < N; ++roll ) { - float_v re( _mm256_set1_ps(0.f) ); - float_v im( _mm256_set1_ps(0.f) ); - auto b1 = m_cache.data() + m_integrals[roll].i * m_events->size(); - auto b2 = m_cache.data() + m_integrals[roll].j * m_events->size(); - for ( size_t i = 0; i < m_events->nBlocks(); ++i ) { - auto c = b1[i] * conj(b2[i]); - re = _mm256_fmadd_ps(re, m_weight[i], real(c) ); - im = _mm256_fmadd_ps(im, m_weight[i], imag(c) ); - } - m_integrals[roll].transfer( complex_t( utils::sum_elements(float_v(re)), - utils::sum_elements(float_v(im)) ) / m_norm ); - } - m_counter = 0; -} - -IntegratorSIMD::IntegratorSIMD( const EventListSIMD* events ) : m_events( events ) -{ - if( m_events == nullptr ) return; - m_weight.resize( m_events->nBlocks() ); - float_v norm_acc = 0.; - for( size_t i = 0 ; i < m_events->nBlocks(); ++i ) - { - m_weight[i] = m_events->weight(i) / m_events->genPDF(i); - norm_acc = norm_acc + m_weight[i]; - } - m_norm = utils::sum_elements(norm_acc); -} - -bool IntegratorSIMD::isReady() const { return m_events != nullptr; } - -void IntegratorSIMD::queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim) -{ - if( !out->workToDo(i,j) ) return; - if( sim ) - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ - out->set(i,j,val); - if( i != j ) out->set(j,i, std::conj(val) ); } ); - else - addIntegralKeyed( c1, c2, [out,i,j]( arg& val ){ out->set(i,j,val); } ); -} -void IntegratorSIMD::addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ) -{ - m_integrals[m_counter++] = Integral(c1,c2,tFunc); - if ( m_counter == N ) integrateBlock(); -} -void IntegratorSIMD::queueIntegral(const size_t& i, const size_t& j, complex_t* result) -{ - addIntegralKeyed(i, j, [result](arg& val){ *result = val ; } ); -} -void IntegratorSIMD::flush() -{ - if ( m_counter == 0 ) return; - integrateBlock(); -} diff --git a/src/Lineshapes/CoupledChannel.cpp b/src/Lineshapes/CoupledChannel.cpp index 8f8d149e159..a6715dd9212 100644 --- a/src/Lineshapes/CoupledChannel.cpp +++ b/src/Lineshapes/CoupledChannel.cpp @@ -13,6 +13,8 @@ using namespace AmpGen; using namespace AmpGen::fcn; using namespace std::complex_literals; +// ENABLE_DEBUG( Lineshape::CoupledChannel ); + Expression H(const Expression& x, const Expression& y, const Expression& z ) @@ -117,7 +119,7 @@ DEFINE_LINESHAPE( CoupledChannel ) ADD_DEBUG( s , dbexpressions ); for( size_t i = 0 ; i < channels.size(); i+=2 ){ Particle p( channels[i] ); - INFO( "Adding channel ... " << p.uniqueString() << " coupling = " << NamedParameter( channels[i+1] ) ); + DEBUG( "Adding channel ... " << p.uniqueString() << " coupling = " << NamedParameter( channels[i+1] ) ); Expression coupling = Parameter(channels[i+1], 0); totalWidth = totalWidth + coupling * phaseSpace(s , p, p.L()); totalWidthAtPole = totalWidthAtPole + coupling * phaseSpace(mass*mass, p, p.L()); diff --git a/src/Minimiser.cpp b/src/Minimiser.cpp index 29cd3312e7b..3c7f326fad8 100644 --- a/src/Minimiser.cpp +++ b/src/Minimiser.cpp @@ -29,9 +29,7 @@ double Minimiser::operator()( const double* xx ) m_parSet->at( m_mapping[i] )->setCurrentFitVal( xx[i] ); } double LL = m_theFunction() ; - for ( auto& extendTerm : m_extendedTerms ) { - LL -= 2 * extendTerm->getVal(); - } + for ( auto& extendTerm : m_extendedTerms ) LL -= 2 * extendTerm->getVal(); return LL - m_ll_zero; } @@ -59,7 +57,7 @@ void Minimiser::prepare() { std::string algorithm = NamedParameter( "Minimiser::Algorithm", "Hesse"); size_t maxCalls = NamedParameter( "Minimiser::MaxCalls" , 100000); - double tolerance = NamedParameter( "Minimiser::Tolerance" , 2.0); + double tolerance = NamedParameter( "Minimiser::Tolerance" , 1.0); m_printLevel = NamedParameter( "Minimiser::PrintLevel", 4); m_normalise = NamedParameter( "Minimiser::Normalise",false); if ( m_minimiser != nullptr ) delete m_minimiser; @@ -68,6 +66,8 @@ void Minimiser::prepare() m_minimiser->SetMaxFunctionCalls( maxCalls ); m_minimiser->SetMaxIterations( 100000 ); m_minimiser->SetTolerance( tolerance ); +// m_minimiser->SetStrategy( 3 ); + // m_minimiser->SetPrecision(std::numeric_limits::epsilon()); m_minimiser->SetPrintLevel( m_printLevel ); m_mapping.clear(); m_covMatrix.clear(); diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 34083588200..64ff4b5ebec 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -29,12 +29,20 @@ #include "AmpGen/DiracMatrices.h" #include "AmpGen/Simplify.h" #include "AmpGen/enum.h" +#include "AmpGen/simd/utils.h" using namespace AmpGen; using namespace std::complex_literals; namespace AmpGen { make_enum(spaceType, spin, flavour) } +std::vector convertProxies(const std::vector& proxyVector, const std::function& transform) +{ + std::vector rt; + std::transform(proxyVector.begin(), proxyVector.end(), std::back_inserter(rt), transform ); + return rt; +} + PolarisedSum::PolarisedSum(const EventType& type, MinuitParameterSet& mps, const std::vector& pVector) @@ -63,9 +71,7 @@ PolarisedSum::PolarisedSum(const EventType& type, { tp.enqueue( [i, &protoAmps, &polStates, this]{ Tensor thisExpression( Tensor::dim(polStates.size()) ); - auto& p = protoAmps[i].first; - auto& coupling = protoAmps[i].second; - //auto& [p,coupling] = protoAmps.at(i); + auto& [p, coupling] = protoAmps[i]; DebugSymbols syms; for(unsigned j = 0; j != polStates.size(); ++j){ p.setPolarisationState( polStates[j] ); @@ -74,11 +80,11 @@ PolarisedSum::PolarisedSum(const EventType& type, m_matrixElements[i] = TransitionMatrix( p, coupling, - CompiledExpression( + CompiledExpression( TensorExpression(thisExpression), p.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); - CompilerWrapper().compile( m_matrixElements[i].amp ); + CompilerWrapper().compile( m_matrixElements[i] ); }); } } @@ -100,11 +106,11 @@ PolarisedSum::PolarisedSum(const EventType& type, m_matrixElements[i] = TransitionMatrix( tm.first, tm.second, - CompiledExpression( + CompiledExpression( TensorExpression(thisExpression), tm.first.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); - CompilerWrapper().compile( m_matrixElements[i].amp ); + CompilerWrapper().compile( m_matrixElements[i] ); }); } } @@ -116,6 +122,12 @@ PolarisedSum::PolarisedSum(const EventType& type, else if( m_dim.first == 3 ) m_pVector = {p("Px"), p("Py"), p("Pz"), p("Tyy"), p("Tzz"), p("Txy"), p("Txz"), p("Tyz")}; } for(size_t i=0; i < m_dim.second * m_dim.first * m_dim.first; ++i) m_norms.emplace_back( m_matrixElements.size(), m_matrixElements.size() ); + + DebugSymbols db; + auto prob = probExpression(transitionMatrix(), convertProxies(m_pVector,[](auto& p){ return Parameter(p->name());} ), m_debug ? &db : nullptr); + m_probExpression = CompiledExpression(prob, "prob_unnormalised", {}, db, m_mps); + CompilerWrapper().compile(m_probExpression); + m_probExpression.prepare(); } std::vector PolarisedSum::polarisations( const std::string& name ) const @@ -172,12 +184,6 @@ std::vector densityMatrix(const unsigned& dim, const std::vector convertProxies(const std::vector& proxyVector, const std::function& transform) -{ - std::vector rt; - std::transform(proxyVector.begin(), proxyVector.end(), std::back_inserter(rt), transform ); - return rt; -} std::vector> PolarisedSum::matrixElements() const { @@ -188,52 +194,44 @@ void PolarisedSum::prepare() { DEBUG( "Preparing: " << m_prefix << " " << m_events << " ready = " << m_integrator.isReady() ); transferParameters(); - size_t nChanges = 0; ProfileClock tEval; size_t size_of = size() / m_matrixElements.size(); - if( m_events != nullptr ) m_events->reserveCache( size() ); - if( m_integrator.isReady() ) m_integrator.allocate(m_matrixElements, size() ); for(auto& t : m_matrixElements){ - if( m_nCalls != 0 && !t.amp.hasExternalsChanged() ) continue; - m_events->updateCache(t.amp, t.addressData); - m_integrator.prepareExpression(t.amp); - t.amp.resetExternals(); + if( m_nCalls != 0 && !t.hasExternalsChanged() ) continue; + if( m_events != nullptr ) m_cache.update(m_events->store(), t); + m_integrator.updateCache(t); + t.resetExternals(); t.workToDo = true; - nChanges++; - if( m_nCalls == 0 && m_integrator.isReady() ) m_integIndex.push_back( m_integrator.getCacheIndex( t.amp ) ); } - if( !m_probExpression.isLinked() ) build_probunnormalised(); tEval.stop(); ProfileClock tIntegral; m_rho = densityMatrix(m_dim.first, m_pVector); - if( m_integrator.isReady() ) - { - if(nChanges != 0) calculateNorms(); - complex_t z = 0; - for(size_t i = 0; i < m_matrixElements.size(); ++i){ - for(size_t j = 0; j < m_matrixElements.size(); ++j){ - // z += ((i==j) ? 1. : 2. ) * m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling())*norm(i,j); - z += m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling())* ( i > j ? std::conj(norm(j,i)) : norm(i,j) ); - } - } - m_norm = std::real(z); - if(m_nCalls % 10000 == 0 && m_prefix == "") debug_norm(); - } - + if( m_integrator.isReady() ) updateNorms(); tIntegral.stop(); - if(m_verbosity && nChanges != 0) - INFO("Time to evaluate = " << tEval << " ms; " << "norm = " << tIntegral << " ms; " << "pdfs = " << nChanges); + if( m_verbosity && m_nCalls % 100 == 0 ){ + INFO("Time to evaluate = " << tEval << " ms; " << "norm = " << tIntegral << " ms; "); + } +// debug_norm(); m_nCalls++; for( auto& me : m_matrixElements ) me.workToDo = false; + auto pw = m_weight / m_norm; + #pragma omp parallel for + for( unsigned block = 0 ; block != m_events->nBlocks(); ++block ) + m_pdfCache[block] = pw * m_probExpression(&m_cache(block, 0)); + + DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << pw * getValNoCache(m_events->at(0)) ); +} + +float_v PolarisedSum::operator()( const float_v*, const unsigned index ) const +{ + return m_pdfCache[index]; } void PolarisedSum::debug_norm() { double norm_slow = 0; - for( auto& evt : *m_integrator.events() ) + for( auto& evt : *m_integrator.events() ) norm_slow += evt.weight() * getValNoCache(evt) / evt.genPdf(); - auto evt = (*m_integrator.events())[0]; - INFO("Event[0]: " << prob_unnormalised(evt) << " " << getValNoCache(evt) ); INFO("Norm : " << std::setprecision(10) << "bilinears=" << m_norm << "; exact=" << norm_slow / m_integrator.norm() @@ -241,19 +239,22 @@ void PolarisedSum::debug_norm() << "; sample=" << m_integrator.norm() ); } -void PolarisedSum::setEvents( EventList& events ) +void PolarisedSum::setEvents( EventList_type& events ) { reset(); + if( m_events != nullptr && m_ownEvents ) delete m_events; m_events = &events; - for( unsigned i = 0; i != m_matrixElements.size(); ++i ) - m_matrixElements[i].addressData = m_events->registerExpression(m_matrixElements[i].amp, m_dim.first * m_dim.second ); + m_cache = Store(m_events->size(), m_matrixElements, m_dim.first * m_dim.second ); + m_pdfCache.resize( m_events->nBlocks() ); } -void PolarisedSum::setMC( EventList& events ) +void PolarisedSum::setMC( EventList_type& events ) { m_nCalls = 0; - m_integrator = Integrator(&events); - + m_integrator = Integrator(&events, m_matrixElements, m_dim.first * m_dim.second ); + m_integIndex.clear(); + for( auto& i : m_matrixElements ) + m_integIndex.push_back( m_integrator.getCacheIndex(i) ); } size_t PolarisedSum::size() const @@ -263,35 +264,27 @@ size_t PolarisedSum::size() const void PolarisedSum::reset( const bool& flag ){ m_nCalls = 0 ; } -void PolarisedSum::build_probunnormalised() -{ - DebugSymbols db; - auto prob = probExpression(transitionMatrix(), convertProxies(m_pVector,[](auto& p){ return Parameter(p->name());} ), m_debug ? &db : nullptr); - m_probExpression = CompiledExpression(prob, "prob_unnormalised", {}, db, m_mps); - CompilerWrapper().compile(m_probExpression); - m_probExpression.prepare(); -} - Tensor PolarisedSum::transitionMatrix() { auto size = m_dim.first * m_dim.second; std::vector expressions(size, 0); + unsigned totalSize = 0 ; for( auto& me : m_matrixElements ){ auto coupling = me.coupling.to_expression() ; - auto cacheIndex = m_events->getCacheIndex(me.amp); + auto cacheIndex = totalSize; for( size_t i = 0 ; i < size ; ++i ){ expressions[i] = expressions[i] + coupling * Parameter( "x1["+std::to_string(cacheIndex+i)+"]",0,true); } + totalSize += size; } Tensor T_matrix(expressions, {m_dim.first, m_dim.second}); T_matrix.st(); return T_matrix; } -real_t PolarisedSum::prob_unnormalised( const Event& evt ) const -{ - return 0; - //return m_probExpression( &m_events->cache(evt.index(), 0) ); +real_t PolarisedSum::operator()(const Event& evt) const +{ + return utils::at( m_pdfCache[ evt.index() / utils::size::value ], evt.index() % utils::size::value ); } double PolarisedSum::norm() const @@ -316,42 +309,36 @@ complex_t PolarisedSum::norm(const size_t& i, const size_t& j, Integrator* integ return total; } -void PolarisedSum::calculateNorms() +void PolarisedSum::updateNorms() { + if(std::any_of(m_matrixElements.begin(),m_matrixElements.end(), [](auto& me){ return me.workToDo; } )){ for( unsigned i = 0 ; i < m_matrixElements.size(); ++i ){ for( unsigned j = i; j < m_matrixElements.size(); ++j ){ if( m_matrixElements[i].workToDo || m_matrixElements[j].workToDo ) norm(i, j, &m_integrator); } } m_integrator.flush(); -} - -double PolarisedSum::prob(const Event& evt) const -{ - return m_weight * prob_unnormalised(evt) / m_norm; + } + complex_t z = 0; + for(size_t i = 0; i < m_matrixElements.size(); ++i){ + for(size_t j = 0; j < m_matrixElements.size(); ++j){ + z += m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling())* ( i > j ? std::conj(norm(j,i)) : norm(i,j) ); + } + } + m_norm = std::real(z); } void PolarisedSum::debug(const Event& evt) { auto tsize = m_dim.first * m_dim.second; - for(const auto& me : m_matrixElements) + for(unsigned j = 0; j != m_matrixElements.size(); ++j) { - std::vector this_cache(0,tsize); - for(unsigned i = 0 ; i != tsize; ++i ) this_cache.emplace_back( m_events->cache(evt.index(), me.addressData+i) ); - INFO( me.decayDescriptor() << " " << vectorToString( this_cache, " ") ); + std::vector this_cache; + for(unsigned i = 0 ; i != tsize; ++i ) this_cache.emplace_back( m_cache(evt.index() / utils::size::value, j*tsize + i) ); + INFO( m_matrixElements[j].decayDescriptor() << " " << vectorToString( this_cache, " ") ); } - INFO("P(x) = " << getValNoCache(evt) ); + INFO("P(x) = " << getValNoCache(evt) << " " << operator()(nullptr, evt.index() / utils::size::value ) ); INFO("Prod = [" << vectorToString(m_pVector , ", ") <<"]"); - if( m_debug ) - { - transferParameters(); - std::vector cache( tsize * m_matrixElements.size() ); - for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ){ - std::memmove( cache.data() + tsize *i, m_matrixElements[i](evt).data(), tsize * sizeof(complex_t) ); - m_matrixElements[i].amp.debug( evt.address() ); - } - m_probExpression.debug( cache.data() ); - } } void PolarisedSum::generateSourceCode(const std::string& fname, const double& normalisation, bool add_mt) @@ -364,13 +351,13 @@ void PolarisedSum::generateSourceCode(const std::string& fname, const double& no std::vector expressions(size); for( auto& p : m_matrixElements ){ auto expr = CompiledExpression(const real_t*, const real_t*)>( - p.amp.expression(), + p.expression(), p.decayDescriptor(), m_eventType.getEventFormat(), DebugSymbols() ,m_mps ) ; expr.prepare(); expr.to_stream( stream ); expr.compileWithParameters( stream ); - Array z( make_cse( Function( programatic_name( p.amp.name()) + "_wParams", {event} ) ), size ); + Array z( make_cse( Function( programatic_name( p.name()) + "_wParams", {event} ) ), size ); INFO( p.decayDescriptor() << " coupling = " << p.coupling() ); for( unsigned int j = 0 ; j < size; ++j ) expressions[j] = expressions[j] + p.coupling() * z[j]; } @@ -449,8 +436,7 @@ std::vector PolarisedSum::fitFractions(const LinearErrorPropagator& for(const auto& p : outputFractions) INFO(p); if( interferenceFractions ) - { - + { auto head_rules = m_rules.rulesForDecay(m_eventType.mother(), m_prefix); FitFractionCalculator iCalc(this, findIndices(m_matrixElements, m_eventType.mother()), recomputeIntegrals); for(size_t i = 0 ; i < head_rules.size(); ++i) @@ -478,83 +464,77 @@ void PolarisedSum::transferParameters() if( m_probExpression.isLinked() ) m_probExpression.prepare(); for(auto& me : m_matrixElements){ me.coefficient = me.coupling(); - me.amp.prepare(); + me.prepare(); } for(auto& p : m_pVector ) p.update(); m_weight.update(); } -real_t PolarisedSum::getValNoCache( const Event& evt ) +real_t PolarisedSum::getValNoCache( const Event& evt ) const { - transferParameters(); auto tsize = m_dim.first * m_dim.second; - std::vector cache( tsize * m_matrixElements.size() ); + std::vector cache( tsize * m_matrixElements.size() ); for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ){ - std::memmove( cache.data() + tsize *i, m_matrixElements[i](evt).data(), tsize * sizeof(complex_t) ); + std::memmove( cache.data() + tsize * i , m_matrixElements[i](evt).data(), tsize * sizeof(complex_v) ); } - return m_probExpression( cache.data() ); + return utils::get<0>(m_probExpression( cache.data() )); } void PolarisedSum::setWeight( MinuitProxy param ){ m_weight = param; } double PolarisedSum::getWeight() const { return m_weight ; } -std::function PolarisedSum::evaluator(const EventList* events) const + +std::function PolarisedSum::evaluator(const EventList_type* ievents) const { - if( events != nullptr && events != m_integrator.events() ) - ERROR("Evaluator only working on the integration sample, fix me!"); - - std::vector address_mapping( size() ); - for( const auto& me : m_matrixElements ){ - for( unsigned i = 0; i != size() / m_matrixElements.size(); ++i ) - address_mapping[me.addressData+i] = m_integrator.getCacheIndex( me.amp ) + i; - } - std::vector values( m_integrator.events()->size() ); - std::vector buffer(address_mapping.size()); + auto events = ievents == nullptr ? m_integrator.events() : ievents; + Store store( events->size(), m_matrixElements); + for( auto& me : m_matrixElements ) store.update(m_events->store(), me ); + + std::vector values( events->aligned_size() ); #ifdef _OPENMP - #pragma omp parallel for firstprivate(buffer) + #pragma omp parallel for #endif - for( unsigned int i = 0 ; i != m_integrator.events()->size(); ++i ) + for( unsigned int block = 0 ; block != events->nBlocks(); ++block ) { - for( unsigned j = 0 ; j != address_mapping.size(); ++j ) buffer[j] = this->m_integrator.get(address_mapping[j], i); - values[i] = m_weight * m_probExpression(&buffer[0]) / m_norm; + utils::store(values.data() + utils::size::value * block, (m_weight/m_norm) * m_probExpression(&store(block,0)) ); } + for( unsigned int i = 0 ; i != 10; ++i ) + DEBUG(values[i] << " " << getValNoCache( events->at(i) ) * ( m_weight / m_norm ) ); return arrayToFunctor(values); } -KeyedView PolarisedSum::componentEvaluator(const EventList* events) const +KeyedView PolarisedSum::componentEvaluator(const EventList_type* events) const { - if( events != nullptr && events != m_integrator.events() ) - ERROR("Evaluator only working on the integration sample, fix me!"); - KeyedView rt(*events, m_matrixElements.size() ); - std::vector address_mapping(m_matrixElements.size()); - for( unsigned i = 0; i != m_matrixElements.size(); ++i ) - address_mapping[i] = m_integrator.getCacheIndex( m_matrixElements[i].amp ); - - for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) - { - auto& me = m_matrixElements[i]; - rt.setKey(i, programatic_name( me.decayTree.decayDescriptor() ) ); - #ifdef _OPENMP - #pragma omp parallel for - #endif - for( unsigned evt = 0 ; evt != m_integrator.events()->size(); ++evt ) - { - complex_t total = 0; - for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ - for( unsigned x = 0; x != m_norms.size(); ++x ) - { - auto s1 = m_dim.first; - auto s2 = m_dim.second; - auto f = x % s2; - auto psiIndex = (x-f) / s2; - auto m2 = psiIndex % s1; - auto m1 = (psiIndex-m2)/s1; - total += m_rho[psiIndex] * this->m_integrator.get( address_mapping[i] + m1 * s2 + f, evt ) * m_matrixElements[i].coefficient - * std::conj( this->m_integrator.get( address_mapping[j] + m2 * s2 + f, evt ) * m_matrixElements[j].coefficient ); - } - } - rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; - } - } + KeyedView rt(*events, m_matrixElements.size() ); +// std::vector address_mapping(m_matrixElements.size()); +// for( unsigned i = 0; i != m_matrixElements.size(); ++i ) +// address_mapping[i] = m_integrator.getCacheIndex( m_matrixElements[i].amp ); +// +// for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) +// { +// auto& me = m_matrixElements[i]; +// rt.setKey(i, programatic_name( me.decayTree.decayDescriptor() ) ); +// #ifdef _OPENMP +// #pragma omp parallel for +// #endif +// for( unsigned evt = 0 ; evt != m_integrator.events()->size(); ++evt ) +// { +// complex_t total = 0; +// for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ +// for( unsigned x = 0; x != m_norms.size(); ++x ) +// { +// auto s1 = m_dim.first; +// auto s2 = m_dim.second; +// auto f = x % s2; +// auto psiIndex = (x-f) / s2; +// auto m2 = psiIndex % s1; +// auto m1 = (psiIndex-m2)/s1; +// total += m_rho[psiIndex] * this->m_integrator.get( address_mapping[i] + m1 * s2 + f, evt ) * m_matrixElements[i].coefficient +// * std::conj( this->m_integrator.get( address_mapping[j] + m2 * s2 + f, evt ) * m_matrixElements[j].coefficient ); +// } +// } +// rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; +// } +// } return rt; } diff --git a/src/Projection.cpp b/src/Projection.cpp index aeeb6901ebc..2afa383fc4f 100644 --- a/src/Projection.cpp +++ b/src/Projection.cpp @@ -76,11 +76,6 @@ template <> TH1D* Projection::projInternal( const EventList& events, const Argum return events.makeProjection(*this, args); } -template <> TH1D* Projection::projInternal( const EventListSIMD& events, const ArgumentPack& args) const -{ - return events.makeProjection(*this, args); -} - template <> std::tuple, THStack*> Projection::projInternal(const EventList& events, const KeyedView& weightFunction, const ArgumentPack& args) const { std::vector hists; @@ -111,6 +106,12 @@ template <> std::tuple, THStack*> Projection::projInternal(co return {hists, stack}; } +#if ENABLE_AVX2 +template <> TH1D* Projection::projInternal( const EventListSIMD& events, const ArgumentPack& args) const +{ + return events.makeProjection(*this, args); +} + template <> std::tuple, THStack*> Projection::projInternal(const EventListSIMD& events, const KeyedView& weightFunction, const ArgumentPack& args) const { std::vector hists; @@ -140,4 +141,4 @@ template <> std::tuple, THStack*> Projection::projInternal(co if( autowrite ) stack->Write(); return {hists, stack}; } - +#endif diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 10c63b83ad9..5195f2299bf 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -413,9 +413,9 @@ TensorProxy AmpGen::operator*( const TensorProxy& t1, const TensorProxy& t2 ) Tensor value( finalTensorRank ); unsigned nElem = value.nElements(); - DEBUG("Got " << t1_tensor.dims().size() << " x " << t2_tensor.dims().size() << " with " << contractions.size() << " contractions " << nElementsInSum); - DEBUG(t1_tensor.dimString() << " x " << t2_tensor.dimString() << " -> " << value.dimString()); - DEBUG("Contraction matrix = " << "[" << vectorToString(contractionMatrix, ", ") << "]"); + //DEBUG("Got " << t1_tensor.dims().size() << " x " << t2_tensor.dims().size() << " with " << contractions.size() << " contractions " << nElementsInSum); + //DEBUG(t1_tensor.dimString() << " x " << t2_tensor.dimString() << " -> " << value.dimString()); + //DEBUG("Contraction matrix = " << "[" << vectorToString(contractionMatrix, ", ") << "]"); for( unsigned elem = 0; elem < nElem; ++elem ) { auto coords = Tensor::index_to_coordinates( elem, finalTensorRank ); diff --git a/src/UnaryExpression.cpp b/src/UnaryExpression.cpp index 0fb9b4c4994..17fb1ae8882 100644 --- a/src/UnaryExpression.cpp +++ b/src/UnaryExpression.cpp @@ -17,18 +17,18 @@ T rsqrt( const T& arg ){ return 1. / sqrt(arg) ; } DEFINE_UNARY_OPERATOR( Log , log ) DEFINE_UNARY_OPERATOR( Sqrt, sqrt ) DEFINE_UNARY_OPERATOR( Exp , exp ) -DEFINE_UNARY_OPERATOR_NO_RESOLVER( Abs , std::fabs ) DEFINE_UNARY_OPERATOR( Sin , sin ) DEFINE_UNARY_OPERATOR( Cos , cos ) DEFINE_UNARY_OPERATOR( Tan , tan ) DEFINE_UNARY_OPERATOR( ASin, asin ) DEFINE_UNARY_OPERATOR( ACos, acos ) DEFINE_UNARY_OPERATOR( ATan, atan ) -DEFINE_UNARY_OPERATOR( Norm, std::norm ) -DEFINE_UNARY_OPERATOR( Conj, std::conj ) -DEFINE_UNARY_OPERATOR( Real, std::real ) -DEFINE_UNARY_OPERATOR( Imag, std::imag ) +DEFINE_UNARY_OPERATOR_NO_RESOLVER( Norm, std::norm ) +DEFINE_UNARY_OPERATOR_NO_RESOLVER( Real, std::real ) +DEFINE_UNARY_OPERATOR_NO_RESOLVER( Imag, std::imag ) DEFINE_UNARY_OPERATOR_NO_RESOLVER( ISqrt, rsqrt ) +DEFINE_UNARY_OPERATOR_NO_RESOLVER( Conj, std::conj ) +DEFINE_UNARY_OPERATOR_NO_RESOLVER( Abs , std::fabs ) LGamma::LGamma( const Expression& expression) : IUnaryExpression(expression) {} LGamma::operator Expression() const { return Expression( std::make_shared(*this) ) ; } @@ -46,10 +46,39 @@ std::string ISqrt::to_string(const ASTResolver* resolver) const { std::string Abs::to_string( const ASTResolver* resolver ) const { return resolver != nullptr && resolver->enableAVX() ? - "AmpGen::AVX2::abs(" + m_expression.to_string(resolver) +")" : + "abs(" + m_expression.to_string(resolver) +")" : "std::fabs("+m_expression.to_string(resolver) +")"; } +std::string Conj::to_string( const ASTResolver* resolver ) const +{ + return resolver != nullptr && resolver->enableAVX() ? + "conj(" + m_expression.to_string(resolver) +")" : + "std::conj("+m_expression.to_string(resolver) +")"; +} + +std::string Norm::to_string( const ASTResolver* resolver ) const +{ + return resolver != nullptr && resolver->enableAVX() ? + "norm(" + m_expression.to_string(resolver) +")" : + "std::norm("+m_expression.to_string(resolver) +")"; +} + +std::string Real::to_string( const ASTResolver* resolver ) const +{ + return resolver != nullptr && resolver->enableAVX() ? + "real(" + m_expression.to_string(resolver) +")" : + "std::real("+m_expression.to_string(resolver) +")"; +} + +std::string Imag::to_string( const ASTResolver* resolver ) const +{ + return resolver != nullptr && resolver->enableAVX() ? + "imag(" + m_expression.to_string(resolver) +")" : + "std::imag("+m_expression.to_string(resolver) +")"; +} + + Expression Log::d() const { return 1. / arg(); } Expression Sqrt::d() const { return 1. / ( 2 * fcn::sqrt( arg() ) ); } Expression Exp::d() const { return fcn::exp(arg()) ; } diff --git a/src/Utilities.cpp b/src/Utilities.cpp index e0c04a9ce94..e5f135bfe39 100644 --- a/src/Utilities.cpp +++ b/src/Utilities.cpp @@ -203,20 +203,16 @@ bool AmpGen::stringMatchesWildcard( const std::string& input, const std::string& { auto pos = wildcard_string.find( wildcard_character ); /// TEST_foobar -> *_foobar if ( wildcard_string.size() == 1 && wildcard_string[0] == wildcard_character ) { - DEBUG( "Returning true" ); return true; } if ( pos == std::string::npos ) { - DEBUG( "Returning " << input << " = " << wildcard_string << " ?" ); return input == wildcard_string; } if ( pos == wildcard_string.size() - 1 ) { - DEBUG( "Returning " << input << " contains " << wildcard_string ); return input.find( wildcard_string.substr( 0, wildcard_string.size() - 1 ) ) == 0; } else { const std::string pattern1 = wildcard_string.substr( 0, pos + 1 ); const std::string pattern2 = wildcard_string.substr( pos + 1 ); - DEBUG( "Matching " << pattern1 << " to " << input ); bool match1 = stringMatchesWildcard( input, pattern1, wildcard_character ); if ( !match1 ) return false; auto pos2 = pattern2.find( wildcard_character ); @@ -285,6 +281,15 @@ void AmpGen::printSplash() #elif defined(__GNUC__) || defined(__GNUG__) std::cout << "gcc " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__; #endif + #if ENABLE_AVX2 + std::cout << " (avx2;"; + #if DOUBLE_PRECISION + std::cout << " double)"; + #else + std::cout << " single)"; + #endif + #endif + std::cout << " " << __DATE__ << " " << __TIME__ << bold_off << "\n\n"; char* AmpGenRoot = getenv("AMPGENROOT"); @@ -336,7 +341,6 @@ std::string AmpGen::expandGlobals( std::string path ) std::string old_path = path; size_t len = end_pos == std::string::npos ? path.length() - pos + 1 : end_pos - pos + 1; path = path.replace( pos, len, global_var ); - DEBUG( old_path << " -> " << path ); } while ( pos != std::string::npos ); return path; From e074da94abe0d188e732912a640349d6505ad77a Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 14 Apr 2020 19:34:40 +0200 Subject: [PATCH 05/67] fixes for travis build --- AmpGen/AmplitudeRules.h | 6 +++++- AmpGen/Event.h | 13 ------------- AmpGen/Store.h | 2 +- src/CoherentSum.cpp | 8 +++++--- src/PolarisedSum.cpp | 4 ++-- 5 files changed, 13 insertions(+), 20 deletions(-) diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index 38823a0dc11..d65df6bf7d0 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -108,8 +108,11 @@ namespace AmpGen #if ENABLE_AVX2 const RT operator()(const Event& event) const { return amp_type::operator()(EventListSIMD::makeEvent(event).data()); } #else - const RT operator()(const Event& event) const { return amp_type::operator()(event.address()); } + const RT operator()(const Event& event) const { return amp_type::operator()(event.address()) ; } #endif + template auto operator()(arg_types... args ) const { return amp_type::operator()(args...) ; } + + const RT operator()(const float_v* t) const { return amp_type::operator()(t) ; } const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } Particle decayTree; @@ -178,6 +181,7 @@ namespace AmpGen #endif return rt; } + template auto operator()(arg_types... args ) const { return amp_type::operator()(args...) ; } const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } diff --git a/AmpGen/Event.h b/AmpGen/Event.h index 5c2145301e7..01bc5139f16 100644 --- a/AmpGen/Event.h +++ b/AmpGen/Event.h @@ -26,13 +26,6 @@ namespace AmpGen { void set( const real_t* evt ); void set( const unsigned& i, const real_t& p ) ; void swap( const unsigned int& i , const unsigned int& j ); - // void setCache(const complex_t& value, const unsigned& pos) ; - // template void setCache( const std::array& value, const unsigned& pos ) - // { - // std::memmove( m_cache.data() + pos, value.data(), sizeof(std::array) ); - // } - // void setCache( const std::vector& value, const unsigned& pos ); - // void resizeCache( const unsigned int& new_size ); unsigned size() const { return m_event.size(); } @@ -41,7 +34,6 @@ namespace AmpGen { const real_t* address(const unsigned& ref=0) const { return &(m_event[ref]); } real_t* address(const unsigned& ref=0) { return &(m_event[ref]); } - // unsigned cacheSize() const { return m_cache.size(); } real_t weight() const { return m_weight; } real_t genPdf() const { return m_genPdf; } real_t operator[](const unsigned& i) const { return m_event[i]; } @@ -49,10 +41,6 @@ namespace AmpGen { operator const real_t*() const { return &(m_event[0]); } operator real_t*() { return &(m_event[0]); } - // const complex_t& getCache(const unsigned& pos) const { return m_cache[pos]; } - // const complex_t* getCachePtr(const unsigned& pos=0) const { return &(m_cache[0]) + pos; } - // complex_t* getCachePtr(const unsigned& pos=0) { return &(m_cache[0]) + pos; } - void setWeight( const real_t& weight ){ m_weight = weight ; } void setGenPdf( const real_t& genPdf ){ m_genPdf = genPdf ; } void extendEvent(const real_t& value) { m_event.push_back( value ); } @@ -67,7 +55,6 @@ namespace AmpGen { real_t s( const std::vector& indices ) const ; private: std::vector m_event; - // std::vector m_cache; real_t m_genPdf = {1}; real_t m_weight = {1}; unsigned m_index = {0}; diff --git a/AmpGen/Store.h b/AmpGen/Store.h index 594efb83ff9..ddfb9b0d5da 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -121,7 +121,7 @@ namespace AmpGen { #pragma omp parallel for #endif for ( size_t evt = 0; evt < events.size(); ++evt ){ - auto tmp = fcn( events[evt].address( ) ); + auto tmp = fcn( events[evt].address() ); store( evt, p0, &tmp, s); } } diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index e74de1ba47a..ad957579520 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -342,12 +342,14 @@ float_v CoherentSum::operator()( const float_v* /*evt*/, const unsigned block ) std::function CoherentSum::evaluator(const EventList_type* ievents) const { auto events = ievents == nullptr ? m_integrator.events() : ievents; - Store store( events->store(), m_matrixElements); + Store store( events->size(), m_matrixElements); + for( auto& me : m_matrixElements ) store.update(m_events->store(), me ); + std::vector values( events->aligned_size() ); #ifdef _OPENMP #pragma omp parallel for #endif - for( unsigned int block = 0 ; block != events->nBlocks(); ++block ) + for( unsigned int block = 0 ; block < events->nBlocks(); ++block ) { complex_v amp(0.,0.); for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ) @@ -369,7 +371,7 @@ KeyedView CoherentSum::componentEvaluator(c #ifdef _OPENMP #pragma omp parallel for #endif - for( unsigned evt = 0 ; evt != events->size(); ++evt ) + for( unsigned evt = 0 ; evt < events->size(); ++evt ) { complex_t total = 0; for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 64ff4b5ebec..19bb114c96a 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -216,7 +216,7 @@ void PolarisedSum::prepare() for( auto& me : m_matrixElements ) me.workToDo = false; auto pw = m_weight / m_norm; #pragma omp parallel for - for( unsigned block = 0 ; block != m_events->nBlocks(); ++block ) + for( unsigned block = 0 ; block < m_events->nBlocks(); ++block ) m_pdfCache[block] = pw * m_probExpression(&m_cache(block, 0)); DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << pw * getValNoCache(m_events->at(0)) ); @@ -494,7 +494,7 @@ std::function PolarisedSum::evaluator(const EventList_type #ifdef _OPENMP #pragma omp parallel for #endif - for( unsigned int block = 0 ; block != events->nBlocks(); ++block ) + for( unsigned int block = 0 ; block < events->nBlocks(); ++block ) { utils::store(values.data() + utils::size::value * block, (m_weight/m_norm) * m_probExpression(&store(block,0)) ); } From a04a7b26fb73999e48f417c5d317f35d3630f741 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 14 Apr 2020 20:58:18 +0200 Subject: [PATCH 06/67] Fix if number of events % simd_size != 0 and Chi2 calculation --- AmpGen/BinDT.h | 2 +- AmpGen/Chi2Estimator.h | 21 ++++++++++++++++----- AmpGen/Store.h | 10 +++++----- AmpGen/SumPDF.h | 2 +- examples/SignalOnlyFitter.cpp | 6 +++--- src/Chi2Estimator.cpp | 6 +++--- src/CoherentSum.cpp | 4 ++-- src/EventListSIMD.cpp | 11 +++++++---- src/PolarisedSum.cpp | 2 +- 9 files changed, 39 insertions(+), 25 deletions(-) diff --git a/AmpGen/BinDT.h b/AmpGen/BinDT.h index 2201e8f36d2..386c55a6cad 100644 --- a/AmpGen/BinDT.h +++ b/AmpGen/BinDT.h @@ -15,9 +15,9 @@ #include #include "AmpGen/ArgumentPack.h" -#include "AmpGen/EventList.h" #include "AmpGen/MsgService.h" #include "AmpGen/Types.h" +#include "AmpGen/EventList.h" namespace AmpGen { diff --git a/AmpGen/Chi2Estimator.h b/AmpGen/Chi2Estimator.h index 4f59291ca85..841cfa76cc7 100644 --- a/AmpGen/Chi2Estimator.h +++ b/AmpGen/Chi2Estimator.h @@ -7,19 +7,30 @@ #include "AmpGen/BinDT.h" +#if ENABLE_AVX2 +#include "AmpGen/EventListSIMD.h" +#else +#include "AmpGen/EventList.h" +#endif + + namespace AmpGen { - class EventList; class EventType; class Event; class Chi2Estimator { - public: + #if ENABLE_AVX2 + typedef EventListSIMD EventList_type; + #else + typedef EventList EventList_type; + #endif + public: template - Chi2Estimator( const EventList& dataEvents, const EventList& mcEvents, + Chi2Estimator( const EventList_type& dataEvents, const EventList_type& mcEvents, const std::function& fcn, - const argument_types&... args ) : m_binning(dataEvents, ArgumentPack(args...) ) + const argument_types&... args ) : m_binning(dataEvents.begin(), dataEvents.end(), ArgumentPack(args...) ) { doChi2(dataEvents, mcEvents, fcn); } @@ -27,7 +38,7 @@ namespace AmpGen double chi2() const; double nBins() const; void writeBinningToFile( const std::string& filename ); - void doChi2( const EventList& dataEvents, const EventList& mcEvents, + void doChi2( const EventList_type& dataEvents, const EventList_type& mcEvents, const std::function& fcn ); private: double m_chi2; diff --git a/AmpGen/Store.h b/AmpGen/Store.h index ddfb9b0d5da..0a0f8f0e448 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -135,10 +135,10 @@ namespace AmpGen { std::map> m_index; }; } -//using aos_store = AmpGen::Store; -//using soa_store = AmpGen::Store; -// -//ENABLE_DEBUG(aos_store) -//ENABLE_DEBUG(soa_store) +using aos_store = AmpGen::Store; +using soa_store = AmpGen::Store; + +ENABLE_DEBUG(aos_store) +ENABLE_DEBUG(soa_store) #endif diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index cc0d466bca2..b5d4e70eee3 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -116,7 +116,7 @@ namespace AmpGen auto eval = pdf.evaluator(events); for( unsigned i = 0; i != events->size(); ++i ) values[i] += eval( events->at(i) ); } ); - return arrayToFunctor(values, events); + return arrayToFunctor(values); } KeyedView componentEvaluator(const eventListType* events) const { diff --git a/examples/SignalOnlyFitter.cpp b/examples/SignalOnlyFitter.cpp index 3d3932e573e..ef256081c11 100644 --- a/examples/SignalOnlyFitter.cpp +++ b/examples/SignalOnlyFitter.cpp @@ -150,9 +150,9 @@ FitResult* doFit( likelihoodType&& likelihood, EventList_type& data, EventList_t /* Estimate the chi2 using an adaptive / decision tree based binning, down to a minimum bin population of 15, and add it to the output. */ - //Chi2Estimator chi2( data, mc, likelihood.evaluator(&mc), MinEvents(15), Dim(data.eventType().dof()) ); - //chi2.writeBinningToFile("chi2_binning.txt"); - //fr->addChi2( chi2.chi2(), chi2.nBins() ); + Chi2Estimator chi2( data, mc, likelihood.evaluator(&mc), MinEvents(15), Dim(data.eventType().dof()) ); + chi2.writeBinningToFile("chi2_binning.txt"); + fr->addChi2( chi2.chi2(), chi2.nBins() ); fr->print(); /* Make the plots for the different components in the PDF, i.e. the signal and backgrounds. diff --git a/src/Chi2Estimator.cpp b/src/Chi2Estimator.cpp index a94b04fdc67..6c5ee020aef 100644 --- a/src/Chi2Estimator.cpp +++ b/src/Chi2Estimator.cpp @@ -6,9 +6,9 @@ #include "AmpGen/ArgumentPack.h" #include "AmpGen/EventType.h" #include "AmpGen/MsgService.h" -#include "AmpGen/EventList.h" #include "AmpGen/Event.h" + using namespace AmpGen; struct Moment { @@ -37,7 +37,7 @@ double Chi2Estimator::chi2() const { return m_chi2; } double Chi2Estimator::nBins() const { return m_nBins; } void Chi2Estimator::writeBinningToFile( const std::string& filename ) { m_binning.serialize( filename ); } -void Chi2Estimator::doChi2( const EventList& dataEvents, const EventList& mcEvents, +void Chi2Estimator::doChi2( const EventList_type& dataEvents, const EventList_type& mcEvents, const std::function& fcn ) { std::vector data( m_binning.size() ); @@ -48,7 +48,7 @@ void Chi2Estimator::doChi2( const EventList& dataEvents, const EventList& mcEv unsigned int j = 0; double total_data_weight = 0; double total_int_weight = 0; - for ( auto& d : dataEvents ) { + for ( const auto& d : dataEvents ) { if ( j % 1000000 == 0 && j != 0 ) INFO( "Binned " << j << " data events" ); double w = d.weight(); data[m_binning.getBinNumber( d )].add( d.weight() ); diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index ad957579520..75fd7a19571 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -343,7 +343,7 @@ std::function CoherentSum::evaluator(const EventList_type* { auto events = ievents == nullptr ? m_integrator.events() : ievents; Store store( events->size(), m_matrixElements); - for( auto& me : m_matrixElements ) store.update(m_events->store(), me ); + for( auto& me : m_matrixElements ) store.update(events->store(), me ); std::vector values( events->aligned_size() ); #ifdef _OPENMP @@ -356,7 +356,7 @@ std::function CoherentSum::evaluator(const EventList_type* amp = amp + m_matrixElements[j].coefficient * store(block, j); utils::store( values.data() + block * utils::size::value, (m_weight/m_norm) * utils::norm(amp) ); } - return arrayToFunctor(values); + return arrayToFunctor(values); } KeyedView CoherentSum::componentEvaluator(const EventList_type* ievents) const diff --git a/src/EventListSIMD.cpp b/src/EventListSIMD.cpp index 0492917bf82..4f791f5edf4 100644 --- a/src/EventListSIMD.cpp +++ b/src/EventListSIMD.cpp @@ -116,10 +116,13 @@ void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) for( unsigned k = 0 ; k != float_v::size; ++k ) { auto evt = k + block * float_v::size; - if(evt >= m_data.size() ) break; - tr.getEntry( hasEventList ? entryList[evt] : evt ); - if( applySym ) symmetriser( temp ); - buffer[k] = temp; + if(evt < m_data.size() ) + { + tr.getEntry( hasEventList ? entryList[evt] : evt ); + if( applySym ) symmetriser( temp ); + buffer[k] = temp; + } + else buffer[k].setWeight(0); } gather( buffer, block ); } diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 19bb114c96a..ba4c1cf10e0 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -488,7 +488,7 @@ std::function PolarisedSum::evaluator(const EventList_type { auto events = ievents == nullptr ? m_integrator.events() : ievents; Store store( events->size(), m_matrixElements); - for( auto& me : m_matrixElements ) store.update(m_events->store(), me ); + for( auto& me : m_matrixElements ) store.update(events->store(), me ); std::vector values( events->aligned_size() ); #ifdef _OPENMP From 53a3917b273cede07a21275725c24b9beb5e988e Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 15 Apr 2020 19:11:17 +0200 Subject: [PATCH 07/67] fixes for array / splines for avx2d --- AmpGen/AmplitudeRules.h | 12 +++---- AmpGen/CoherentSum.h | 13 ++++--- AmpGen/MetaUtils.h | 1 + AmpGen/MsgService.h | 23 +++++++------ AmpGen/ProfileClock.h | 3 +- AmpGen/Store.h | 11 +++--- AmpGen/SumPDF.h | 1 + AmpGen/simd/avx2d_types.h | 67 +++++++++++++++++++++++------------- AmpGen/simd/utils.h | 2 +- apps/ConvertToSourceCode.cpp | 4 +-- src/Array.cpp | 5 +++ src/CoherentSum.cpp | 39 ++++++++++++++------- src/EventListSIMD.cpp | 7 ++-- src/ProgressBar.cpp | 4 ++- test/test_avx2d.cpp | 64 ++++++++++++++++++++++++++++++++++ 15 files changed, 186 insertions(+), 70 deletions(-) create mode 100644 test/test_avx2d.cpp diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index d65df6bf7d0..27a3be5991c 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -89,19 +89,19 @@ namespace AmpGen { using amp_type = CompiledExpression; TransitionMatrix() = default; - TransitionMatrix(Particle dt, + TransitionMatrix(const Particle& dt, const TotalCoupling& coupling, const amp_type& amp) : amp_type(amp), decayTree(dt), coupling(coupling) {} - TransitionMatrix(Particle dt, + TransitionMatrix(const Particle& dt, const TotalCoupling& coupling, const MinuitParameterSet& mps, const std::map& evtFormat, const bool& debugThis=false) : - amp_type(dt.getExpression(debugThis ? &db : nullptr ), dt.decayDescriptor(), evtFormat, db, &mps ), + amp_type(Particle(dt).getExpression(debugThis ? &db : nullptr ), dt.decayDescriptor(), evtFormat, db, &mps ), decayTree(dt), coupling(coupling) {} @@ -154,19 +154,19 @@ namespace AmpGen { using amp_type = CompiledExpression; TransitionMatrix() = default; - TransitionMatrix(Particle dt, + TransitionMatrix(const Particle& dt, const TotalCoupling& coupling, const amp_type& amp) : amp_type(amp), decayTree(dt), coupling(coupling) {} - TransitionMatrix(Particle dt, + TransitionMatrix(const Particle& dt, const TotalCoupling& coupling, const MinuitParameterSet& mps, const std::map& evtFormat, const bool& debugThis=false) : - amp_type(dt.getExpression(debugThis ? &db : nullptr ), dt.decayDescriptor(), evtFormat, db, &mps ), + amp_type(Particle(dt).getExpression(debugThis ? &db : nullptr ), dt.decayDescriptor(), evtFormat, db, &mps ), decayTree(dt), coupling(coupling) { use_rto();} diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 89bd84cb3f6..1db5690704d 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -74,14 +74,14 @@ namespace AmpGen void setWeight( MinuitProxy param ) { m_weight = param; } void makeTotalExpression(); void reset( bool resetEvents = false ); - void setEvents( EventList_type& list ); + void setEvents( const EventList_type& list ); + void setMC( const EventList_type& sim ); #if ENABLE_AVX2 - void setEvents( EventList& list) { m_ownEvents = true; setEvents( *(new EventListSIMD(list)) ) ; } - void setMC( EventList& list) { setMC( *(new EventListSIMD(list)) ) ; } + void setEvents( const EventList& list) { m_ownEvents = true; setEvents( *(new EventListSIMD(list)) ) ; } + void setMC( const EventList& list) { setMC( *(new EventListSIMD(list)) ) ; } #endif float_v operator()(const float_v*, const unsigned) const; real_t operator()(const Event& evt ) const { return m_weight*std::norm(getVal(evt))/m_norm; } - void setMC( EventList_type& sim ); void debug( const Event& evt, const std::string& nameMustContain=""); void generateSourceCode( const std::string& fname, const double& normalisation = 1, bool add_mt = false ); @@ -101,7 +101,7 @@ namespace AmpGen AmplitudeRules m_rules; ///< Ruleset for the selected transition. Integrator m_integrator; ///< Tool to calculate integrals - EventList_type* m_events = {nullptr}; ///< Data events to evaluate PDF on + const EventList_type* m_events = {nullptr}; ///< Data events to evaluate PDF on Store m_cache; ///< Store of intermediate values for the PDF calculation bool m_ownEvents = {false}; ///< Flag as to whether events are owned by this PDF or not @@ -116,7 +116,10 @@ namespace AmpGen bool m_verbosity = {false}; ///< Flag for verbose printing std::string m_objCache = {""}; ///< Directory that contains (cached) amplitude objects std::string m_prefix = {""}; ///< Prefix for matrix elements + const MinuitParameterSet* m_mps = {nullptr}; + void addMatrixElement( std::pair& particleWithCoupling, const MinuitParameterSet& mps ); + }; } // namespace AmpGen diff --git a/AmpGen/MetaUtils.h b/AmpGen/MetaUtils.h index 20b6ac06253..1450998d3e1 100644 --- a/AmpGen/MetaUtils.h +++ b/AmpGen/MetaUtils.h @@ -83,6 +83,7 @@ namespace AmpGen { return std::is_constructible::value && (false == std::is_same::value); } + template std::vector typelist() { diff --git a/AmpGen/MsgService.h b/AmpGen/MsgService.h index 3067529e460..3d91b75d63f 100644 --- a/AmpGen/MsgService.h +++ b/AmpGen/MsgService.h @@ -17,10 +17,11 @@ #include #define WARNINGLEVEL 1 -#define FCNNAMELENGTH 45 namespace AmpGen { namespace detail { + constexpr static int FCNNAMELENGTH = 45; + inline std::string trimmedString( std::string thing, const unsigned int& length = FCNNAMELENGTH ) { size_t pos2=0; @@ -42,6 +43,10 @@ namespace AmpGen { } return thing.size() < length ? thing : thing.substr( 0, length ) + "..."; } + inline std::ostream& labelled_stream(const std::string& function_name) + { + return std::cout << "\033[2;34m" << std::left << std::setw(FCNNAMELENGTH) << trimmedString(function_name) << " INFO " << "\033[0m"; + } template struct debug_type : std::false_type {}; } } @@ -53,38 +58,36 @@ namespace AmpGen { /// Used for printing verbose debugging messages, only if DEBUGLEVEL is defined. #define DEBUG( X ) { \ if constexpr( AmpGen::detail::debug_type::type>::value ) { \ - std::cout << "\033[2;32m" << std::left << std::setw( FCNNAMELENGTH ) << AmpGen::detail::trimmedString(__PRETTY_FUNCTION__) \ + std::cout << "\033[2;32m" << std::left << std::setw( AmpGen::detail::FCNNAMELENGTH ) << AmpGen::detail::trimmedString(__PRETTY_FUNCTION__) \ << " DEBUG " \ << "\033[0m" << X << " " << std::endl; } } /// @ingroup msgService macro INFO /// Used for printing information messages, and will always be printed. -#define INFO( X ) \ - std::cout << "\033[2;34m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ - << " INFO " \ - << "\033[0m" << X << std::endl +#define INFO( X ) \ + AmpGen::detail::labelled_stream(__PRETTY_FUNCTION__) << X << std::endl /// @ingroup msgService macro ERROR /// Used for printing errors messages, and will always be printed. #define ERROR( X ) \ - std::cout << "\033[1;31m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ + std::cout << "\033[1;31m" << std::left << std::setw( AmpGen::detail::FCNNAMELENGTH ) << AmpGen::detail::trimmedString( __PRETTY_FUNCTION__ ) \ << " ERROR " \ << "\033[0m" << X << std::endl /// @ingroup msgService macro FATAL /// Used for printing fatal errors messages, and will always be printed and will terminate the process afterwards. #define FATAL( X ) \ -{ std::cout << "\033[1;31m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ +{ std::cout << "\033[1;31m" << std::left << std::setw( AmpGen::detail::FCNNAMELENGTH ) << AmpGen::detail::trimmedString( __PRETTY_FUNCTION__ ) \ << " FATAL " \ << "\033[0m" << X << std::endl; \ - throw std::runtime_error( detail::trimmedString( __PRETTY_FUNCTION__)+ " FATAL" ) ;} + throw std::runtime_error( AmpGen::detail::trimmedString( __PRETTY_FUNCTION__)+ " FATAL" ) ;} /// @ingroup msgService macro FATAL /// Used for printing warning messages, can be switched off using WARNINGLEVEL. These messages are often harmless, but sometimes not! #ifdef WARNINGLEVEL #define WARNING( X ) \ - std::cout << "\033[1;35m" << std::left << std::setw( FCNNAMELENGTH ) << detail::trimmedString( __PRETTY_FUNCTION__ ) \ + std::cout << "\033[1;35m" << std::left << std::setw( AmpGen::detail::FCNNAMELENGTH ) << AmpGen::detail::trimmedString( __PRETTY_FUNCTION__ ) \ << " WARNING " \ << "\033[0m" << X << std::endl #else diff --git a/AmpGen/ProfileClock.h b/AmpGen/ProfileClock.h index 2b334861f9c..6da6bccbaa9 100644 --- a/AmpGen/ProfileClock.h +++ b/AmpGen/ProfileClock.h @@ -1,6 +1,7 @@ #ifndef AMPGEN_PROFILECLOCK_H #define AMPGEN_PROFILECLOCK_H 1 #include +#include #include "AmpGen/MsgService.h" #include "AmpGen/Utilities.h" @@ -44,7 +45,7 @@ namespace AmpGen{ tmax = pi > tmax ? pi : tmax; } t /= double(N); - t2 = sqrt( t2 / double(N) - t*t); + t2 = std::sqrt( t2 / double(N) - t*t); INFO( (name == "" ? typeof() : name ) << " " << t << " ± " << t2 << "[ms] per iteration << [" << tmin << ", " << tmax << "]" ); return t; } diff --git a/AmpGen/Store.h b/AmpGen/Store.h index 0a0f8f0e448..4ffccf44fb0 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -19,10 +19,10 @@ namespace AmpGen { m_nFields(nFields), m_store(m_nBlocks * m_nFields) {} - template Store( const Store& store, const std::vector& functors, const size_t& fieldsPerFunctor=0) - : Store(store.size(), functors, fieldsPerFunctor){ - for( auto& f : functors ) update(store,f); - } + // template Store( const Store& store, const std::vector& functors, const size_t& fieldsPerFunctor=0) + // : Store(store.size(), functors, fieldsPerFunctor){ + // for( auto& f : functors ) update(store,f); + // } template Store( const size_t& nEntries, const std::vector& functors, const size_t& fieldsPerFunctor = 0) { @@ -135,10 +135,11 @@ namespace AmpGen { std::map> m_index; }; } +/* using aos_store = AmpGen::Store; using soa_store = AmpGen::Store; ENABLE_DEBUG(aos_store) ENABLE_DEBUG(soa_store) - +*/ #endif diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index b5d4e70eee3..a2ef9e1a73f 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -16,6 +16,7 @@ namespace AmpGen { class EventList; + class EventListSIMD; /** @class SumPDF @brief A pdf that contains one or more terms. diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index 594b95dcedc..6326576cdbe 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -10,14 +10,22 @@ namespace AmpGen { namespace AVX2d { + #define stl_fallback( x ) \ + inline float_t x( const float_t& v ){ auto a = v.to_array(); return float_t( std::x(a[0]), std::x(a[1]), std::x(a[2]), std::x(a[3]) ) ; } + struct float_t { __m256d data; static constexpr unsigned size = 4; typedef double scalar_type; float_t() = default; float_t(__m256d data ) : data(data) {} - float_t(const double& f ) : data( _mm256_set1_pd( f )) {} - float_t(const double* f ) : data( _mm256_loadu_pd( f ) ) {} + float_t(const double& f ) : data( _mm256_set1_pd( f )) {} + float_t(const double& x0, const double& x1, const double& x2, const double& x3 ) + { + double tmp[4] = {x0,x1,x2,x3}; + _mm256_loadu_pd(tmp); + } + float_t(const double* f ) : data( _mm256_loadu_pd( f ) ) {} void store( double* ptr ) const { _mm256_storeu_pd( ptr, data ); } std::array to_array() const { std::array b; store( &b[0] ); return b; } double at(const unsigned i) const { return to_array()[i] ; } @@ -43,27 +51,43 @@ namespace AmpGen { inline float_t operator>( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_GT_OS ); } inline float_t operator==( const float_t& lhs, const float_t& rhs ){ return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OS ); } inline float_t sqrt( const float_t& v ) { return _mm256_sqrt_pd(v); } + inline float_t abs ( const float_t& v ) { return _mm256_andnot_pd(_mm256_set1_pd(-0.), v); } // inline float_t sin( const float_t& v ) { return sin256_pd(v) ; } // inline float_t cos( const float_t& v ) { return cos256_pd(v) ; } // inline float_t tan( const float_t& v ) { float_t s; float_t c; sincos256_pd(v, (__m256*)&s, (__m256*)&c) ; return s/c; } // inline float_t log( const float_t& v ) { return log256_ps(v) ; } // inline float_t exp( const float_t& v ) { return exp256_ps(v) ; } - inline float_t abs ( const float_t& v ) { - static const __m256d sign_mask = _mm256_set1_pd(-0.); // -0. = 1 << 63 - return _mm256_andnot_pd(sign_mask, v); - } inline float_t select(const float_t& mask, const float_t& a, const float_t& b ) { return _mm256_blendv_pd( b, a, mask ); } inline float_t select(const bool& mask , const float_t& a, const float_t& b ) { return mask ? a : b; } inline float_t atan2( const float_t& y, const float_t& x ){ - std::array bx{x.to_array()}, by{y.to_array()}, rt; - for( unsigned i = 0 ; i != 4 ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); - return float_t (rt.data() ); + std::array bx{x.to_array()}, by{y.to_array()}; + return float_t ( + std::atan2( by[0], bx[0]) + , std::atan2( by[1], bx[1]) + , std::atan2( by[2], bx[2]) + , std::atan2( by[3], bx[3]) ); + } + inline __m256i double_to_int( const float_t& x ) + { + // based on: https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx + return _mm256_sub_epi64(_mm256_castpd_si256(x + _mm256_set1_pd(0x0018000000000000)), + _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))); } - inline float_t log( const float_t& x ) + inline float_t gather( const double* base_addr, const float_t& offsets) + { + return _mm256_i64gather_pd(base_addr, double_to_int(offsets),sizeof(double)); + } + stl_fallback( log ) + stl_fallback( exp ) + stl_fallback( tan ) + stl_fallback( sin ) + stl_fallback( cos ) + + inline float_t remainder( const float_t& a, const float_t& b ){ return a - _mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF) * b; } + inline float_t fmod( const float_t& a, const float_t& b ) { - std::array bx{x.to_array()}, rt; - for( unsigned i = 0 ; i != 4 ; ++i ) rt[i] = std::log(bx[i]); - return float_t (rt.data()); + auto r = remainder( abs(a), abs(b) ); + return select( a > 0., r, -r ); } struct complex_t { @@ -123,22 +147,17 @@ namespace AmpGen { inline complex_t select(const float_t& mask, const complex_t& a, const float_t& b ) { return complex_t( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); } inline complex_t select(const bool& mask , const complex_t& a, const complex_t& b ) { return mask ? a : b; } inline complex_t exp( const complex_t& v ){ - // float_t s; float_t c; sincos256_ps(v.im, (__m256*)&s, (__m256*)&c); - auto vr = v.re.to_array(); - auto vi = v.im.to_array(); - std::array rt_re; - std::array rt_im; - for( unsigned i = 0 ; i != 4; ++i ){ - rt_re[i] = std::exp( vr[i] ) * cos( vi[i] ); - rt_im[i] = std::exp( vr[i] ) * sin( vi[i] ); - // rt[i] = complex_t( exp(vr[i]) * cos(vi[i]), exp(vr[i]) * sin( vi[i] ) ); - } - return complex_t( rt_re.data(), rt_im.data() ); + return exp( v.re) * complex_t( cos( v.im ), sin( v.im ) ); } inline float_t fmadd( const float_t& a, const float_t& b, const float_t& c ) { return _mm256_fmadd_pd(a, b, c ); } + inline complex_t sqrt( const complex_t& v ) + { + auto r = abs(v); + return complex_t ( sqrt( 0.5 * (r + v.re) ), sqrt( 0.5*( r - v.re ) ) ); + } inline std::ostream& operator<<( std::ostream& os, const complex_t& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } #pragma omp declare reduction(+: float_t: \ diff --git a/AmpGen/simd/utils.h b/AmpGen/simd/utils.h index 094877931b2..62099046210 100644 --- a/AmpGen/simd/utils.h +++ b/AmpGen/simd/utils.h @@ -45,7 +45,7 @@ namespace AmpGen { template <> struct is_vector_type : std::false_type {}; #endif template simd_type gather( - const container_type& container, const functor_type& functor, unsigned offset=0, float df =0.) + const container_type& container, const functor_type& functor, unsigned offset=0, typename simd_type::scalar_type df =0.) { std::array rv; if( df == 0. ) diff --git a/apps/ConvertToSourceCode.cpp b/apps/ConvertToSourceCode.cpp index c6864aca788..94ffd3f8567 100644 --- a/apps/ConvertToSourceCode.cpp +++ b/apps/ConvertToSourceCode.cpp @@ -73,8 +73,8 @@ void create_integration_tests(T& pdf, template void generate_source(T& pdf, EventList& normEvents, const std::string& sourceFile, MinuitParameterSet& mps, const double& sf) { - bool normalise = NamedParameter("Normalise",true); - std::string type = NamedParameter( "Type", "CoherentSum" ); + bool normalise = NamedParameter("Normalise",true); + std::string type = NamedParameter("Type", "CoherentSum" ); double norm = 1; if( normalise ){ diff --git a/src/Array.cpp b/src/Array.cpp index f4bdb97412a..4a135861e7e 100644 --- a/src/Array.cpp +++ b/src/Array.cpp @@ -21,7 +21,12 @@ std::string Array::to_string(const ASTResolver* resolver) const { auto head = m_top.to_string(resolver); if( is(m_address) ) return head+"["+ std::to_string(int(std::real(m_address()))) +"]"; + auto offset = m_address.to_string(resolver); + if( resolver != nullptr && resolver->enableAVX() ) + { + return " gather( &(" + head + "), " + offset + ")"; + } auto pos = head.find_last_of("]"); if( pos != std::string::npos ){ auto st1 = head.substr(0,pos); diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 75fd7a19571..910b3c73711 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -34,14 +34,17 @@ using namespace AmpGen; CoherentSum::CoherentSum() = default; +//ENABLE_DEBUG(CoherentSum) + CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, const std::string& prefix ) - : m_rules (mps) + : m_rules (mps) , m_evtType (type) , m_printFreq(NamedParameter( "CoherentSum::PrintFrequency", 100) ) , m_dbThis (NamedParameter( "CoherentSum::Debug" , false)) , m_verbosity(NamedParameter( "CoherentSum::Verbosity" , 0) ) - , m_objCache (NamedParameter("CoherentSum::ObjectCache" ,"") ) + , m_objCache (NamedParameter("CoherentSum::ObjectCache" ,"") ) , m_prefix (prefix) + , m_mps(&mps) { auto amplitudes = m_rules.getMatchingRules( m_evtType, prefix); if( amplitudes.size() == 0 ){ @@ -52,15 +55,19 @@ CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, m_normalisations.resize( m_matrixElements.size(), m_matrixElements.size() ); size_t nThreads = NamedParameter ("nCores" , std::thread::hardware_concurrency(), "Number of threads to use" ); ThreadPool tp(nThreads); - //#pragma omp parallel for for(size_t i = 0; i < m_matrixElements.size(); ++i){ - tp.enqueue( [i,this,&mps,&litudes]{ - m_matrixElements[i] = TransitionMatrix( amplitudes[i].first, amplitudes[i].second, mps, this->m_evtType.getEventFormat(), this->m_dbThis); - CompilerWrapper().compile( m_matrixElements[i], this->m_objCache); } ); + tp.enqueue( [i,this,&mps,&litudes]{ + auto& [p, c] = amplitudes[i]; + m_matrixElements[i] = + TransitionMatrix(p, c, + CompiledExpression( + p.getExpression(), p.decayDescriptor(), + this->m_evtType.getEventFormat(), DebugSymbols(), this->m_mps ) ); + CompilerWrapper().compile( m_matrixElements[i], this->m_objCache); + } ); } } - void CoherentSum::prepare() { transferParameters(); @@ -75,7 +82,7 @@ void CoherentSum::prepare() } clockEval.stop(); ProfileClock clockIntegral; - if ( m_integrator.isReady()) updateNorms(); + if (m_integrator.isReady()) updateNorms(); else if ( m_verbosity ) WARNING( "No simulated sample specified for " << this ); clockIntegral.stop(); if ( m_verbosity && m_prepareCalls % 100 == 0 ) { @@ -175,8 +182,13 @@ void CoherentSum::generateSourceCode(const std::string& fname, const double& nor bool includePythonBindings = NamedParameter("CoherentSum::IncludePythonBindings",false); for ( auto& p : m_matrixElements ){ - stream << p << std::endl; - p.compileWithParameters( stream ); + auto expr = CompiledExpression( + p.expression(), + p.decayDescriptor(), + m_evtType.getEventFormat(), DebugSymbols() , m_mps ); + expr.prepare(); + stream << expr << std::endl; + expr.compileWithParameters( stream ); if( includePythonBindings ) p.compileDetails( stream ); } Expression event = Parameter("x0",0,true); @@ -254,17 +266,18 @@ void CoherentSum::reset( bool resetEvents ) } } -void CoherentSum::setEvents( EventList_type& list ) +void CoherentSum::setEvents( const EventList_type& list ) { - if ( m_verbosity ) INFO( "Setting event list with:" << list.size() << " events for " << this ); + DEBUG( "Setting event list with:" << list.size() << " events for " << this ); reset(); + for( auto& me : m_matrixElements ){ DEBUG("Registering: " << me.name() ) ; } if( m_ownEvents && m_events != nullptr ) delete m_events; m_events = &list; m_cache = Store( m_events->size(), m_matrixElements ); } -void CoherentSum::setMC( EventList_type& sim ) +void CoherentSum::setMC( const EventList_type& sim ) { if ( m_verbosity ) INFO( "Setting norm. event list with:" << sim.size() << " events for " << this ); reset(); diff --git a/src/EventListSIMD.cpp b/src/EventListSIMD.cpp index 4f791f5edf4..3c7a644c34c 100644 --- a/src/EventListSIMD.cpp +++ b/src/EventListSIMD.cpp @@ -33,6 +33,8 @@ #include "AmpGen/simd/utils.h" using namespace AmpGen; +// ENABLE_DEBUG(EventListSIMD) + EventListSIMD::EventListSIMD( const EventType& type ) : m_eventType( type ) {} void EventListSIMD::loadFromFile( const std::string& fname, const ArgumentPack& args ) @@ -134,14 +136,15 @@ void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) EventListSIMD::EventListSIMD( const EventList& other ) : EventListSIMD( other.eventType() ) { m_data = Store(other.size(), m_eventType.eventSize() ); + DEBUG("Converting EventList -> EventListSIMD, allocate: " << m_data.size() << " events in " << m_data.nBlocks() << " with: " << m_data.nFields() << " fields"); m_weights.resize( m_data.nBlocks() ); m_genPDF.resize ( m_data.nBlocks() ); for( unsigned block = 0 ; block != m_data.nBlocks(); block++ ) { for( unsigned j = 0 ; j != m_data.nFields(); ++j ) - m_data(block, j ) = utils::gather(other, [j](auto& event){ return event[j]; } , block * float_v::size ); + m_data(block, j) = utils::gather(other, [j](auto& event){ return event[j]; } , block * float_v::size ); m_weights[block] = utils::gather(other, [](auto& event){ return event.weight(); }, block * float_v::size, 0); - m_genPDF [block] = utils::gather(other, [](auto& event){ return event.genPdf(); }, block * float_v::size, 1 ); + m_genPDF [block] = utils::gather(other, [](auto& event){ return event.genPdf(); }, block * float_v::size, 1); } } diff --git a/src/ProgressBar.cpp b/src/ProgressBar.cpp index 84ba994f132..73e940e5855 100644 --- a/src/ProgressBar.cpp +++ b/src/ProgressBar.cpp @@ -21,8 +21,10 @@ void ProgressBar::print(const double& percentage, const std::string& message) int val = int(percentage * 100); if( val == m_lastPercent ) return; m_lastPercent = val; - std::cout << "\r\033[2;34m" << std::left << std::setw( FCNNAMELENGTH ) << m_context << " INFO " << "\033[0m"; + std::cout << "\r\033[2;34m" << std::left << std::setw( detail::FCNNAMELENGTH ) << m_context << " INFO " << "\033[0m"; std::cout << "Completed: " << std::right << std::setw(3) << val << "% " << "["; + + // detail::labelled_stream(m_context) << "Completed: " << std::right << std::setw(3) << val << "% " << "["; std::fill_n(std::ostream_iterator(std::cout), lpad, '|'); std::fill_n(std::ostream_iterator(std::cout), m_width-lpad, ' '); std::cout << "]"; diff --git a/test/test_avx2d.cpp b/test/test_avx2d.cpp new file mode 100644 index 00000000000..20119f426e8 --- /dev/null +++ b/test/test_avx2d.cpp @@ -0,0 +1,64 @@ + +#define BOOST_TEST_DYN_LINK + +#define BOOST_TEST_MODULE "test_avx2" + +#include +namespace utf = boost::unit_test; + + +#if ENABLE_AVX2 +#include "AmpGen/simd/avx2d_types.h" + +using namespace AmpGen; + +BOOST_AUTO_TEST_CASE( test_fmod ) +{ + std::vector a = {5.1, -5.1, 5.1, -5.1}; + std::vector b = {3.0, +3.0, -3.0, -3.0}; + + AVX2d::float_t av( a.data() ); + AVX2d::float_t bv( b.data() ); + + auto modv = AVX2d::fmod(av,bv); + BOOST_TEST_MESSAGE( "fmod = " << modv ); + + auto mod = modv.to_array(); + BOOST_TEST( mod[0] == 2.1 , boost::test_tools::tolerance(1e-15)); + BOOST_TEST( mod[1] == -2.1 , boost::test_tools::tolerance(1e-15)); + BOOST_TEST( mod[2] == 2.1 , boost::test_tools::tolerance(1e-15)); + BOOST_TEST( mod[3] == -2.1 , boost::test_tools::tolerance(1e-15)); +} + +BOOST_AUTO_TEST_CASE( test_double_to_int ) +{ + std::vector a = {17.4, -19.2, 12.1, -4007.3}; + auto f = AVX2d::double_to_int( AVX2d::float_t( a.data() )); + alignas(32) uint64_t t[4]; + _mm256_store_si256( (__m256i*)t, f); + BOOST_TEST( t[0] == 17 ); + BOOST_TEST( t[1] == -19 ); + BOOST_TEST( t[2] == 12 ); + BOOST_TEST( t[3] == -4007 ); +} + +BOOST_AUTO_TEST_CASE( test_gather ) +{ + // 0 1 2 3 4 5 6 + std::vector data = { 15.4, 19.7, 121.8, -15.6, M_PI, sqrt(2), 5.7, 12 }; + std::vector addr = { 0, 5, 3, 3 }; + auto v = AVX2d::gather( data.data(), AVX2d::float_t(addr.data()) ).to_array(); + BOOST_TEST( v[0] == data[0] ); + BOOST_TEST( v[1] == data[5] ); + BOOST_TEST( v[2] == data[3] ); + BOOST_TEST( v[3] == data[3] ); +} + +#else +BOOST_AUTO_TEST_CASE( test_dummy ) +{ + BOOST_TEST( 1 == 1 ); +} +#endif + + From 7ad8e91a2a4ece2af87b82d1667644a64b1447d6 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 15 Apr 2020 20:25:42 +0200 Subject: [PATCH 08/67] fix for OSX --- Standalone.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Standalone.cmake b/Standalone.cmake index f0248d9c909..ad04032760d 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -166,8 +166,9 @@ if( ENABLE_AVX2 ) endif() if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" ) - target_link_libraries(AmpGen PUBLIC stdc++) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lstdc++") + target_link_libraries(AmpGen PUBLIC stdc++ ) + message(STATUS "Using OSX specific flags: -lm -lstdc++ -lSystem") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lstdc++ -lSystem") elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") target_link_libraries(AmpGen PUBLIC stdc++) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lm -lstdc++") From 462ae542fb9aff04f8de902f7284d937965dbbcb Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 14:41:47 +0200 Subject: [PATCH 09/67] Fix simd'ed integration and plotting --- AmpGen/CoherentSum.h | 4 +- AmpGen/CompiledExpression.h | 6 +- AmpGen/EventListSIMD.h | 6 +- AmpGen/Expression.h | 11 +++ AmpGen/Integrator.h | 100 +++++++++------------ AmpGen/LiteSpan.h | 62 ++++--------- AmpGen/MetaUtils.h | 10 ++- AmpGen/PolarisedSum.h | 4 +- AmpGen/Projection.h | 7 +- AmpGen/Store.h | 49 ++++++----- AmpGen/SumPDF.h | 17 ++-- AmpGen/simd/avx2d_types.h | 58 ++++++++++--- Standalone.cmake | 2 +- examples/FitterWithPolarisation.cpp | 22 ++--- src/AmplitudeRules.cpp | 2 +- src/BinaryExpression.cpp | 7 +- src/CoherentSum.cpp | 43 ++++------ src/Expression.cpp | 23 +++++ src/Integrator.cpp | 33 ++----- src/Minimiser.cpp | 3 + src/PolarisedSum.cpp | 129 ++++++++++++---------------- src/Projection.cpp | 20 ++--- test/test_avx2d.cpp | 12 +++ 23 files changed, 320 insertions(+), 310 deletions(-) diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 1db5690704d..0dbd08da458 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -21,7 +21,7 @@ #include "AmpGen/Projection.h" #include "AmpGen/MinuitParameter.h" #include "AmpGen/Store.h" - +#include "AmpGen/LiteSpan.h" namespace AmpGen { class LinearErrorPropagator; @@ -93,7 +93,7 @@ namespace AmpGen Bilinears norms() const { return m_normalisations ; } std::function evaluator(const EventList_type* = nullptr) const; - KeyedView componentEvaluator(const EventList_type* = nullptr) const; + KeyedFunctors componentEvaluator(const EventList_type* = nullptr) const; protected: std::vector> m_matrixElements; ///< Vector of matrix elements diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index be6c4eecfaf..1bd41d4d1f7 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -220,13 +220,13 @@ namespace AmpGen rt.prepare(); return rt; } - template - CompiledExpression + template + CompiledExpression make_expression( const Expression& expression, const std::string& name, const MinuitParameterSet& mps ) { - CompiledExpression rt(expression,name,{},{},&mps); + CompiledExpression rt(expression,name,{},{},&mps); rt.compile(); rt.prepare(); return rt; diff --git a/AmpGen/EventListSIMD.h b/AmpGen/EventListSIMD.h index 61873b890c3..248deff8455 100644 --- a/AmpGen/EventListSIMD.h +++ b/AmpGen/EventListSIMD.h @@ -60,9 +60,9 @@ namespace AmpGen EventListSIMD( const EventList& other ); const float_v* data() const { return m_data.data(); } operator Store () const { return m_data ; } - const auto& store() const { return m_data; } - const Event at(const unsigned& p) const { return EventListSIMD::operator[](p) ; } - const float_v* block(const unsigned& p) { return m_data.data() + p * m_data.nFields(); } + const auto& store() const { return m_data; } + const Event at(const unsigned& p) const { return EventListSIMD::operator[](p) ; } + const float_v* block(const unsigned& p) const { return m_data.data() + p * m_data.nFields(); } float_v weight(const unsigned& p) const { return m_weights[p]; } float_v genPDF(const unsigned& p) const { return m_genPDF[p]; } const Event operator[]( const size_t&) const; diff --git a/AmpGen/Expression.h b/AmpGen/Expression.h index 85c3f90dca3..49fe25acc3f 100644 --- a/AmpGen/Expression.h +++ b/AmpGen/Expression.h @@ -194,6 +194,17 @@ namespace AmpGen double m_defaultValue; bool m_resolved; }; + class ComplexParameter : public IExpression { + public: + ComplexParameter( const Parameter& real, const Parameter& imag ); + std::string to_string(const ASTResolver* resolver = nullptr ) const override; + void resolve( ASTResolver& resolver ) const override; + operator Expression() const ; + complex_t operator()() const override; + private: + Parameter m_real; + Parameter m_imag; + }; /** @ingroup ExpressionEngine class Ternary @brief Evaluates the ternary operator. diff --git a/AmpGen/Integrator.h b/AmpGen/Integrator.h index cc98c208031..f1f4f7c6c40 100644 --- a/AmpGen/Integrator.h +++ b/AmpGen/Integrator.h @@ -10,55 +10,19 @@ #include "AmpGen/EventListSIMD.h" #include "AmpGen/EventList.h" -/* - * Calculates Bilinears A_i A_j^* integrated over the phase-space. - * Integrates in blocks of (i,j) such that integrals can be queued and evaluated in blocks - * to optimise cache throughput. - */ - namespace AmpGen -{ - class Bilinears - { - private: - size_t rows; - size_t cols; - std::vector norms; - std::vector markAsZero; - std::vector calculate; - public: - Bilinears( const size_t& r = 0, const size_t& c = 0 ); - complex_t get(const size_t& x, const size_t& y) const; - template - complex_t get(const size_t& x, const size_t& y, T* integ = nullptr, const size_t& kx=0, const size_t& ky=0){ - if( integ != nullptr ) integ->queueIntegral(kx, ky, &norms[x*cols+y]); - /// will return the wrong answer for now, but queues for later.. - return norms[x*cols+y]; - } - void set(const size_t& x, const size_t& y, const complex_t& f ); - void setZero(const size_t& x, const size_t& y); - void resetCalculateFlags(); - complex_t& operator()( const size_t& x, const size_t& y ); - bool isZero(const size_t& x, const size_t& y); - bool workToDo(const size_t& x, const size_t& y) const; - void resize(const size_t& r, const size_t& c = 1 ); - }; - - template struct Integral - { - typedef std::function TransferFCN; - size_t i = {0}; - size_t j = {0}; - TransferFCN transfer; - Integral() = default; - Integral(const size_t& i, const size_t& j, TransferFCN t) - : i(i), j(j), transfer(t) {} - }; - +{ class Integrator { - typedef std::function TransferFCN; - + struct QueuedIntegral + { + QueuedIntegral() = default; + QueuedIntegral(complex_t* result, const unsigned& i, const unsigned& j) + : result(result), i(i), j(j) {} + complex_t* result = {nullptr}; + unsigned i = {0}; + unsigned j = {0}; + }; public: Integrator() = default; @@ -73,21 +37,14 @@ namespace AmpGen float_v norm_acc = 0.; for( size_t i = 0 ; i < events->nBlocks(); ++i ) { - m_weight[i] = events->weight(i) / events->genPDF(i); - norm_acc = norm_acc + m_weight[i]; + m_weight[i] = events->weight(i) / events->genPDF(i); + norm_acc += m_weight[i]; } m_norm = utils::sum_elements(norm_acc); } bool isReady() const; - void queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim = true); - void addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ); - void queueIntegral(const size_t& i, const size_t& j, complex_t* result); + void queueIntegral( complex_t* result, const unsigned& i, const unsigned& j ); void flush(); template return_type get( const unsigned& index, const unsigned& evt ) const ; @@ -102,19 +59,44 @@ namespace AmpGen if( m_events != nullptr ) m_cache.update( static_cast(m_events)->store(), expression ); #endif } - template - const T* events() const { return static_cast(m_events) ; } + template const T* events() const { return static_cast(m_events) ; } + const Store& cache() const { return m_cache; } private: static constexpr size_t N = {8}; ///unroll factor size_t m_counter = {0}; /// - std::array, N> m_integrals; + std::array m_integrals; const void* m_events = {nullptr}; std::vector m_weight; Store m_cache; double m_norm = {0}; void integrateBlock(); }; + + class Bilinears + { + private: + size_t rows; + size_t cols; + std::vector norms; + std::vector markAsZero; + std::vector calculate; + public: + Bilinears( const size_t& r = 0, const size_t& c = 0 ); + complex_t get(const size_t& x, const size_t& y) const; + complex_t get(const size_t& x, const size_t& y, Integrator* integ = nullptr, const size_t& kx=0, const size_t& ky=0){ + if( integ != nullptr ) integ->queueIntegral(&norms[x*cols+y], kx, ky ); + /// will return the wrong answer for now, but queues for later.. + return norms[x*cols+y]; + } + void set(const size_t& x, const size_t& y, const complex_t& f ); + void setZero(const size_t& x, const size_t& y); + void resetCalculateFlags(); + complex_t& operator()( const size_t& x, const size_t& y ); + bool isZero(const size_t& x, const size_t& y); + bool workToDo(const size_t& x, const size_t& y) const; + void resize(const size_t& r, const size_t& c = 1 ); + }; } // namespace AmpGen #endif diff --git a/AmpGen/LiteSpan.h b/AmpGen/LiteSpan.h index ed4692a015b..e947665a4ef 100644 --- a/AmpGen/LiteSpan.h +++ b/AmpGen/LiteSpan.h @@ -17,54 +17,24 @@ namespace AmpGen { const type* m_data = {nullptr}; unsigned m_size = {0}; }; - /// functor-like object that documents what is stored in each slot; - /// This mutated into a cache-like object as was I was writing it, so - /// should rename it to something else ... - template class KeyedView + + template struct KeyedFunctors { - typedef typename container_type::value_type value_type; - public: - KeyedView( const container_type& container, const unsigned width ) : - m_container(&container), - m_cache( width * container.size(),0 ), - m_width(width), - m_size(container.size()), - m_keys( width, "") {} - unsigned index(const value_type& it) const { return it.index() ; } - const std::string& key(const unsigned int& column ) const { return m_keys[column] ; } - const return_type* operator()( const value_type& it ) const { - if( m_width *index(it) >= m_cache.size()) ERROR("Out-of-bounds access : " << index(it) ); - return &m_cache[m_width * index(it)]; } - const cache_type& operator()(const value_type& it, const unsigned entry ) const { - if( m_width * index(it) + entry > m_cache.size() ) ERROR("Invalid cache element: " << m_width * index(it) + entry > m_cache.size() ); - return m_cache[m_width * index(it) + entry] ; } - unsigned width() const { return m_width ; } + std::vector > functors; + std::vector keys; + std::vector titles; + template + void add(const functor_type& functor, const std::string& key, const std::string& title="") + { + functors.push_back(functor); + keys.push_back(key); + titles.push_back(title); + } + std::vector operator()( const arg_type& arg ) const { - template void set(const functor_type& functor, - unsigned int column, - const std::string& key = "") - { - for(const auto& element : *m_container) m_cache[ element.index() * m_width + column] = functor(element); - if( key != "" ) m_keys[column] = key; - } - cache_type& operator()(const value_type& it, const unsigned entry ) { - auto pos = m_width * index(it) + entry; - if( pos >= m_cache.size() ) ERROR("Out-of-bounds access: " << pos << " " << index(it) + entry); - return m_cache[pos] ; } - void setKey(const unsigned& column, const std::string& key ) { m_keys[column] = key ; } - void print() - { - INFO( "width = " << m_width << ", size = " << m_size << " keys = " << vectorToString( m_keys , " ") << " cache size = " << m_cache.size() ); - for( unsigned int i = 0 ; i != m_width ; ++i ) std::cout << m_cache[i] << " "; - } - private: - const container_type* m_container; - std::vector m_cache; - unsigned m_width; - unsigned m_size; - std::vector m_keys; + std::vector rt; + for( auto& f : functors ) rt.push_back( f(arg) ); + return rt; } }; } diff --git a/AmpGen/MetaUtils.h b/AmpGen/MetaUtils.h index 1450998d3e1..11b33f7ca4f 100644 --- a/AmpGen/MetaUtils.h +++ b/AmpGen/MetaUtils.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace AmpGen { @@ -24,7 +25,7 @@ namespace AmpGen return name; } - template std::string typeof( TYPE t ) { return typeof(); } + template std::string typeof( const TYPE& t ) { return typeof(); } namespace detail { template struct zeroType { typedef T type; }; @@ -43,6 +44,11 @@ namespace AmpGen f( std::get( t ) ); for_each( t, f ); } + template + void for_each_sequence( iterator begin, iterator end, transform_types... transforms) + { + for_each( std::tuple(transforms...), [&](auto& transform){ std::for_each( begin, end, transform ); } ); + } template typename std::enable_if_t @@ -98,6 +104,8 @@ namespace AmpGen template struct isTuple: std::false_type {}; template struct isTuple>: std::true_type {}; + template struct isVector : std::false_type {}; + template struct isVector> : std::true_type {}; } // namespace AmpGen #endif diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index 3bba2f76609..f9908ab196f 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -71,12 +71,13 @@ namespace AmpGen Tensor transitionMatrix(); const TransitionMatrix& operator[](const size_t& i) const { return m_matrixElements[i] ; } std::function evaluator(const EventList_type* = nullptr) const; - KeyedView componentEvaluator(const EventList_type* = nullptr) const; + KeyedFunctors componentEvaluator(const EventList_type* = nullptr) const; private: size_t m_nCalls = {0}; real_t m_norm = {1}; EventList_type* m_events = {nullptr}; Store m_cache = {}; + Store m_pdfCache = {}; bool m_ownEvents = {false}; MinuitParameterSet* m_mps = {nullptr}; MinuitProxy m_weight = {nullptr,1}; @@ -93,7 +94,6 @@ namespace AmpGen std::pair m_dim; std::vector> m_matrixElements; CompiledExpression m_probExpression; - std::vector m_pdfCache; std::vector> indexProduct(const std::vector>&, const std::vector&) const; std::vector polarisations(const std::string&) const ; }; diff --git a/AmpGen/Projection.h b/AmpGen/Projection.h index 2a2481693fa..d1866b373c7 100644 --- a/AmpGen/Projection.h +++ b/AmpGen/Projection.h @@ -11,6 +11,7 @@ #include "THStack.h" #include "AmpGen/ArgumentPack.h" +#include "AmpGen/Types.h" #include "AmpGen/LiteSpan.h" namespace AmpGen @@ -21,6 +22,7 @@ namespace AmpGen class Projection { + using keyedFunctors = KeyedFunctors; public: Projection(); template @@ -35,7 +37,8 @@ namespace AmpGen { return projInternal(evts, ArgumentPack(args...) ); } - template std::tuple, THStack*> operator()(const eventlist_type& evts, const KeyedView& weightFunction, const ARGS... args ) const + template std::tuple, THStack*> operator()(const eventlist_type& evts, + const keyedFunctors& weightFunction, const ARGS... args ) const { return projInternal(evts, weightFunction, ArgumentPack(args...) ); } @@ -52,7 +55,7 @@ namespace AmpGen template TH1D* projInternal(const eventlist_type&, const ArgumentPack&) const; template - std::tuple, THStack*> projInternal(const eventlist_type&, const KeyedView&, const ArgumentPack&) const; + std::tuple, THStack*> projInternal(const eventlist_type&, const keyedFunctors&, const ArgumentPack&) const; std::function m_func; std::string m_name = {""}; std::string m_xAxisTitle = {""}; diff --git a/AmpGen/Store.h b/AmpGen/Store.h index 4ffccf44fb0..d039e2e9f23 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -18,21 +18,27 @@ namespace AmpGen { m_nBlocks(utils::aligned_size( nEntries ) / utils::size::value ), m_nFields(nFields), m_store(m_nBlocks * m_nFields) {} - - // template Store( const Store& store, const std::vector& functors, const size_t& fieldsPerFunctor=0) - // : Store(store.size(), functors, fieldsPerFunctor){ - // for( auto& f : functors ) update(store,f); - // } - + + template + void addFunctor( const functor_type& functor, unsigned fieldsPerFunctor=0 ) + { + auto vsize = fieldsPerFunctor == 0 ? functor.returnTypeSize() / sizeof(stored_type) : fieldsPerFunctor; + DEBUG("Registering: " << functor.name() << " field = " << m_nFields ); + m_index[ functor.name() ] = std::make_pair(m_nFields, vsize); + m_nFields += vsize; + } + template Store( const size_t& nEntries, const std::vector& functors, const size_t& fieldsPerFunctor = 0) { - for(const auto& functor : functors) - { - auto vsize = fieldsPerFunctor == 0 ? functor.returnTypeSize() / sizeof(stored_type) : fieldsPerFunctor; - DEBUG("Registering: " << functor.name() << " I = " << m_nFields << " / " << functors.size() * vsize ); - m_index[ functor.name() ] = std::make_pair(m_nFields, vsize); - m_nFields += vsize; - } + for(const auto& functor : functors) addFunctor( functor, fieldsPerFunctor); + m_nEntries = nEntries; + m_nBlocks = utils::aligned_size(nEntries)/utils::size::value; + m_store.resize(m_nBlocks * m_nFields); + } + template ::value>::type> + Store( const size_t& nEntries, const functor_type& functor, const size_t& fieldsPerFunctor=0 ) + { + addFunctor(functor); m_nEntries = nEntries; m_nBlocks = utils::aligned_size(nEntries)/utils::size::value; m_store.resize(m_nBlocks * m_nFields); @@ -51,6 +57,11 @@ namespace AmpGen { if constexpr( align == Alignment::SoA ) return m_store[ field * m_nBlocks + index] ; else return m_store[index*m_nFields+field]; } + template + __always_inline const return_type get(const size_t& index, const size_t& field ) const + { + return utils::at( operator()( index / utils::size::value, field ), index % utils::size::value ); + } __always_inline const stored_type* data() const { return m_store.data(); } __always_inline stored_type& operator()(const size_t& index, const size_t& field) { @@ -135,11 +146,9 @@ namespace AmpGen { std::map> m_index; }; } -/* -using aos_store = AmpGen::Store; -using soa_store = AmpGen::Store; - -ENABLE_DEBUG(aos_store) -ENABLE_DEBUG(soa_store) -*/ +// using aos_store = AmpGen::Store; +// using soa_store = AmpGen::Store; +// +// ENABLE_DEBUG(aos_store) +// ENABLE_DEBUG(soa_store) #endif diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index a2ef9e1a73f..340f7aa18f9 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -45,7 +45,7 @@ namespace AmpGen private: typedef typename eventListType::value_type eventValueType; ///< The value type stored in the eventListType std::tuple m_pdfs; ///< The tuple of probability density functions - eventListType* m_events = {nullptr}; ///< The event list to evaluate likelihoods on + const eventListType* m_events = {nullptr}; ///< The event list to evaluate likelihoods on public: /// Default Constructor @@ -75,8 +75,9 @@ namespace AmpGen float_v LL = 0.f; for_each( m_pdfs, []( auto& f ) { f.prepare(); } ); #pragma omp parallel for reduction( +: LL ) - for ( unsigned int block = 0; block < m_events->nBlocks(); ++block ) { - LL += m_events->weight(block) * log(this->operator()(m_events->block(block), block)); + for ( size_t block = 0; block < m_events->nBlocks(); ++block ) + { + LL += m_events->weight(block) * AVX2d::log(this->operator()(m_events->block(block), block)); } return -2 * utils::sum_elements(LL); } @@ -119,12 +120,12 @@ namespace AmpGen } ); return arrayToFunctor(values); } - KeyedView componentEvaluator(const eventListType* events) const + KeyedFunctors componentEvaluator(const eventListType* events) const { - KeyedView view(*events, nPDFs() ); - unsigned pdf_counter = 0; - for_each( this->m_pdfs, [&events, &view, &pdf_counter](const auto& pdf) mutable { - view.set(pdf.evaluator(events), pdf_counter++, typeof(pdf) ); + KeyedFunctors view; + for_each( this->m_pdfs, [&view, &events]( const auto& pdf) mutable { + auto eval = pdf.evaluator(events); + view.add([eval](const auto& event){ return eval(event) ; } , typeof(pdf), "" ); } ); return view; } diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index 6326576cdbe..46565686a49 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -5,7 +5,6 @@ #include #include #include -#include "AmpGen/simd/avx_mathfun.h" #include namespace AmpGen { @@ -23,7 +22,7 @@ namespace AmpGen { float_t(const double& x0, const double& x1, const double& x2, const double& x3 ) { double tmp[4] = {x0,x1,x2,x3}; - _mm256_loadu_pd(tmp); + data = _mm256_loadu_pd(tmp); } float_t(const double* f ) : data( _mm256_loadu_pd( f ) ) {} void store( double* ptr ) const { _mm256_storeu_pd( ptr, data ); } @@ -55,10 +54,10 @@ namespace AmpGen { // inline float_t sin( const float_t& v ) { return sin256_pd(v) ; } // inline float_t cos( const float_t& v ) { return cos256_pd(v) ; } // inline float_t tan( const float_t& v ) { float_t s; float_t c; sincos256_pd(v, (__m256*)&s, (__m256*)&c) ; return s/c; } - // inline float_t log( const float_t& v ) { return log256_ps(v) ; } // inline float_t exp( const float_t& v ) { return exp256_ps(v) ; } inline float_t select(const float_t& mask, const float_t& a, const float_t& b ) { return _mm256_blendv_pd( b, a, mask ); } inline float_t select(const bool& mask , const float_t& a, const float_t& b ) { return mask ? a : b; } + inline float_t sign ( const float_t& v){ return select( v > 0., +1., -1. ); } inline float_t atan2( const float_t& y, const float_t& x ){ std::array bx{x.to_array()}, by{y.to_array()}; return float_t ( @@ -77,12 +76,51 @@ namespace AmpGen { { return _mm256_i64gather_pd(base_addr, double_to_int(offsets),sizeof(double)); } - stl_fallback( log ) + + inline void frexp(const AVX2d::float_t& value, AVX2d::float_t& mant, AVX2d::float_t& exponent) + { + auto arg_as_int = _mm256_castpd_si256(value); + static const AVX2d::float_t offset(4503599627370496.0 + 1022.0); // 2^52 + 1022.0 + static const __m256i pow2_52_i = _mm256_set1_epi64x(0x4330000000000000); // *reinterpret_cast(&pow2_52_d); + auto b = _mm256_srl_epi64(arg_as_int, _mm_cvtsi32_si128(52)); + auto c = _mm256_or_si256( b , pow2_52_i); + exponent = AVX2d::float_t( _mm256_castsi256_pd(c) ) - offset; + mant = _mm256_castsi256_pd(_mm256_or_si256(_mm256_and_si256 (arg_as_int, _mm256_set1_epi64x(0x000FFFFFFFFFFFFFll) ), _mm256_set1_epi64x(0x3FE0000000000000ll))); + } + + inline float_t fmadd( const float_t& a, const float_t& b, const float_t& c ) + { + return _mm256_fmadd_pd(a, b, c); + } + inline float_t log(const AVX2d::float_t& arg) + { + static const AVX2d::float_t corr = 0.693147180559945286226764; + static const AVX2d::float_t CL15 = 0.148197055177935105296783; + static const AVX2d::float_t CL13 = 0.153108178020442575739679; + static const AVX2d::float_t CL11 = 0.181837339521549679055568; + static const AVX2d::float_t CL9 = 0.22222194152736701733275; + static const AVX2d::float_t CL7 = 0.285714288030134544449368; + static const AVX2d::float_t CL5 = 0.399999999989941956712869; + static const AVX2d::float_t CL3 = 0.666666666666685503450651; + static const AVX2d::float_t CL1 = 2.0; + AVX2d::float_t mant, exponent; + frexp(arg, mant, exponent); + auto x = (mant - 1.) / (mant + 1.); + auto x2 = x * x; + auto p = fmadd(CL15, x2, CL13); + p = fmadd(p, x2, CL11); + p = fmadd(p, x2, CL9); + p = fmadd(p, x2, CL7); + p = fmadd(p, x2, CL5); + p = fmadd(p, x2, CL3); + p = fmadd(p, x2, CL1); + p = fmadd(p, x, corr * exponent); + return p; + } stl_fallback( exp ) stl_fallback( tan ) stl_fallback( sin ) stl_fallback( cos ) - inline float_t remainder( const float_t& a, const float_t& b ){ return a - _mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF) * b; } inline float_t fmod( const float_t& a, const float_t& b ) { @@ -149,15 +187,15 @@ namespace AmpGen { inline complex_t exp( const complex_t& v ){ return exp( v.re) * complex_t( cos( v.im ), sin( v.im ) ); } - inline float_t fmadd( const float_t& a, const float_t& b, const float_t& c ) - { - return _mm256_fmadd_pd(a, b, c ); - } inline complex_t sqrt( const complex_t& v ) { auto r = abs(v); - return complex_t ( sqrt( 0.5 * (r + v.re) ), sqrt( 0.5*( r - v.re ) ) ); + return complex_t ( sqrt( 0.5 * (r + v.re) ), sign(v.im) * sqrt( 0.5*( r - v.re ) ) ); } + inline complex_t log( const complex_t& v ) + { + return complex_t( log( v.re ) , atan2(v.im, v.re) ); + } inline std::ostream& operator<<( std::ostream& os, const complex_t& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } #pragma omp declare reduction(+: float_t: \ diff --git a/Standalone.cmake b/Standalone.cmake index ad04032760d..ec5f4dbc520 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -13,7 +13,7 @@ if( NOT "${CMAKE_CXX_STANDARD}" ) endif() SET(USE_OPENMP TRUE CACHE BOOL "USE_OPENMP") -SET(ENABLE_AVX2 FALSE CACHE BOOL "ENABLE_AVX2") +SET(ENABLE_AVX2 TRUE CACHE BOOL "ENABLE_AVX2") SET(PRECISION "DOUBLE" CACHE STRING "PRECISION") set(CMAKE_CXX_EXTENSIONS OFF) diff --git a/examples/FitterWithPolarisation.cpp b/examples/FitterWithPolarisation.cpp index 69b39fd171f..4e0abc26d69 100644 --- a/examples/FitterWithPolarisation.cpp +++ b/examples/FitterWithPolarisation.cpp @@ -58,7 +58,7 @@ int main( int argc, char* argv[] ) std::string dataFile = NamedParameter("DataSample", "" , "Name of file containing data sample to fit." ); std::string logFile = NamedParameter("LogFile" , "Fitter.log", "Name of the output log file"); std::string plotFile = NamedParameter("Plots" , "plots.root", "Name of the output plot file"); - + std::string simFile = NamedParameter("SgIntegratorFname", "" , "Name of file containing simulated sample for using in MC integration"); auto bNames = NamedParameter("Branches", std::vector() ,"List of branch names, assumed to be \033[3m daughter1_px ... daughter1_E, daughter2_px ... \033[0m" ).getVector(); @@ -68,7 +68,7 @@ int main( int argc, char* argv[] ) if( dataFile == "" ) FATAL("Must specify input with option " << italic_on << "DataSample" << italic_off ); if( pNames.size() == 0 ) FATAL("Must specify event type with option " << italic_on << " EventType" << italic_off); - size_t seed = NamedParameter ("Seed" , 0 , "Random seed used" ); + size_t seed = NamedParameter ("Seed" , 1 , "Random seed used" ); TRandom3 rndm; rndm.SetSeed( seed ); @@ -87,7 +87,7 @@ int main( int argc, char* argv[] ) the parsed options. For historical reasons, this is referred to as loading it from a "Stream" */ MinuitParameterSet MPS; MPS.loadFromStream(); - for( auto& p : MPS ) if( p->flag() == Flag::Free ) p->setResult( gRandom->Gaus( p->mean(), p->err() ), p->err(), 0,0 ); + // for( auto& p : MPS ) if( p->flag() == Flag::Free ) p->setResult( gRandom->Gaus( p->mean(), p->err() ), p->err(), 0,0 ); /* An EventType specifies the initial and final state particles as a vector that will be described by the fit. It is typically loaded from the interface parameter EventType. */ @@ -108,7 +108,7 @@ int main( int argc, char* argv[] ) /* Generate events to normalise the PDF with. This can also be loaded from a file, which will be the case when efficiency variations are included. Default number of normalisation events is 2 million. */ - EventList_type eventsMC = Generator<>(evtType, &rndm).generate(int(2e6)); + EventList_type eventsMC = simFile == "" ? EventList_type(Generator<>(evtType, &rndm).generate(int(3365617)) ) : EventList_type(simFile, evtType); sig.setMC( eventsMC ); @@ -146,16 +146,6 @@ FitResult* doFit( PDF&& pdf, EventList_type& data, EventList_type& mc, MinuitPar /* Make the plots for the different components in the PDF, i.e. the signal and backgrounds. The structure assumed the PDF is some SumPDF. */ -// unsigned int counter = 1; -// for_each(pdf.pdfs(), [&]( const auto& f ){ -// auto mc_plot3 = mc.makeDefaultProjections(WeightFunction(f.evaluator()), Prefix("Model_cat"+std::to_string(counter))); -// for( auto& plot : mc_plot3 ) -// { -// plot->Scale( ( data.integral() * f.getWeight() ) / plot->Integral() ); -// plot->Write(); -// } -// counter++; -// } ); /* Estimate the chi2 using an adaptive / decision tree based binning, down to a minimum bin population of 15, and add it to the output. */ // Chi2Estimator chi2( data, mc, pdf, 15 ); @@ -172,11 +162,11 @@ FitResult* doFit( PDF&& pdf, EventList_type& data, EventList_type& mc, MinuitPar /* Write out the data plots. This also shows the first example of the named arguments to functions, emulating python's behaviour in this area */ - + auto evaluator_per_component = std::get<0>( pdf.pdfs() ).componentEvaluator(); for( const auto& proj : projections ) { proj(mc, evaluator, PlotOptions::Norm(data.size()), PlotOptions::AutoWrite() ); - //proj(mc, evaluator_per_component, PlotOptions::Prefix("amp"), PlotOptions::Norm(data.size()), PlotOptions::AutoWrite() ); + proj(mc, evaluator_per_component, PlotOptions::Prefix("amp"), PlotOptions::Norm(data.size()), PlotOptions::AutoWrite() ); proj(data, PlotOptions::Prefix("Data") )->Write(); } fr->print(); diff --git a/src/AmplitudeRules.cpp b/src/AmplitudeRules.cpp index 98b66bcd496..b7a3a10e452 100644 --- a/src/AmplitudeRules.cpp +++ b/src/AmplitudeRules.cpp @@ -125,7 +125,7 @@ std::complex Coupling::operator()() const Expression Coupling::to_expression() const { - return m_expr != nullptr ? m_expr->expression() : ( m_isCartesian ? Parameter(m_re->name()) + 1i * Parameter(m_im->name()) : Parameter( m_re->name() ) * fcn::exp( 1i * m_sf * Parameter(m_im->name()) ) ); + return m_expr != nullptr ? m_expr->expression() : ( m_isCartesian ? ComplexParameter(Parameter(m_re->name()), Parameter(m_im->name())) : Parameter( m_re->name() ) * fcn::exp( 1i * m_sf * Parameter(m_im->name()) ) ); } std::complex TotalCoupling::operator()() const diff --git a/src/BinaryExpression.cpp b/src/BinaryExpression.cpp index f249e3bbdd9..fdd516580d4 100644 --- a/src/BinaryExpression.cpp +++ b/src/BinaryExpression.cpp @@ -36,10 +36,13 @@ complex_t Fmod::operator()() const { return 0; } complex_t Equal::operator()() const { return lval() == rval() ; } complex_t ATan2::operator()() const { return atan2( std::real(lval() ), std::real(rval() ) ); } -std::string Sum::to_string(const ASTResolver* resolver) const { +std::string Sum::to_string(const ASTResolver* resolver) const +{ return lval.to_string(resolver) + " + " + rval.to_string(resolver) ; } -std::string Sub::to_string(const ASTResolver* resolver) const { + +std::string Sub::to_string(const ASTResolver* resolver) const +{ return lval.to_string(resolver) + "-" + bracketed( rval, [](auto& expression){ return is(expression) || is(expression) ; } , resolver ) ; } std::string Equal::to_string(const ASTResolver* resolver) const { return "(" + lval.to_string(resolver) + " == "+ rval.to_string(resolver) +")"; } diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 910b3c73711..60c7d6f0a62 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -98,15 +98,12 @@ void CoherentSum::prepare() void CoherentSum::updateNorms() { - std::vector cacheIndex; - std::transform( m_matrixElements.begin(), m_matrixElements.end(), std::back_inserter(cacheIndex), - [this](auto& m){ return this->m_integrator.getCacheIndex( m ) ; } ); if(std::any_of(m_matrixElements.begin(),m_matrixElements.end(), [](auto& me){ return me.workToDo; } )) { for ( unsigned i = 0; i != m_matrixElements.size(); ++i ) for ( size_t j = i; j < size(); ++j ){ if( m_matrixElements[i].workToDo || m_matrixElements[j].workToDo ) - m_integrator.queueIntegral( cacheIndex[i], cacheIndex[j] ,i, j, &m_normalisations ); + m_normalisations.get(i, j, &m_integrator, i, j); } } m_integrator.flush(); @@ -294,10 +291,9 @@ real_t CoherentSum::norm(const Bilinears& norms) const complex_t acc(0, 0); for ( size_t i = 0; i < size(); ++i ) { for ( size_t j = 0; j < size(); ++j ) { - auto val = norms.get(i, j) - * m_matrixElements[i].coefficient - * std::conj(m_matrixElements[j].coefficient); - acc += val; + acc += m_matrixElements[i].coefficient + * std::conj(m_matrixElements[j].coefficient) + * ( i > j ? std::conj(norm(j,i)) : norm(i,j) ); } } return acc.real(); @@ -372,27 +368,24 @@ std::function CoherentSum::evaluator(const EventList_type* return arrayToFunctor(values); } -KeyedView CoherentSum::componentEvaluator(const EventList_type* ievents) const +KeyedFunctors CoherentSum::componentEvaluator(const EventList_type* ievents) const { - auto events = ievents == nullptr ? m_integrator.events() : ievents; - - KeyedView rt(*events, m_matrixElements.size() ); + auto& cache = m_integrator.cache(); + KeyedFunctors rt; for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) { - auto& me = m_matrixElements[i]; - rt.setKey(i, programatic_name( me.decayTree.decayDescriptor() ) ); - #ifdef _OPENMP - #pragma omp parallel for - #endif - for( unsigned evt = 0 ; evt < events->size(); ++evt ) - { - complex_t total = 0; - for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ - total += this->m_integrator.get(i, evt) * m_matrixElements[i].coefficient - * std::conj( this->m_integrator.get(j, evt) * m_matrixElements[j].coefficient ); - } - rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; + for( unsigned j = i ; j != m_matrixElements.size(); ++j ){ + auto mi = m_matrixElements[i]; + auto mj = m_matrixElements[j]; + auto ci = this->m_matrixElements[i].coefficient; + auto cj = this->m_matrixElements[j].coefficient; + double s = (i==j) ? 1 : 2 ; + auto name = programatic_name(mi.decayTree.decayDescriptor()) + "_" + programatic_name( mj.decayTree.decayDescriptor() ); + INFO("Adding evaluator for: " << name ); + auto functor = [ci,cj,i,j,s, &cache](const Event& event){ return s * std::real( ci * cache.get( event.index(), i ) * std::conj( cj * cache.get( event.index(), j ) ) ) ;}; + rt.add(functor, name, ""); } } + INFO(" Returning: " << rt.keys.size() << " functors" ); return rt; } diff --git a/src/Expression.cpp b/src/Expression.cpp index 058ee0d342a..9433ef4c9cf 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -25,6 +25,7 @@ DEFINE_CAST(Parameter ) DEFINE_CAST(SubTree ) DEFINE_CAST(Ternary ) DEFINE_CAST(Function ) +DEFINE_CAST(ComplexParameter); Expression::Expression( const std::shared_ptr& expression ) : m_expression( expression ) {} @@ -324,3 +325,25 @@ Expression AmpGen::fcn::fpow( const Expression& x, const int& n){ for( int y=0;yenableCuda() ) complex_type = "AmpGen::CUDA::complex_t"; + if( resolver != nullptr && resolver->enableAVX() ) complex_type = "AmpGen::AVX2d::complex_t"; + return complex_type + "(" + m_real.to_string(resolver) + ", " + m_imag.to_string(resolver) +")"; +} + +void ComplexParameter::resolve( ASTResolver& resolver ) const +{ + m_real.resolve(resolver); + m_imag.resolve(resolver); +} + +complex_t ComplexParameter::operator()() const +{ + return m_real() + 1i * m_imag(); +} diff --git a/src/Integrator.cpp b/src/Integrator.cpp index 531248a016b..4632a44b764 100644 --- a/src/Integrator.cpp +++ b/src/Integrator.cpp @@ -37,7 +37,7 @@ void Bilinears::resize( const size_t& r, const size_t& c) void Integrator::integrateBlock() { #pragma omp parallel for - for ( size_t roll = 0; roll < N; ++roll ) { + for ( size_t roll = 0; roll < m_counter; ++roll ) { float_v re( 0.f ); float_v im( 0.f ); auto b1 = m_cache.data() + m_integrals[roll].i * m_cache.nBlocks(); @@ -45,43 +45,24 @@ void Integrator::integrateBlock() for ( size_t i = 0; i < m_cache.nBlocks(); ++i ) { auto c = b1[i] * conj(b2[i]); #if ENABLE_AVX2 - re = fmadd(re, m_weight[i], real(c) ); - im = fmadd(im, m_weight[i], imag(c) ); + re = fmadd( m_weight[i], real(c), re ); + im = fmadd( m_weight[i], imag(c), im ); #else re = re + m_weight[i] * real(c); im = im + m_weight[i] * imag(c); #endif } - m_integrals[roll].transfer( utils::sum_elements( complex_v(re, im) ) / m_norm ); + *m_integrals[roll].result = utils::sum_elements( complex_v(re, im) ) / m_norm; } m_counter = 0; } bool Integrator::isReady() const { return m_events != nullptr; } -void Integrator::queueIntegral(const size_t& c1, - const size_t& c2, - const size_t& i, - const size_t& j, - Bilinears* out, - const bool& sim) +void Integrator::queueIntegral(complex_t* result, const unsigned& i, const unsigned& j) { - if( !out->workToDo(i,j) ) return; - if( sim ) - addIntegralKeyed( c1, c2, [out,i,j]( const complex_t& val ){ out->set(i,j,val); if( i != j ) out->set(j,i, std::conj(val) ); } ); - else - addIntegralKeyed( c1, c2, [out,i,j]( const complex_t& val ){ out->set(i,j,val); } ); -} - -void Integrator::addIntegralKeyed( const size_t& c1, const size_t& c2, const TransferFCN& tFunc ) -{ - m_integrals[m_counter++] = Integral(c1,c2,tFunc); - if ( m_counter == N ) integrateBlock(); -} - -void Integrator::queueIntegral(const size_t& i, const size_t& j, complex_t* result) -{ - addIntegralKeyed(i, j, [result](const complex_t& val){ *result = val ; } ); + m_integrals[m_counter++] = QueuedIntegral(result, i, j ); + if( m_counter == N ) integrateBlock(); } void Integrator::flush() diff --git a/src/Minimiser.cpp b/src/Minimiser.cpp index 3c7f326fad8..0eaabb29f01 100644 --- a/src/Minimiser.cpp +++ b/src/Minimiser.cpp @@ -17,6 +17,7 @@ #include "Math/Factory.h" #include "Math/Functor.h" #include "Math/Minimizer.h" +#include "AmpGen/ProfileClock.h" using namespace AmpGen; using namespace ROOT; @@ -25,11 +26,13 @@ unsigned int Minimiser::nPars() const { return m_nParams; } double Minimiser::operator()( const double* xx ) { + ProfileClock callTime; for(size_t i = 0; i < m_mapping.size(); ++i ) { m_parSet->at( m_mapping[i] )->setCurrentFitVal( xx[i] ); } double LL = m_theFunction() ; for ( auto& extendTerm : m_extendedTerms ) LL -= 2 * extendTerm->getVal(); + callTime.stop(); return LL - m_ll_zero; } diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index ba4c1cf10e0..bc0e5df0654 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -34,6 +34,8 @@ using namespace AmpGen; using namespace std::complex_literals; +// ENABLE_DEBUG( PolarisedSum ) + namespace AmpGen { make_enum(spaceType, spin, flavour) } std::vector convertProxies(const std::vector& proxyVector, const std::function& transform) @@ -125,9 +127,7 @@ PolarisedSum::PolarisedSum(const EventType& type, DebugSymbols db; auto prob = probExpression(transitionMatrix(), convertProxies(m_pVector,[](auto& p){ return Parameter(p->name());} ), m_debug ? &db : nullptr); - m_probExpression = CompiledExpression(prob, "prob_unnormalised", {}, db, m_mps); - CompilerWrapper().compile(m_probExpression); - m_probExpression.prepare(); + m_probExpression = make_expression( prob, "prob_unnormalised", *m_mps ); } std::vector PolarisedSum::polarisations( const std::string& name ) const @@ -190,41 +190,27 @@ std::vector> PolarisedSum::matrixElements() const return m_matrixElements; } + void PolarisedSum::prepare() { - DEBUG( "Preparing: " << m_prefix << " " << m_events << " ready = " << m_integrator.isReady() ); + auto resetFlags = [](auto& t){ t.workToDo = false; t.resetExternals() ; }; + auto flagUpdate = [this](auto& t){ t.workToDo = this->m_nCalls == 0 || t.hasExternalsChanged(); }; + auto updateData = [this](auto& t) mutable { if( t.workToDo && this->m_events != nullptr ) this->m_cache.update(this->m_events->store(), t) ; }; + auto updateInteg = [this](auto& t) mutable { if( t.workToDo ) this->m_integrator.updateCache(t) ; }; + transferParameters(); - ProfileClock tEval; - size_t size_of = size() / m_matrixElements.size(); - for(auto& t : m_matrixElements){ - if( m_nCalls != 0 && !t.hasExternalsChanged() ) continue; - if( m_events != nullptr ) m_cache.update(m_events->store(), t); - m_integrator.updateCache(t); - t.resetExternals(); - t.workToDo = true; - } - tEval.stop(); - ProfileClock tIntegral; - m_rho = densityMatrix(m_dim.first, m_pVector); + for_each_sequence(m_matrixElements.begin(), m_matrixElements.end(), flagUpdate, updateData, updateInteg); if( m_integrator.isReady() ) updateNorms(); - tIntegral.stop(); - if( m_verbosity && m_nCalls % 100 == 0 ){ - INFO("Time to evaluate = " << tEval << " ms; " << "norm = " << tIntegral << " ms; "); - } -// debug_norm(); - m_nCalls++; - for( auto& me : m_matrixElements ) me.workToDo = false; - auto pw = m_weight / m_norm; - #pragma omp parallel for - for( unsigned block = 0 ; block < m_events->nBlocks(); ++block ) - m_pdfCache[block] = pw * m_probExpression(&m_cache(block, 0)); - - DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << pw * getValNoCache(m_events->at(0)) ); + std::for_each( m_matrixElements.begin(), m_matrixElements.end(), resetFlags ); + if( m_nCalls % 10000 == 0 ) debug_norm(); + DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << (m_weight/m_norm) * getValNoCache(m_events->at(0))); + m_pdfCache.update(m_cache, m_probExpression); + m_nCalls++; } float_v PolarisedSum::operator()( const float_v*, const unsigned index ) const { - return m_pdfCache[index]; + return ( m_weight / m_norm ) * m_pdfCache[index]; } void PolarisedSum::debug_norm() @@ -232,11 +218,8 @@ void PolarisedSum::debug_norm() double norm_slow = 0; for( auto& evt : *m_integrator.events() ) norm_slow += evt.weight() * getValNoCache(evt) / evt.genPdf(); - INFO("Norm : " << std::setprecision(10) - << "bilinears=" << m_norm - << "; exact=" << norm_slow / m_integrator.norm() - << "; d = " << m_norm - norm_slow / m_integrator.norm() - << "; sample=" << m_integrator.norm() ); + norm_slow /= m_integrator.norm(); + INFO("Norm: " << std::setprecision(10) << "bilinears=" << m_norm << "; Slow=" << norm_slow << "; d = " << m_norm - norm_slow); } void PolarisedSum::setEvents( EventList_type& events ) @@ -244,8 +227,8 @@ void PolarisedSum::setEvents( EventList_type& events ) reset(); if( m_events != nullptr && m_ownEvents ) delete m_events; m_events = &events; - m_cache = Store(m_events->size(), m_matrixElements, m_dim.first * m_dim.second ); - m_pdfCache.resize( m_events->nBlocks() ); + m_cache = Store(m_events->size(), m_matrixElements, m_dim.first * m_dim.second ); + m_pdfCache = Store(m_events->size(), m_probExpression ); } void PolarisedSum::setMC( EventList_type& events ) @@ -322,7 +305,7 @@ void PolarisedSum::updateNorms() complex_t z = 0; for(size_t i = 0; i < m_matrixElements.size(); ++i){ for(size_t j = 0; j < m_matrixElements.size(); ++j){ - z += m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling())* ( i > j ? std::conj(norm(j,i)) : norm(i,j) ); + z += m_matrixElements[i].coupling()*std::conj(m_matrixElements[j].coupling()) * ( i > j ? std::conj(norm(j,i)) : norm(i,j) ); } } m_norm = std::real(z); @@ -461,13 +444,14 @@ std::vector PolarisedSum::fitFractions(const LinearErrorPropagator& void PolarisedSum::transferParameters() { - if( m_probExpression.isLinked() ) m_probExpression.prepare(); + m_probExpression.prepare(); for(auto& me : m_matrixElements){ me.coefficient = me.coupling(); me.prepare(); } - for(auto& p : m_pVector ) p.update(); + for(auto& p : m_pVector) p.update(); m_weight.update(); + m_rho = densityMatrix(m_dim.first, m_pVector); } real_t PolarisedSum::getValNoCache( const Event& evt ) const @@ -487,7 +471,7 @@ double PolarisedSum::getWeight() const { return m_weight ; } std::function PolarisedSum::evaluator(const EventList_type* ievents) const { auto events = ievents == nullptr ? m_integrator.events() : ievents; - Store store( events->size(), m_matrixElements); + Store store(events->size(), m_matrixElements, m_dim.first * m_dim.second); for( auto& me : m_matrixElements ) store.update(events->store(), me ); std::vector values( events->aligned_size() ); @@ -503,38 +487,37 @@ std::function PolarisedSum::evaluator(const EventList_type return arrayToFunctor(values); } -KeyedView PolarisedSum::componentEvaluator(const EventList_type* events) const +KeyedFunctors PolarisedSum::componentEvaluator(const EventList_type* events) const { - KeyedView rt(*events, m_matrixElements.size() ); -// std::vector address_mapping(m_matrixElements.size()); -// for( unsigned i = 0; i != m_matrixElements.size(); ++i ) -// address_mapping[i] = m_integrator.getCacheIndex( m_matrixElements[i].amp ); -// -// for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) -// { -// auto& me = m_matrixElements[i]; -// rt.setKey(i, programatic_name( me.decayTree.decayDescriptor() ) ); -// #ifdef _OPENMP -// #pragma omp parallel for -// #endif -// for( unsigned evt = 0 ; evt != m_integrator.events()->size(); ++evt ) -// { -// complex_t total = 0; -// for( unsigned j = 0 ; j != m_matrixElements.size(); ++j ){ -// for( unsigned x = 0; x != m_norms.size(); ++x ) -// { -// auto s1 = m_dim.first; -// auto s2 = m_dim.second; -// auto f = x % s2; -// auto psiIndex = (x-f) / s2; -// auto m2 = psiIndex % s1; -// auto m1 = (psiIndex-m2)/s1; -// total += m_rho[psiIndex] * this->m_integrator.get( address_mapping[i] + m1 * s2 + f, evt ) * m_matrixElements[i].coefficient -// * std::conj( this->m_integrator.get( address_mapping[j] + m2 * s2 + f, evt ) * m_matrixElements[j].coefficient ); -// } -// } -// rt(events->at(evt), i) = m_weight * std::real( total ) / m_norm; -// } -// } + auto& cache = m_integrator.cache(); + KeyedFunctors rt; + for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) + { + for( unsigned j = i ; j != m_matrixElements.size(); ++j ){ + auto mi = m_matrixElements[i]; + auto mj = m_matrixElements[j]; + auto ci = this->m_matrixElements[i].coefficient; + auto cj = this->m_matrixElements[j].coefficient; + double s = (i==j) ? 1 : 2 ; + auto name = programatic_name(mi.decayTree.decayDescriptor()) + "_" + programatic_name( mj.decayTree.decayDescriptor() ); + INFO("Adding evaluator for: " << name ); + auto functor = [ci,cj,i,j,s, &cache, this](const Event& event){ + auto [s1,s2] = this->m_dim; + auto R = s1 * s2; + complex_t total = 0; + for( unsigned x = 0; x != this->m_norms.size(); ++x ) + { + auto f = x % s2; + auto psiIndex = (x-f) / s2; + auto m2 = psiIndex % s1; + auto m1 = (psiIndex-m2)/s1; + total += this->m_rho[psiIndex] * ci * cache.get(event.index(),R * i + m1 * s2 + f) + * std::conj( cj * cache.get(event.index(),R * j + m2 * s2 + f) ); + } + return s * std::real(total); + }; + rt.add(functor, name, ""); + } + } return rt; } diff --git a/src/Projection.cpp b/src/Projection.cpp index 2afa383fc4f..89b354a7d89 100644 --- a/src/Projection.cpp +++ b/src/Projection.cpp @@ -76,22 +76,22 @@ template <> TH1D* Projection::projInternal( const EventList& events, const Argum return events.makeProjection(*this, args); } -template <> std::tuple, THStack*> Projection::projInternal(const EventList& events, const KeyedView& weightFunction, const ArgumentPack& args) const +template <> std::tuple, THStack*> Projection::projInternal(const EventList& events, const Projection::keyedFunctors& weightFunction, const ArgumentPack& args) const { std::vector hists; double norm_sum = args.getArg(1).val; std::string prefix = args.getArg().val; bool autowrite = args.get() != nullptr; THStack* stack = args.getArg(new THStack()).val; - if( prefix != "" ) prefix = prefix +"_"; - for( unsigned int i = 0 ; i != weightFunction.width(); ++i ) - hists.push_back( plot(prefix + weightFunction.key(i)==""?"C"+std::to_string(i):weightFunction.key(i)) ); auto selection = args.getArg().val; + if( prefix != "" ) prefix = prefix +"_"; + for( auto& key : weightFunction.keys ) + hists.push_back( plot(prefix + key ) ); for( const auto& evt : events ){ if( selection != nullptr && !selection(evt) ) continue; auto pos = operator()(evt); auto weights = weightFunction(evt); - for( unsigned j = 0 ; j != weightFunction.width(); ++j ) hists[j]->Fill( pos, evt.weight() * weights[j] / evt.genPdf() ); + for( unsigned j = 0 ; j != weights.size(); ++j ) hists[j]->Fill( pos, evt.weight() * weights[j] / evt.genPdf() ); } std::sort( std::begin(hists), std::end(hists), [](auto& h1, auto& h2){ return h1->Integral() < h2->Integral() ; } ); double total = std::accumulate( std::begin(hists), std::end(hists), 0.0, [](double& t, auto& h){ return t + h->Integral() ; } ); @@ -112,22 +112,22 @@ template <> TH1D* Projection::projInternal( const EventListSIMD& events, const A return events.makeProjection(*this, args); } -template <> std::tuple, THStack*> Projection::projInternal(const EventListSIMD& events, const KeyedView& weightFunction, const ArgumentPack& args) const +template <> std::tuple, THStack*> Projection::projInternal(const EventListSIMD& events, const Projection::keyedFunctors& weightFunction, const ArgumentPack& args) const { std::vector hists; double norm_sum = args.getArg(1).val; std::string prefix = args.getArg().val; bool autowrite = args.get() != nullptr; THStack* stack = args.getArg(new THStack()).val; - if( prefix != "" ) prefix = prefix +"_"; - for( unsigned int i = 0 ; i != weightFunction.width(); ++i ) - hists.push_back( plot(prefix + weightFunction.key(i)==""?"C"+std::to_string(i):weightFunction.key(i)) ); auto selection = args.getArg().val; + if( prefix != "" ) prefix = prefix +"_"; + for( auto& key : weightFunction.keys ) + hists.push_back( plot(prefix + key ) ); for( const auto& evt : events ){ if( selection != nullptr && !selection(evt) ) continue; auto pos = operator()(evt); auto weights = weightFunction(evt); - for( unsigned j = 0 ; j != weightFunction.width(); ++j ) hists[j]->Fill( pos, evt.weight() * weights[j] / evt.genPdf() ); + for( unsigned j = 0 ; j != weights.size(); ++j ) hists[j]->Fill( pos, evt.weight() * weights[j] / evt.genPdf() ); } std::sort( std::begin(hists), std::end(hists), [](auto& h1, auto& h2){ return h1->Integral() < h2->Integral() ; } ); double total = std::accumulate( std::begin(hists), std::end(hists), 0.0, [](double& t, auto& h){ return t + h->Integral() ; } ); diff --git a/test/test_avx2d.cpp b/test/test_avx2d.cpp index 20119f426e8..f914bf28790 100644 --- a/test/test_avx2d.cpp +++ b/test/test_avx2d.cpp @@ -12,6 +12,16 @@ namespace utf = boost::unit_test; using namespace AmpGen; +BOOST_AUTO_TEST_CASE( test_log ) +{ + AVX2d::float_t p(0.3, 0.5, 10.0, 7.0); + auto logged = AVX2d::log( p ).to_array() ; + BOOST_TEST( logged[0] == std::log(0.3), boost::test_tools::tolerance(1e-15 ) ); + BOOST_TEST( logged[1] == std::log(0.5), boost::test_tools::tolerance(1e-15 ) ); + BOOST_TEST( logged[2] == std::log(10.0), boost::test_tools::tolerance(1e-15 ) ); + BOOST_TEST( logged[3] == std::log(7.0), boost::test_tools::tolerance(1e-15 ) ); +} + BOOST_AUTO_TEST_CASE( test_fmod ) { std::vector a = {5.1, -5.1, 5.1, -5.1}; @@ -54,6 +64,8 @@ BOOST_AUTO_TEST_CASE( test_gather ) BOOST_TEST( v[3] == data[3] ); } + + #else BOOST_AUTO_TEST_CASE( test_dummy ) { From 33121a55aeb2c687f8f2e6dfb93681c5865910a3 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 14:55:22 +0200 Subject: [PATCH 10/67] fix travis... --- .ci/build_root_linux.sh | 1 + README.md | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index fd12bdea9b6..adc3f336000 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -6,6 +6,7 @@ export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge conda install --quiet --yes -c conda-forge/label/gcc8 root +conda config --set channel_priority strict source "$DEPS_DIR/miniconda/bin/thisroot.sh" export CXX="$DEPS_DIR/miniconda/bin/g++" diff --git a/README.md b/README.md index 9dd1f285f52..a862c53b3d2 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,8 @@ There is at the time of writing only a master branch (FIXME) ##### Build requirements: * cmake >= 3.11.0 -* C++ compiler with CXX standard >= 14 (gcc >= 4.9.3, clang ~ 5). - Defaults to Cxx17 (enable cxx14 with cmake flag `-DCMAKE_CXX_STANDARD=14` ) +* C++ compiler with CXX standard >= 17 (gcc >= 8) + Defaults to Cxx17 * ROOT >= 6 with MathMore To (re)configure root with these options, use the additional command line options `-Dcxx14 -Dmathmore=ON` when configuring the installation of ROOT. @@ -38,7 +38,7 @@ There is at the time of writing only a master branch (FIXME) * xROOTd for network file access * OpenMP for multithreading * ROOT >= 6 with MathMore and Minuit2 enabled. The external version of Minuit2 provided as an external package of GooFit is used if the ROOT version is not unavailable. - To (re)configure root with these options, use the additional command line options `-Dcxx14 -Dminuit2=ON -Dmathmore=ON` when configuring the installation of ROOT. + To (re)configure root with these options, use the additional command line options `-Dminuit2=ON -Dmathmore=ON` when configuring the installation of ROOT. #### Building The configuration of the AmpGen build is performed by cmake. From b97eceba23c755822d560a54c647b239fe7c5e61 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 15:16:05 +0200 Subject: [PATCH 11/67] fix travis... --- .ci/build_root_linux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index adc3f336000..31994e26076 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -5,8 +5,8 @@ bash miniconda.sh -b -p $DEPS_DIR/miniconda export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge -conda install --quiet --yes -c conda-forge/label/gcc8 root conda config --set channel_priority strict +conda install --quiet --yes -c conda-forge/label/gcc8 root zstd source "$DEPS_DIR/miniconda/bin/thisroot.sh" export CXX="$DEPS_DIR/miniconda/bin/g++" From f1b159152c920bbfa906e8815185910167143cb4 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 15:37:22 +0200 Subject: [PATCH 12/67] fix travis... --- .ci/build_root_linux.sh | 7 ++++--- .ci/travis_linux.sh | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index 31994e26076..551e2c512e1 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -6,8 +6,9 @@ export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge conda config --set channel_priority strict -conda install --quiet --yes -c conda-forge/label/gcc8 root zstd +# conda install --quiet --yes -c conda-forge/label/gcc8 root zstd +conda create -n my_root_env root -c conda-forge/label/gcc8 -source "$DEPS_DIR/miniconda/bin/thisroot.sh" -export CXX="$DEPS_DIR/miniconda/bin/g++" +# source "$DEPS_DIR/miniconda/bin/thisroot.sh" +# export CXX="$DEPS_DIR/miniconda/bin/g++" popd diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index 9f29206a32a..df7bd9472ee 100644 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -5,6 +5,7 @@ set -evx mkdir -p build cd build +conda activate my_root_env cmake .. -DCMAKE_CXX_COMPILER=$CXX cmake --build . -- -j2 From e1f26c87970e8e9edd826477df917fa2b5680fc8 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 15:43:45 +0200 Subject: [PATCH 13/67] fix travis... --- .ci/build_root_linux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index 551e2c512e1..6eca271e9a0 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -7,7 +7,7 @@ hash -r conda config --add channels conda-forge conda config --set channel_priority strict # conda install --quiet --yes -c conda-forge/label/gcc8 root zstd -conda create -n my_root_env root -c conda-forge/label/gcc8 +conda create --quiet -n my_root_env root -c conda-forge/label/gcc8 # source "$DEPS_DIR/miniconda/bin/thisroot.sh" # export CXX="$DEPS_DIR/miniconda/bin/g++" From 194f473fba2c4541b8d7a95907ffe85910e27ab9 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 15:55:27 +0200 Subject: [PATCH 14/67] fix travis... --- .ci/build_root_linux.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index 6eca271e9a0..a1bfec49916 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -6,8 +6,8 @@ export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge conda config --set channel_priority strict -# conda install --quiet --yes -c conda-forge/label/gcc8 root zstd -conda create --quiet -n my_root_env root -c conda-forge/label/gcc8 +# conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen +conda create --quiet --yes -n my_root_env root doxygen -c conda-forge/label/gcc8 # source "$DEPS_DIR/miniconda/bin/thisroot.sh" # export CXX="$DEPS_DIR/miniconda/bin/g++" From 6403a294c2d7b00cf8a017432050afe96bc6d74e Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 16:07:16 +0200 Subject: [PATCH 15/67] fix travis... --- .ci/build_root_linux.sh | 28 ++++++++++++++-------------- .ci/travis_linux.sh | 12 +++++++++++- .travis.yml | 22 +++++++++++----------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index a1bfec49916..dd6d15db351 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -1,14 +1,14 @@ -pushd $DEPS_DIR - -wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -bash miniconda.sh -b -p $DEPS_DIR/miniconda -export PATH="$DEPS_DIR/miniconda/bin:$PATH" -hash -r -conda config --add channels conda-forge -conda config --set channel_priority strict -# conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen -conda create --quiet --yes -n my_root_env root doxygen -c conda-forge/label/gcc8 - -# source "$DEPS_DIR/miniconda/bin/thisroot.sh" -# export CXX="$DEPS_DIR/miniconda/bin/g++" -popd +# pushd $DEPS_DIR +# +# wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +# bash miniconda.sh -b -p $DEPS_DIR/miniconda +# export PATH="$DEPS_DIR/miniconda/bin:$PATH" +# hash -r +# conda config --add channels conda-forge +# conda config --set channel_priority strict +# # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen +# conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 +# +# # source "$DEPS_DIR/miniconda/bin/thisroot.sh" +# # export CXX="$DEPS_DIR/miniconda/bin/g++" +# popd diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index df7bd9472ee..1e71238f1f0 100644 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -1,3 +1,14 @@ + +wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +bash miniconda.sh -b -p $DEPS_DIR/miniconda +export PATH="$DEPS_DIR/miniconda/bin:$PATH" +hash -r +conda config --add channels conda-forge +conda config --set channel_priority strict +# conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen +conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 +conda activate my_root_env + echo -en 'travis_fold:start:script.build\\r' echo "Building..." echo "Building under OS: $TRAVIS_OS_NAME, CXX =$CXX" @@ -5,7 +16,6 @@ set -evx mkdir -p build cd build -conda activate my_root_env cmake .. -DCMAKE_CXX_COMPILER=$CXX cmake --build . -- -j2 diff --git a/.travis.yml b/.travis.yml index 8dbf58565e3..13daca97f93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,17 +10,17 @@ matrix: packages: - libomp - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-8 - - doxygen - - doxygen-doc - - doxygen-gui - - graphviz - - libtbb-dev + # addons: + # apt: + # sources: + # - ubuntu-toolchain-r-test + # packages: + # - g++-8 + # - doxygen + # - doxygen-doc + # - doxygen-gui + # - graphviz + # - libtbb-dev env: - MATRIX_EVAL="CC=gcc-8 && CXX=g++-8" From 1c7d80ffdad68bae2409f879c784330b7273a682 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 16:17:26 +0200 Subject: [PATCH 16/67] fix travis... --- .ci/travis_linux.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index 1e71238f1f0..ded5c34e9d0 100644 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -7,6 +7,7 @@ conda config --add channels conda-forge conda config --set channel_priority strict # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 +conda init bash conda activate my_root_env echo -en 'travis_fold:start:script.build\\r' @@ -14,9 +15,11 @@ echo "Building..." echo "Building under OS: $TRAVIS_OS_NAME, CXX =$CXX" set -evx +# -DCMAKE_CXX_COMPILER=$CXX + mkdir -p build cd build -cmake .. -DCMAKE_CXX_COMPILER=$CXX +cmake .. cmake --build . -- -j2 set +evx From 7b9816dc5af2fb08609f56b8330e2e145adfd5a0 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 16:28:32 +0200 Subject: [PATCH 17/67] fix travis... --- .ci/build_root_linux.sh | 28 ++++++++++++++-------------- .ci/travis_linux.sh | 17 ++++++++--------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index dd6d15db351..0a4c760c7ac 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -1,14 +1,14 @@ -# pushd $DEPS_DIR -# -# wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -# bash miniconda.sh -b -p $DEPS_DIR/miniconda -# export PATH="$DEPS_DIR/miniconda/bin:$PATH" -# hash -r -# conda config --add channels conda-forge -# conda config --set channel_priority strict -# # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen -# conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 -# -# # source "$DEPS_DIR/miniconda/bin/thisroot.sh" -# # export CXX="$DEPS_DIR/miniconda/bin/g++" -# popd +pushd $DEPS_DIR + +wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +bash miniconda.sh -b -p $DEPS_DIR/miniconda +export PATH="$DEPS_DIR/miniconda/bin:$PATH" +hash -r +conda config --add channels conda-forge +conda config --set channel_priority strict +# conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen +conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 +conda init bash +# source "$DEPS_DIR/miniconda/bin/thisroot.sh" +# export CXX="$DEPS_DIR/miniconda/bin/g++" +popd diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index ded5c34e9d0..22690f6dd50 100644 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -1,13 +1,12 @@ -wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -bash miniconda.sh -b -p $DEPS_DIR/miniconda -export PATH="$DEPS_DIR/miniconda/bin:$PATH" -hash -r -conda config --add channels conda-forge -conda config --set channel_priority strict -# conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen -conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 -conda init bash +# wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +# bash miniconda.sh -b -p $DEPS_DIR/miniconda +# export PATH="$DEPS_DIR/miniconda/bin:$PATH" +# hash -r +# conda config --add channels conda-forge +# conda config --set channel_priority strict +# # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen +# conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 conda activate my_root_env echo -en 'travis_fold:start:script.build\\r' From 9c7e191f8e3664b36000ceb8e0286246f2d94675 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 19 Apr 2020 16:34:05 +0200 Subject: [PATCH 18/67] fix travis... --- .ci/travis_linux.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index 22690f6dd50..eb79ed00251 100644 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -7,6 +7,9 @@ # conda config --set channel_priority strict # # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen # conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 + +# from https://stackoverflow.com/questions/55342122/conda-activate-on-travis-ci +source $(conda info --root)/etc/profile.d/conda.sh conda activate my_root_env echo -en 'travis_fold:start:script.build\\r' From 77339caf4fe39c4f9a2a0500e1e98f160563cbc7 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 20 Apr 2020 10:47:59 +0200 Subject: [PATCH 19/67] fix travis build --- .ci/build_root.sh | 20 ++++++++++++++++++++ .ci/build_root_linux.sh | 12 +++++++++--- .ci/travis_linux.sh | 2 +- .travis.yml | 4 ++-- 4 files changed, 32 insertions(+), 6 deletions(-) create mode 100755 .ci/build_root.sh diff --git a/.ci/build_root.sh b/.ci/build_root.sh new file mode 100755 index 00000000000..6c184476362 --- /dev/null +++ b/.ci/build_root.sh @@ -0,0 +1,20 @@ +pushd $DEPS_DIR + +if [[ $1 == "osx" ]] ; then + wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh +elif [[ $1 == "linux" ]] ; then + wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +fi + +bash miniconda.sh -b -p $DEPS_DIR/miniconda +export PATH="$DEPS_DIR/miniconda/bin:$PATH" +hash -r +conda config --add channels conda-forge +conda config --set channel_priority strict + +# conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen +conda create --quiet --yes -n my_root_env root doxygen zstd=1.3.7 -c conda-forge +# conda init bash +# source "$DEPS_DIR/miniconda/bin/thisroot.sh" +# export CXX="$DEPS_DIR/miniconda/bin/g++" +popd diff --git a/.ci/build_root_linux.sh b/.ci/build_root_linux.sh index 0a4c760c7ac..6c184476362 100755 --- a/.ci/build_root_linux.sh +++ b/.ci/build_root_linux.sh @@ -1,14 +1,20 @@ pushd $DEPS_DIR -wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +if [[ $1 == "osx" ]] ; then + wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh +elif [[ $1 == "linux" ]] ; then + wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +fi + bash miniconda.sh -b -p $DEPS_DIR/miniconda export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge conda config --set channel_priority strict + # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen -conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 -conda init bash +conda create --quiet --yes -n my_root_env root doxygen zstd=1.3.7 -c conda-forge +# conda init bash # source "$DEPS_DIR/miniconda/bin/thisroot.sh" # export CXX="$DEPS_DIR/miniconda/bin/g++" popd diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index eb79ed00251..112d06aa2ec 100644 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -9,7 +9,7 @@ # conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 # from https://stackoverflow.com/questions/55342122/conda-activate-on-travis-ci -source $(conda info --root)/etc/profile.d/conda.sh +. $(conda info --root)/etc/profile.d/conda.sh conda activate my_root_env echo -en 'travis_fold:start:script.build\\r' diff --git a/.travis.yml b/.travis.yml index 13daca97f93..24cbaaca55e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,8 +48,8 @@ env: before_install: - eval "${MATRIX_EVAL}" - - chmod +x .ci/build_root_${TRAVIS_OS_NAME}.sh - - source .ci/build_root_${TRAVIS_OS_NAME}.sh + - chmod +x .ci/build_root.sh + - source .ci/build_root.sh ${TRAVIS_OS_NAME} # Build your code e.g. by calling make script: From 9b38caac5f09aa577ed4e33df5ad8debcead9a6a Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 20 Apr 2020 10:58:58 +0200 Subject: [PATCH 20/67] fix control predicate for CompiledExpression batch --- AmpGen/CompiledExpression.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index 1bd41d4d1f7..f4bd9e607a7 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -132,7 +132,7 @@ namespace AmpGen stream << typeof() << " * rt, "; stream << CompiledExpressionBase::fcnSignature(typelist(), use_rto(), false) << ") {\n"; stream << "#pragma omp parallel for\n"; - stream << "for( size_t i = 0; i != N/" << utils::size::value << "; ++i ){\n"; + stream << "for( size_t i = 0; i < N/" << utils::size::value << "; ++i ){\n"; if( use_rto() ) stream << progName() + "( r + cacheSize * i, s, x0, x1 + i * eventSize);"; else stream << " rt[cacheSize*i] = " << progName() + "( x0, x1 + i * eventSize);"; stream << "}\n}"; From b18d0484296ff11648c867f16a90bb20c905c6fd Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 20 Apr 2020 11:45:57 +0200 Subject: [PATCH 21/67] remove debug_norm for src/PolarisedSum --- src/PolarisedSum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index bc0e5df0654..26adb8cd13a 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -202,7 +202,7 @@ void PolarisedSum::prepare() for_each_sequence(m_matrixElements.begin(), m_matrixElements.end(), flagUpdate, updateData, updateInteg); if( m_integrator.isReady() ) updateNorms(); std::for_each( m_matrixElements.begin(), m_matrixElements.end(), resetFlags ); - if( m_nCalls % 10000 == 0 ) debug_norm(); +// if( m_nCalls % 10000 == 0 ) debug_norm(); DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << (m_weight/m_norm) * getValNoCache(m_events->at(0))); m_pdfCache.update(m_cache, m_probExpression); m_nCalls++; From 5c10f20bd6fcc64323b97fb468d387683551d341 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 20 Apr 2020 11:48:09 +0200 Subject: [PATCH 22/67] try to fix osx ci --- .ci/build_root.sh | 13 +++++++------ .ci/travis_linux.sh | 30 +++++++----------------------- .ci/travis_osx.sh | 9 ++++++--- 3 files changed, 20 insertions(+), 32 deletions(-) mode change 100644 => 100755 .ci/travis_linux.sh diff --git a/.ci/build_root.sh b/.ci/build_root.sh index 6c184476362..e7c51c6d7b6 100755 --- a/.ci/build_root.sh +++ b/.ci/build_root.sh @@ -1,19 +1,20 @@ pushd $DEPS_DIR -if [[ $1 == "osx" ]] ; then - wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh -elif [[ $1 == "linux" ]] ; then - wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +os=$1 +if [[ $os == "osx" ]] ; then + wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda_${os}.sh +elif [[ $os == "linux" ]] ; then + wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda_${os}.sh fi -bash miniconda.sh -b -p $DEPS_DIR/miniconda +bash miniconda_${os}.sh -b -p $DEPS_DIR/miniconda_${os} export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge conda config --set channel_priority strict # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen -conda create --quiet --yes -n my_root_env root doxygen zstd=1.3.7 -c conda-forge +conda create --quiet --yes -n env_${os} root doxygen zstd=1.3.7 -c conda-forge # conda init bash # source "$DEPS_DIR/miniconda/bin/thisroot.sh" # export CXX="$DEPS_DIR/miniconda/bin/g++" diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh old mode 100644 new mode 100755 index 112d06aa2ec..a216a17c410 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -1,26 +1,18 @@ - -# wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -# bash miniconda.sh -b -p $DEPS_DIR/miniconda -# export PATH="$DEPS_DIR/miniconda/bin:$PATH" -# hash -r -# conda config --add channels conda-forge -# conda config --set channel_priority strict -# # conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen -# conda create --quiet --yes -n my_root_env root_base doxygen -c conda-forge/label/gcc8 +#!/bin/bash # from https://stackoverflow.com/questions/55342122/conda-activate-on-travis-ci +export HOME=/home/tim/ +export PATH="$HOME/miniconda/bin:$PATH" . $(conda info --root)/etc/profile.d/conda.sh -conda activate my_root_env +conda activate env_${TRAVIS_OS_NAME} echo -en 'travis_fold:start:script.build\\r' echo "Building..." echo "Building under OS: $TRAVIS_OS_NAME, CXX =$CXX" set -evx -# -DCMAKE_CXX_COMPILER=$CXX - -mkdir -p build -cd build +mkdir -p build.conda +cd build.conda cmake .. cmake --build . -- -j2 @@ -28,13 +20,5 @@ set +evx cd .. -./build/bin/Generator options/example_b2kstarll.opt --CompilerWrapper::Verbose --nEvents 1000 - - -# echo -e 'travis_fold:end:script.build\\r' -# echo -en 'travis_fold:start:script.test\\r' -# echo "Testing..." -# set -evx - -# ctest --output-on-failure +./build.conda/bin/Generator options/example_b2kstarll.opt --CompilerWrapper::Verbose --nEvents 10000 diff --git a/.ci/travis_osx.sh b/.ci/travis_osx.sh index 18d123b85ee..c974bbc0779 100644 --- a/.ci/travis_osx.sh +++ b/.ci/travis_osx.sh @@ -1,3 +1,8 @@ +#!/bin/bash + +. $(conda info --root)/etc/profile.d/conda.sh +conda activate env_${TRAVIS_OS_NAME} + echo -en 'travis_fold:start:script.build\\r' echo "Building..." echo "Building under OS: $TRAVIS_OS_NAME" @@ -10,9 +15,7 @@ echo "Building ..." cmake --build . -- -j2 cd .. echo "Running test job ..." -./build/bin/Generator options/example_b2kstarll.opt --CompilerWrapper::Verbose --nEvents 1000 - - +./build/bin/Generator options/example_b2kstarll.opt --CompilerWrapper::Verbose --nEvents 10000 # echo -e 'travis_fold:end:script.build\\r' # echo -en 'travis_fold:start:script.test\\r' # echo "Testing..." From df719858562e2e487b793b19e8e4153d2d8f7a95 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 20 Apr 2020 11:54:33 +0200 Subject: [PATCH 23/67] fix travis ci --- .ci/travis_linux.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index a216a17c410..034ff5af640 100755 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -1,8 +1,6 @@ #!/bin/bash # from https://stackoverflow.com/questions/55342122/conda-activate-on-travis-ci -export HOME=/home/tim/ -export PATH="$HOME/miniconda/bin:$PATH" . $(conda info --root)/etc/profile.d/conda.sh conda activate env_${TRAVIS_OS_NAME} From 5e3fe48a714592aa77f51d5d34285077f740861f Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 20 Apr 2020 11:58:24 +0200 Subject: [PATCH 24/67] fix travis ci --- .ci/travis_linux.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index 034ff5af640..7cec5a28507 100755 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -1,6 +1,7 @@ #!/bin/bash # from https://stackoverflow.com/questions/55342122/conda-activate-on-travis-ci +export PATH="$DEPS_DIR/miniconda/bin:$PATH" . $(conda info --root)/etc/profile.d/conda.sh conda activate env_${TRAVIS_OS_NAME} From c5d39065cc781894d025b227b8ba476885a6d4b9 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 20 Apr 2020 12:12:41 +0200 Subject: [PATCH 25/67] fix travis ci --- .ci/build_root.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/build_root.sh b/.ci/build_root.sh index e7c51c6d7b6..b0d5e2a436c 100755 --- a/.ci/build_root.sh +++ b/.ci/build_root.sh @@ -7,7 +7,7 @@ elif [[ $os == "linux" ]] ; then wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda_${os}.sh fi -bash miniconda_${os}.sh -b -p $DEPS_DIR/miniconda_${os} +bash miniconda_${os}.sh -b -p $DEPS_DIR/miniconda export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge From 3f193e8eb90ca1b47922696750eeff740d0db8ee Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 21 Apr 2020 13:51:42 +0200 Subject: [PATCH 26/67] increase verbosity of ci --- .ci/build_root.sh | 7 +++---- .ci/travis_linux.sh | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.ci/build_root.sh b/.ci/build_root.sh index b0d5e2a436c..2364e668a48 100755 --- a/.ci/build_root.sh +++ b/.ci/build_root.sh @@ -1,4 +1,5 @@ pushd $DEPS_DIR +set -evx os=$1 if [[ $os == "osx" ]] ; then @@ -13,9 +14,7 @@ hash -r conda config --add channels conda-forge conda config --set channel_priority strict -# conda install --quiet --yes -c conda-forge/label/gcc8 root_base doxygen conda create --quiet --yes -n env_${os} root doxygen zstd=1.3.7 -c conda-forge -# conda init bash -# source "$DEPS_DIR/miniconda/bin/thisroot.sh" -# export CXX="$DEPS_DIR/miniconda/bin/g++" + +set +evx popd diff --git a/.ci/travis_linux.sh b/.ci/travis_linux.sh index 7cec5a28507..16acae4bab8 100755 --- a/.ci/travis_linux.sh +++ b/.ci/travis_linux.sh @@ -1,5 +1,6 @@ #!/bin/bash +set -evx # from https://stackoverflow.com/questions/55342122/conda-activate-on-travis-ci export PATH="$DEPS_DIR/miniconda/bin:$PATH" . $(conda info --root)/etc/profile.d/conda.sh @@ -8,7 +9,6 @@ conda activate env_${TRAVIS_OS_NAME} echo -en 'travis_fold:start:script.build\\r' echo "Building..." echo "Building under OS: $TRAVIS_OS_NAME, CXX =$CXX" -set -evx mkdir -p build.conda cd build.conda From 14e6db13664aa1dc0242e6243b2546b236ab94cc Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 21 Apr 2020 14:10:36 +0200 Subject: [PATCH 27/67] remove --quiet from conda options --- .ci/build_root.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/build_root.sh b/.ci/build_root.sh index 2364e668a48..f49655f951f 100755 --- a/.ci/build_root.sh +++ b/.ci/build_root.sh @@ -14,7 +14,7 @@ hash -r conda config --add channels conda-forge conda config --set channel_priority strict -conda create --quiet --yes -n env_${os} root doxygen zstd=1.3.7 -c conda-forge +conda create --yes -n env_${os} root doxygen zstd=1.3.7 -c conda-forge set +evx popd From 804603f94afc5bf3eaf1a07d8f0639f60671376a Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 22 Apr 2020 09:33:08 +0200 Subject: [PATCH 28/67] chris's fix to build_root --- .ci/build_root.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/build_root.sh b/.ci/build_root.sh index f49655f951f..52c15b244d0 100755 --- a/.ci/build_root.sh +++ b/.ci/build_root.sh @@ -12,9 +12,9 @@ bash miniconda_${os}.sh -b -p $DEPS_DIR/miniconda export PATH="$DEPS_DIR/miniconda/bin:$PATH" hash -r conda config --add channels conda-forge -conda config --set channel_priority strict +# conda config --set channel_priority strict -conda create --yes -n env_${os} root doxygen zstd=1.3.7 -c conda-forge +conda create --yes -n env_${os} root doxygen -c conda-forge set +evx popd From a5a2e683e47d4f552903e06f9028094bccc39dcb Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 22 Apr 2020 09:55:47 +0200 Subject: [PATCH 29/67] A few random fixes for clang --- AmpGen/CoherentSum.h | 8 ++++---- AmpGen/CompiledExpression.h | 2 +- AmpGen/Store.h | 4 +++- AmpGen/simd/avx2d_types.h | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 0dbd08da458..1a4cfeac4df 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -56,10 +56,10 @@ namespace AmpGen std::string prefix() const { return m_prefix; } auto operator[]( const size_t& index ) { return m_matrixElements[index]; } - const auto operator[]( const size_t& index ) const { return m_matrixElements[index]; } - size_t size() const { return m_matrixElements.size(); } - real_t getWeight() const { return m_weight; } - real_t norm( const Bilinears& norms ) const; + auto operator[]( const size_t& index ) const { return m_matrixElements[index]; } + size_t size() const { return m_matrixElements.size(); } + real_t getWeight() const { return m_weight; } + real_t norm( const Bilinears& norms ) const; real_t norm() const; real_t getNorm( const Bilinears& normalisations ); diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index f4bd9e607a7..9e34ec2c07c 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -121,7 +121,7 @@ namespace AmpGen stream << " if(n == " << i << ") return " << m_externals.at( i ) << ";\n"; stream << " return 0;\n}\n"; } - void compileBatch( std::ostream& stream ) const + void compileBatch( std::ostream& stream ) const override { stream << "#include \n"; stream << "extern \"C\" void " << progName() diff --git a/AmpGen/Store.h b/AmpGen/Store.h index d039e2e9f23..85dc1ed4db5 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -113,7 +113,9 @@ namespace AmpGen { { auto f = m_index.find( fcn.name() ); if( f == m_index.end() ) FATAL("Expression: " << fcn.name() << " is not registed"); - auto [p0, s] = f->second; + //auto& [p0, s] = f->second; /// bug in the C++ standard. Such fun. + auto p0 = f->second.first; + auto s = f->second.second; if constexpr( std::is_same< typename functor_type::return_type, void >::value ) { diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index 46565686a49..5ed64e3101e 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -69,7 +69,7 @@ namespace AmpGen { inline __m256i double_to_int( const float_t& x ) { // based on: https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx - return _mm256_sub_epi64(_mm256_castpd_si256(x + _mm256_set1_pd(0x0018000000000000)), + return _mm256_sub_epi64(_mm256_castpd_si256(_mm256_add_pd(x, _mm256_set1_pd(0x0018000000000000))), _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))); } inline float_t gather( const double* base_addr, const float_t& offsets) @@ -121,7 +121,7 @@ namespace AmpGen { stl_fallback( tan ) stl_fallback( sin ) stl_fallback( cos ) - inline float_t remainder( const float_t& a, const float_t& b ){ return a - _mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF) * b; } + inline float_t remainder( const float_t& a, const float_t& b ){ return a - float_t(_mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF)) * b; } inline float_t fmod( const float_t& a, const float_t& b ) { auto r = remainder( abs(a), abs(b) ); From 14937c475d473e3fd338c40165617b2f3c75f664 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 22 Apr 2020 10:42:49 +0200 Subject: [PATCH 30/67] fixes for osx --- AmpGen/Store.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/AmpGen/Store.h b/AmpGen/Store.h index 85dc1ed4db5..da17cc18911 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -3,6 +3,9 @@ #include "AmpGen/simd/utils.h" #include "AmpGen/EventList.h" +#ifdef _OPENMP +#include +#endif namespace AmpGen { @@ -44,26 +47,26 @@ namespace AmpGen { m_store.resize(m_nBlocks * m_nFields); } - __always_inline stored_type operator[]( const size_t& index ) const { return m_store[index]; } - __always_inline stored_type& operator[]( const size_t& index ) { return m_store[index]; } + inline stored_type operator[]( const size_t& index ) const { return m_store[index]; } + inline stored_type& operator[]( const size_t& index ) { return m_store[index]; } template unsigned find( const T& t ) const { return m_index.find( t.name() )->second.first; } - __always_inline size_t size() const { return m_nEntries; } - __always_inline size_t nBlocks() const { return m_nBlocks; } - __always_inline size_t nFields() const { return m_nFields; } - __always_inline size_t aligned_size() const { return m_nBlocks * utils::size::value ; } - __always_inline const stored_type& operator()(const size_t& index, const size_t& field) const + inline size_t size() const { return m_nEntries; } + inline size_t nBlocks() const { return m_nBlocks; } + inline size_t nFields() const { return m_nFields; } + inline size_t aligned_size() const { return m_nBlocks * utils::size::value ; } + inline const stored_type& operator()(const size_t& index, const size_t& field) const { if constexpr( align == Alignment::SoA ) return m_store[ field * m_nBlocks + index] ; else return m_store[index*m_nFields+field]; } template - __always_inline const return_type get(const size_t& index, const size_t& field ) const + inline const return_type get(const size_t& index, const size_t& field ) const { return utils::at( operator()( index / utils::size::value, field ), index % utils::size::value ); } - __always_inline const stored_type* data() const { return m_store.data(); } - __always_inline stored_type& operator()(const size_t& index, const size_t& field) + inline const stored_type* data() const { return m_store.data(); } + inline stored_type& operator()(const size_t& index, const size_t& field) { if constexpr( align == Alignment::SoA ) return m_store[ field * m_nBlocks + index] ; else return m_store[index*m_nFields+field]; @@ -133,7 +136,8 @@ namespace AmpGen { #ifdef _OPENMP #pragma omp parallel for #endif - for ( size_t evt = 0; evt < events.size(); ++evt ){ + for ( size_t evt = 0; evt < events.size(); ++evt ) + { auto tmp = fcn( events[evt].address() ); store( evt, p0, &tmp, s); } From d34fd5499e99f43babfcc6b2e2d5b88223ec610f Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 22 Apr 2020 11:01:35 +0200 Subject: [PATCH 31/67] add -fma flag --- Standalone.cmake | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Standalone.cmake b/Standalone.cmake index ec5f4dbc520..600bb8314b7 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -155,14 +155,17 @@ target_compile_options(AmpGen if( ENABLE_AVX2 ) if( "${PRECISION}" MATCHES "DOUBLE" ) - message(STATUS "Enabling AVX2 [double precision]") - target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=1") + message(STATUS "Enabling AVX2 [double precision]") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=1") elseif( "${PRECISION}" MATCHES "SINGLE" ) - message(STATUS "Enabling AVX2 [single precision]") - target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=0") - + message(STATUS "Enabling AVX2 [single precision]") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=0") endif() target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx2 -DHAVE_AVX2_INSTRUCTIONS) + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" OR + "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) + target_compile_options(AmpGen PUBLIC -mfma) + endif() endif() if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" ) From 324b89616c3ec4f77f231a9b56d8883b8226bd10 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 22 Apr 2020 13:09:25 +0200 Subject: [PATCH 32/67] remove std::fabs from unary expression --- src/UnaryExpression.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/UnaryExpression.cpp b/src/UnaryExpression.cpp index 17fb1ae8882..94b0d0db346 100644 --- a/src/UnaryExpression.cpp +++ b/src/UnaryExpression.cpp @@ -28,7 +28,7 @@ DEFINE_UNARY_OPERATOR_NO_RESOLVER( Real, std::real ) DEFINE_UNARY_OPERATOR_NO_RESOLVER( Imag, std::imag ) DEFINE_UNARY_OPERATOR_NO_RESOLVER( ISqrt, rsqrt ) DEFINE_UNARY_OPERATOR_NO_RESOLVER( Conj, std::conj ) -DEFINE_UNARY_OPERATOR_NO_RESOLVER( Abs , std::fabs ) +DEFINE_UNARY_OPERATOR_NO_RESOLVER( Abs , std::abs ) LGamma::LGamma( const Expression& expression) : IUnaryExpression(expression) {} LGamma::operator Expression() const { return Expression( std::make_shared(*this) ) ; } @@ -47,7 +47,7 @@ std::string Abs::to_string( const ASTResolver* resolver ) const { return resolver != nullptr && resolver->enableAVX() ? "abs(" + m_expression.to_string(resolver) +")" : - "std::fabs("+m_expression.to_string(resolver) +")"; + "std::abs("+m_expression.to_string(resolver) +")"; } std::string Conj::to_string( const ASTResolver* resolver ) const From 94ae6402ef519f47f20ee85a164faf093e83a428 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 22 Apr 2020 13:52:04 +0200 Subject: [PATCH 33/67] still applying fixes for osx --- AmpGen/CompilerWrapper.h | 2 ++ AmpGen/Particle.h | 2 +- src/CompilerWrapper.cpp | 10 +++++++++- src/Particle.cpp | 3 ++- src/PolarisedSum.cpp | 12 +++++------- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/AmpGen/CompilerWrapper.h b/AmpGen/CompilerWrapper.h index 1e17d9eb03c..a7f20be2439 100644 --- a/AmpGen/CompilerWrapper.h +++ b/AmpGen/CompilerWrapper.h @@ -22,11 +22,13 @@ namespace AmpGen void setVerbose() { m_verbose = true ; } void preamble(std::ostream& os ) const ; void addHeader(const std::string& include ) { m_includes.push_back(include); } + private: std::vector m_includes = {"array","complex","math.h","vector"}; bool m_verbose; std::string m_cxx; std::string generateFilename(); + bool isClang() const; }; } // namespace AmpGen #endif diff --git a/AmpGen/Particle.h b/AmpGen/Particle.h index 9dd3609d1b6..593fc2663b5 100644 --- a/AmpGen/Particle.h +++ b/AmpGen/Particle.h @@ -258,7 +258,7 @@ namespace AmpGen Expression propagator( DebugSymbols* db = nullptr ) const; /// Calculates the total expression for this particle, including symmetrisation and the current polarisation state - Expression getExpression( DebugSymbols* db = nullptr, const unsigned int& index = 0 ); + Expression getExpression( DebugSymbols* db = nullptr, const std::vector& = {} ); /// Calculate the transition matrix for this decay Tensor transitionMatrix( DebugSymbols* db = nullptr ); diff --git a/src/CompilerWrapper.cpp b/src/CompilerWrapper.cpp index b018506382e..b27e031bb34 100644 --- a/src/CompilerWrapper.cpp +++ b/src/CompilerWrapper.cpp @@ -124,6 +124,11 @@ bool CompilerWrapper::compile( std::vector& expressions return true; } +bool CompilerWrapper::isClang() const +{ + return m_cxx.find("clang") != std::string::npos || m_cxx.find("llvm-g++") != std::string::npos; +} + std::string get_cpp_version(){ if( __cplusplus >= 201703L ) return "c++17"; if( __cplusplus >= 201402L ) return "c++14"; @@ -151,12 +156,15 @@ void CompilerWrapper::compileSource( const std::string& fname, const std::string "-rdynamic", "-fPIC"}; std::transform( compile_flags.begin(), compile_flags.end(), std::back_inserter(argp), [](const auto& flag ){return flag.c_str() ; } ); - if( m_cxx.find("clang") != std::string::npos || m_cxx.find("llvm-g++") != std::string::npos) + if(isClang()) { argp.push_back( "-Wno-return-type-c-linkage"); #if __APPLE__ argp.push_back("-lstdc++"); #endif + #ifdef _OPENMP + argp.push_back("-fopenmp=libiomp5"); + #endif } argp.push_back( fname.c_str() ); diff --git a/src/Particle.cpp b/src/Particle.cpp index fdd239cc255..caa73e54681 100644 --- a/src/Particle.cpp +++ b/src/Particle.cpp @@ -372,8 +372,9 @@ Tensor Particle::transitionMatrix( DebugSymbols* db ) return spinTensor(); } -Expression Particle::getExpression( DebugSymbols* db, const unsigned int& index ) +Expression Particle::getExpression( DebugSymbols* db, const std::vector& state) { + if( state.size() !=0 ) setPolarisationState( state ); if( db != nullptr && !isStable() ) db->emplace_back( uniqueString() , Parameter( "NULL", 0, true ) ); Expression total = 0; diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 26adb8cd13a..86e748a55e0 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "AmpGen/CompilerWrapper.h" #include "AmpGen/NamedParameter.h" @@ -68,17 +69,14 @@ PolarisedSum::PolarisedSum(const EventType& type, auto protoAmps = m_rules.getMatchingRules(m_eventType); for(const auto& m : protoAmps ) INFO( m.first.uniqueString() ); m_matrixElements.resize( protoAmps.size() ); - ThreadPool tp(8); + ThreadPool tp( std::thread::hardware_concurrency() ); for(unsigned i = 0; i < m_matrixElements.size(); ++i) { - tp.enqueue( [i, &protoAmps, &polStates, this]{ + // tp.enqueue( [i, &protoAmps, &polStates, this]{ Tensor thisExpression( Tensor::dim(polStates.size()) ); auto& [p, coupling] = protoAmps[i]; DebugSymbols syms; - for(unsigned j = 0; j != polStates.size(); ++j){ - p.setPolarisationState( polStates[j] ); - thisExpression[j] = make_cse( p.getExpression(&syms) ); - } + for(unsigned j = 0; j != polStates.size(); ++j) thisExpression[j] = make_cse( p.getExpression(&syms, polStates[j] ) ); m_matrixElements[i] = TransitionMatrix( p, coupling, @@ -87,7 +85,7 @@ PolarisedSum::PolarisedSum(const EventType& type, p.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); CompilerWrapper().compile( m_matrixElements[i] ); - }); + // }); } } if ( stype == spaceType::flavour ) From 2f14291cdc803d7e8c10471c57391c208006370c Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 28 Apr 2020 13:21:00 +0200 Subject: [PATCH 34/67] cleanup of AVX code, add AVX512, fixes to spline shapes --- .ci/travis_osx.sh | 2 +- AmpGen/AmplitudeRules.h | 10 +- AmpGen/Array.h | 4 +- AmpGen/Chi2Estimator.h | 4 +- AmpGen/CoherentSum.h | 4 +- AmpGen/CompiledExpression.h | 47 +++--- AmpGen/Event.h | 1 + AmpGen/EventList.h | 12 +- AmpGen/EventType.h | 1 + AmpGen/Generator.h | 64 +++++---- AmpGen/Integrator.h | 2 +- AmpGen/PolarisedSum.h | 4 +- AmpGen/Spline.h | 6 +- AmpGen/SumPDF.h | 7 +- AmpGen/TreeReader.h | 192 ++++++++++++------------- AmpGen/Units.h | 13 ++ AmpGen/simd/avx2_types.h | 127 ----------------- AmpGen/simd/avx2d_types.h | 206 ++++++++++++++------------- AmpGen/simd/avx2f_types.h | 128 +++++++++++++++++ AmpGen/simd/avx512d_types.h | 212 ++++++++++++++++++++++++++++ AmpGen/simd/utils.h | 68 ++++----- Standalone.cmake | 33 +++-- apps/DataConverter.cpp | 31 ++-- doc/release.notes | 5 + examples/FitterWithPolarisation.cpp | 2 +- examples/SignalOnlyFitter.cpp | 2 +- src/Array.cpp | 17 +-- src/CoherentSum.cpp | 19 +-- src/CompiledExpressionBase.cpp | 7 +- src/CompilerWrapper.cpp | 12 +- src/Event.cpp | 10 +- src/EventList.cpp | 78 +++++----- src/EventListSIMD.cpp | 4 +- src/EventType.cpp | 39 ++++- src/Expression.cpp | 20 +-- src/Integrator.cpp | 2 +- src/PolarisedSum.cpp | 4 +- src/Projection.cpp | 2 +- src/Spline.cpp | 23 +-- src/TreeReader.cpp | 34 +++++ src/Units.cpp | 13 ++ src/Utilities.cpp | 13 +- test/test_avx2d.cpp | 34 ++--- 43 files changed, 916 insertions(+), 602 deletions(-) delete mode 100644 AmpGen/simd/avx2_types.h create mode 100644 AmpGen/simd/avx2f_types.h create mode 100644 AmpGen/simd/avx512d_types.h create mode 100644 src/TreeReader.cpp create mode 100644 src/Units.cpp diff --git a/.ci/travis_osx.sh b/.ci/travis_osx.sh index c974bbc0779..2ca3917298e 100644 --- a/.ci/travis_osx.sh +++ b/.ci/travis_osx.sh @@ -10,7 +10,7 @@ echo "Building under OS: $TRAVIS_OS_NAME" mkdir -p build cd build echo "CMake-ing, CXX = $CXX" -cmake .. -DCMAKE_CXX_COMPILER=clang +cmake .. -DCMAKE_CXX_COMPILER=clang -DUSE_SIMD="" echo "Building ..." cmake --build . -- -j2 cd .. diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index 27a3be5991c..f7d0be128a2 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -14,7 +14,7 @@ #include "AmpGen/Event.h" #include "AmpGen/Particle.h" #include "AmpGen/ExpressionParser.h" -#if ENABLE_AVX2 +#if ENABLE_AVX #include "AmpGen/EventListSIMD.h" #endif @@ -105,14 +105,18 @@ namespace AmpGen decayTree(dt), coupling(coupling) {} - #if ENABLE_AVX2 + #if ENABLE_AVX const RT operator()(const Event& event) const { return amp_type::operator()(EventListSIMD::makeEvent(event).data()); } + void debug( const Event& event ) const { amp_type::debug(EventListSIMD::makeEvent(event).data() ) ; } + #else const RT operator()(const Event& event) const { return amp_type::operator()(event.address()) ; } + void debug( const Event& event ) const { amp_type::debug(event.address()) ; } #endif template auto operator()(arg_types... args ) const { return amp_type::operator()(args...) ; } const RT operator()(const float_v* t) const { return amp_type::operator()(t) ; } + void debug( const float_v* t ) const { amp_type::debug(t) ; } const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } Particle decayTree; @@ -174,7 +178,7 @@ namespace AmpGen const std::vector operator()(const Event& event) const { std::vector rt(4); - #if ENABLE_AVX2 + #if ENABLE_AVX amp_type::operator()(rt.data(), 1, externBuffer().data(), EventListSIMD::makeEvent(event).data()); #else amp_type::operator()(rt.data(), 1, externBuffer().data(), event.address()); diff --git a/AmpGen/Array.h b/AmpGen/Array.h index f53d5ea39ab..312cd465e08 100644 --- a/AmpGen/Array.h +++ b/AmpGen/Array.h @@ -31,11 +31,11 @@ namespace AmpGen complex_t operator()() const override; Expression operator[]( const Expression& address ) const; Expression top() const { return m_top ; } - + unsigned size() const { return m_size; } private: Expression m_top; Expression m_address; - size_t m_size; + unsigned m_size; }; } // namespace AmpGen diff --git a/AmpGen/Chi2Estimator.h b/AmpGen/Chi2Estimator.h index 841cfa76cc7..ea92e4d09de 100644 --- a/AmpGen/Chi2Estimator.h +++ b/AmpGen/Chi2Estimator.h @@ -7,7 +7,7 @@ #include "AmpGen/BinDT.h" -#if ENABLE_AVX2 +#if ENABLE_AVX #include "AmpGen/EventListSIMD.h" #else #include "AmpGen/EventList.h" @@ -21,7 +21,7 @@ namespace AmpGen class Chi2Estimator { - #if ENABLE_AVX2 + #if ENABLE_AVX typedef EventListSIMD EventList_type; #else typedef EventList EventList_type; diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 1a4cfeac4df..c50404fa02e 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -43,7 +43,7 @@ namespace AmpGen class CoherentSum { public: - #if ENABLE_AVX2 + #if ENABLE_AVX using EventList_type = EventListSIMD; #else using EventList_type = EventList; @@ -76,7 +76,7 @@ namespace AmpGen void reset( bool resetEvents = false ); void setEvents( const EventList_type& list ); void setMC( const EventList_type& sim ); - #if ENABLE_AVX2 + #if ENABLE_AVX void setEvents( const EventList& list) { m_ownEvents = true; setEvents( *(new EventListSIMD(list)) ) ; } void setMC( const EventList& list) { setMC( *(new EventListSIMD(list)) ) ; } #endif diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index 9e34ec2c07c..eaf86b1c5d2 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -36,7 +36,7 @@ namespace AmpGen private: DynamicFCN m_fcn; DynamicFCN m_batchFcn; - DynamicFCN>(ARGS...)> m_fdb; + DynamicFCN>(ARGS...)> m_fdb; std::vector m_externals = {}; bool m_hasExternalsChanged = {false}; @@ -100,10 +100,9 @@ namespace AmpGen m_externals[address] = value; m_hasExternalsChanged = true; } - void resizeExternalCache(const size_t& N ) override { - if( m_externals.size() < N ){ - m_externals.resize(N); - } + void resizeExternalCache(const size_t& N ) override + { + if( m_externals.size() < N ) m_externals.resize(N); } bool hasExternalsChanged() { return m_hasExternalsChanged; } void resetExternals() { m_hasExternalsChanged = false; } @@ -165,26 +164,26 @@ namespace AmpGen m_batchFcn(args...); } - template < class T> - void debug( const T* event ) const - { - if ( !m_fcn.isLinked() ) { - FATAL( "Function " << name() << " not linked" ); - } - if ( !m_fdb.isLinked() ) { - FATAL( "Function" << name() << " debugging symbols not linked" ); - } - std::vector> debug_results; - if constexpr(std::is_same::value) debug_results = m_fdb( nullptr, &( m_externals[0] ), event ); - else debug_results = m_fdb( &(m_externals[0]), event); - for( auto& debug_result : debug_results ){ - auto val = debug_result.second; - auto label = debug_result.first; - if( std::real(val) == -999. ) std::cout << bold_on << std::setw(50) << std::left << label << bold_off << std::endl; - else if( std::imag(val) == 0 ) std::cout << " " << std::setw(50) << std::left << label << " = " << std::real(val) << std::endl; - else std::cout << " " << std::setw(50) << std::left << label << " = " << val << std::endl; - } + template < class T> void debug( const T* event ) const + { + if ( !m_fcn.isLinked() ) { + FATAL( "Function " << name() << " not linked" ); } + if ( !m_fdb.isLinked() ) { + FATAL( "Function" << name() << " debugging symbols not linked" ); + } + std::vector> debug_results; + if constexpr(std::is_same::value) debug_results = m_fdb( nullptr, &( m_externals[0] ), event ); + else debug_results = m_fdb( &(m_externals[0]), event); + for( auto& debug_result : debug_results ){ + auto val = debug_result.second; + auto label = debug_result.first; + if( utils::all_of(val.real(), -999.) ) std::cout << bold_on << std::setw(50) << std::left << label << bold_off << std::endl; + else if( utils::all_of(val.imag(), 0.) ) std::cout << " " << std::setw(50) << std::left << label << " = " << val.real() << std::endl; + else + std::cout << " " << std::setw(50) << std::left << label << " = " << val << std::endl; + } + } bool link( void* handle ) override { diff --git a/AmpGen/Event.h b/AmpGen/Event.h index 01bc5139f16..5e420599f1b 100644 --- a/AmpGen/Event.h +++ b/AmpGen/Event.h @@ -53,6 +53,7 @@ namespace AmpGen { real_t s( const unsigned& index1, const unsigned& index2 ) const ; real_t s( const unsigned& index1, const unsigned& index2, const unsigned& index3 ) const; real_t s( const std::vector& indices ) const ; + void reorder( const std::vector& addresses); private: std::vector m_event; real_t m_genPdf = {1}; diff --git a/AmpGen/EventList.h b/AmpGen/EventList.h index d96f5326c91..aadd68eea37 100644 --- a/AmpGen/EventList.h +++ b/AmpGen/EventList.h @@ -8,6 +8,7 @@ #include "AmpGen/Projection.h" #include "AmpGen/Utilities.h" #include "AmpGen/MetaUtils.h" +#include "AmpGen/Units.h" #include #include @@ -71,10 +72,13 @@ namespace AmpGen size_t aligned_size() const { return m_data.size() ; } size_t nBlocks() const { return m_data.size() ; } double integral() const; + const double* block(const unsigned pos) const { return m_data[pos].address(); } real_t weight( const size_t& pos) const { return m_data[pos].weight(); } real_t genPDF( const size_t& pos) const { return m_data[pos].genPdf(); } void reserve( const size_t& size ) { m_data.reserve( size ); } + void resize ( const size_t& size ) { m_data.resize(size) ; } void push_back( const Event& evt ) { m_data.push_back( evt ); } + void emplace_back( const Event& evt) { m_data.emplace_back(evt) ; } void setEventType( const EventType& type ) { m_eventType = type; } void add( const EventList& evts ); void loadFromTree( TTree* tree, const ArgumentPack& args ); @@ -132,15 +136,17 @@ namespace AmpGen { return std::count_if( std::begin(*this), std::end(*this), fcn ); } - }; - DECLARE_ARGUMENT(Branches, std::vector); + }; + DECLARE_ARGUMENT(Branches, std::vector); /// Branch names containing kinematic information + DECLARE_ARGUMENT(ExtraBranches, std::vector); /// additional information about the event to include + DECLARE_ARGUMENT(IdBranches, std::vector); /// Branches containing PID information, used if the names of particles are incorrect (looking at you, DTF) DECLARE_ARGUMENT(EntryList, std::vector); DECLARE_ARGUMENT(GetGenPdf, bool); DECLARE_ARGUMENT(Filter, std::string); DECLARE_ARGUMENT(WeightBranch, std::string); DECLARE_ARGUMENT(ApplySym, bool); DECLARE_ARGUMENT(WeightFunction, std::function); - + DECLARE_ARGUMENT(InputUnits, AmpGen::Units); } // namespace AmpGen #endif diff --git a/AmpGen/EventType.h b/AmpGen/EventType.h index 93fa1467f03..d2777c0143a 100644 --- a/AmpGen/EventType.h +++ b/AmpGen/EventType.h @@ -62,6 +62,7 @@ namespace AmpGen /// Functor to randomly symmetrise data of this event type, using the Fisher-Yates shuffle. std::function symmetriser() const; + std::function& ids)> automaticOrdering() const; /// Calculates the number of spin indices associated with the initial and final state, i.e. the rank of the relevant transition matrix. std::pair dim() const; diff --git a/AmpGen/Generator.h b/AmpGen/Generator.h index 4a97856dadd..670c6838850 100644 --- a/AmpGen/Generator.h +++ b/AmpGen/Generator.h @@ -33,37 +33,40 @@ namespace AmpGen m_rnd = rand; m_gps.setRandom( m_rnd ); } - void fillEventListPhaseSpace( EventList& list, const size_t& N) - { - list.reserve( N ); - while( list.size() < N ){ - Event newEvent = m_gps.makeEvent(); - newEvent.setWeight( 1 ); - newEvent.setIndex( list.size() ); - list.push_back( newEvent ); - } - } void setBlockSize( const size_t& blockSize ) { m_generatorBlock = blockSize; } void setNormFlag( const bool& normSetting ) { m_normalise = normSetting; } - template void fillEventListPhaseSpace( EventList& list, const size_t& N, HARD_CUT cut) + template void fillEventListPhaseSpace( EventList& list, const size_t& N, cut_type cut = nullptr) { - list.reserve( N ); - while( list.size() < N ){ - Event newEvent = m_gps.makeEvent(); - newEvent.setWeight( 1 ); - if ( cut( newEvent ) ){ - newEvent.setIndex( list.size() ); - list.push_back( newEvent ); + if constexpr( std::is_same::value ) + { + if( cut != nullptr ) FATAL("This shouldn't happen..."); + list.resize(N); +// #pragma omp parallel for + for( unsigned int i = 0 ; i != N; ++i ) + { + list[i] = m_gps.makeEvent(); + list[i].setWeight( 1 ); + list[i].setIndex(i); + } + } + else { + list.reserve( N ); + while( list.size() < N ){ + Event newEvent = m_gps.makeEvent(); + newEvent.setWeight( 1 ); + if ( cut( newEvent ) ){ + newEvent.setIndex( list.size() ); + list.push_back( newEvent ); + } } } } - template - void fillEventList( PDF& pdf, EventList& list, const size_t& N ) - { - fillEventList( pdf, list, N, []( const Event& /*evt*/ ) { return 1; } ); - } + template void fillEventList( PDF& pdf, EventList& list, const size_t& N ) + { + fillEventList( pdf, list, N, nullptr); + } template void fillEventList( PDF& pdf, EventList& list, const size_t& N, HARD_CUT cut ) @@ -77,7 +80,7 @@ namespace AmpGen auto tStartTotal = std::chrono::high_resolution_clock::now(); pdf.reset( true ); ProgressBar pb(60, detail::trimmedString(__PRETTY_FUNCTION__) ); - ProfileClock t_phsp, t_eval, t_acceptReject; + ProfileClock t_phsp, t_eval, t_acceptReject, t_gather; std::vector efficiencyReport(m_generatorBlock,false); while ( list.size() - size0 < N ) { @@ -85,8 +88,10 @@ namespace AmpGen t_phsp.start(); fillEventListPhaseSpace(mc, m_generatorBlock, cut); t_phsp.stop(); - t_eval.start(); + t_gather.start(); pdf.setEvents( mc ); + t_gather.stop(); + t_eval.start(); pdf.prepare(); t_eval.stop(); if ( maxProb == 0 ) { @@ -133,10 +138,11 @@ namespace AmpGen } pb.finish(); double time = std::chrono::duration( std::chrono::high_resolution_clock::now() - tStartTotal ).count(); - INFO( "Generated " << N << " events in " << time << " ms" ); - INFO( "Generating phase space : " << t_phsp << " ms"); - INFO( "Evaluating PDF : " << t_eval << " ms"); - INFO( "Doing accept/reject : " << t_acceptReject << " ms"); + INFO("Generated " << N << " events in " << time << " ms"); + INFO("Generating phase space : " << t_phsp << " ms"); + INFO("Evaluating PDF : " << t_eval << " ms"); + INFO("Doing accept/reject : " << t_acceptReject << " ms"); + INFO("Gathering : " << t_gather << " ms"); } template ::value>::type> EventList generate(PDF& pdf, const size_t& nEvents ) diff --git a/AmpGen/Integrator.h b/AmpGen/Integrator.h index f1f4f7c6c40..d7856376028 100644 --- a/AmpGen/Integrator.h +++ b/AmpGen/Integrator.h @@ -53,7 +53,7 @@ namespace AmpGen template void updateCache(const T& expression) { - #if ENABLE_AVX2 + #if ENABLE_AVX if( m_events != nullptr ) m_cache.update( static_cast(m_events)->store(), expression ); #else if( m_events != nullptr ) m_cache.update( static_cast(m_events)->store(), expression ); diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index f9908ab196f..a2d0a555e52 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -36,7 +36,7 @@ namespace AmpGen class PolarisedSum { public: - #if ENABLE_AVX2 + #if ENABLE_AVX using EventList_type = EventListSIMD; #else using EventList_type = EventList; @@ -47,7 +47,7 @@ namespace AmpGen void prepare(); void setEvents(EventList_type&); void setMC(EventList_type&); - #if ENABLE_AVX2 + #if ENABLE_AVX void setEvents(EventList& evts){ m_ownEvents = true; setEvents( *new EventList_type(evts)) ; }; void setMC(EventList& evts){ setMC( *new EventList_type(evts)) ; }; #endif diff --git a/AmpGen/Spline.h b/AmpGen/Spline.h index 4ce5696911b..7ceadd03b6d 100644 --- a/AmpGen/Spline.h +++ b/AmpGen/Spline.h @@ -49,13 +49,13 @@ namespace AmpGen{ const double& min, const double& max ); - Spline( const Spline& spline, const Expression& x ); + Spline( const Spline& spline, const Expression& x, DebugSymbols* db =nullptr ); void resolve( ASTResolver& resolver ) const override ; std::string to_string(const ASTResolver* resolver=nullptr) const override; operator Expression() ; complex_t operator()() const override ; - Expression operator()( const Expression& x ); - Expression eval() const ; + Expression operator()( const Expression& x, DebugSymbols* db); + Expression eval(DebugSymbols* db=nullptr) const ; Array m_points; std::string m_name; diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index 340f7aa18f9..dd096cbffca 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -8,8 +8,7 @@ #include "AmpGen/LiteSpan.h" #include -#if ENABLE_AVX2 - #include "AmpGen/simd/avx2_types.h" +#if ENABLE_AVX #include "AmpGen/simd/utils.h" #endif @@ -69,7 +68,7 @@ namespace AmpGen } return -2 * LL; } - #if ENABLE_AVX2 + #if ENABLE_AVX if constexpr( std::is_same::value ) { float_v LL = 0.f; @@ -77,7 +76,7 @@ namespace AmpGen #pragma omp parallel for reduction( +: LL ) for ( size_t block = 0; block < m_events->nBlocks(); ++block ) { - LL += m_events->weight(block) * AVX2d::log(this->operator()(m_events->block(block), block)); + LL += m_events->weight(block) * AVX::log(this->operator()(m_events->block(block), block)); } return -2 * utils::sum_elements(LL); } diff --git a/AmpGen/TreeReader.h b/AmpGen/TreeReader.h index 64e9924f8cf..d808025d423 100644 --- a/AmpGen/TreeReader.h +++ b/AmpGen/TreeReader.h @@ -5,115 +5,105 @@ #include "TLeaf.h" #include "TTree.h" #include - +#include "AmpGen/MetaUtils.h" namespace AmpGen { - template - class TreeReader - { - private: - struct IReadBranch { - std::string name; - IReadBranch( const std::string& name = "" ) : name( name ) {} - virtual void* address() const = 0; - virtual void transfer() = 0; - virtual ~IReadBranch() = default; - }; - - template struct ReadBranch : public IReadBranch { - InputType thing; - OutputType* output; - void* address() const override { return (void*)&thing; } - ReadBranch( const std::string& name, OutputType* outputBranch ) : IReadBranch( name ), output( outputBranch ) {} - void transfer() override { *output = thing; } - }; - - template struct ReinterpretBranch : public IReadBranch { - InputType thing; - OutputType* output; - void* address() const override { return (void*)&thing; } - ReinterpretBranch( const std::string& name, OutputType* outputBranch ) : IReadBranch( name ), output( outputBranch ) {} - void transfer() override { *output = reinterpret_cast(thing); } - }; - - struct Branch { - OutputType* value; - Branch() : value( new OutputType() ) {} - ~Branch() { delete value; } - operator OutputType() const { return *value; } - operator OutputType&() { return *value; } - OutputType* operator&() { return value; } - }; + class TreeReader + { + private: + struct IReadBranch { + std::string name; + IReadBranch( const std::string& name = "" ) : name( name ) {} + virtual void* address() const = 0; + virtual void transfer() = 0; + virtual ~IReadBranch() = default; + }; - struct Iterator { - size_t m_position; - TreeReader* m_parent; - Iterator( const size_t& pos, TreeReader* parent ) : m_position( pos ), m_parent( parent ) {} - Iterator& operator++() - { - m_position++; - m_parent->getEntry( m_position ); - return *this; - } - bool operator==( const Iterator& rhs ) const { return m_position == rhs.m_position; } - bool operator!=( const Iterator& rhs ) const { return m_position != rhs.m_position; } - size_t operator*() const { return m_position; } - }; - TTree* tree = {nullptr}; - bool ready = {false}; - std::vector branches = {}; + template struct ReadBranch : public IReadBranch + { + InputType thing; + OutputType* output; + void* address() const override { return (void*)&thing; } + ReadBranch( const std::string& name, OutputType* outputBranch ) : IReadBranch( name ), output( outputBranch ) {} + void transfer() override { *output = thing; } + }; - public: - explicit TreeReader( TTree* tree ) : tree( tree ) {} - void setBranch( const std::string& name, OutputType* ptr ) + template struct ReinterpretBranch : public IReadBranch + { + InputType thing; + OutputType* output; + void* address() const override { return (void*)&thing; } + ReinterpretBranch( const std::string& name, OutputType* outputBranch ) : IReadBranch( name ), output( outputBranch ) {} + void transfer() override { *output = reinterpret_cast(thing); } + }; + struct Iterator { + size_t m_position; + TreeReader* m_parent; + Iterator( const size_t& pos, TreeReader* parent ) : m_position( pos ), m_parent( parent ) {} + Iterator& operator++() { - IReadBranch* new_branch = nullptr; - TLeaf* leaf = tree->GetLeaf( name.c_str() ); - if( leaf == nullptr ){ - ERROR( "Leaf: " << name << " not found"); - return; - } - std::string branchType = leaf->GetTypeName(); - if( branchType == "Double_t" ) new_branch = new ReadBranch( name, ptr ); - if( branchType == "Float_t" ) new_branch = new ReadBranch( name, ptr ); - if( branchType == "Bool_t" ) new_branch = new ReadBranch( name, ptr ); - if( branchType == "Int_t" ) new_branch = new ReadBranch( name, ptr ); - if( branchType == "UInt_t" ) new_branch = new ReadBranch( name, ptr ); - if( branchType == "ULong64_t") new_branch = new ReadBranch( name, ptr ); - if( new_branch == nullptr ){ - ERROR( "Branch type:" << branchType << " not recognised" ); - return; - } - ready = false; - branches.push_back( new_branch ); + m_position++; + m_parent->getEntry( m_position ); + return *this; } - Branch bind( const std::string& name ) - { - Branch rt; - setBranch( name, &rt ); - return rt; + bool operator==( const Iterator& rhs ) const { return m_position == rhs.m_position; } + bool operator!=( const Iterator& rhs ) const { return m_position != rhs.m_position; } + size_t operator*() const { return m_position; } + }; + TTree* m_tree = {nullptr}; + bool m_ready = {false}; + std::vector m_branches = {}; + std::vector m_entryList = {}; + public: + template struct Proxy + { + OutputType* data; + Proxy() : data( new OutputType() ) {} + operator OutputType() const { return *data; } + ~Proxy(){ delete data ; } + }; + explicit TreeReader( TTree* tree ); + template Proxy make_proxy( const std::string& name ) + { + Proxy rt; + setBranch( name, rt.data ); + return rt; + } + template void setBranch( const std::string& name, OutputType& thing ) { + setBranch(name,&thing) ; } + template void setBranch( const std::string& name, OutputType* ptr ) + { + IReadBranch* new_branch = nullptr; + TLeaf* leaf = m_tree->GetLeaf( name.c_str() ); + if( leaf == nullptr ){ + ERROR( "Leaf: " << name << " not found"); + return; } - void getEntry( const unsigned int& entry ) - { - if ( !ready ) prepare(); - tree->GetEntry( entry ); - for ( auto& branch : branches ) branch->transfer(); - } - void prepare() - { - for ( auto& branch : branches ) { - tree->SetBranchStatus( branch->name.c_str(), "1" ); - tree->SetBranchAddress( branch->name.c_str(), branch->address() ); - } - ready = true; - } - ~TreeReader() - { - for ( auto& branch : branches ) delete branch; + std::string branchType = leaf->GetTypeName(); + if( branchType == "Double_t" ) new_branch = new ReadBranch( name, ptr ); + if( branchType == "Float_t" ) new_branch = new ReadBranch( name, ptr ); + if( branchType == "Bool_t" ) new_branch = new ReadBranch( name, ptr ); + if( branchType == "Int_t" ) new_branch = new ReadBranch( name, ptr ); + if( branchType == "UInt_t" ) new_branch = new ReadBranch( name, ptr ); + if( branchType == "ULong64_t") new_branch = new ReadBranch( name, ptr ); + if( new_branch == nullptr ){ + ERROR( "Branch type:" << branchType << " not recognised" ); + return; } - Iterator begin() { return Iterator( 0, this ); } - Iterator end() { return Iterator( tree->GetEntries(), this ); } - }; + DEBUG("Making branch with properties: [name = " << name << ", input type = " << branchType << " output type = " << typeof() << "]" ); + m_ready = false; + m_branches.push_back( new_branch ); + } + void setEntryList( const std::vector& entryList ); + void unsetEntryList(); + void getEntry( const unsigned int& entry ); + void prepare(); + size_t nEntries() const; + ~TreeReader(); + Iterator begin(); + Iterator end(); + }; +// ENABLE_DEBUG(TreeReader); } // namespace AmpGen #endif diff --git a/AmpGen/Units.h b/AmpGen/Units.h index fc9db3fddd2..9e376d122d7 100644 --- a/AmpGen/Units.h +++ b/AmpGen/Units.h @@ -1,10 +1,23 @@ #ifndef AMPGEN_UNITS_H #define AMPGEN_UNITS_H 1 +#include "AmpGen/enum.h" namespace AmpGen { +/* + struct NewUnits { + static unsigned TeV = 0; + static unsigned GeV = 1; + static unsigned MeV = 2; + static unsigned KeV = 3; + + }; +*/ static const double TeV = 1000; static const double GeV = 1; static const double MeV = 0.001; static const double KeV = 0.001*0.001; + + declare_enum( Units, TeV, GeV, MeV, KeV ) + double to_double(const Units& unit ); } #endif diff --git a/AmpGen/simd/avx2_types.h b/AmpGen/simd/avx2_types.h deleted file mode 100644 index 04579d4ad73..00000000000 --- a/AmpGen/simd/avx2_types.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef AMPGEN_AVX_TYPES -#define AMPGEN_AVX_TYPES 1 - -#include -#include -#include -#include -#include "AmpGen/simd/avx_mathfun.h" -#include - -namespace AmpGen { - namespace AVX2 { - struct float_t { - __m256 data; - static constexpr unsigned size = 8 ; - typedef float scalar_type; - float_t() = default; - float_t(__m256 data ) : data(data) {} - float_t(const float& f ) : data( _mm256_set1_ps(f) ) {} - float_t(const double& f ) : data( _mm256_set1_ps( float(f) )) {} - float_t(const float* f ) : data( _mm256_loadu_ps( f ) ) {} - void store( float* ptr ) const { _mm256_storeu_ps( ptr, data ); } - std::array to_array() const { std::array b; store( &b[0] ); return b; } - float at(const unsigned i) const { return to_array()[i] ; } - operator __m256() const { return data ; } - }; - - inline float_t operator+( const float_t& lhs, const float_t& rhs ) { return _mm256_add_ps(lhs, rhs); } - inline float_t operator-( const float_t& lhs, const float_t& rhs ) { return _mm256_sub_ps(lhs, rhs); } - inline float_t operator*( const float_t& lhs, const float_t& rhs ) { return _mm256_mul_ps(lhs, rhs); } - inline float_t operator/( const float_t& lhs, const float_t& rhs ) { return _mm256_div_ps(lhs, rhs); } - inline float_t operator-( const float_t& x ) { return -1.f * x; } - inline float_t operator&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_ps( lhs, rhs ); } - inline float_t operator|( const float_t& lhs, const float_t& rhs ) { return _mm256_or_ps( lhs, rhs ); } - inline float_t operator^( const float_t& lhs, const float_t& rhs ) { return _mm256_xor_ps( lhs, rhs ); } - inline float_t operator+=(float_t& lhs, const float_t& rhs ){ lhs = lhs + rhs; return lhs; } - inline float_t operator-=(float_t& lhs, const float_t& rhs ){ lhs = lhs - rhs; return lhs; } - inline float_t operator*=(float_t& lhs, const float_t& rhs ){ lhs = lhs * rhs; return lhs; } - inline float_t operator/=(float_t& lhs, const float_t& rhs ){ lhs = lhs / rhs; return lhs; } - inline float_t operator&&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_ps( lhs, rhs ); } - inline float_t operator||( const float_t& lhs, const float_t& rhs ) { return _mm256_or_ps( lhs, rhs ); } - inline float_t operator!( const float_t& x ) { return x ^ _mm256_castsi256_ps( _mm256_set1_epi32( -1 ) ); } - inline float_t operator<( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_LT_OS ); } - inline float_t operator>( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_GT_OS ); } - inline float_t operator==( const float_t& lhs, const float_t& rhs ){ return _mm256_cmp_ps( lhs, rhs, _CMP_EQ_OS ); } - inline float_t sqrt( const float_t& v ) { return _mm256_sqrt_ps(v); } - inline float_t sin( const float_t& v ) { return sin256_ps(v) ; } - inline float_t cos( const float_t& v ) { return cos256_ps(v) ; } - inline float_t tan( const float_t& v ) { float_t s; float_t c; sincos256_ps(v, (__m256*)&s, (__m256*)&c) ; return s/c; } - inline float_t log( const float_t& v ) { return log256_ps(v) ; } - inline float_t exp( const float_t& v ) { return exp256_ps(v) ; } - inline float_t abs ( const float_t& v ) { return v & _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); } - inline float_t select(const float_t& mask, const float_t& a, const float_t& b ) { return _mm256_blendv_ps( b, a, mask ); } - inline float_t select(const bool& mask , const float_t& a, const float_t& b ) { return mask ? a : b; } - inline float_t atan2( const float_t& y, const float_t& x ){ - std::array bx{x.to_array()}, by{y.to_array()}, rt; - for( unsigned i = 0 ; i != 8 ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); - return float_t (rt.data() ); - } - inline float_t fmadd( const float_t& a, const float_t& b, const float_t& c ) - { - return _mm256_fmadd_ps(a, b, c ); - } - struct complex_t { - float_t re; - float_t im; - typedef std::complex scalar_type; - - float_t real() const { return re; } - float_t imag() const { return im; } - float_t norm() const { return re*re + im *im ; } - complex_t() = default; - complex_t( const float_t& re, const float_t& im) : re(re), im(im) {} - complex_t( const float& re, const float& im) : re(re), im(im) {} - complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} - complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} - const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } - void store( float* sre, float* sim ){ re.store(sre); im.store(sim); } - void store( std::complex* r ){ - auto re_arr = re.to_array(); - auto im_arr = im.to_array(); - for( unsigned i = 0 ; i != re_arr.size(); ++i ) r[i] = std::complex( re_arr[i], im_arr[i] ); - } - }; - - inline std::ostream& operator<<( std::ostream& os, const float_t& obj ) { - auto buffer = obj.to_array(); - for( unsigned i = 0 ; i != 8; ++i ) os << buffer[i] << " "; - return os; - } - inline float_t real(const complex_t& arg ){ return arg.re ; } - inline float_t imag(const complex_t& arg ){ return arg.im ; } - inline complex_t conj(const complex_t& arg ){ return complex_t(arg.re, -arg.im) ; } - inline float_t conj(const float_t& arg ){ return arg ; } - inline complex_t operator+( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re + rhs, lhs.im); } - inline complex_t operator-( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re - rhs, lhs.im); } - inline complex_t operator*( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re*rhs, lhs.im*rhs); } - inline complex_t operator/( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re/rhs, lhs.im/rhs); } - inline complex_t operator+( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs + rhs.re, rhs.im); } - inline complex_t operator-( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs - rhs.re, - rhs.im); } - inline complex_t operator*( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs*rhs.re, lhs*rhs.im); } - inline complex_t operator/( const float_t& lhs, const complex_t& rhs ) { return complex_t( lhs * rhs.re , -lhs *rhs.im) / (rhs.re * rhs.re + rhs.im * rhs.im ); } - inline complex_t operator+( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re + rhs.re, lhs.im + rhs.im); } - inline complex_t operator-( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re - rhs.re, lhs.im - rhs.im); } - inline complex_t operator*( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re - lhs.im*rhs.im, lhs.re*rhs.im + lhs.im*rhs.re); } - inline complex_t operator/( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re + lhs.im*rhs.im, -lhs.re*rhs.im + lhs.im*rhs.re) / (rhs.re * rhs.re + rhs.im * rhs.im ); } - inline complex_t operator-( const complex_t& x ) { return -1.f * x; } - inline float_t abs( const complex_t& v ) { return sqrt( v.re * v.re + v.im * v.im ) ; } - inline float_t norm( const complex_t& v ) { return ( v.re * v.re + v.im * v.im ) ; } - inline complex_t select(const float_t& mask, const complex_t& a, const complex_t& b ) { return complex_t( _mm256_blendv_ps( b.re, a.re, mask ), _mm256_blendv_ps( b.im, a.im, mask ) ); } - inline complex_t select(const float_t& mask, const float_t& a, const complex_t& b ) { return complex_t( _mm256_blendv_ps( b.re, a , mask ), _mm256_blendv_ps( b.im, float_t(0.f), mask ) ); } - inline complex_t select(const float_t& mask, const complex_t& a, const float_t& b ) { return complex_t( _mm256_blendv_ps( b, a.re, mask ), _mm256_blendv_ps( float_t(0.f), a.im, mask ) ); } - inline complex_t select(const bool& mask , const complex_t& a, const complex_t& b ) { return mask ? a : b; } - inline complex_t exp( const complex_t& v ){ - float_t s; float_t c; sincos256_ps(v.im, (__m256*)&s, (__m256*)&c) ; - return exp( v.re ) * complex_t(c, s); - } - inline std::ostream& operator<<( std::ostream& os, const complex_t& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } - #pragma omp declare reduction(+: float_t: \ - omp_out = omp_out + omp_in) - #pragma omp declare reduction(+: complex_t: \ - omp_out = omp_out + omp_in) - - } -} - -#endif diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index 5ed64e3101e..f478f365d6f 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -10,100 +10,101 @@ namespace AmpGen { namespace AVX2d { #define stl_fallback( x ) \ - inline float_t x( const float_t& v ){ auto a = v.to_array(); return float_t( std::x(a[0]), std::x(a[1]), std::x(a[2]), std::x(a[3]) ) ; } + inline real_v x( const real_v& v ){ auto a = v.to_array(); return real_v( std::x(a[0]), std::x(a[1]), std::x(a[2]), std::x(a[3]) ) ; } - struct float_t { + struct real_v { __m256d data; static constexpr unsigned size = 4; typedef double scalar_type; - float_t() = default; - float_t(__m256d data ) : data(data) {} - float_t(const double& f ) : data( _mm256_set1_pd( f )) {} - float_t(const double& x0, const double& x1, const double& x2, const double& x3 ) + real_v() = default; + real_v(__m256d data ) : data(data) {} + real_v(const double& f ) : data( _mm256_set1_pd( f )) {} + real_v(const double& x0, const double& x1, const double& x2, const double& x3 ) { double tmp[4] = {x0,x1,x2,x3}; data = _mm256_loadu_pd(tmp); } - float_t(const double* f ) : data( _mm256_loadu_pd( f ) ) {} + real_v(const double* f ) : data( _mm256_loadu_pd( f ) ) {} void store( double* ptr ) const { _mm256_storeu_pd( ptr, data ); } std::array to_array() const { std::array b; store( &b[0] ); return b; } double at(const unsigned i) const { return to_array()[i] ; } operator __m256d() const { return data ; } }; - inline float_t operator+( const float_t& lhs, const float_t& rhs ) { return _mm256_add_pd(lhs, rhs); } - inline float_t operator-( const float_t& lhs, const float_t& rhs ) { return _mm256_sub_pd(lhs, rhs); } - inline float_t operator*( const float_t& lhs, const float_t& rhs ) { return _mm256_mul_pd(lhs, rhs); } - inline float_t operator/( const float_t& lhs, const float_t& rhs ) { return _mm256_div_pd(lhs, rhs); } - inline float_t operator-( const float_t& x ) { return -1.f * x; } - inline float_t operator&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_pd( lhs, rhs ); } - inline float_t operator|( const float_t& lhs, const float_t& rhs ) { return _mm256_or_pd( lhs, rhs ); } - inline float_t operator^( const float_t& lhs, const float_t& rhs ) { return _mm256_xor_pd( lhs, rhs ); } - inline float_t operator+=(float_t& lhs, const float_t& rhs ){ lhs = lhs + rhs; return lhs; } - inline float_t operator-=(float_t& lhs, const float_t& rhs ){ lhs = lhs - rhs; return lhs; } - inline float_t operator*=(float_t& lhs, const float_t& rhs ){ lhs = lhs * rhs; return lhs; } - inline float_t operator/=(float_t& lhs, const float_t& rhs ){ lhs = lhs / rhs; return lhs; } - inline float_t operator&&( const float_t& lhs, const float_t& rhs ) { return _mm256_and_pd( lhs, rhs ); } - inline float_t operator||( const float_t& lhs, const float_t& rhs ) { return _mm256_or_pd( lhs, rhs ); } - inline float_t operator!( const float_t& x ) { return x ^ _mm256_castsi256_pd( _mm256_set1_epi32( -1 ) ); } - inline float_t operator<( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_LT_OS ); } - inline float_t operator>( const float_t& lhs, const float_t& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_GT_OS ); } - inline float_t operator==( const float_t& lhs, const float_t& rhs ){ return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OS ); } - inline float_t sqrt( const float_t& v ) { return _mm256_sqrt_pd(v); } - inline float_t abs ( const float_t& v ) { return _mm256_andnot_pd(_mm256_set1_pd(-0.), v); } - // inline float_t sin( const float_t& v ) { return sin256_pd(v) ; } - // inline float_t cos( const float_t& v ) { return cos256_pd(v) ; } - // inline float_t tan( const float_t& v ) { float_t s; float_t c; sincos256_pd(v, (__m256*)&s, (__m256*)&c) ; return s/c; } - // inline float_t exp( const float_t& v ) { return exp256_ps(v) ; } - inline float_t select(const float_t& mask, const float_t& a, const float_t& b ) { return _mm256_blendv_pd( b, a, mask ); } - inline float_t select(const bool& mask , const float_t& a, const float_t& b ) { return mask ? a : b; } - inline float_t sign ( const float_t& v){ return select( v > 0., +1., -1. ); } - inline float_t atan2( const float_t& y, const float_t& x ){ + inline real_v operator+( const real_v& lhs, const real_v& rhs ) { return _mm256_add_pd(lhs, rhs); } + inline real_v operator-( const real_v& lhs, const real_v& rhs ) { return _mm256_sub_pd(lhs, rhs); } + inline real_v operator*( const real_v& lhs, const real_v& rhs ) { return _mm256_mul_pd(lhs, rhs); } + inline real_v operator/( const real_v& lhs, const real_v& rhs ) { return _mm256_div_pd(lhs, rhs); } + inline real_v operator-( const real_v& x ) { return -1.f * x; } + inline real_v operator&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_pd( lhs, rhs ); } + inline real_v operator|( const real_v& lhs, const real_v& rhs ) { return _mm256_or_pd( lhs, rhs ); } + inline real_v operator^( const real_v& lhs, const real_v& rhs ) { return _mm256_xor_pd( lhs, rhs ); } + inline real_v operator+=(real_v& lhs, const real_v& rhs ){ lhs = lhs + rhs; return lhs; } + inline real_v operator-=(real_v& lhs, const real_v& rhs ){ lhs = lhs - rhs; return lhs; } + inline real_v operator*=(real_v& lhs, const real_v& rhs ){ lhs = lhs * rhs; return lhs; } + inline real_v operator/=(real_v& lhs, const real_v& rhs ){ lhs = lhs / rhs; return lhs; } + inline real_v operator&&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_pd( lhs, rhs ); } + inline real_v operator||( const real_v& lhs, const real_v& rhs ) { return _mm256_or_pd( lhs, rhs ); } + inline real_v operator!( const real_v& x ) { return x ^ _mm256_castsi256_pd( _mm256_set1_epi32( -1 ) ); } + inline real_v operator<( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_LT_OS ); } + inline real_v operator>( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_GT_OS ); } + inline real_v operator==( const real_v& lhs, const real_v& rhs ){ return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OS ); } + inline real_v sqrt( const real_v& v ) { return _mm256_sqrt_pd(v); } + inline real_v abs ( const real_v& v ) { return _mm256_andnot_pd(_mm256_set1_pd(-0.), v); } + // inline real_v sin( const real_v& v ) { return sin256_pd(v) ; } + // inline real_v cos( const real_v& v ) { return cos256_pd(v) ; } + // inline real_v tan( const real_v& v ) { real_v s; real_v c; sincos256_pd(v, (__m256*)&s, (__m256*)&c) ; return s/c; } + // inline real_v exp( const real_v& v ) { return exp256_ps(v) ; } + inline real_v select(const real_v& mask, const real_v& a, const real_v& b ) { return _mm256_blendv_pd( b, a, mask ); } + inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; } + inline real_v sign ( const real_v& v){ return select( v > 0., +1., -1. ); } + inline real_v atan2( const real_v& y, const real_v& x ){ std::array bx{x.to_array()}, by{y.to_array()}; - return float_t ( + return real_v ( std::atan2( by[0], bx[0]) , std::atan2( by[1], bx[1]) , std::atan2( by[2], bx[2]) , std::atan2( by[3], bx[3]) ); } - inline __m256i double_to_int( const float_t& x ) + inline __m256i udouble_to_uint( const real_v& x ) { + auto xr = _mm256_round_pd(x, _MM_FROUND_TO_NEG_INF); // based on: https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx - return _mm256_sub_epi64(_mm256_castpd_si256(_mm256_add_pd(x, _mm256_set1_pd(0x0018000000000000))), + return _mm256_sub_epi64(_mm256_castpd_si256(_mm256_add_pd(xr, _mm256_set1_pd(0x0018000000000000))), _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))); } - inline float_t gather( const double* base_addr, const float_t& offsets) + inline real_v gather( const double* base_addr, const real_v& offsets) { - return _mm256_i64gather_pd(base_addr, double_to_int(offsets),sizeof(double)); + return _mm256_i64gather_pd(base_addr, udouble_to_uint(offsets),sizeof(double)); } - inline void frexp(const AVX2d::float_t& value, AVX2d::float_t& mant, AVX2d::float_t& exponent) + inline void frexp(const real_v& value, real_v& mant, real_v& exponent) { auto arg_as_int = _mm256_castpd_si256(value); - static const AVX2d::float_t offset(4503599627370496.0 + 1022.0); // 2^52 + 1022.0 + static const real_v offset(4503599627370496.0 + 1022.0); // 2^52 + 1022.0 static const __m256i pow2_52_i = _mm256_set1_epi64x(0x4330000000000000); // *reinterpret_cast(&pow2_52_d); auto b = _mm256_srl_epi64(arg_as_int, _mm_cvtsi32_si128(52)); auto c = _mm256_or_si256( b , pow2_52_i); - exponent = AVX2d::float_t( _mm256_castsi256_pd(c) ) - offset; + exponent = real_v( _mm256_castsi256_pd(c) ) - offset; mant = _mm256_castsi256_pd(_mm256_or_si256(_mm256_and_si256 (arg_as_int, _mm256_set1_epi64x(0x000FFFFFFFFFFFFFll) ), _mm256_set1_epi64x(0x3FE0000000000000ll))); } - inline float_t fmadd( const float_t& a, const float_t& b, const float_t& c ) + inline real_v fmadd( const real_v& a, const real_v& b, const real_v& c ) { return _mm256_fmadd_pd(a, b, c); } - inline float_t log(const AVX2d::float_t& arg) + inline real_v log(const real_v& arg) { - static const AVX2d::float_t corr = 0.693147180559945286226764; - static const AVX2d::float_t CL15 = 0.148197055177935105296783; - static const AVX2d::float_t CL13 = 0.153108178020442575739679; - static const AVX2d::float_t CL11 = 0.181837339521549679055568; - static const AVX2d::float_t CL9 = 0.22222194152736701733275; - static const AVX2d::float_t CL7 = 0.285714288030134544449368; - static const AVX2d::float_t CL5 = 0.399999999989941956712869; - static const AVX2d::float_t CL3 = 0.666666666666685503450651; - static const AVX2d::float_t CL1 = 2.0; - AVX2d::float_t mant, exponent; + static const real_v corr = 0.693147180559945286226764; + static const real_v CL15 = 0.148197055177935105296783; + static const real_v CL13 = 0.153108178020442575739679; + static const real_v CL11 = 0.181837339521549679055568; + static const real_v CL9 = 0.22222194152736701733275; + static const real_v CL7 = 0.285714288030134544449368; + static const real_v CL5 = 0.399999999989941956712869; + static const real_v CL3 = 0.666666666666685503450651; + static const real_v CL1 = 2.0; + real_v mant, exponent; frexp(arg, mant, exponent); auto x = (mant - 1.) / (mant + 1.); auto x2 = x * x; @@ -121,26 +122,29 @@ namespace AmpGen { stl_fallback( tan ) stl_fallback( sin ) stl_fallback( cos ) - inline float_t remainder( const float_t& a, const float_t& b ){ return a - float_t(_mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF)) * b; } - inline float_t fmod( const float_t& a, const float_t& b ) + inline real_v remainder( const real_v& a, const real_v& b ){ return a - real_v(_mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF)) * b; } + inline real_v fmod( const real_v& a, const real_v& b ) { auto r = remainder( abs(a), abs(b) ); return select( a > 0., r, -r ); } - struct complex_t { - float_t re; - float_t im; + struct complex_v { + real_v re; + real_v im; typedef std::complex scalar_type; + static constexpr unsigned size = 4; - float_t real() const { return re; } - float_t imag() const { return im; } - float_t norm() const { return re*re + im *im ; } - complex_t() = default; - complex_t( const float_t& re, const float_t& im) : re(re), im(im) {} - complex_t( const float& re, const float& im) : re(re), im(im) {} - complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} - complex_t( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + real_v real() const { return re; } + real_v imag() const { return im; } + real_v norm() const { return re*re + im *im ; } + complex_v() = default; + complex_v( const real_v& re, const real_v& im) : re(re), im(im) {} + complex_v( const float& re, const float& im) : re(re), im(im) {} + complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + explicit complex_v( const real_v& arg ) : re(arg) {}; + explicit complex_v( const double& arg ) : re(arg) {}; const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } void store( double* sre, double* sim ){ re.store(sre); im.store(sim); } void store( scalar_type* r ) const { @@ -156,51 +160,51 @@ namespace AmpGen { } }; - inline std::ostream& operator<<( std::ostream& os, const float_t& obj ) { + inline std::ostream& operator<<( std::ostream& os, const real_v& obj ) { auto buffer = obj.to_array(); for( unsigned i = 0 ; i != 4; ++i ) os << buffer[i] << " "; return os; } - inline float_t real(const complex_t& arg ){ return arg.re ; } - inline float_t imag(const complex_t& arg ){ return arg.im ; } - inline complex_t conj(const complex_t& arg ){ return complex_t(arg.re, -arg.im) ; } - inline float_t conj(const float_t& arg ){ return arg ; } - inline complex_t operator+( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re + rhs, lhs.im); } - inline complex_t operator-( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re - rhs, lhs.im); } - inline complex_t operator*( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re*rhs, lhs.im*rhs); } - inline complex_t operator/( const complex_t& lhs, const float_t& rhs ) { return complex_t(lhs.re/rhs, lhs.im/rhs); } - inline complex_t operator+( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs + rhs.re, rhs.im); } - inline complex_t operator-( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs - rhs.re, - rhs.im); } - inline complex_t operator*( const float_t& lhs, const complex_t& rhs ) { return complex_t(lhs*rhs.re, lhs*rhs.im); } - inline complex_t operator/( const float_t& lhs, const complex_t& rhs ) { return complex_t( lhs * rhs.re , -lhs *rhs.im) / (rhs.re * rhs.re + rhs.im * rhs.im ); } - inline complex_t operator+( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re + rhs.re, lhs.im + rhs.im); } - inline complex_t operator-( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re - rhs.re, lhs.im - rhs.im); } - inline complex_t operator*( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re - lhs.im*rhs.im, lhs.re*rhs.im + lhs.im*rhs.re); } - inline complex_t operator/( const complex_t& lhs, const complex_t& rhs ) { return complex_t(lhs.re*rhs.re + lhs.im*rhs.im, -lhs.re*rhs.im + lhs.im*rhs.re) / (rhs.re * rhs.re + rhs.im * rhs.im ); } - inline complex_t operator-( const complex_t& x ) { return -1.f * x; } - inline float_t abs( const complex_t& v ) { return sqrt( v.re * v.re + v.im * v.im ) ; } - inline float_t norm( const complex_t& v ) { return ( v.re * v.re + v.im * v.im ) ; } - inline complex_t select(const float_t& mask, const complex_t& a, const complex_t& b ) { return complex_t( select(mask, a.re, b.re), select(mask, a.im, b.im ) ) ; } - inline complex_t select(const float_t& mask, const float_t& a, const complex_t& b ) { return complex_t( select(mask, a , b.re), select(mask, 0.f, b.im) ); } - inline complex_t select(const float_t& mask, const complex_t& a, const float_t& b ) { return complex_t( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); } - inline complex_t select(const bool& mask , const complex_t& a, const complex_t& b ) { return mask ? a : b; } - inline complex_t exp( const complex_t& v ){ - return exp( v.re) * complex_t( cos( v.im ), sin( v.im ) ); + inline real_v real(const complex_v& arg ){ return arg.re ; } + inline real_v imag(const complex_v& arg ){ return arg.im ; } + inline complex_v conj(const complex_v& arg ){ return complex_v(arg.re, -arg.im) ; } + inline real_v conj(const real_v& arg ){ return arg ; } + inline complex_v operator+( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re + rhs, lhs.im); } + inline complex_v operator-( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re - rhs, lhs.im); } + inline complex_v operator*( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re*rhs, lhs.im*rhs); } + inline complex_v operator/( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re/rhs, lhs.im/rhs); } + inline complex_v operator+( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs + rhs.re, rhs.im); } + inline complex_v operator-( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs - rhs.re, - rhs.im); } + inline complex_v operator*( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs*rhs.re, lhs*rhs.im); } + inline complex_v operator/( const real_v& lhs, const complex_v& rhs ) { return complex_v( lhs * rhs.re , -lhs *rhs.im) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_v operator+( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re + rhs.re, lhs.im + rhs.im); } + inline complex_v operator-( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re - rhs.re, lhs.im - rhs.im); } + inline complex_v operator*( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re*rhs.re - lhs.im*rhs.im, lhs.re*rhs.im + lhs.im*rhs.re); } + inline complex_v operator/( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re*rhs.re + lhs.im*rhs.im, -lhs.re*rhs.im + lhs.im*rhs.re) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_v operator-( const complex_v& x ) { return -1.f * x; } + inline real_v abs( const complex_v& v ) { return sqrt( v.re * v.re + v.im * v.im ) ; } + inline real_v norm( const complex_v& v ) { return ( v.re * v.re + v.im * v.im ) ; } + inline complex_v select(const real_v& mask, const complex_v& a, const complex_v& b ) { return complex_v( select(mask, a.re, b.re), select(mask, a.im, b.im ) ) ; } + inline complex_v select(const real_v& mask, const real_v& a, const complex_v& b ) { return complex_v( select(mask, a , b.re), select(mask, 0.f, b.im) ); } + inline complex_v select(const real_v& mask, const complex_v& a, const real_v& b ) { return complex_v( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); } + inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; } + inline complex_v exp( const complex_v& v ){ + return exp( v.re) * complex_v( cos( v.im ), sin( v.im ) ); } - inline complex_t sqrt( const complex_t& v ) + inline complex_v sqrt( const complex_v& v ) { auto r = abs(v); - return complex_t ( sqrt( 0.5 * (r + v.re) ), sign(v.im) * sqrt( 0.5*( r - v.re ) ) ); + return complex_v ( sqrt( 0.5 * (r + v.re) ), sign(v.im) * sqrt( 0.5*( r - v.re ) ) ); } - inline complex_t log( const complex_t& v ) + inline complex_v log( const complex_v& v ) { - return complex_t( log( v.re ) , atan2(v.im, v.re) ); + return complex_v( log( v.re ) , atan2(v.im, v.re) ); } - inline std::ostream& operator<<( std::ostream& os, const complex_t& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } - #pragma omp declare reduction(+: float_t: \ + inline std::ostream& operator<<( std::ostream& os, const complex_v& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } + #pragma omp declare reduction(+: real_v: \ omp_out = omp_out + omp_in) - #pragma omp declare reduction(+: complex_t: \ + #pragma omp declare reduction(+: complex_v: \ omp_out = omp_out + omp_in) } diff --git a/AmpGen/simd/avx2f_types.h b/AmpGen/simd/avx2f_types.h new file mode 100644 index 00000000000..6ddd78327fd --- /dev/null +++ b/AmpGen/simd/avx2f_types.h @@ -0,0 +1,128 @@ +#ifndef AMPGEN_AVX_TYPES +#define AMPGEN_AVX_TYPES 1 + +#include +#include +#include +#include +#include "AmpGen/simd/avx_mathfun.h" +#include + +namespace AmpGen { + namespace AVX2 { + struct real_v { + __m256 data; + static constexpr unsigned size = 8 ; + typedef float scalar_type; + real_v() = default; + real_v(__m256 data ) : data(data) {} + real_v(const float& f ) : data( _mm256_set1_ps(f) ) {} + real_v(const double& f ) : data( _mm256_set1_ps( float(f) )) {} + real_v(const float* f ) : data( _mm256_loadu_ps( f ) ) {} + void store( float* ptr ) const { _mm256_storeu_ps( ptr, data ); } + std::array to_array() const { std::array b; store( &b[0] ); return b; } + float at(const unsigned i) const { return to_array()[i] ; } + operator __m256() const { return data ; } + }; + + inline real_v operator+( const real_v& lhs, const real_v& rhs ) { return _mm256_add_ps(lhs, rhs); } + inline real_v operator-( const real_v& lhs, const real_v& rhs ) { return _mm256_sub_ps(lhs, rhs); } + inline real_v operator*( const real_v& lhs, const real_v& rhs ) { return _mm256_mul_ps(lhs, rhs); } + inline real_v operator/( const real_v& lhs, const real_v& rhs ) { return _mm256_div_ps(lhs, rhs); } + inline real_v operator-( const real_v& x ) { return -1.f * x; } + inline real_v operator&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_ps( lhs, rhs ); } + inline real_v operator|( const real_v& lhs, const real_v& rhs ) { return _mm256_or_ps( lhs, rhs ); } + inline real_v operator^( const real_v& lhs, const real_v& rhs ) { return _mm256_xor_ps( lhs, rhs ); } + inline real_v operator+=(real_v& lhs, const real_v& rhs ){ lhs = lhs + rhs; return lhs; } + inline real_v operator-=(real_v& lhs, const real_v& rhs ){ lhs = lhs - rhs; return lhs; } + inline real_v operator*=(real_v& lhs, const real_v& rhs ){ lhs = lhs * rhs; return lhs; } + inline real_v operator/=(real_v& lhs, const real_v& rhs ){ lhs = lhs / rhs; return lhs; } + inline real_v operator&&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_ps( lhs, rhs ); } + inline real_v operator||( const real_v& lhs, const real_v& rhs ) { return _mm256_or_ps( lhs, rhs ); } + inline real_v operator!( const real_v& x ) { return x ^ _mm256_castsi256_ps( _mm256_set1_epi32( -1 ) ); } + inline real_v operator<( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_LT_OS ); } + inline real_v operator>( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_GT_OS ); } + inline real_v operator==( const real_v& lhs, const real_v& rhs ){ return _mm256_cmp_ps( lhs, rhs, _CMP_EQ_OS ); } + inline real_v sqrt( const real_v& v ) { return _mm256_sqrt_ps(v); } + inline real_v sin( const real_v& v ) { return sin256_ps(v) ; } + inline real_v cos( const real_v& v ) { return cos256_ps(v) ; } + inline real_v tan( const real_v& v ) { real_v s; real_v c; sincos256_ps(v, (__m256*)&s, (__m256*)&c) ; return s/c; } + inline real_v log( const real_v& v ) { return log256_ps(v) ; } + inline real_v exp( const real_v& v ) { return exp256_ps(v) ; } + inline real_v abs ( const real_v& v ) { return v & _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); } + inline real_v select(const real_v& mask, const real_v& a, const real_v& b ) { return _mm256_blendv_ps( b, a, mask ); } + inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; } + inline real_v atan2( const real_v& y, const real_v& x ){ + std::array bx{x.to_array()}, by{y.to_array()}, rt; + for( unsigned i = 0 ; i != 8 ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); + return real_v (rt.data() ); + } + inline real_v fmadd( const real_v& a, const real_v& b, const real_v& c ) + { + return _mm256_fmadd_ps(a, b, c ); + } + struct complex_v { + real_v re; + real_v im; + typedef std::complex scalar_type; + static constexpr unsigned size = 8 ; + + real_v real() const { return re; } + real_v imag() const { return im; } + real_v norm() const { return re*re + im *im ; } + complex_v() = default; + complex_v( const real_v& re, const real_v& im) : re(re), im(im) {} + complex_v( const float& re, const float& im) : re(re), im(im) {} + complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } + void store( float* sre, float* sim ){ re.store(sre); im.store(sim); } + void store( std::complex* r ){ + auto re_arr = re.to_array(); + auto im_arr = im.to_array(); + for( unsigned i = 0 ; i != re_arr.size(); ++i ) r[i] = std::complex( re_arr[i], im_arr[i] ); + } + }; + + inline std::ostream& operator<<( std::ostream& os, const real_v& obj ) { + auto buffer = obj.to_array(); + for( unsigned i = 0 ; i != 8; ++i ) os << buffer[i] << " "; + return os; + } + inline real_v real(const complex_v& arg ){ return arg.re ; } + inline real_v imag(const complex_v& arg ){ return arg.im ; } + inline complex_v conj(const complex_v& arg ){ return complex_v(arg.re, -arg.im) ; } + inline real_v conj(const real_v& arg ){ return arg ; } + inline complex_v operator+( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re + rhs, lhs.im); } + inline complex_v operator-( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re - rhs, lhs.im); } + inline complex_v operator*( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re*rhs, lhs.im*rhs); } + inline complex_v operator/( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re/rhs, lhs.im/rhs); } + inline complex_v operator+( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs + rhs.re, rhs.im); } + inline complex_v operator-( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs - rhs.re, - rhs.im); } + inline complex_v operator*( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs*rhs.re, lhs*rhs.im); } + inline complex_v operator/( const real_v& lhs, const complex_v& rhs ) { return complex_v( lhs * rhs.re , -lhs *rhs.im) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_v operator+( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re + rhs.re, lhs.im + rhs.im); } + inline complex_v operator-( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re - rhs.re, lhs.im - rhs.im); } + inline complex_v operator*( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re*rhs.re - lhs.im*rhs.im, lhs.re*rhs.im + lhs.im*rhs.re); } + inline complex_v operator/( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re*rhs.re + lhs.im*rhs.im, -lhs.re*rhs.im + lhs.im*rhs.re) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_v operator-( const complex_v& x ) { return -1.f * x; } + inline real_v abs( const complex_v& v ) { return sqrt( v.re * v.re + v.im * v.im ) ; } + inline real_v norm( const complex_v& v ) { return ( v.re * v.re + v.im * v.im ) ; } + inline complex_v select(const real_v& mask, const complex_v& a, const complex_v& b ) { return complex_v( _mm256_blendv_ps( b.re, a.re, mask ), _mm256_blendv_ps( b.im, a.im, mask ) ); } + inline complex_v select(const real_v& mask, const real_v& a, const complex_v& b ) { return complex_v( _mm256_blendv_ps( b.re, a , mask ), _mm256_blendv_ps( b.im, real_v(0.f), mask ) ); } + inline complex_v select(const real_v& mask, const complex_v& a, const real_v& b ) { return complex_v( _mm256_blendv_ps( b, a.re, mask ), _mm256_blendv_ps( real_v(0.f), a.im, mask ) ); } + inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; } + inline complex_v exp( const complex_v& v ){ + real_v s; real_v c; sincos256_ps(v.im, (__m256*)&s, (__m256*)&c) ; + return exp( v.re ) * complex_v(c, s); + } + inline std::ostream& operator<<( std::ostream& os, const complex_v& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } + #pragma omp declare reduction(+: real_v: \ + omp_out = omp_out + omp_in) + #pragma omp declare reduction(+: complex_v: \ + omp_out = omp_out + omp_in) + + } +} + +#endif diff --git a/AmpGen/simd/avx512d_types.h b/AmpGen/simd/avx512d_types.h new file mode 100644 index 00000000000..a43af591688 --- /dev/null +++ b/AmpGen/simd/avx512d_types.h @@ -0,0 +1,212 @@ +#ifndef AMPGEN_AVXd_TYPES +#define AMPGEN_AVXd_TYPES 1 + +#include +#include +#include +#include +#include + +namespace AmpGen { + namespace AVX512d { + #define stl_fallback( x ) \ + inline real_v x( const real_v& v ){ auto a = v.to_array(); return real_v( std::x(a[0]), std::x(a[1]), std::x(a[2]), std::x(a[3]), std::x(a[4]), std::x(a[5]), std::x(a[6]), std::x(a[7]) ) ; } + + struct real_v { + __m512d data; + static constexpr unsigned size = 8; + typedef double scalar_type; + real_v() = default; + real_v(__m512d data ) : data(data) {} + real_v(const double& f ) : data( _mm512_set1_pd( f )) {} + real_v( + const double& x0, const double& x1, const double& x2, const double& x3, + const double& x4, const double& x5, const double& x6, const double& x7) + { + double tmp[8] = {x0,x1,x2,x3,x4,x5,x6,x7}; + data = _mm512_loadu_pd(tmp); + } + real_v(const double* f ) : data( _mm512_loadu_pd( f ) ) {} + void store( double* ptr ) const { _mm512_storeu_pd( ptr, data ); } + std::array to_array() const { std::array b; store( &b[0] ); return b; } + double at(const unsigned i) const { return to_array()[i] ; } + operator __m512d() const { return data ; } + }; + + inline real_v operator+( const real_v& lhs, const real_v& rhs ) { return _mm512_add_pd(lhs, rhs); } + inline real_v operator-( const real_v& lhs, const real_v& rhs ) { return _mm512_sub_pd(lhs, rhs); } + inline real_v operator*( const real_v& lhs, const real_v& rhs ) { return _mm512_mul_pd(lhs, rhs); } + inline real_v operator/( const real_v& lhs, const real_v& rhs ) { return _mm512_div_pd(lhs, rhs); } + inline real_v operator-( const real_v& x ) { return -1.f * x; } + inline real_v operator&( const real_v& lhs, const real_v& rhs ) { return _mm512_and_pd( lhs, rhs ); } + inline real_v operator|( const real_v& lhs, const real_v& rhs ) { return _mm512_or_pd( lhs, rhs ); } + inline real_v operator^( const real_v& lhs, const real_v& rhs ) { return _mm512_xor_pd( lhs, rhs ); } + inline real_v operator+=(real_v& lhs, const real_v& rhs ){ lhs = lhs + rhs; return lhs; } + inline real_v operator-=(real_v& lhs, const real_v& rhs ){ lhs = lhs - rhs; return lhs; } + inline real_v operator*=(real_v& lhs, const real_v& rhs ){ lhs = lhs * rhs; return lhs; } + inline real_v operator/=(real_v& lhs, const real_v& rhs ){ lhs = lhs / rhs; return lhs; } + inline real_v operator&&( const real_v& lhs, const real_v& rhs ) { return _mm512_and_pd( lhs, rhs ); } + inline real_v operator||( const real_v& lhs, const real_v& rhs ) { return _mm512_or_pd( lhs, rhs ); } + inline real_v operator!( const real_v& x ) { return x ^ _mm512_castsi512_pd( _mm512_set1_epi32( -1 ) ); } + inline __mmask8 operator<( const real_v& lhs, const real_v& rhs ) { return _mm512_cmp_pd_mask( lhs, rhs, _CMP_LT_OS ); } + inline __mmask8 operator>( const real_v& lhs, const real_v& rhs ) { return _mm512_cmp_pd_mask( lhs, rhs, _CMP_GT_OS ); } + inline __mmask8 operator==( const real_v& lhs, const real_v& rhs ){ return _mm512_cmp_pd_mask( lhs, rhs, _CMP_EQ_OS ); } + inline real_v sqrt( const real_v& v ) { return _mm512_sqrt_pd(v); } + inline real_v abs ( const real_v& v ) { return _mm512_andnot_pd(_mm512_set1_pd(-0.), v); } + // inline real_v sin( const real_v& v ) { return sin512_pd(v) ; } + // inline real_v cos( const real_v& v ) { return cos512_pd(v) ; } + // inline real_v tan( const real_v& v ) { real_v s; real_v c; sincos512_pd(v, (__m512*)&s, (__m512*)&c) ; return s/c; } + // inline real_v exp( const real_v& v ) { return exp512_ps(v) ; } + inline real_v select(const __mmask8& mask, const real_v& a, const real_v& b ) { return _mm512_mask_mov_pd( b, mask, a ); } + inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; } + inline real_v sign ( const real_v& v){ return select( v > 0., +1., -1. ); } + inline real_v atan2( const real_v& y, const real_v& x ){ + std::array bx{x.to_array()}, by{y.to_array()}; + return real_v ( + std::atan2(by[0], bx[0]) , std::atan2( by[1], bx[1]), std::atan2( by[2], bx[2]), std::atan2( by[3], bx[3]) , + std::atan2(by[4], bx[4]) , std::atan2( by[5], bx[5]), std::atan2( by[6], bx[6]), std::atan2( by[7], bx[7]) ); + } + inline __m512i double_to_int( const real_v& x ) + { + auto xr = _mm512_roundscale_pd(x, _MM_FROUND_TO_ZERO); + // based on: https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx + return _mm512_sub_epi64(_mm512_castpd_si512(_mm512_add_pd(xr, _mm512_set1_pd(0x0018000000000000))), + _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000))); + } + inline real_v gather( const double* base_addr, const real_v& offsets) + { + return _mm512_i64gather_pd(double_to_int(offsets), base_addr, sizeof(double)); + } + + inline void frexp(const real_v& value, real_v& mant, real_v& exponent) + { + auto arg_as_int = _mm512_castpd_si512(value); + static const real_v offset(4503599627370496.0 + 1022.0); // 2^52 + 1022.0 + static const __m512i pow2_52_i = _mm512_set1_epi64(0x4330000000000000); // *reinterpret_cast(&pow2_52_d); + auto b = _mm512_srl_epi64(arg_as_int, _mm_cvtsi32_si128(52)); + auto c = _mm512_or_si512( b , pow2_52_i); + exponent = real_v( _mm512_castsi512_pd(c) ) - offset; + mant = _mm512_castsi512_pd(_mm512_or_si512(_mm512_and_si512 (arg_as_int, _mm512_set1_epi64(0x000FFFFFFFFFFFFFll) ), _mm512_set1_epi64(0x3FE0000000000000ll))); + } + + inline real_v fmadd( const real_v& a, const real_v& b, const real_v& c ) + { + return _mm512_fmadd_pd(a, b, c); + } + inline real_v log(const real_v& arg) + { + static const real_v corr = 0.693147180559945286226764; + static const real_v CL15 = 0.148197055177935105296783; + static const real_v CL13 = 0.153108178020442575739679; + static const real_v CL11 = 0.181837339521549679055568; + static const real_v CL9 = 0.22222194152736701733275; + static const real_v CL7 = 0.285714288030134544449368; + static const real_v CL5 = 0.399999999989941956712869; + static const real_v CL3 = 0.666666666666685503450651; + static const real_v CL1 = 2.0; + real_v mant, exponent; + frexp(arg, mant, exponent); + auto x = (mant - 1.) / (mant + 1.); + auto x2 = x * x; + auto p = fmadd(CL15, x2, CL13); + p = fmadd(p, x2, CL11); + p = fmadd(p, x2, CL9); + p = fmadd(p, x2, CL7); + p = fmadd(p, x2, CL5); + p = fmadd(p, x2, CL3); + p = fmadd(p, x2, CL1); + p = fmadd(p, x, corr * exponent); + return p; + } + stl_fallback( exp ) + stl_fallback( tan ) + stl_fallback( sin ) + stl_fallback( cos ) + inline real_v remainder( const real_v& a, const real_v& b ){ return a - real_v(_mm512_roundscale_pd(a/b, _MM_FROUND_TO_NEG_INF)) * b; } + inline real_v fmod( const real_v& a, const real_v& b ) + { + auto r = remainder( abs(a), abs(b) ); + return select( a > 0., r, -r ); + } + + struct complex_v { + real_v re; + real_v im; + typedef std::complex scalar_type; + + real_v real() const { return re; } + real_v imag() const { return im; } + real_v norm() const { return re*re + im *im ; } + complex_v() = default; + complex_v( const real_v& re, const real_v& im) : re(re), im(im) {} + complex_v( const float& re, const float& im) : re(re), im(im) {} + complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + explicit complex_v( const real_v& arg ) : re(arg) {}; + explicit complex_v( const double& arg ) : re(arg) {}; + const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } + void store( double* sre, double* sim ){ re.store(sre); im.store(sim); } + void store( scalar_type* r ) const { + auto re_arr = re.to_array(); + auto im_arr = im.to_array(); + for( unsigned i = 0 ; i != re_arr.size(); ++i ) r[i] = scalar_type( re_arr[i], im_arr[i] ); + } + auto to_array() const + { + std::array rt; + store( rt.data() ); + return rt; + } + }; + + inline std::ostream& operator<<( std::ostream& os, const real_v& obj ) { + auto buffer = obj.to_array(); + for( unsigned i = 0 ; i != 8; ++i ) os << buffer[i] << " "; + return os; + } + inline real_v real(const complex_v& arg ){ return arg.re ; } + inline real_v imag(const complex_v& arg ){ return arg.im ; } + inline complex_v conj(const complex_v& arg ){ return complex_v(arg.re, -arg.im) ; } + inline real_v conj(const real_v& arg ){ return arg ; } + inline complex_v operator+( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re + rhs, lhs.im); } + inline complex_v operator-( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re - rhs, lhs.im); } + inline complex_v operator*( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re*rhs, lhs.im*rhs); } + inline complex_v operator/( const complex_v& lhs, const real_v& rhs ) { return complex_v(lhs.re/rhs, lhs.im/rhs); } + inline complex_v operator+( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs + rhs.re, rhs.im); } + inline complex_v operator-( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs - rhs.re, - rhs.im); } + inline complex_v operator*( const real_v& lhs, const complex_v& rhs ) { return complex_v(lhs*rhs.re, lhs*rhs.im); } + inline complex_v operator/( const real_v& lhs, const complex_v& rhs ) { return complex_v( lhs * rhs.re , -lhs *rhs.im) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_v operator+( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re + rhs.re, lhs.im + rhs.im); } + inline complex_v operator-( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re - rhs.re, lhs.im - rhs.im); } + inline complex_v operator*( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re*rhs.re - lhs.im*rhs.im, lhs.re*rhs.im + lhs.im*rhs.re); } + inline complex_v operator/( const complex_v& lhs, const complex_v& rhs ) { return complex_v(lhs.re*rhs.re + lhs.im*rhs.im, -lhs.re*rhs.im + lhs.im*rhs.re) / (rhs.re * rhs.re + rhs.im * rhs.im ); } + inline complex_v operator-( const complex_v& x ) { return -1.f * x; } + inline real_v abs( const complex_v& v ) { return sqrt( v.re * v.re + v.im * v.im ) ; } + inline real_v norm( const complex_v& v ) { return ( v.re * v.re + v.im * v.im ) ; } + inline complex_v select(const __mmask8& mask, const complex_v& a, const complex_v& b ) { return complex_v( select(mask, a.re, b.re), select(mask, a.im, b.im ) ) ; } + inline complex_v select(const __mmask8& mask, const real_v& a, const complex_v& b ) { return complex_v( select(mask, a , b.re), select(mask, 0.f, b.im) ); } + inline complex_v select(const __mmask8& mask, const complex_v& a, const real_v& b ) { return complex_v( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); } + inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; } + inline complex_v exp( const complex_v& v ){ + return exp( v.re) * complex_v( cos( v.im ), sin( v.im ) ); + } + inline complex_v sqrt( const complex_v& v ) + { + auto r = abs(v); + return complex_v ( sqrt( 0.5 * (r + v.re) ), sign(v.im) * sqrt( 0.5*( r - v.re ) ) ); + } + inline complex_v log( const complex_v& v ) + { + return complex_v( log( v.re ) , atan2(v.im, v.re) ); + } + + inline std::ostream& operator<<( std::ostream& os, const complex_v& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } + #pragma omp declare reduction(+: real_v: \ + omp_out = omp_out + omp_in) + #pragma omp declare reduction(+: complex_v: \ + omp_out = omp_out + omp_in) + + } +} + +#endif diff --git a/AmpGen/simd/utils.h b/AmpGen/simd/utils.h index 62099046210..b10b3580c6c 100644 --- a/AmpGen/simd/utils.h +++ b/AmpGen/simd/utils.h @@ -2,47 +2,41 @@ #define AMPGEN_SIMD_UTILS_H #include -#if ENABLE_AVX2 -#if DOUBLE_PRECISION + +#if ENABLE_AVX512 + #include "AmpGen/simd/avx512d_types.h" +#elif ENABLE_AVX2d #include "AmpGen/simd/avx2d_types.h" -#else - #include "AmpGen/simd/avx2_types.h" -#endif +#elif ENABLE_AVX2f + #include "AmpGen/simd/avx2f_types.h" #endif namespace AmpGen { -#if ENABLE_AVX2 -#if DOUBLE_PRECISION - using float_v = AVX2d::float_t; - using complex_v = AVX2d::complex_t; -#else - using float_v = AVX2::float_t; - using complex_v = AVX2::complex_t; +#if ENABLE_AVX512 + namespace AVX = AVX512d; +#elif ENABLE_AVX2d + namespace AVX = AVX2d; +#elif ENABLE_AVX2f + namespace AVX = AVX2f; #endif + +#if ENABLE_AVX + using float_v = AVX::real_v; + using complex_v = AVX::complex_v; #else - using float_v = double; - using complex_v = std::complex; + using float_v = double; + using complex_v = std::complex; #endif namespace utils { template struct is_vector_type : std::false_type {}; template struct size { static constexpr unsigned value = 1; } ; -#if ENABLE_AVX2 -#if DOUBLE_PRECISION - template <> struct is_vector_type : std::true_type {}; - template <> struct is_vector_type : std::true_type {}; - template <> struct size { static constexpr unsigned value = 4; }; - template <> struct size { static constexpr unsigned value = 4; }; -#else - template <> struct is_vector_type : std::true_type {}; - template <> struct is_vector_type : std::true_type {}; - template <> struct size { static constexpr unsigned value = 8; }; - template <> struct size { static constexpr unsigned value = 8; }; -#endif -#else - template <> struct is_vector_type : std::false_type {}; - template <> struct is_vector_type : std::false_type {}; +#if ENABLE_AVX + template <> struct is_vector_type : std::true_type {}; + template <> struct is_vector_type : std::true_type {}; + template <> struct size { static constexpr unsigned value = AVX::complex_v::size; }; + template <> struct size { static constexpr unsigned value = AVX::complex_v::size; }; #endif template simd_type gather( const container_type& container, const functor_type& functor, unsigned offset=0, typename simd_type::scalar_type df =0.) @@ -69,6 +63,15 @@ namespace AmpGen { } else return obj; } + template bool all_of( const simd_type& obj, const value_type& v ) + { + if constexpr( ! is_vector_type::value ) return obj == v; + else { + auto arr = obj.to_array(); + for( unsigned i = 0 ; i != size::value; ++i ) if( arr[i] != v ) return false; + return true; + } + } template auto get( vtype v ) { if constexpr ( is_vector_type::value ) return v.at(p); @@ -81,11 +84,8 @@ namespace AmpGen { } template auto norm( const ctype& v ) { - #if ENABLE_AVX2 && DOUBLE_PRECISION - if constexpr( is_vector_type::value ) return AVX2d::norm(v); - #endif - #if ENABLE_AVX2 && ! DOUBLE_PRECISION - if constexpr( is_vector_type::value ) return AVX2::norm(v); + #if ENABLE_AVX + if constexpr( is_vector_type::value ) return AVX::norm(v); #endif if constexpr( ! is_vector_type::value ) return std::norm(v); } diff --git a/Standalone.cmake b/Standalone.cmake index 600bb8314b7..254d52f89af 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -12,9 +12,8 @@ if( NOT "${CMAKE_CXX_STANDARD}" ) set(CMAKE_CXX_STANDARD 17) endif() -SET(USE_OPENMP TRUE CACHE BOOL "USE_OPENMP") -SET(ENABLE_AVX2 TRUE CACHE BOOL "ENABLE_AVX2") -SET(PRECISION "DOUBLE" CACHE STRING "PRECISION") +SET(USE_OPENMP TRUE CACHE BOOL "USE_OPENMP") +SET(USE_SIMD "AVX2d" CACHE STRING "USE_SIMD") set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -153,20 +152,26 @@ target_compile_options(AmpGen -Wno-unknown-pragmas $<$:-O3>) -if( ENABLE_AVX2 ) - if( "${PRECISION}" MATCHES "DOUBLE" ) - message(STATUS "Enabling AVX2 [double precision]") - target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=1") - elseif( "${PRECISION}" MATCHES "SINGLE" ) - message(STATUS "Enabling AVX2 [single precision]") - target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX2=1" "DOUBLE_PRECISION=0") - endif() +# if( NOT ${USE_SIMD} MATCHES "" ) + if ( ${USE_SIMD} MATCHES "AVX2d" ) + message(STATUS "Enabling AVX2 [double precision]") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX=1" "ENABLE_AVX2d=1") + target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx2 -DHAVE_AVX2_INSTRUCTIONS) + elseif ( ${USE_SIMD} MATCHES "AVX2f" ) + message(STATUS "Enabling AVX2 [single precision]") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX=1" "ENABLE_AVX2f=1") target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx2 -DHAVE_AVX2_INSTRUCTIONS) - if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" OR - "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) + elseif ( ${USE_SIMD} MATCHES "AVX512d" ) + message(STATUS "Enabling AVX2 [double precision]") + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX=1" "ENABLE_AVX512=1") + target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx512f -DHAVE_AVX512_INSTRUCTIONS) + endif() + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) target_compile_options(AmpGen PUBLIC -mfma) endif() -endif() + # else() + # message("SIMD disabled, resorting to scalar build : ${USE_SIMD}") + # endif() if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" ) target_link_libraries(AmpGen PUBLIC stdc++ ) diff --git a/apps/DataConverter.cpp b/apps/DataConverter.cpp index 11081dc2a08..438a1e4fbd1 100644 --- a/apps/DataConverter.cpp +++ b/apps/DataConverter.cpp @@ -36,11 +36,10 @@ void invertParity( Event& event, const size_t& nParticles=0) } } - int main( int argc, char* argv[] ) { OptionsParser::setArgs( argc, argv ); - std::string inputFilename = NamedParameter("Input" , "", "Input ROOT file" ); + std::string inputFilename = NamedParameter("Input" , "", "Input ROOT file(s)" ); std::string treeName = NamedParameter("Tree" , "", "Input ROOT tree." ); std::string outputFilename = NamedParameter("Output" , "", "Output ROOT file" ); std::string pdfLibrary = NamedParameter("PdfLibrary", "", "PDF Library that used to generate this sample for MC reweighting (MC only)" ); @@ -50,10 +49,14 @@ int main( int argc, char* argv[] ) std::vector monitorBranches = NamedParameter("Monitors" , std::vector() ).getVector(); std::vector branchFormat = NamedParameter("BranchFormat" , std::vector() ).getVector(); std::vector friends = NamedParameter("Friends" , std::vector() ).getVector(); - bool usePIDCalib = NamedParameter("usePIDCalib" , false ); - bool rejectMultipleCandidates = NamedParameter("rejectMultipleCandidates", true ); + std::vector idBranches = NamedParameter("IdBranches" , std::vector() ).getVector(); + bool usePIDCalib = NamedParameter("usePIDCalib" , false); + bool rejectMultipleCandidates = NamedParameter("rejectMultipleCandidates", true ); std::string cuts = vectorToString( NamedParameter("Cut","").getVector() , " && "); EventType evtType( NamedParameter( "EventType" ).getVector() ); + + std::vector branches; + for( auto& particle : particles ) for(auto& bf : branchFormat) branches.push_back( mysprintf(bf, particle.c_str())); INFO( "Reading file " << inputFilename ); INFO( "Outputting file: " << outputFilename); @@ -83,12 +86,6 @@ int main( int argc, char* argv[] ) std::vector eventsToTake; - std::vector branches; - for( auto& particle : particles ) { - for(size_t i = 0 ; i < 4; ++i ) branches.push_back( mysprintf(branchFormat[i], particle.c_str())); - } - for(auto& branch : monitorBranches) branches.push_back( branch ); - if ( rejectMultipleCandidates ) { ULong64_t totCandidate; ULong64_t eventNumber; @@ -130,7 +127,13 @@ int main( int argc, char* argv[] ) } } - EventList evts( in_tree, evtType, Branches(branches), EntryList(eventsToTake), GetGenPdf(false), ApplySym(true) ); + EventList evts( in_tree, evtType, Branches(branches), + EntryList(eventsToTake), + GetGenPdf(false), + ApplySym(true) , + ExtraBranches(monitorBranches), + IdBranches(idBranches), + InputUnits(Units::MeV) ); INFO( "Branches = ["<< vectorToString(branches, ", " ) << "]" ); @@ -182,8 +185,6 @@ int main( int argc, char* argv[] ) evts[i].setWeight( weight ); } } - evts.transform( [=](auto& event){ for( size_t i = 0 ; i < 4*evtType.size(); ++i ) event[i] /= 1000. ; } ); - if ( pdfLibrary != "" ) { INFO( "Setting generator level PDF from " << pdfLibrary ); void* handle = dlopen( pdfLibrary.c_str(), RTLD_NOW ); @@ -208,9 +209,13 @@ int main( int argc, char* argv[] ) outputFile->Close(); TFile* outputPlotFile = TFile::Open( plotsName.c_str(), "RECREATE" ); auto plots = evts.makeDefaultProjections(); + auto proj = Projection([](auto& event){ return sqrt(event.s({0,1,2,3})) ; }, "m_D", "m_D",100, 1.8, 1.9 ); for ( auto& plot : plots ) { INFO( "Writing plot " << plot->GetName() << " to file" ); plot->Write(); } + proj( evts )->Write(); + for( int i = 0 ;i != 4; ++i ) + Projection([i](auto& event){ return sqrt(event.s(i)) ; }, "m_"+std::to_string(i), "m_D",100, 0, 1.0 )(evts)->Write(); outputPlotFile->Close(); } diff --git a/doc/release.notes b/doc/release.notes index 186a1246b62..6b1c47a5a5d 100644 --- a/doc/release.notes +++ b/doc/release.notes @@ -3,6 +3,11 @@ ! Responsible : Tim Evans ! Purpose : !----------------------------------------------------------------------------- +!=================== AmpGen v2r0 2020-25-04 + - Refactored caching logic away from being computations being stored with event data. + - Support for AVX2 for amplitude evaluation / integration single/double precision. + - Updated plotting for making component amplitude plots. + - Better thread safety of integration should improve fit stability. !=================== AmpGen v1r2 2019-11-12 ================== - New phase space Generator TreePhaseSpace for producing narrow resonances. - Improved handling of CP conjugated amplitudes. diff --git a/examples/FitterWithPolarisation.cpp b/examples/FitterWithPolarisation.cpp index 4e0abc26d69..79620f5d706 100644 --- a/examples/FitterWithPolarisation.cpp +++ b/examples/FitterWithPolarisation.cpp @@ -32,7 +32,7 @@ #include #include -#if ENABLE_AVX2 +#if ENABLE_AVX #include "AmpGen/EventListSIMD.h" using EventList_type = AmpGen::EventListSIMD; #else diff --git a/examples/SignalOnlyFitter.cpp b/examples/SignalOnlyFitter.cpp index ef256081c11..611ae7dd6da 100644 --- a/examples/SignalOnlyFitter.cpp +++ b/examples/SignalOnlyFitter.cpp @@ -26,7 +26,7 @@ #include #endif -#if ENABLE_AVX2 +#if ENABLE_AVX #include "AmpGen/EventListSIMD.h" using EventList_type = AmpGen::EventListSIMD; #else diff --git a/src/Array.cpp b/src/Array.cpp index 4a135861e7e..fe66b64b347 100644 --- a/src/Array.cpp +++ b/src/Array.cpp @@ -19,21 +19,10 @@ Array::Array( const Expression& top, std::string Array::to_string(const ASTResolver* resolver) const { auto head = m_top.to_string(resolver); - if( is(m_address) ) - return head+"["+ std::to_string(int(std::real(m_address()))) +"]"; - auto offset = m_address.to_string(resolver); - if( resolver != nullptr && resolver->enableAVX() ) - { - return " gather( &(" + head + "), " + offset + ")"; - } - auto pos = head.find_last_of("]"); - if( pos != std::string::npos ){ - auto st1 = head.substr(0,pos); - auto st2 = head.substr(pos,head.size() - pos ); - return st1 + "+int("+offset+")" + st2; - } - else return head +"[ int("+offset+")]"; + if( resolver != nullptr && resolver->enableAVX() ) return " gather( &(" + head + "), " + offset + ")"; + if( head.find("[") == std::string::npos ) return head + "[int("+offset+")]"; + else return " * ( & (" + head + ") + int("+offset+") )"; } void Array::resolve( ASTResolver& resolver ) const diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 60c7d6f0a62..e3240a318d1 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -34,8 +34,6 @@ using namespace AmpGen; CoherentSum::CoherentSum() = default; -//ENABLE_DEBUG(CoherentSum) - CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, const std::string& prefix ) : m_rules (mps) , m_evtType (type) @@ -57,12 +55,12 @@ CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, ThreadPool tp(nThreads); for(size_t i = 0; i < m_matrixElements.size(); ++i){ tp.enqueue( [i,this,&mps,&litudes]{ - auto& [p, c] = amplitudes[i]; + auto& [p, c] = amplitudes[i]; + DebugSymbols db; m_matrixElements[i] = TransitionMatrix(p, c, - CompiledExpression( - p.getExpression(), p.decayDescriptor(), - this->m_evtType.getEventFormat(), DebugSymbols(), this->m_mps ) ); + CompiledExpression( p.getExpression(m_dbThis ? &db : nullptr), p.decayDescriptor(), + this->m_evtType.getEventFormat(), db, this->m_mps ) ); CompilerWrapper().compile( m_matrixElements[i], this->m_objCache); } ); } @@ -121,13 +119,8 @@ void CoherentSum::debug( const Event& evt, const std::string& nameMustContain ) << " ] g = [ "<< me.coupling().real() << " " << me.coupling().imag() << " ] " << m_cache( evt.index(), std::distance(&m_matrixElements[0], &me ) ) << me.decayTree.CP() ); - - // if( m_dbThis ) me.amp.debug( evt.address() ); } - //else - // for ( auto& me : m_matrixElements ) - // if ( me.amp.name().find( nameMustContain ) != std::string::npos ) me.amp.debug( evt.address() ); - // if( evt.cacheSize() != 0 ) INFO( "Pdf = " << prob_unnormalised( evt ) ); + if( m_dbThis ) for ( auto& me : m_matrixElements ) me.debug( m_events->block(0) ); INFO( "A(x) = " << getVal(evt) << " without cache: " << getValNoCache(evt) ); } @@ -330,7 +323,7 @@ complex_t CoherentSum::getVal( const Event& evt ) const for (unsigned int i = 0 ; i != m_matrixElements.size(); ++i ) { value = value + m_matrixElements[i].coefficient * m_cache(evt.index() / utils::size::value, i ); } -#if ENABLE_AVX2 +#if ENABLE_AVX return value.at(evt.index() % utils::size::value ); #else return value; diff --git a/src/CompiledExpressionBase.cpp b/src/CompiledExpressionBase.cpp index acf12e97540..d9c10a323ff 100644 --- a/src/CompiledExpressionBase.cpp +++ b/src/CompiledExpressionBase.cpp @@ -13,6 +13,7 @@ #include "AmpGen/ASTResolver.h" #include "AmpGen/ProfileClock.h" #include "AmpGen/Tensor.h" +#include "AmpGen/simd/utils.h" using namespace AmpGen; CompiledExpressionBase::CompiledExpressionBase() = default; @@ -143,7 +144,7 @@ void CompiledExpressionBase::compile(const std::string& fname) void CompiledExpressionBase::addDebug( std::ostream& stream ) const { stream << "#include\n"; - stream << "extern \"C\" std::vector>> " + stream << "extern \"C\" std::vector() << " >> " << m_progName << "_DB(" << fcnSignature() << "){\n"; for ( auto& dep : m_debugSubexpressions ) { std::string rt = "auto v" + std::to_string(dep.first) + " = " + dep.second.to_string(m_resolver.get()) +";"; @@ -155,8 +156,8 @@ void CompiledExpressionBase::addDebug( std::ostream& stream ) const const auto expression = m_db[i].second; stream << std::endl << "{\"" << m_db[i].first << "\","; if ( expression.to_string(m_resolver.get()) != "NULL" ) - stream << expression.to_string(m_resolver.get()) << "}" << comma; - else stream << "-999}" << comma ; + stream << typeof() << "("<< expression.to_string(m_resolver.get()) << ")}" << comma; + else stream << typeof() << "(-999.,0.)}" << comma ; } } diff --git a/src/CompilerWrapper.cpp b/src/CompilerWrapper.cpp index b27e031bb34..1cc55061d28 100644 --- a/src/CompilerWrapper.cpp +++ b/src/CompilerWrapper.cpp @@ -53,8 +53,10 @@ void CompilerWrapper::generateSource( const CompiledExpressionBase& expression, { std::ofstream output( filename ); for ( auto& include : m_includes ) output << "#include <" << include << ">\n"; - if( expression.fcnSignature().find("AVX2d") != std::string::npos ) output << "#include \"AmpGen/simd/avx2d_types.h\"\n; using namespace AmpGen::AVX2d;\n" ; - else if( expression.fcnSignature().find("AVX2") != std::string::npos ) output << "#include \"AmpGen/simd/avx2_types.h\"\n; using namespace AmpGen::AVX2;\n;" ; + if( expression.fcnSignature().find("AVX2d") != std::string::npos ) output << "#include \"AmpGen/simd/avx2d_types.h\"\n; using namespace AmpGen::AVX2d;\n" ; + else if( expression.fcnSignature().find("AVX2") != std::string::npos ) output << "#include \"AmpGen/simd/avx2_types.h\"\n; using namespace AmpGen::AVX2;\n;" ; + else if( expression.fcnSignature().find("AVX512d") != std::string::npos ) output << "#include \"AmpGen/simd/avx512d_types.h\"\n; using namespace AmpGen::AVX512d;\n;" ; + else if( expression.fcnSignature().find("AVX512") != std::string::npos ) output << "#include \"AmpGen/simd/avx512_types.h\"\n; using namespace AmpGen::AVX512;\n;" ; output << expression << std::endl; output.close(); } @@ -141,11 +143,13 @@ void CompilerWrapper::compileSource( const std::string& fname, const std::string std::vector compile_flags = NamedParameter("CompilerWrapper::Flags", {"-Ofast", "--std="+get_cpp_version()}); - #if ENABLE_AVX2 + #if ENABLE_AVX compile_flags.push_back("-march=native"); + compile_flags.push_back( std::string("-I") + AMPGENROOT) ; + #endif + #if ENABLE_AVX2d compile_flags.push_back("-mavx2"); compile_flags.push_back("-DHAVE_AVX2_INSTRUCTIONS"); - compile_flags.push_back( std::string("-I") + AMPGENROOT) ; #endif #ifdef _OPENMP compile_flags.push_back("-fopenmp"); diff --git a/src/Event.cpp b/src/Event.cpp index 3f1bf940bd9..04043c21503 100644 --- a/src/Event.cpp +++ b/src/Event.cpp @@ -89,12 +89,8 @@ void Event::swap( const unsigned& i , const unsigned& j ) std::memmove( &m_event[4*i], &tmp,sizeof(tmp)); } -/* -void Event::setCache(const complex_t& value, const unsigned& pos){ m_cache[pos] = value; } -void Event::setCache( const std::vector& value, const unsigned& pos ) +void Event::reorder( const std::vector& addresses ) { - std::memmove( m_cache.data() + pos, value.data(), sizeof(complex_t) * value.size() ); + auto copy = m_event; + for( unsigned i = 0; i != addresses.size(); ++i ) set( addresses[i], copy.data() + 4 *i ); } - -void Event::resizeCache( const unsigned int& new_size ){ m_cache.resize(new_size); } -*/ diff --git a/src/EventList.cpp b/src/EventList.cpp index 4ab59d90009..86727861ad8 100644 --- a/src/EventList.cpp +++ b/src/EventList.cpp @@ -54,6 +54,12 @@ void EventList::loadFromFile( const std::string& fname, const ArgumentPack& args gFile = current_file; } +template key_type inv_map(const std::map& mp, const value_type& k, const default_type& df = default_type()) +{ + for( const auto& [key,val] : mp ) if( val == k ) return key; + return df; +} + void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) { ProfileClock read_time; @@ -66,32 +72,36 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) auto getGenPdf = args.getArg(false).val; auto weightBranch = args.getArg(std::string("")).val; auto branches = args.getArg().val; + auto extraBranches= args.getArg().val; auto applySym = args.getArg(false).val; auto entryList = args.getArg().val; auto eventFormat = m_eventType.getEventFormat( true ); - - Event temp( branches.size() == 0 ? eventFormat.size() : branches.size()); - + auto inputUnits = args.getArg(Units::GeV); + auto idBranches = args.getArg({}).val; + Event temp( eventFormat.size() + extraBranches.size()); temp.setWeight( 1 ); temp.setGenPdf( 1 ); tree->SetBranchStatus( "*", 0 ); - - TreeReader tr( tree ); - if( branches.size() != 0 ){ - INFO("Branches = [" << vectorToString(branches, ", ") << "]" ); - for ( auto branch = branches.begin(); branch != branches.end(); ++branch ) { - unsigned int pos = std::distance( branches.begin(), branch ); - tr.setBranch( *branch, &(temp[pos]) ); - if( pos >= eventFormat.size() ){ - INFO("Specifiying event extension: " << *branch << " " << pos << " " << eventFormat.size() ); - m_extensions[ *branch ] = pos; - } + TreeReader tr( tree ); + bool hasEnergy = branches.size() == 0 || branches.size() == 4 * m_eventType.size(); // if the energy of the particle has been explicitly specified // + if( branches.size() != 0 ) + { + DEBUG("Branches = [" << vectorToString(branches, ", ") << "]" ); + for (unsigned p = 0 ; p != branches.size(); ++p ) + { + auto pos = hasEnergy ? p : 4 * int(p/3) + p % 3 ; + DEBUG("Setting branch: " << branches[p] << " pos: " << pos << " fmt = " << inv_map( eventFormat, pos, "NOT FOUND" ) << " has energy? " << hasEnergy ); + tr.setBranch( branches[p], &(temp[pos]) ); } + auto pos = eventFormat.size(); + for( const auto& branch : extraBranches ) m_extensions[branch] = pos++; } - else { - for ( auto& branch : eventFormat ){ - tr.setBranch( branch.first, &(temp[branch.second]) ); - } + else for ( auto& branch : eventFormat ) tr.setBranch( branch.first, &(temp[branch.second]) ); + std::vector ids( m_eventType.size() ); + if( idBranches.size() != 0 ) + { + if( idBranches.size() != m_eventType.size() ){ FATAL("Number of ID branches should be number of final state particles"); } + for( int i = 0; i != ids.size(); ++i ) tr.setBranch( idBranches[i], ids.data() + i); } if( getGenPdf ) tr.setBranch( "genPdf", temp.pGenPdf() ); if( weightBranch != "" ) tr.setBranch( weightBranch, temp.pWeight() ); @@ -105,13 +115,18 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) for( int i = 0 ; i < evtList->GetN(); ++i ) entryList.push_back( evtList->GetEntry(i) ); } - bool hasEventList = entryList.size() != 0; - unsigned int nEvents = hasEventList ? entryList.size() : tree->GetEntries(); - m_data.reserve( nEvents ); + if( entryList.size() != 0 ) tr.setEntryList( entryList ); + m_data.reserve( tr.nEntries() ); auto symmetriser = m_eventType.symmetriser(); - for ( unsigned int evt = 0; evt < nEvents; ++evt ) { - tr.getEntry( hasEventList ? entryList[evt] : evt ); - if( applySym ) symmetriser( temp ); + auto automaticOrdering = m_eventType.automaticOrdering(); + for (const auto& evt : tr) { + if( inputUnits != Units::GeV ) for( unsigned k = 0; k != eventFormat.size(); ++k ) temp[k] *= to_double(inputUnits); + if( idBranches.size() != 0 ) automaticOrdering(temp, ids); + if( applySym ) symmetriser(temp); + if( ! hasEnergy ){ + for( unsigned int k = 0 ; k != m_eventType.size(); ++k ) + temp[4*k + 3] = sqrt( m_eventType.mass(k) * m_eventType.mass(k) + temp[4*k+0]*temp[4*k+0] + temp[4*k+1]*temp[4*k+1] + temp[4*k+2]*temp[4*k+2] ); + } temp.setIndex( m_data.size() ); m_data.push_back( temp ); } @@ -132,10 +147,8 @@ TTree* EventList::tree( const std::string& name, const std::vector& double genPdf = 1; double weight = 1; auto format = m_eventType.getEventFormat( true ); - for ( const auto& f : format ) outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); for ( const auto& f : m_extensions ) outputTree->Branch( f.first.c_str(), tmp.address( f.second ) ); - outputTree->Branch( "genPdf", &genPdf ); outputTree->Branch( "weight", &weight ); for ( const auto& evt : *this ) { @@ -185,12 +198,6 @@ TH2D* EventList::makeProjection( const Projection2D& projection, const ArgumentP return plot; } -// void EventList::resetCache() -// { -// m_index.clear(); -// //m_cache.clear(); -// } - double EventList::integral() const { return std::accumulate( std::begin(*this), std::end(*this), 0, [](double rv, const auto& evt){ return rv + evt.weight(); } ); @@ -211,10 +218,3 @@ void EventList::erase(const std::vector::iterator& begin, { m_data.erase( begin, end ); } - -//void EventList::reserveCache(const size_t& size) -//{ -// if ( size * m_data.size() >= m_cache.size() ) -// m_cache.reserve( m_data.size() * m_cache.size() ); -//} - diff --git a/src/EventListSIMD.cpp b/src/EventListSIMD.cpp index 3c7a644c34c..2ba7040bcb4 100644 --- a/src/EventListSIMD.cpp +++ b/src/EventListSIMD.cpp @@ -1,4 +1,4 @@ -#if ENABLE_AVX2 +#if ENABLE_AVX #include #include @@ -81,7 +81,7 @@ void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) temp.setGenPdf( 1 ); tree->SetBranchStatus( "*", 0 ); - TreeReader tr( tree ); + TreeReader tr( tree ); if( branches.size() != 0 ){ INFO("Branches = [" << vectorToString(branches, ", ") << "]" ); for ( auto branch = branches.begin(); branch != branches.end(); ++branch ) { diff --git a/src/EventType.cpp b/src/EventType.cpp index ceff945be56..9adc9b788a8 100644 --- a/src/EventType.cpp +++ b/src/EventType.cpp @@ -230,7 +230,7 @@ std::function EventType::symmetriser() const for ( auto& s : shuffle ) shuffle_string += std::to_string( s ) + " "; DEBUG( "Shuffle = " << shuffle_string ); } - auto fcn = [shuffles, rng]( auto& event ) mutable -> void { + return [shuffles, rng]( auto& event ) mutable -> void { for ( auto shuffled : shuffles ) { for ( unsigned int index = 0; index < shuffled.size(); ++index ) { unsigned int j = std::uniform_int_distribution( 0, index )( rng ); @@ -240,9 +240,44 @@ std::function EventType::symmetriser() const } } }; - return fcn; } +std::function&)> EventType::automaticOrdering() const +{ + std::vector ids; + for( unsigned i = 0 ; i != m_particleNames.size(); ++i ) ids.push_back( ParticleProperties::get(m_particleNames[i])->pdgID() ); + auto matches = [](const auto& c1, const auto& c2 , unsigned sgn = +1) + { + std::vector used( c1.size(), false ); + for(unsigned i = 0; i != c1.size(); ++i ) + { + for( unsigned j = 0; j != c2.size(); ++j ) + { + if( c1[i] == sgn * c2[j] && ! used[j] ) used[j] = true; + } + } + return std::all_of( std::begin(used), std::end(used), [](auto b) { return b; } ) ; + }; + + return [ids, matches](auto& event, const auto& actual_ids) -> void { + std::vector new_addresses( ids.size(), 999 ); + int sgn = +1; + if( matches(ids, actual_ids ) ) sgn = +1; + else if( matches(ids, actual_ids, -1 ) ) sgn = -1; + else { FATAL("Ids: " << vectorToString(actual_ids, " ") << " do not match either particle or antiparticle ["<< vectorToString(ids, " ") << "]" );} + + for( unsigned i = 0 ; i != ids.size(); ++i ) + { + for( unsigned j = 0 ; j != actual_ids.size(); ++j ) + { + if( actual_ids[j] == sgn * ids[i] && new_addresses[j]==999 ){ new_addresses[j] = i; break ; } + } + } + event.reorder( new_addresses ); + }; +} + + bool EventType::has( const std::string& name ) const { return std::any_of( m_particleNames.begin(), m_particleNames.end(), [&name](auto& it) { return it == name ; } ); diff --git a/src/Expression.cpp b/src/Expression.cpp index 9433ef4c9cf..1584bcb9621 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -15,7 +15,7 @@ #include "AmpGen/MetaUtils.h" #include "AmpGen/MsgService.h" #include "AmpGen/Types.h" - +#include "AmpGen/simd/utils.h" using namespace AmpGen; using namespace AmpGen::fcn; using namespace std::complex_literals; @@ -58,15 +58,10 @@ std::string Constant::to_string(const ASTResolver* resolver) const { }; std::string complex_type = typeof(); std::string literalSuffix = ""; - if( resolver != nullptr && resolver->enableCuda() ) - { - complex_type = "AmpGen::CUDA::complex_t"; - literalSuffix = "f"; - } - if( resolver != nullptr && resolver->enableAVX() ) - { - complex_type = "AmpGen::AVX2d::complex_t"; + if( resolver != nullptr && (resolver->enableCuda() || resolver->enableAVX()) ) + { literalSuffix = "f"; + complex_type = typeof(); } return std::imag(m_value) == 0 ? "(" + rounded_string(std::real(m_value)) +literalSuffix + ")" : complex_type +"("+rounded_string(std::real(m_value))+literalSuffix+","+rounded_string(std::imag(m_value))+literalSuffix+")"; @@ -216,7 +211,7 @@ Ternary::Ternary( const Expression& cond, const Expression& v1, const Expression } std::string Ternary::to_string(const ASTResolver* resolver) const { - return resolver != nullptr && resolver->enableAVX() ? "AmpGen::AVX2d::select(" + m_cond.to_string(resolver) + ", " + + return resolver != nullptr && resolver->enableAVX() ? "select(" + m_cond.to_string(resolver) + ", " + m_v1.to_string(resolver) + ", " + m_v2.to_string(resolver) +")" : "(" + m_cond.to_string(resolver) + "?" + m_v1.to_string(resolver) + ":" + m_v2.to_string(resolver) + ")"; } @@ -331,9 +326,8 @@ ComplexParameter::ComplexParameter( const Parameter& real, const Parameter& imag std::string ComplexParameter::to_string(const ASTResolver* resolver) const { - std::string complex_type = "std::complex"; - if( resolver != nullptr && resolver->enableCuda() ) complex_type = "AmpGen::CUDA::complex_t"; - if( resolver != nullptr && resolver->enableAVX() ) complex_type = "AmpGen::AVX2d::complex_t"; + std::string complex_type = typeof(); + if( resolver != nullptr && (resolver->enableCuda() || resolver->enableAVX()) ) complex_type = typeof(); return complex_type + "(" + m_real.to_string(resolver) + ", " + m_imag.to_string(resolver) +")"; } diff --git a/src/Integrator.cpp b/src/Integrator.cpp index 4632a44b764..70da64875db 100644 --- a/src/Integrator.cpp +++ b/src/Integrator.cpp @@ -44,7 +44,7 @@ void Integrator::integrateBlock() auto b2 = m_cache.data() + m_integrals[roll].j * m_cache.nBlocks(); for ( size_t i = 0; i < m_cache.nBlocks(); ++i ) { auto c = b1[i] * conj(b2[i]); - #if ENABLE_AVX2 + #if ENABLE_AVX re = fmadd( m_weight[i], real(c), re ); im = fmadd( m_weight[i], imag(c), im ); #else diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 86e748a55e0..bc2211757ed 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -72,7 +72,7 @@ PolarisedSum::PolarisedSum(const EventType& type, ThreadPool tp( std::thread::hardware_concurrency() ); for(unsigned i = 0; i < m_matrixElements.size(); ++i) { - // tp.enqueue( [i, &protoAmps, &polStates, this]{ + tp.enqueue( [i, &protoAmps, &polStates, this]{ Tensor thisExpression( Tensor::dim(polStates.size()) ); auto& [p, coupling] = protoAmps[i]; DebugSymbols syms; @@ -85,7 +85,7 @@ PolarisedSum::PolarisedSum(const EventType& type, p.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); CompilerWrapper().compile( m_matrixElements[i] ); - // }); + }); } } if ( stype == spaceType::flavour ) diff --git a/src/Projection.cpp b/src/Projection.cpp index 89b354a7d89..6d14b4b9a2f 100644 --- a/src/Projection.cpp +++ b/src/Projection.cpp @@ -106,7 +106,7 @@ template <> std::tuple, THStack*> Projection::projInternal(co return {hists, stack}; } -#if ENABLE_AVX2 +#if ENABLE_AVX template <> TH1D* Projection::projInternal( const EventListSIMD& events, const ArgumentPack& args) const { return events.makeProjection(*this, args); diff --git a/src/Spline.cpp b/src/Spline.cpp index 81e7de93045..c7c3de27547 100644 --- a/src/Spline.cpp +++ b/src/Spline.cpp @@ -24,20 +24,18 @@ Spline::Spline(const std::string& name, m_min(min), m_max(max) {} -Spline::Spline(const Spline& spline, - const Expression& x ) - : +Spline::Spline(const Spline& spline, const Expression& x, DebugSymbols* db ) : m_points( spline.m_points ), m_name( spline.m_name), m_nKnots( spline.m_nKnots), m_min( spline.m_min ), m_max( spline.m_max ), m_x ( x ), - m_eval( eval() ) { - } -Expression Spline::operator()( const Expression& x ) + m_eval( eval(db) ) {} + +Expression Spline::operator()( const Expression& x, DebugSymbols* db ) { - return Spline(*this,x); + return Spline(*this,x, db); } Expression AmpGen::getSpline( const std::string& name, const Expression& x, const std::string& arrayName, @@ -56,10 +54,10 @@ Expression AmpGen::getSpline( const std::string& name, const Expression& x, cons max = NamedParameter( name + "::Spline::Max", 0. ); } std::string spline_name = name + "::Spline::"+arrayName; - return Spline( spline_name, nBins, min,max )(x); + return Spline(spline_name, nBins, min, max)(x, dbexpressions); } -Expression Spline::eval() const +Expression Spline::eval(DebugSymbols* db) const { Expression x = make_cse(m_x); double spacing = ( m_max - m_min ) / ( (double)m_nKnots - 1. ); @@ -73,6 +71,13 @@ Expression Spline::eval() const + m_points[bin+m_nKnots] * dx * dx / 2. + dx * dx * dx * ( m_points[bin+1+m_nKnots] - m_points[bin+m_nKnots] ) / ( 6. * spacing ), continuedValue ); + ADD_DEBUG(x, db ); + ADD_DEBUG(dx, db ); + ADD_DEBUG(bin, db ); + ADD_DEBUG(returnValue, db ); + ADD_DEBUG( m_points[bin], db ); + for( int i = 0 ; i != m_points.size(); ++i ) + ADD_DEBUG( m_points[Constant(i) + 0.1], db ); return make_cse(returnValue); } diff --git a/src/TreeReader.cpp b/src/TreeReader.cpp new file mode 100644 index 00000000000..42ec42513ae --- /dev/null +++ b/src/TreeReader.cpp @@ -0,0 +1,34 @@ +#include "AmpGen/TreeReader.h" +using namespace AmpGen; + +TreeReader::TreeReader( TTree* tree ) : m_tree( tree ) {} + +void TreeReader::setEntryList( const std::vector& entryList ){ m_entryList = entryList ; } + +void TreeReader::unsetEntryList(){ m_entryList.clear() ; } + +void TreeReader::getEntry( const unsigned int& entry ) +{ + if(!m_ready ) prepare(); + m_tree->GetEntry( m_entryList.size() == 0 ? entry : m_entryList[entry] ); + for ( auto& branch : m_branches ) branch->transfer(); +} + +void TreeReader::prepare() +{ + for ( auto& branch : m_branches ) { + m_tree->SetBranchStatus( branch->name.c_str(), "1" ); + m_tree->SetBranchAddress( branch->name.c_str(), branch->address() ); + } + m_ready = true; +} + +size_t TreeReader::nEntries() const { return m_entryList.size() == 0 ? m_tree->GetEntries() : m_entryList.size(); } + +TreeReader::~TreeReader() +{ + for ( auto& branch : m_branches ) delete branch; +} + +TreeReader::Iterator TreeReader::begin() { getEntry(0); return Iterator( 0, this ); } +TreeReader::Iterator TreeReader::end() { return Iterator( nEntries(), this ); } diff --git a/src/Units.cpp b/src/Units.cpp new file mode 100644 index 00000000000..121791d3ddc --- /dev/null +++ b/src/Units.cpp @@ -0,0 +1,13 @@ + +#include "AmpGen/Units.h" +#include "AmpGen/Utilities.h" + +namespace AmpGen { + complete_enum(Units, TeV, GeV, MeV, KeV) +} + +double AmpGen::to_double(const AmpGen::Units& unit) +{ + static constexpr std::array value_table = {TeV, GeV, MeV, KeV}; + return value_table[unsigned(unit)]; +} diff --git a/src/Utilities.cpp b/src/Utilities.cpp index e5f135bfe39..a03553b20fc 100644 --- a/src/Utilities.cpp +++ b/src/Utilities.cpp @@ -281,13 +281,12 @@ void AmpGen::printSplash() #elif defined(__GNUC__) || defined(__GNUG__) std::cout << "gcc " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__; #endif - #if ENABLE_AVX2 - std::cout << " (avx2;"; - #if DOUBLE_PRECISION - std::cout << " double)"; - #else - std::cout << " single)"; - #endif + #if ENABLE_AVX2d + std::cout << " (avx2; double)"; + #elif ENABLE_AVX2f + std::cout << " (avx2; single)"; + #elif ENABLE_AVX512 + std::cout << " (avx512; double)"; #endif std::cout << " " << __DATE__ << " " << __TIME__ << bold_off << "\n\n"; diff --git a/test/test_avx2d.cpp b/test/test_avx2d.cpp index f914bf28790..d1ce7d48435 100644 --- a/test/test_avx2d.cpp +++ b/test/test_avx2d.cpp @@ -8,18 +8,19 @@ namespace utf = boost::unit_test; #if ENABLE_AVX2 -#include "AmpGen/simd/avx2d_types.h" +#include "AmpGen/simd/utils.h" using namespace AmpGen; +using namespace AmpGen::AVX2d; BOOST_AUTO_TEST_CASE( test_log ) { - AVX2d::float_t p(0.3, 0.5, 10.0, 7.0); + AVX2d::real_v p(0.3, 0.5, 10.0, 7.0); auto logged = AVX2d::log( p ).to_array() ; - BOOST_TEST( logged[0] == std::log(0.3), boost::test_tools::tolerance(1e-15 ) ); - BOOST_TEST( logged[1] == std::log(0.5), boost::test_tools::tolerance(1e-15 ) ); - BOOST_TEST( logged[2] == std::log(10.0), boost::test_tools::tolerance(1e-15 ) ); - BOOST_TEST( logged[3] == std::log(7.0), boost::test_tools::tolerance(1e-15 ) ); + BOOST_TEST( logged[0] == std::log(0.3), boost::test_tools::tolerance(5e-10 ) ); + BOOST_TEST( logged[1] == std::log(0.5), boost::test_tools::tolerance(5e-10 ) ); + BOOST_TEST( logged[2] == std::log(10.0), boost::test_tools::tolerance(5e-10 ) ); + BOOST_TEST( logged[3] == std::log(7.0), boost::test_tools::tolerance(5e-10 ) ); } BOOST_AUTO_TEST_CASE( test_fmod ) @@ -27,8 +28,8 @@ BOOST_AUTO_TEST_CASE( test_fmod ) std::vector a = {5.1, -5.1, 5.1, -5.1}; std::vector b = {3.0, +3.0, -3.0, -3.0}; - AVX2d::float_t av( a.data() ); - AVX2d::float_t bv( b.data() ); + AVX2d::real_v av( a.data() ); + AVX2d::real_v bv( b.data() ); auto modv = AVX2d::fmod(av,bv); BOOST_TEST_MESSAGE( "fmod = " << modv ); @@ -42,30 +43,29 @@ BOOST_AUTO_TEST_CASE( test_fmod ) BOOST_AUTO_TEST_CASE( test_double_to_int ) { - std::vector a = {17.4, -19.2, 12.1, -4007.3}; - auto f = AVX2d::double_to_int( AVX2d::float_t( a.data() )); - alignas(32) uint64_t t[4]; + std::vector a = {17.4, 19.2, 12.1, 4007.3}; + auto f = udouble_to_uint( real_v( a.data() )); + alignas(32) uint64_t t[ utils::size::value ]; _mm256_store_si256( (__m256i*)t, f); BOOST_TEST( t[0] == 17 ); - BOOST_TEST( t[1] == -19 ); + BOOST_TEST( t[1] == 19 ); BOOST_TEST( t[2] == 12 ); - BOOST_TEST( t[3] == -4007 ); + BOOST_TEST( t[3] == 4007 ); } BOOST_AUTO_TEST_CASE( test_gather ) { // 0 1 2 3 4 5 6 std::vector data = { 15.4, 19.7, 121.8, -15.6, M_PI, sqrt(2), 5.7, 12 }; - std::vector addr = { 0, 5, 3, 3 }; - auto v = AVX2d::gather( data.data(), AVX2d::float_t(addr.data()) ).to_array(); + std::vector addr = { 0.2, 5.3, 3.1, 4.1 }; + auto v = AVX2d::gather( data.data(), AVX2d::real_v(addr.data()) ).to_array(); BOOST_TEST( v[0] == data[0] ); BOOST_TEST( v[1] == data[5] ); BOOST_TEST( v[2] == data[3] ); - BOOST_TEST( v[3] == data[3] ); + BOOST_TEST( v[3] == data[4] ); } - #else BOOST_AUTO_TEST_CASE( test_dummy ) { From e8180a048f9a6059bc3017db74a329135de0f856 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 28 Apr 2020 14:04:35 +0200 Subject: [PATCH 35/67] fix avx512d types, units for OSX --- AmpGen/Units.h | 15 ++++----------- AmpGen/simd/avx512d_types.h | 1 + src/Units.cpp | 4 ++-- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/AmpGen/Units.h b/AmpGen/Units.h index 9e376d122d7..c7e88da4570 100644 --- a/AmpGen/Units.h +++ b/AmpGen/Units.h @@ -3,21 +3,14 @@ #include "AmpGen/enum.h" namespace AmpGen { -/* - struct NewUnits { - static unsigned TeV = 0; - static unsigned GeV = 1; - static unsigned MeV = 2; - static unsigned KeV = 3; - - }; -*/ + static const double TeV = 1000; static const double GeV = 1; static const double MeV = 0.001; static const double KeV = 0.001*0.001; - - declare_enum( Units, TeV, GeV, MeV, KeV ) + static const double eV = 0.001*0.001*0.001; + + declare_enum( Units, TeV, GeV, MeV, KeV, eV ) double to_double(const Units& unit ); } #endif diff --git a/AmpGen/simd/avx512d_types.h b/AmpGen/simd/avx512d_types.h index a43af591688..36d9695af77 100644 --- a/AmpGen/simd/avx512d_types.h +++ b/AmpGen/simd/avx512d_types.h @@ -132,6 +132,7 @@ namespace AmpGen { struct complex_v { real_v re; real_v im; + static constexpr unsigned size = 8; typedef std::complex scalar_type; real_v real() const { return re; } diff --git a/src/Units.cpp b/src/Units.cpp index 121791d3ddc..ff8909af72c 100644 --- a/src/Units.cpp +++ b/src/Units.cpp @@ -3,11 +3,11 @@ #include "AmpGen/Utilities.h" namespace AmpGen { - complete_enum(Units, TeV, GeV, MeV, KeV) + complete_enum(Units, TeV, GeV, MeV, KeV, eV) } double AmpGen::to_double(const AmpGen::Units& unit) { - static constexpr std::array value_table = {TeV, GeV, MeV, KeV}; + static constexpr double value_table[5] = {TeV, GeV, MeV, KeV, eV}; return value_table[unsigned(unit)]; } From c0c7f1cd3f0448729577b365ae305d2092f0ed13 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 28 Apr 2020 14:17:21 +0200 Subject: [PATCH 36/67] fix units for osx --- src/Units.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Units.cpp b/src/Units.cpp index ff8909af72c..9ae73ad916d 100644 --- a/src/Units.cpp +++ b/src/Units.cpp @@ -8,6 +8,6 @@ namespace AmpGen { double AmpGen::to_double(const AmpGen::Units& unit) { - static constexpr double value_table[5] = {TeV, GeV, MeV, KeV, eV}; + static const double value_table[5] = {TeV, GeV, MeV, KeV, eV}; return value_table[unsigned(unit)]; } From 867cf2a9a7132c9bcec622524ce453124ece0b9e Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sat, 16 May 2020 14:19:40 +0200 Subject: [PATCH 37/67] Progress towards SIMD compatible generation, various fixes --- AmpGen/ASTResolver.h | 4 +- AmpGen/AmplitudeRules.h | 6 +- AmpGen/CacheTransfer.h | 10 +- AmpGen/CoherentSum.h | 10 +- AmpGen/CompiledExpression.h | 21 ++-- AmpGen/EventList.h | 13 +- AmpGen/EventListSIMD.h | 13 ++ AmpGen/EventType.h | 6 +- AmpGen/ExpressionParser.h | 1 + AmpGen/Factory.h | 2 +- AmpGen/Generator.h | 219 ++++++++++++++++----------------- AmpGen/MetaUtils.h | 18 ++- AmpGen/Minimiser.h | 22 +--- AmpGen/NamedParameter.h | 2 +- AmpGen/PolarisedSum.h | 3 +- AmpGen/ProfileClock.h | 11 +- AmpGen/Projection.h | 7 +- AmpGen/Spline.h | 2 +- AmpGen/Store.h | 43 ++++--- AmpGen/SumPDF.h | 2 +- AmpGen/TreePhaseSpace.h | 23 ++-- AmpGen/TreeReader.h | 3 +- AmpGen/Units.h | 11 +- AmpGen/Utilities.h | 2 +- AmpGen/simd/avx2d_types.h | 82 +++++------- AmpGen/simd/utils.h | 14 ++- CMakeLists.txt | 4 +- Standalone.cmake | 20 +-- apps/DataConverter.cpp | 14 +-- apps/Fitter.cpp | 2 +- apps/Generator.cpp | 142 +++++++++++++-------- examples/QcGenerator.cpp | 2 +- src/ASTResolver.cpp | 3 +- src/CacheTransfer.cpp | 14 +-- src/CoherentSum.cpp | 9 +- src/CompiledExpressionBase.cpp | 6 +- src/EventList.cpp | 48 ++++++-- src/EventListSIMD.cpp | 15 +-- src/EventType.cpp | 19 +-- src/Expression.cpp | 8 +- src/ExpressionParser.cpp | 14 ++- src/Kinematics.cpp | 4 +- src/Lineshapes/BW.cpp | 6 +- src/PhaseSpace.cpp | 27 ++-- src/PolarisedSum.cpp | 24 +++- src/Projection.cpp | 20 ++- src/Spline.cpp | 6 +- src/TreePhaseSpace.cpp | 130 +++++++++---------- src/Units.cpp | 4 +- test/test_avx2d.cpp | 33 ++++- 50 files changed, 638 insertions(+), 486 deletions(-) diff --git a/AmpGen/ASTResolver.h b/AmpGen/ASTResolver.h index aa978026649..bbad189fb1c 100644 --- a/AmpGen/ASTResolver.h +++ b/AmpGen/ASTResolver.h @@ -36,7 +36,7 @@ namespace AmpGen { { auto it = m_cacheFunctions.find(name); if( it != m_cacheFunctions.end() ) return it->second->address(); - auto cacheFunction = std::make_shared(m_nParameters, args... ); + auto cacheFunction = std::make_shared(m_nParameters, name, args... ); m_cacheFunctions[name] = cacheFunction; m_nParameters += cacheFunction->size(); return m_nParameters - cacheFunction->size(); @@ -63,8 +63,8 @@ namespace AmpGen { std::map m_tempTrees; /// temporary store of sub-trees for performing cse reduction unsigned int m_nParameters; /// Number of parameters bool m_enable_cuda {false}; /// flag to generate CUDA code <> - bool m_enable_avx {false};/// flag to generate vectorised code <> bool m_enable_compileTimeConstants {false}; /// flag to enable compile time constants <> + bool m_enable_avx {false}; /// flag to generate code using AVX instructions <> }; template <> void ASTResolver::resolve( const Parameter& obj ); diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index f7d0be128a2..ed44420896f 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -177,7 +177,7 @@ namespace AmpGen const std::vector operator()(const Event& event) const { - std::vector rt(4); + std::vector rt(size); #if ENABLE_AVX amp_type::operator()(rt.data(), 1, externBuffer().data(), EventListSIMD::makeEvent(event).data()); #else @@ -186,7 +186,9 @@ namespace AmpGen return rt; } template auto operator()(arg_types... args ) const { return amp_type::operator()(args...) ; } - + #if ENABLE_AVX + void debug( const Event& event ) const { amp_type::debug(EventListSIMD::makeEvent(event).data() ) ; } + #endif const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } Particle decayTree; diff --git a/AmpGen/CacheTransfer.h b/AmpGen/CacheTransfer.h index 6a4e162adc8..4a2748e60dc 100644 --- a/AmpGen/CacheTransfer.h +++ b/AmpGen/CacheTransfer.h @@ -3,6 +3,7 @@ #include #include +#include namespace AmpGen { @@ -13,7 +14,7 @@ namespace AmpGen { public: CacheTransfer(); - CacheTransfer( const size_t& address, const double& value=0, const size_t& size=1); + CacheTransfer( const size_t& address, const std::string& name, const double& value=0, const size_t& size=1); virtual ~CacheTransfer() = default; size_t address() const { return m_address ; } @@ -25,13 +26,14 @@ namespace AmpGen protected: size_t m_address = {0}; size_t m_size = {0}; - double m_value = {0}; + double m_value = {0}; + std::string m_name = {""}; }; class ParameterTransfer : public CacheTransfer { public: - ParameterTransfer( const size_t& address, AmpGen::MinuitParameter* source ); + ParameterTransfer( const size_t& address, const std::string& name, AmpGen::MinuitParameter* source ); virtual ~ParameterTransfer() = default; size_t size() const override { return 1 ; } @@ -40,7 +42,7 @@ namespace AmpGen void print() const override; protected: - AmpGen::MinuitParameter* m_source = {nullptr}; + MinuitParameter* m_source = {nullptr}; }; } // namespace AmpGen diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index c50404fa02e..9b0bf903f7c 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -77,9 +77,15 @@ namespace AmpGen void setEvents( const EventList_type& list ); void setMC( const EventList_type& sim ); #if ENABLE_AVX - void setEvents( const EventList& list) { m_ownEvents = true; setEvents( *(new EventListSIMD(list)) ) ; } - void setMC( const EventList& list) { setMC( *(new EventListSIMD(list)) ) ; } + void setEvents( const EventList& list) { + WARNING("Setting events from a AoS container, will need to make a copy"); + m_ownEvents = true; setEvents( *(new EventListSIMD(list)) ) ; } + void setMC ( const EventList& list) { + WARNING("Setting integration events from a AoS container, will need to make a copy"); + setMC( *(new EventListSIMD(list)) ) ; } + double operator()(const double*, const unsigned) const; #endif + float_v operator()(const float_v*, const unsigned) const; real_t operator()(const Event& evt ) const { return m_weight*std::norm(getVal(evt))/m_norm; } diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index eaf86b1c5d2..6b3812564b6 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -22,10 +22,8 @@ namespace AmpGen but in principal support also exists for computing coupled channel propagators (i.e. returning array types) */ namespace detail { - template struct size_of { unsigned operator()(){ return sizeof(T); } }; - template <> struct size_of { unsigned operator()(){ - WARNING("Asking for the size_of the return buffer of an RTO expression"); - return 0; } }; + template struct size_of { static constexpr unsigned value = sizeof(T); }; + template <> struct size_of { static constexpr unsigned value = 0; } ; } template class CompiledExpression; @@ -56,7 +54,7 @@ namespace AmpGen CompiledExpression( const std::string& name = "" ) : CompiledExpressionBase( name ) {}; std::vector externBuffer() const { return m_externals ; } - std::string returnTypename() const override { return typeof(); } + std::string returnTypename() const override { return type_string(); } std::string fcnSignature() const override { return CompiledExpressionBase::fcnSignature(typelist(), use_rto()); @@ -90,7 +88,12 @@ namespace AmpGen INFO( "Hash = " << hash() ); INFO( "IsReady? = " << isReady() << " IsLinked? " << (m_fcn.isLinked() ) ); INFO( "args = ["<< vectorToString( m_externals, ", ") <<"]"); - for( auto& c : m_cacheTransfers ){ c->print() ; } + std::vector ordered_cache_functors; + for( const auto& c : m_cacheTransfers ) ordered_cache_functors.push_back( c.get() ); + std::sort( ordered_cache_functors.begin(), + ordered_cache_functors.end(), + [](auto& c1, auto& c2 ) { return c1->address() < c2->address() ; } ); + for( auto& c : ordered_cache_functors ) c->print() ; } void setExternal( const double& value, const unsigned int& address ) override @@ -128,7 +131,7 @@ namespace AmpGen stream << " const size_t& N, " << " const size_t& eventSize, " << " const size_t& cacheSize, "; - stream << typeof() << " * rt, "; + stream << type_string() << " * rt, "; stream << CompiledExpressionBase::fcnSignature(typelist(), use_rto(), false) << ") {\n"; stream << "#pragma omp parallel for\n"; stream << "for( size_t i = 0; i < N/" << utils::size::value << "; ++i ){\n"; @@ -149,7 +152,7 @@ namespace AmpGen bool isReady() const override { return m_fcn.isLinked(); } bool isLinked() const { return m_fcn.isLinked() ; } - unsigned returnTypeSize() const override { return detail::size_of()(); } + unsigned returnTypeSize() const override { return detail::size_of::value; } template < class T > RETURN_TYPE operator()( const T* event ) const @@ -173,7 +176,7 @@ namespace AmpGen FATAL( "Function" << name() << " debugging symbols not linked" ); } std::vector> debug_results; - if constexpr(std::is_same::value) debug_results = m_fdb( nullptr, &( m_externals[0] ), event ); + if constexpr(std::is_same::value) debug_results = m_fdb( nullptr, 0, &( m_externals[0] ), event ); else debug_results = m_fdb( &(m_externals[0]), event); for( auto& debug_result : debug_results ){ auto val = debug_result.second; diff --git a/AmpGen/EventList.h b/AmpGen/EventList.h index aadd68eea37..8d3f742cf6c 100644 --- a/AmpGen/EventList.h +++ b/AmpGen/EventList.h @@ -75,15 +75,20 @@ namespace AmpGen const double* block(const unsigned pos) const { return m_data[pos].address(); } real_t weight( const size_t& pos) const { return m_data[pos].weight(); } real_t genPDF( const size_t& pos) const { return m_data[pos].genPdf(); } - void reserve( const size_t& size ) { m_data.reserve( size ); } - void resize ( const size_t& size ) { m_data.resize(size) ; } - void push_back( const Event& evt ) { m_data.push_back( evt ); } - void emplace_back( const Event& evt) { m_data.emplace_back(evt) ; } + void reserve( const size_t& size ); + void resize ( const size_t& size ); + void push_back( const Event& evt ); + void emplace_back( const Event& evt); void setEventType( const EventType& type ) { m_eventType = type; } void add( const EventList& evts ); void loadFromTree( TTree* tree, const ArgumentPack& args ); void loadFromFile( const std::string& fname, const ArgumentPack& args ); void clear(); + void setWeight( const unsigned int& pos, const double& w, const double&g=+1) + { + m_data[pos].setWeight(w); + m_data[pos].setGenPdf(g); + } void erase( const std::vector::iterator& begin, const std::vector::iterator& end ); TTree* tree( const std::string& name, const std::vector& extraBranches = {} ) const; diff --git a/AmpGen/EventListSIMD.h b/AmpGen/EventListSIMD.h index 248deff8455..9c7900dd507 100644 --- a/AmpGen/EventListSIMD.h +++ b/AmpGen/EventListSIMD.h @@ -63,8 +63,21 @@ namespace AmpGen const auto& store() const { return m_data; } const Event at(const unsigned& p) const { return EventListSIMD::operator[](p) ; } const float_v* block(const unsigned& p) const { return m_data.data() + p * m_data.nFields(); } + float_v* block(const unsigned& p) { return m_data.data() + p * m_data.nFields(); } float_v weight(const unsigned& p) const { return m_weights[p]; } float_v genPDF(const unsigned& p) const { return m_genPDF[p]; } + + void setWeight( const unsigned& block, const float_v& w, const float_v& g=1) + { + m_weights[block] = w; + m_genPDF[block] = g; + } + void resize( const unsigned nEvents ) + { + m_data = Store( nEvents, m_eventType.eventSize() ); + m_weights.resize( aligned_size(), 1); + m_genPDF.resize( aligned_size(), 1 ); + } const Event operator[]( const size_t&) const; std::array::value> scatter(unsigned) const; void gather(const std::array::value>&, unsigned); diff --git a/AmpGen/EventType.h b/AmpGen/EventType.h index d2777c0143a..191cf35f976 100644 --- a/AmpGen/EventType.h +++ b/AmpGen/EventType.h @@ -38,7 +38,7 @@ namespace AmpGen /// Counts the number of particles in this event type with /// the same name as the index'th name. std::pair count(const unsigned& index) const; - std::pair minmax( const std::vector& indices, bool isGeV = false ) const; + std::pair minmax( const std::vector& indices) const; std::vector masses() const; std::string mother() const; double mass( const unsigned& index ) const; @@ -52,7 +52,7 @@ namespace AmpGen std::string decayDescriptor() const; std::string label( const unsigned& index, bool isRoot = true ) const; std::string label( const std::vector& index, bool isRoot = true ) const; - std::vector defaultProjections(const unsigned& nBins) const; + std::vector defaultProjections(const unsigned& nBins=100) const; Projection projection(const unsigned& nBins, const std::vector& indices, const std::string& observable = "mass2") const; bool operator==( const EventType& other ) const; @@ -62,7 +62,7 @@ namespace AmpGen /// Functor to randomly symmetrise data of this event type, using the Fisher-Yates shuffle. std::function symmetriser() const; - std::function& ids)> automaticOrdering() const; + std::function& ids)> automaticOrdering() const; /// Calculates the number of spin indices associated with the initial and final state, i.e. the rank of the relevant transition matrix. std::pair dim() const; diff --git a/AmpGen/ExpressionParser.h b/AmpGen/ExpressionParser.h index 0c69776b8b0..6becfbf086d 100644 --- a/AmpGen/ExpressionParser.h +++ b/AmpGen/ExpressionParser.h @@ -1,4 +1,5 @@ #ifndef AMPGEN_EXPRESSIONPARSER_H +#define AMPGEN_EXPRESSIONPARSER_H 1 #include #include diff --git a/AmpGen/Factory.h b/AmpGen/Factory.h index 0a76926c6e4..3323914be87 100644 --- a/AmpGen/Factory.h +++ b/AmpGen/Factory.h @@ -34,7 +34,7 @@ namespace AmpGen auto ptrToStatic = getMe(); auto raw_base = ptrToStatic->m_terms.find( type ); if ( raw_base == ptrToStatic->m_terms.end() ) { - if ( !quiet ) ERROR( type << " not found in Factory<" << typeof() << typeof() << " >" ); + if ( !quiet ) ERROR( type << " not found in Factory<" << type_string() << type_string() << " >" ); return nullptr; } auto objectToReturn = raw_base->second->create(); diff --git a/AmpGen/Generator.h b/AmpGen/Generator.h index 670c6838850..4ab6e0676a1 100644 --- a/AmpGen/Generator.h +++ b/AmpGen/Generator.h @@ -7,145 +7,136 @@ #include "AmpGen/Utilities.h" #include "AmpGen/ProfileClock.h" #include "AmpGen/ProgressBar.h" +#include "AmpGen/NamedParameter.h" +#include "AmpGen/MetaUtils.h" + namespace AmpGen { - template + template class Generator { private: - EventType m_eventType; - PHASESPACE m_gps; - size_t m_generatorBlock = {5000000}; - TRandom* m_rnd = {gRandom}; - bool m_normalise = {true}; + EventType m_eventType; + phaseSpace_t m_gps; + size_t m_generatorBlock = {5000000}; + TRandom* m_rnd = {gRandom}; + bool m_normalise = {true}; public: - template - explicit Generator( const ARGS&... args ) - : m_gps(args...) - { - m_eventType = m_gps.eventType(); - setRandom( m_rnd ); - } - PHASESPACE phsp() { return m_gps; } + template explicit Generator( const ARGS&... args ) : m_gps(args...) + { + m_eventType = m_gps.eventType(); + setRandom( m_rnd ); + DEBUG("Creating generator, using: " << type_string() << " and internal store type: " << type_string() ); + } + + phaseSpace_t phsp() { return m_gps; } + void setRandom( TRandom* rand ) { m_rnd = rand; m_gps.setRandom( m_rnd ); } void setBlockSize( const size_t& blockSize ) { m_generatorBlock = blockSize; } - void setNormFlag( const bool& normSetting ) { m_normalise = normSetting; } + void setNormFlag( const bool& normSetting ) { m_normalise = normSetting; } - template void fillEventListPhaseSpace( EventList& list, const size_t& N, cut_type cut = nullptr) + template void fillEventListPhaseSpace( eventListInternal_t& events, const size_t& N, [[maybe_unused]] cut_t cut = nullptr) { - if constexpr( std::is_same::value ) + events.resize(N); + auto it = events.begin(); + while( it != events.end() ) { - if( cut != nullptr ) FATAL("This shouldn't happen..."); - list.resize(N); -// #pragma omp parallel for - for( unsigned int i = 0 ; i != N; ++i ) - { - list[i] = m_gps.makeEvent(); - list[i].setWeight( 1 ); - list[i].setIndex(i); - } - } - else { - list.reserve( N ); - while( list.size() < N ){ - Event newEvent = m_gps.makeEvent(); - newEvent.setWeight( 1 ); - if ( cut( newEvent ) ){ - newEvent.setIndex( list.size() ); - list.push_back( newEvent ); - } + *it = m_gps.makeEvent(); + if constexpr( ! std::is_same::value ) + { + if( ! cut(*it ) ) continue; } + if constexpr( std::is_same::value ) it->setIndex( it - events.begin() ); + ++it; } } - - template void fillEventList( PDF& pdf, EventList& list, const size_t& N ) + template void fillEventList( pdf_t& pdf, eventList_t& list, const size_t& N ) { fillEventList( pdf, list, N, nullptr); } - - template - void fillEventList( PDF& pdf, EventList& list, const size_t& N, HARD_CUT cut ) + template double getMax(const pdf_t& pdf, const eventListInternal_t& events) const + { + auto max = 0.; + for ( const auto& evt : events ) { - if ( m_rnd == nullptr ) { - ERROR( "Random generator not set!" ); - return; - } - double maxProb = m_normalise ? 0 : 1; - auto size0 = list.size(); - auto tStartTotal = std::chrono::high_resolution_clock::now(); - pdf.reset( true ); - ProgressBar pb(60, detail::trimmedString(__PRETTY_FUNCTION__) ); - ProfileClock t_phsp, t_eval, t_acceptReject, t_gather; - std::vector efficiencyReport(m_generatorBlock,false); - - while ( list.size() - size0 < N ) { - EventList mc( m_eventType ); - t_phsp.start(); - fillEventListPhaseSpace(mc, m_generatorBlock, cut); - t_phsp.stop(); - t_gather.start(); - pdf.setEvents( mc ); - t_gather.stop(); - t_eval.start(); - pdf.prepare(); - t_eval.stop(); - if ( maxProb == 0 ) { - double max = 0; - for ( auto& evt : mc ) { - double value = pdf(evt) / evt.genPdf(); - if ( value > max ) max = value; - } - maxProb = max * 1.5; - INFO( "Setting normalisation constant = " << maxProb ); - } - auto previousSize = list.size(); - t_acceptReject.start(); - #ifdef _OPENMP - #pragma omp parallel for - #endif - for ( size_t i=0; i < mc.size(); ++i ) - mc[i].setGenPdf(pdf(mc[i]) / mc[i].genPdf()); + auto value = pdf(evt) / evt.genPdf(); + if ( value > max ) max = value; + } + DEBUG( "Returning normalisation constant = " << max * 1.5 ); + return max; + } - for( size_t i=0; i != mc.size(); ++i ) - { - auto& evt = mc[i]; - if ( evt.genPdf() > maxProb ) { - std::cout << std::endl; - WARNING( "PDF value exceeds norm value: " << evt.genPdf() << " > " << maxProb ); - } - if ( evt.genPdf() > maxProb * m_rnd->Rndm() ){ - list.push_back(evt); - efficiencyReport[i] = true; - } - else efficiencyReport[i] = false; - if ( list.size() - size0 == N ) break; + template void fillEventList( pdf_t& pdf, eventList_t& list, const size_t& N, cut_t cut ) + { + if ( m_rnd == nullptr ) + { + ERROR( "Random generator not set!" ); + return; + } + double maxProb = m_normalise ? 0 : 1; + auto size0 = list.size(); + double totalGenerated = 0; + pdf.reset( true ); + ProgressBar pb(60, detail::trimmedString(__PRETTY_FUNCTION__) ); + ProfileClock t_phsp, t_eval, t_acceptReject, t_total; + std::vector efficiencyReport(m_generatorBlock,false); + bool accept_all = NamedParameter("AcceptAll",false); + eventListInternal_t mc( m_eventType ); + while ( list.size() - size0 < N ) { + t_phsp.start(); + fillEventListPhaseSpace(mc, m_generatorBlock, cut); + t_phsp.stop(); + t_eval.start(); + pdf.setEvents( mc ); + pdf.prepare(); + maxProb = maxProb == 0 ? 1.5 * getMax(pdf, mc) : maxProb; + auto previousSize = list.size(); + #ifdef _OPENMP + #pragma omp parallel for + #endif + for ( size_t block=0; block != mc.nBlocks(); ++block ) + mc.setWeight(block, 1.0, pdf(mc.block(block), block) / mc.genPDF(block)); + t_eval.stop(); + t_acceptReject.start(); + totalGenerated += mc.size(); + for(const auto& event : mc) + { + if ( event.genPdf() > maxProb ) { + std::cout << std::endl; + WARNING( "PDF value exceeds norm value: " << event.genPdf() << " > " << maxProb ); } - m_gps.provideEfficiencyReport( efficiencyReport ); - t_acceptReject.stop(); - double time = std::chrono::duration( std::chrono::high_resolution_clock::now() - tStartTotal ).count(); - double efficiency = 100. * ( list.size() - previousSize ) / (double)m_generatorBlock; - pb.print( double(list.size()) / double(N), " ε[gen] = " + mysprintf("%.4f",efficiency) + "% , " + std::to_string(int(time/1000.)) + " seconds" ); - if ( list.size() == previousSize ) { - ERROR( "No events generated, PDF: " << typeof() << " is likely to be malformed" ); - break; + if ( accept_all || event.genPdf() > maxProb * m_rnd->Rndm() ){ + list.push_back(event); + efficiencyReport[event.index()] = true; } -// maxProb = 0; - } - pb.finish(); - double time = std::chrono::duration( std::chrono::high_resolution_clock::now() - tStartTotal ).count(); - INFO("Generated " << N << " events in " << time << " ms"); - INFO("Generating phase space : " << t_phsp << " ms"); - INFO("Evaluating PDF : " << t_eval << " ms"); - INFO("Doing accept/reject : " << t_acceptReject << " ms"); - INFO("Gathering : " << t_gather << " ms"); - } - template ::value>::type> - EventList generate(PDF& pdf, const size_t& nEvents ) + else efficiencyReport[event.index()] = false; + if ( list.size() - size0 == N ) break; + } + t_acceptReject.stop(); + + // m_gps.provideEfficiencyReport( efficiencyReport ); + double efficiency = 100. * ( list.size() - previousSize ) / (double)m_generatorBlock; + pb.print( double(list.size()) / double(N), " ε[gen] = " + mysprintf("%.4f",efficiency) + "% , " + std::to_string(int(t_total.count()/1000.)) + " seconds" ); + if ( list.size() == previousSize ) { + ERROR( "No events generated, PDF: " << type_string() << " is likely to be malformed" ); + break; + } + } + pb.finish(); + t_total.stop(); + INFO("Generated " << N << " events in " << t_total << " ms"); + INFO("Generating phase space : " << t_phsp << " ms"); + INFO("Evaluating PDF : " << t_eval << " ms"); + INFO("Accept/reject : " << t_acceptReject << " ms"); + INFO("Efficiency = " << double(N) * 100. / totalGenerated << " %"); + } + template ::value>::type> + EventList generate(pdf_t& pdf, const size_t& nEvents ) { EventList evts( m_eventType ); fillEventList( pdf, evts, nEvents ); @@ -178,7 +169,7 @@ namespace AmpGen /** @function PyGenerate Wrapper around the a phase space generator from a stringy event type to be used with python / numpy. - */ + */ extern "C" void PyGenerate( const char* eventType, double* out, const unsigned int size ); } // namespace AmpGen #endif diff --git a/AmpGen/MetaUtils.h b/AmpGen/MetaUtils.h index 11b33f7ca4f..3c985be4daa 100644 --- a/AmpGen/MetaUtils.h +++ b/AmpGen/MetaUtils.h @@ -15,7 +15,7 @@ namespace AmpGen and identifying if a class can be constructed in different ways. */ - template std::string typeof() + template std::string type_string() { int status = 0; std::string name = abi::__cxa_demangle( typeid( TYPE ).name(), nullptr, nullptr, &status ); @@ -25,7 +25,7 @@ namespace AmpGen return name; } - template std::string typeof( const TYPE& t ) { return typeof(); } + template std::string type_string( const TYPE& t ) { return type_string(); } namespace detail { template struct zeroType { typedef T type; }; @@ -94,8 +94,8 @@ namespace AmpGen template std::vector typelist() { std::vector< std::string > rt; - if( typeof() != "void" ) { - rt.emplace_back( typeof() ); + if( type_string() != "void" ) { + rt.emplace_back( type_string() ); auto rtp = typelist(); std::copy( rtp.begin(), rtp.end(), std::back_inserter(rt) ); } @@ -106,6 +106,16 @@ namespace AmpGen template struct isTuple>: std::true_type {}; template struct isVector : std::false_type {}; template struct isVector> : std::true_type {}; + + + #define def_has_function(function_name) \ + template \ + struct has_##function_name { \ + template static auto test(int) -> decltype(std::declval().function_name() == 1, std::true_type()); \ + template static std::false_type test(...); \ + static constexpr bool value = std::is_same(0)), std::true_type>::value; \ + } + } // namespace AmpGen #endif diff --git a/AmpGen/Minimiser.h b/AmpGen/Minimiser.h index c32c45921b9..736380d6588 100644 --- a/AmpGen/Minimiser.h +++ b/AmpGen/Minimiser.h @@ -9,6 +9,7 @@ #include #include "TMatrixTSym.h" +#include "AmpGen/MetaUtils.h" /** @cond PRIVATE */ namespace ROOT @@ -31,24 +32,13 @@ namespace AmpGen class Minimiser { private: - template - struct HasGetVal - { - typedef char YesType[1]; - typedef char NoType[2]; - template static YesType& test( decltype(&C::getVal) ) ; - template static NoType& test(...); - enum { value = sizeof(test(0)) == sizeof(YesType) }; - }; - + def_has_function(getVal); + public: - template typename std::enable_if_t::value, void> setFunction( TYPE& fcn ) - { - m_theFunction = [&fcn]() { return fcn.getVal(); }; - } - template typename std::enable_if_t::value, void> setFunction(TYPE& fcn) + template void setFunction( TYPE& fcn ) { - m_theFunction = [&fcn](){ return fcn() ; } ; + if constexpr( has_getVal::value ) m_theFunction = [&fcn]() { return fcn.getVal(); }; + else m_theFunction = fcn; } template diff --git a/AmpGen/NamedParameter.h b/AmpGen/NamedParameter.h index 461b33c4bd3..40774b215fb 100644 --- a/AmpGen/NamedParameter.h +++ b/AmpGen/NamedParameter.h @@ -76,7 +76,7 @@ namespace AmpGen } // ~NamedParameter(){ INFO("Deconstructing: " << m_name ); } void help(const T& def){ - std::string type = typeof(); + std::string type = type_string(); if( type == "std::__cxx11::basic_string, std::allocator >" ) type = "string"; std::cout << " " << bold_on << std::left << std::setw(27) << m_name << bold_off << std::setw(20) << "[" + type + "]" ; diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index a2d0a555e52..987a090dd6d 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -50,8 +50,9 @@ namespace AmpGen #if ENABLE_AVX void setEvents(EventList& evts){ m_ownEvents = true; setEvents( *new EventList_type(evts)) ; }; void setMC(EventList& evts){ setMC( *new EventList_type(evts)) ; }; + double operator()(const double*, const unsigned) const; #endif - float_v operator()( const float_v*, const unsigned) const; + float_v operator()(const float_v*, const unsigned) const; real_t operator()(const Event& evt) const; void reset(const bool& = false); void debug(const Event&); diff --git a/AmpGen/ProfileClock.h b/AmpGen/ProfileClock.h index 6da6bccbaa9..8810c325663 100644 --- a/AmpGen/ProfileClock.h +++ b/AmpGen/ProfileClock.h @@ -17,6 +17,11 @@ namespace AmpGen{ t_end = std::chrono::high_resolution_clock::now() ; t_duration += std::chrono::duration( t_end - t_start ).count(); } + double count() const + { + auto now = std::chrono::high_resolution_clock::now() ; + return std::chrono::duration(now - t_start ).count(); + } void start(){ t_start = std::chrono::high_resolution_clock::now() ; } operator double() const { return t_duration; } ; }; @@ -26,7 +31,7 @@ namespace AmpGen{ ProfileClock t; for( size_t i = 0 ; i < N; ++i ) fcn(); t.stop(); - INFO( (name == "" ? typeof() : name ) << " " << t/double(N) << "[ms] per iteration" ); + INFO( (name == "" ? type_string() : name ) << " " << t/double(N) << "[ms] per iteration" ); return t; } template @@ -46,7 +51,7 @@ namespace AmpGen{ } t /= double(N); t2 = std::sqrt( t2 / double(N) - t*t); - INFO( (name == "" ? typeof() : name ) << " " << t << " ± " << t2 << "[ms] per iteration << [" << tmin << ", " << tmax << "]" ); + INFO( (name == "" ? type_string() : name ) << " " << t << " ± " << t2 << "[ms] per iteration << [" << tmin << ", " << tmax << "]" ); return t; } @@ -56,7 +61,7 @@ namespace AmpGen{ auto z = 0 ; for( size_t i = 0 ; i < N; ++i ) z += fcn(); t.stop(); - INFO( typeof() << " " << t/double(N) << "[ms] per iteration; " << z ); + INFO( type_string() << " " << t/double(N) << "[ms] per iteration; " << z ); return t; } } diff --git a/AmpGen/Projection.h b/AmpGen/Projection.h index d1866b373c7..ce6c926ea53 100644 --- a/AmpGen/Projection.h +++ b/AmpGen/Projection.h @@ -48,7 +48,12 @@ namespace AmpGen TH1D* plot(const std::string& prefix="") const; std::function binFunctor() const; - void setRange( const double& min, const double& max ){ m_min = (min); m_max = (max) ; } + void setRange( const double& min, const double& max ) + { + m_min = min; + m_max = max; + m_width = (m_max-m_min)/double(m_nBins); + } friend class Projection2D; private: diff --git a/AmpGen/Spline.h b/AmpGen/Spline.h index 7ceadd03b6d..3556e80b823 100644 --- a/AmpGen/Spline.h +++ b/AmpGen/Spline.h @@ -26,7 +26,7 @@ namespace AmpGen{ public: SplineTransfer(); SplineTransfer( const SplineTransfer& other ); - SplineTransfer( const size_t& address, const unsigned int& N, const double& min, const double& max ); + SplineTransfer( const size_t& address, const std::string& name, const unsigned int& N, const double& min, const double& max ); void transfer( CompiledExpressionBase* destination ) override; bool isConfigured(); void set( const unsigned int& N, AmpGen::MinuitParameter* f ); diff --git a/AmpGen/Store.h b/AmpGen/Store.h index da17cc18911..8686a91d333 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -13,7 +13,7 @@ namespace AmpGen { SoA, AoS }; - template class Store + template class Store { public: Store( const size_t& nEntries=0, const size_t& nFields=0) : @@ -22,34 +22,46 @@ namespace AmpGen { m_nFields(nFields), m_store(m_nBlocks * m_nFields) {} - template + template void addFunctor( const functor_type& functor, unsigned fieldsPerFunctor=0 ) { - auto vsize = fieldsPerFunctor == 0 ? functor.returnTypeSize() / sizeof(stored_type) : fieldsPerFunctor; - DEBUG("Registering: " << functor.name() << " field = " << m_nFields ); - m_index[ functor.name() ] = std::make_pair(m_nFields, vsize); - m_nFields += vsize; + if( m_index.count(functor.name()) == 0 ) + { + auto vsize = fieldsPerFunctor == 0 ? functor.returnTypeSize() / sizeof(stored_type) : fieldsPerFunctor; + DEBUG("Registering: " << functor.name() << " field = " << m_nFields ); + m_index[ functor.name() ] = std::make_pair(m_nFields, vsize); + m_nFields += vsize; + } } - - template Store( const size_t& nEntries, const std::vector& functors, const size_t& fieldsPerFunctor = 0) + template void allocate( const size_t& nEntries, const std::vector& functors, const size_t& fieldsPerFunctor = 0) { for(const auto& functor : functors) addFunctor( functor, fieldsPerFunctor); m_nEntries = nEntries; m_nBlocks = utils::aligned_size(nEntries)/utils::size::value; m_store.resize(m_nBlocks * m_nFields); } - template ::value>::type> - Store( const size_t& nEntries, const functor_type& functor, const size_t& fieldsPerFunctor=0 ) + template ::value>::type > + void allocate( const size_t& nEntries, const functor_type& functor, const size_t& fieldsPerFunctor = 0) { - addFunctor(functor); + addFunctor(functor, fieldsPerFunctor); m_nEntries = nEntries; m_nBlocks = utils::aligned_size(nEntries)/utils::size::value; m_store.resize(m_nBlocks * m_nFields); } + + template Store( const size_t& nEntries, const std::vector& functors, const size_t& fieldsPerFunctor = 0) + { + allocate(nEntries, functors, fieldsPerFunctor); + } + template ::value>::type> + Store( const size_t& nEntries, const functor_type& functor, const size_t& fieldsPerFunctor=0 ) + { + allocate( nEntries, {functor}, fieldsPerFunctor); + } inline stored_type operator[]( const size_t& index ) const { return m_store[index]; } inline stored_type& operator[]( const size_t& index ) { return m_store[index]; } - template unsigned find( const T& t ) const { return m_index.find( t.name() )->second.first; } + template unsigned find( const T& t ) const { return m_index.find( t.name() )->second.first; } inline size_t size() const { return m_nEntries; } inline size_t nBlocks() const { return m_nBlocks; } @@ -60,12 +72,13 @@ namespace AmpGen { if constexpr( align == Alignment::SoA ) return m_store[ field * m_nBlocks + index] ; else return m_store[index*m_nFields+field]; } - template + template inline const return_type get(const size_t& index, const size_t& field ) const { return utils::at( operator()( index / utils::size::value, field ), index % utils::size::value ); } inline const stored_type* data() const { return m_store.data(); } + inline stored_type* data() { return m_store.data() ;} inline stored_type& operator()(const size_t& index, const size_t& field) { if constexpr( align == Alignment::SoA ) return m_store[ field * m_nBlocks + index] ; @@ -91,7 +104,7 @@ namespace AmpGen { } } - template void update(const Store& is, const functor_type& fcn) + template void update(const Store& is, const functor_type& fcn) { auto f = m_index.find( fcn.name() ); if( f == m_index.end() ) FATAL("Expression: " << fcn.name() << " is not registed"); @@ -112,7 +125,7 @@ namespace AmpGen { fcn.batch(aligned_size(), is.nFields(), 1 , m_store.data() + p0*m_nBlocks , fcn.externBuffer().data(), is.data() ); } } - template void update( const EventList& events, const functor_type& fcn ) + template void update( const EventList& events, const functor_type& fcn ) { auto f = m_index.find( fcn.name() ); if( f == m_index.end() ) FATAL("Expression: " << fcn.name() << " is not registed"); diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index dd096cbffca..830c7d4413c 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -124,7 +124,7 @@ namespace AmpGen KeyedFunctors view; for_each( this->m_pdfs, [&view, &events]( const auto& pdf) mutable { auto eval = pdf.evaluator(events); - view.add([eval](const auto& event){ return eval(event) ; } , typeof(pdf), "" ); + view.add([eval](const auto& event){ return eval(event) ; } , type_string(pdf), "" ); } ); return view; } diff --git a/AmpGen/TreePhaseSpace.h b/AmpGen/TreePhaseSpace.h index bca2cd1da5c..3c708882f5d 100644 --- a/AmpGen/TreePhaseSpace.h +++ b/AmpGen/TreePhaseSpace.h @@ -36,17 +36,16 @@ namespace AmpGen */ class TreePhaseSpace { + public: struct Vertex { + enum Type { BW, Flat, Stable, QuasiStable}; Vertex() = default; Vertex(const Particle& particle, const double& min); - Vertex(const Particle& particle, const double& min, const double& max, const bool& isStable); + Vertex(const Particle& particle, const double& min, const double& max); double p() const; - double genBW() const; - double BW(const double& si) const; double weight() const; - double maxWeight() const; double genPdf(const Event& event) const; void generate(); void print(const unsigned& offset = 0) const; @@ -59,22 +58,20 @@ namespace AmpGen Particle particle; double min = {0}; double max = {0}; - double rhoMax = {0}; - double s = {0}; - bool isStable = {false}; - bool isBW = {true}; + double phiMin = {0}; + double phiMax = {0}; + Type type = {Type::BW}; unsigned index = {999}; double bwMass = {0}; double bwWidth = {0}; - double phiMin = {0}; - double phiMax = {0}; - double w = {0}; - double weightMax= {0}; + double s = {0}; std::shared_ptr left = {nullptr}; std::shared_ptr right = {nullptr}; TRandom3* rand = {nullptr}; std::vector indices; - TLorentzVector mom; + TLorentzVector mom; + bool isMultiBody = {false}; + PhaseSpace phsp; /// multibody phase to resort to for non two-body decomposition; }; explicit TreePhaseSpace(const EventType& type); diff --git a/AmpGen/TreeReader.h b/AmpGen/TreeReader.h index d808025d423..86bea94c1e7 100644 --- a/AmpGen/TreeReader.h +++ b/AmpGen/TreeReader.h @@ -49,6 +49,7 @@ namespace AmpGen bool operator==( const Iterator& rhs ) const { return m_position == rhs.m_position; } bool operator!=( const Iterator& rhs ) const { return m_position != rhs.m_position; } size_t operator*() const { return m_position; } + unsigned pos() const { return m_position; } }; TTree* m_tree = {nullptr}; bool m_ready = {false}; @@ -90,7 +91,7 @@ namespace AmpGen ERROR( "Branch type:" << branchType << " not recognised" ); return; } - DEBUG("Making branch with properties: [name = " << name << ", input type = " << branchType << " output type = " << typeof() << "]" ); + DEBUG("Making branch with properties: [name = " << name << ", input type = " << branchType << " output type = " << type_string() << "]" ); m_ready = false; m_branches.push_back( new_branch ); } diff --git a/AmpGen/Units.h b/AmpGen/Units.h index c7e88da4570..00b40c2ccf5 100644 --- a/AmpGen/Units.h +++ b/AmpGen/Units.h @@ -10,7 +10,16 @@ namespace AmpGen { static const double KeV = 0.001*0.001; static const double eV = 0.001*0.001*0.001; - declare_enum( Units, TeV, GeV, MeV, KeV, eV ) + static const double ms = 1000*1000; + static const double us = 1000; + static const double ns = 1; + static const double ps = 0.001; + static const double fs = 0.001*0.001; + + static const double mm = 1.0; + static const double um = 0.001; + static const double nm = 0.001*0.001; + declare_enum( Units, TeV, GeV, MeV, KeV, eV, ms, us, ns, ps, fs ) double to_double(const Units& unit ); } #endif diff --git a/AmpGen/Utilities.h b/AmpGen/Utilities.h index 791ae437a37..07ef1580995 100644 --- a/AmpGen/Utilities.h +++ b/AmpGen/Utilities.h @@ -82,7 +82,7 @@ namespace AmpGen { template RETURN_TYPE lexical_cast( const std::string& word, bool& status ) { - WARNING( "Only use specialised versions of this template (word = " << word << ", type = " << AmpGen::typeof() + WARNING( "Only use specialised versions of this template (word = " << word << ", type = " << AmpGen::type_string() << ") " ); status = 0; return RETURN_TYPE(); diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index f478f365d6f..88ab0bfdd4b 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -6,12 +6,20 @@ #include #include #include +#include + +extern "C" __m256d _ZGVcN4v_cos(__m256d x); +extern "C" __m256d _ZGVcN4v_sin(__m256d x); +extern "C" __m256d _ZGVcN4v_exp(__m256d x); +extern "C" __m256d _ZGVcN4v_log(__m256d x); +extern "C" void _ZGVdN4vvv_sincos(__m256d x, __m256i ptrs, __m256i ptrc); namespace AmpGen { namespace AVX2d { - #define stl_fallback( x ) \ - inline real_v x( const real_v& v ){ auto a = v.to_array(); return real_v( std::x(a[0]), std::x(a[1]), std::x(a[2]), std::x(a[3]) ) ; } - + + #define libmvec_alias( function_name, avx_function_name ) \ + inline real_v function_name( const real_v& v ){ return avx_function_name (v) ; } + struct real_v { __m256d data; static constexpr unsigned size = 4; @@ -25,6 +33,7 @@ namespace AmpGen { data = _mm256_loadu_pd(tmp); } real_v(const double* f ) : data( _mm256_loadu_pd( f ) ) {} + real_v(const std::array f ) : data( _mm256_loadu_pd( f.data() ) ) {} void store( double* ptr ) const { _mm256_storeu_pd( ptr, data ); } std::array to_array() const { std::array b; store( &b[0] ); return b; } double at(const unsigned i) const { return to_array()[i] ; } @@ -51,10 +60,25 @@ namespace AmpGen { inline real_v operator==( const real_v& lhs, const real_v& rhs ){ return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OS ); } inline real_v sqrt( const real_v& v ) { return _mm256_sqrt_pd(v); } inline real_v abs ( const real_v& v ) { return _mm256_andnot_pd(_mm256_set1_pd(-0.), v); } - // inline real_v sin( const real_v& v ) { return sin256_pd(v) ; } - // inline real_v cos( const real_v& v ) { return cos256_pd(v) ; } - // inline real_v tan( const real_v& v ) { real_v s; real_v c; sincos256_pd(v, (__m256*)&s, (__m256*)&c) ; return s/c; } - // inline real_v exp( const real_v& v ) { return exp256_ps(v) ; } + + libmvec_alias( sin, _ZGVcN4v_sin ) + libmvec_alias( cos, _ZGVcN4v_cos ) + libmvec_alias( exp, _ZGVcN4v_exp ) + libmvec_alias( log, _ZGVcN4v_log ) + inline void sincos( const real_v& v, real_v& s, real_v& c ) + { + __m256i sp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&s),_mm256_set_epi64x(24,16,8,0)); + __m256i cp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&c),_mm256_set_epi64x(24,16,8,0)); + _ZGVdN4vvv_sincos(v,sp,cp); + } + + inline real_v tan( const real_v& v ) + { + real_v s, c; + sincos( v, s, c ); + return s / c ; + } + inline real_v select(const real_v& mask, const real_v& a, const real_v& b ) { return _mm256_blendv_pd( b, a, mask ); } inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; } inline real_v sign ( const real_v& v){ return select( v > 0., +1., -1. ); } @@ -78,50 +102,10 @@ namespace AmpGen { return _mm256_i64gather_pd(base_addr, udouble_to_uint(offsets),sizeof(double)); } - inline void frexp(const real_v& value, real_v& mant, real_v& exponent) - { - auto arg_as_int = _mm256_castpd_si256(value); - static const real_v offset(4503599627370496.0 + 1022.0); // 2^52 + 1022.0 - static const __m256i pow2_52_i = _mm256_set1_epi64x(0x4330000000000000); // *reinterpret_cast(&pow2_52_d); - auto b = _mm256_srl_epi64(arg_as_int, _mm_cvtsi32_si128(52)); - auto c = _mm256_or_si256( b , pow2_52_i); - exponent = real_v( _mm256_castsi256_pd(c) ) - offset; - mant = _mm256_castsi256_pd(_mm256_or_si256(_mm256_and_si256 (arg_as_int, _mm256_set1_epi64x(0x000FFFFFFFFFFFFFll) ), _mm256_set1_epi64x(0x3FE0000000000000ll))); - } - inline real_v fmadd( const real_v& a, const real_v& b, const real_v& c ) { return _mm256_fmadd_pd(a, b, c); } - inline real_v log(const real_v& arg) - { - static const real_v corr = 0.693147180559945286226764; - static const real_v CL15 = 0.148197055177935105296783; - static const real_v CL13 = 0.153108178020442575739679; - static const real_v CL11 = 0.181837339521549679055568; - static const real_v CL9 = 0.22222194152736701733275; - static const real_v CL7 = 0.285714288030134544449368; - static const real_v CL5 = 0.399999999989941956712869; - static const real_v CL3 = 0.666666666666685503450651; - static const real_v CL1 = 2.0; - real_v mant, exponent; - frexp(arg, mant, exponent); - auto x = (mant - 1.) / (mant + 1.); - auto x2 = x * x; - auto p = fmadd(CL15, x2, CL13); - p = fmadd(p, x2, CL11); - p = fmadd(p, x2, CL9); - p = fmadd(p, x2, CL7); - p = fmadd(p, x2, CL5); - p = fmadd(p, x2, CL3); - p = fmadd(p, x2, CL1); - p = fmadd(p, x, corr * exponent); - return p; - } - stl_fallback( exp ) - stl_fallback( tan ) - stl_fallback( sin ) - stl_fallback( cos ) inline real_v remainder( const real_v& a, const real_v& b ){ return a - real_v(_mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF)) * b; } inline real_v fmod( const real_v& a, const real_v& b ) { @@ -189,7 +173,9 @@ namespace AmpGen { inline complex_v select(const real_v& mask, const complex_v& a, const real_v& b ) { return complex_v( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); } inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; } inline complex_v exp( const complex_v& v ){ - return exp( v.re) * complex_v( cos( v.im ), sin( v.im ) ); + real_v c, s; + sincos( v.im, c, s ); + return exp(v.re) * complex_v(c, s); } inline complex_v sqrt( const complex_v& v ) { diff --git a/AmpGen/simd/utils.h b/AmpGen/simd/utils.h index b10b3580c6c..d4e4cbb45b7 100644 --- a/AmpGen/simd/utils.h +++ b/AmpGen/simd/utils.h @@ -2,6 +2,7 @@ #define AMPGEN_SIMD_UTILS_H #include +#include #if ENABLE_AVX512 #include "AmpGen/simd/avx512d_types.h" @@ -11,6 +12,8 @@ #include "AmpGen/simd/avx2f_types.h" #endif + + namespace AmpGen { #if ENABLE_AVX512 namespace AVX = AVX512d; @@ -66,11 +69,12 @@ namespace AmpGen { template bool all_of( const simd_type& obj, const value_type& v ) { if constexpr( ! is_vector_type::value ) return obj == v; - else { - auto arr = obj.to_array(); - for( unsigned i = 0 ; i != size::value; ++i ) if( arr[i] != v ) return false; - return true; - } + else return _mm256_movemask_pd( obj == v ) == 0xF; + } + template bool all_of( const simd_type& obj) + { + if constexpr( ! is_vector_type::value ) return obj; + else return _mm256_movemask_pd( obj ) == 0xF; } template auto get( vtype v ) { diff --git a/CMakeLists.txt b/CMakeLists.txt index 91f484b9100..04680a05846 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,8 @@ if(COMMAND gaudi_subdir) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -Wno-unused-parameter -ldl") - set(AmpGen_VERSION_MAJOR "1") - set(AmpGen_VERSION_MINOR "1") + set(AmpGen_VERSION_MAJOR "2") + set(AmpGen_VERSION_MINOR "0") set(AMPGEN_CXX ${CMAKE_CXX_COMPILER} CACHE FILEPATH "This should be the path to compiler (use which c++ for macOS)" ) configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/AmpGen/Version.h.in" "${CMAKE_CURRENT_SOURCE_DIR}/AmpGen/Version.h") gaudi_add_library(AmpGen diff --git a/Standalone.cmake b/Standalone.cmake index 254d52f89af..a476fb87bd9 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -64,7 +64,7 @@ target_include_directories(AmpGen PUBLIC $ idBranches = NamedParameter("IdBranches" , std::vector() ).getVector(); bool usePIDCalib = NamedParameter("usePIDCalib" , false); bool rejectMultipleCandidates = NamedParameter("rejectMultipleCandidates", true ); - std::string cuts = vectorToString( NamedParameter("Cut","").getVector() , " && "); + std::string cuts = NamedParameter("Cut",""); EventType evtType( NamedParameter( "EventType" ).getVector() ); std::vector branches; @@ -141,14 +141,14 @@ int main( int argc, char* argv[] ) INFO("Constructing eventList"); if ( motherID != "" ) { + bool neg = motherID[0] == '-'; INFO( "Converting " << evtType.mother() << " " << eventsToTake.size() << " " << evts.size() ); - in_tree->SetBranchStatus( "*", 0 ); + TreeReader tr( in_tree ); int id = 0; - in_tree->SetBranchStatus( motherID.c_str() ); - in_tree->SetBranchAddress( motherID.c_str(), &id ); + tr.setBranch( neg ? motherID.substr(1, motherID.size() -1 ) : motherID, & id ); for ( unsigned int i = 0; i < eventsToTake.size(); ++i ) { - in_tree->GetEntry( eventsToTake[i] ); - if ( id < 0 ) invertParity( evts[i] , evtType.size() ); + tr.getEntry(eventsToTake[i] ); + if ( neg ? id > 0 : id < 0 ) invertParity( evts[i] , evtType.size() ); } } diff --git a/apps/Fitter.cpp b/apps/Fitter.cpp index 5159a789cd7..52ef74f1ad1 100644 --- a/apps/Fitter.cpp +++ b/apps/Fitter.cpp @@ -87,7 +87,7 @@ void addExtendedTerms( Minimiser& mini, SIGPDF& pdf, MinuitParameterSet& mps ) template FitResult* doFit( PDF&& pdf, EventList& data, EventList& mc, MinuitParameterSet& MPS ) { - INFO( "Type = " << typeof() ); + INFO( "Type = " << type_string() ); auto time_wall = std::chrono::high_resolution_clock::now(); auto time = std::clock(); pdf.setEvents( data ); diff --git a/apps/Generator.cpp b/apps/Generator.cpp index 3abf40b7bb6..a8f77720b72 100644 --- a/apps/Generator.cpp +++ b/apps/Generator.cpp @@ -31,39 +31,98 @@ #include "AmpGen/ParticlePropertiesList.h" #include "AmpGen/AddCPConjugate.h" +#if ENABLE_AVX + #include "AmpGen/EventListSIMD.h" + using EventList_t = AmpGen::EventListSIMD; +#else + #include "AmpGen/EventList.h" + using EventList_t = AmpGen::EventList; +#endif + using namespace AmpGen; -namespace AmpGen { make_enum(generatorType, CoherentSum, PolarisedSum, FixedLib, RGenerator, TreePhaseSpace) } +namespace AmpGen { + make_enum(pdfTypes, CoherentSum, PolarisedSum, FixedLib) + make_enum(phspTypes, PhaseSpace, RecursivePhaseSpace, TreePhaseSpace) +} -struct FixedLibPDF { +struct FixedLibPDF +{ void* lib = {nullptr}; - AmpGen::DynamicFCN PDF; - + DynamicFCN PDF; + void debug( const Event& event) {}; void prepare(){}; void setEvents( AmpGen::EventList& evts ){}; double operator()( const AmpGen::Event& evt ) const { return PDF( evt, 1 ); } + double operator()( const double* evt, const unsigned& index ) + { + return PDF(evt, 1 ); + } FixedLibPDF( const std::string& lib ) { void* handle = dlopen( lib.c_str(), RTLD_NOW ); if ( handle == nullptr ) ERROR( dlerror() ); - PDF = AmpGen::DynamicFCN( handle, "FCN" ); + PDF = DynamicFCN( handle, "FCN" ); } size_t size() { return 0; } void reset( const bool& flag = false ){}; }; -template - void GenerateEvents( EventList& events - , PDF_TYPE& pdf - , PRIOR_TYPE& prior + +template Particle getTopology(const pdf_t& pdf) +{ + if constexpr( std::is_same::value ) + { + FATAL("Cannot deduce decay topology from a compiled library, check generator options"); + } + else return pdf.matrixElements()[0].decayTree.quasiStableTree(); +} + +template std::vector getDecayChains( const pdf_t& pdf ) +{ + if constexpr( std::is_same::value ) + { + FATAL("Cannot deduce decay topology from a compiled library, check generator options"); + } + else { + std::vector channels; + for( auto& chain : pdf.matrixElements() ) channels.push_back( chain.decayTree ); + return channels; + } +} + +template void generateEvents( EventList& events + , pdf_t& pdf + , const phspTypes& phsp_type , const size_t& nEvents , const size_t& blockSize - , TRandom* rndm ) + , TRandom* rndm + , const bool& normalise = true ) { - Generator signalGenerator( prior ); - signalGenerator.setRandom( rndm); - signalGenerator.setBlockSize( blockSize ); - signalGenerator.fillEventList( pdf, events, nEvents ); + if( phsp_type == phspTypes::PhaseSpace ) + { + Generator signalGenerator(events.eventType(), rndm); + signalGenerator.setBlockSize(blockSize); +// signalGenerator.setNormFlag(normalise); + signalGenerator.fillEventList(pdf, events, nEvents ); + } + else if( phsp_type == phspTypes::RecursivePhaseSpace ) + { + Generator signalGenerator( getTopology(pdf), events.eventType(), rndm ); + signalGenerator.setBlockSize(blockSize); +// signalGenerator.setNormFlag(normalise); + signalGenerator.fillEventList(pdf, events, nEvents); + } + else if( phsp_type == phspTypes::TreePhaseSpace ) + { + Generator signalGenerator(getDecayChains(pdf), events.eventType(), rndm); + signalGenerator.setBlockSize(blockSize); +// signalGenerator.setNormFlag(normalise); + signalGenerator.fillEventList(pdf, events, nEvents ); + } + else { + FATAL("Phase space configuration: " << phsp_type << " is not supported"); + } } @@ -72,16 +131,17 @@ int main( int argc, char** argv ) OptionsParser::setArgs( argc, argv ); size_t nEvents = NamedParameter ("nEvents" , 1, "Total number of events to generate" ); - size_t blockSize = NamedParameter ("BlockSize", 100000, "Number of events to generate per block" ); + size_t blockSize = NamedParameter ("BlockSize", 5000000, "Number of events to generate per block" ); int seed = NamedParameter ("Seed" , 0, "Random seed used in event Generation" ); std::string outfile = NamedParameter("Output" , "Generate_Output.root" , "Name of output file" ); - auto genType = NamedParameter( "Type", generatorType::CoherentSum, optionalHelpString("Generator configuration to use:", + auto pdfType = NamedParameter( "Type", pdfTypes::CoherentSum, optionalHelpString("Generator configuration to use:", { {"CoherentSum" , "Full phase-space generator with (pseudo)scalar amplitude"} , {"PolarisedSum" , "Full phase-space generator with particles carrying spin in the initial/final states"} - , {"FixedLib" , "Full phase-space generator with an amplitude from a precompiled library"} - , {"RGenerator" , "Recursive phase-space generator for intermediate (quasi)stable states such as the D-mesons"} - , {"TreePhaseSpace" , "Recursive phase-space generator with generic handling of intermediate states."} } ) ); - + , {"FixedLib" , "Full phase-space generator with an amplitude from a precompiled library"}} ) ); + auto phspType = NamedParameter( "PhaseSpace", phspTypes::PhaseSpace, optionalHelpString("Phase-space generator to use:", + { {"CoherentSum" , "Full phase-space generator with (pseudo)scalar amplitude"} + , {"PolarisedSum" , "Full phase-space generator with particles carrying spin in the initial/final states"} + , {"FixedLib" , "Full phase-space generator with an amplitude from a precompiled library"}} ) ); std::string lib = NamedParameter("Library","","Name of library to use for a fixed library generation"); size_t nBins = NamedParameter ("nBins" ,100, "Number of bins for monitoring plots." ); @@ -122,40 +182,20 @@ int main( int argc, char** argv ) INFO("Generating events with type = " << eventType ); - if ( genType == generatorType::CoherentSum ) { - CoherentSum sig( eventType, MPS ); - PhaseSpace phsp(eventType,&rand); - GenerateEvents( accepted, sig, phsp , nEvents, blockSize, &rand ); - } - else if ( genType == generatorType::PolarisedSum ){ - PolarisedSum sig( eventType, MPS ); - RecursivePhaseSpace phsp( sig.matrixElements()[0].decayTree.quasiStableTree() , eventType, &rand ); - GenerateEvents( accepted, sig, phsp, nEvents, blockSize, &rand ); - } - else if ( genType == generatorType::RGenerator ) { - CoherentSum sig( eventType, MPS, "" ); - Generator signalGenerator( sig[0].decayTree.quasiStableTree(), eventType ); - signalGenerator.setRandom( &rand ); - signalGenerator.fillEventList( sig, accepted, nEvents ); + if ( pdfType == pdfTypes::CoherentSum ){ + CoherentSum pdf( eventType, MPS); + generateEvents(accepted, pdf, phspType , nEvents, blockSize, &rand ); } - else if ( genType == generatorType::TreePhaseSpace ) { - PolarisedSum sig( eventType, MPS); - std::vector channels; - for( auto& chain : sig.matrixElements() ) channels.push_back( chain.decayTree ); - Generator signalGenerator(channels, eventType, &rand); - signalGenerator.setRandom( &rand ); - signalGenerator.fillEventList( sig, accepted, nEvents ); + else if ( pdfType == pdfTypes::PolarisedSum ){ + PolarisedSum pdf(eventType, MPS); + generateEvents( accepted, pdf, phspType, nEvents, blockSize, &rand ); } - else if ( genType == generatorType::FixedLib ) { - Generator<> signalGenerator( eventType ); - signalGenerator.setRandom( &rand ); - signalGenerator.setBlockSize( blockSize ); - signalGenerator.setNormFlag( false ); - FixedLibPDF pdf( lib ); - signalGenerator.fillEventList( pdf, accepted, nEvents ); - } + // else if ( pdfType == pdfTypes::FixedLib ){ + // FixedLibPDF pdf(lib); + // generateEvents( accepted, pdf, phspType, nEvents, blockSize, &rand, false ); + // } else { - FATAL("Did not recognise configuration: " << genType ); + FATAL("Did not recognise configuration: " << pdfType ); } if( accepted.size() == 0 ) return -1; TFile* f = TFile::Open( outfile.c_str(), "RECREATE" ); diff --git a/examples/QcGenerator.cpp b/examples/QcGenerator.cpp index f40a90331e7..7a410e5ab45 100644 --- a/examples/QcGenerator.cpp +++ b/examples/QcGenerator.cpp @@ -102,7 +102,7 @@ template struct normalised_pdf { norm = sqrt(yc.bf(type)/n); if( it != nullptr ) norm *= exp( 1i * it->mean() * M_PI/180. ); pc.stop(); - INFO(type << " Time to construct: " << pc << "[ms], norm = " << norm << " " << typeof() ); + INFO(type << " Time to construct: " << pc << "[ms], norm = " << norm << " " << type_string() ); } complex_t operator()(const Event& event){ return norm * pdf.getValNoCache(event); } }; diff --git a/src/ASTResolver.cpp b/src/ASTResolver.cpp index 2b6785aba3f..6118fb62795 100644 --- a/src/ASTResolver.cpp +++ b/src/ASTResolver.cpp @@ -19,7 +19,6 @@ ASTResolver::ASTResolver(const std::map& evtMap, m_nParameters(0) { m_enable_cuda = NamedParameter("UseCUDA",false); - m_enable_avx = NamedParameter("UseAVX",false); m_enable_compileTimeConstants = NamedParameter("ASTResolver::CompileTimeConstants", false); } @@ -69,7 +68,7 @@ template <> void ASTResolver::resolve( const SubTree& subTree ) template <> void ASTResolver::resolve( const Spline& spline ) { if( m_resolvedParameters.count( &spline) != 0 ) return ; - auto address = addCacheFunction(spline.m_name,spline.m_nKnots,spline.m_min,spline.m_max); + auto address = addCacheFunction(spline.m_name, spline.m_nKnots, spline.m_min, spline.m_max); addResolvedParameter( &spline, address ); addResolvedParameter( spline.m_points.top().get(), address ); auto splineTransfer = dynamic_cast( m_cacheFunctions[spline.m_name].get() ); diff --git a/src/CacheTransfer.cpp b/src/CacheTransfer.cpp index 0c8aa6166a2..0963b0d305b 100644 --- a/src/CacheTransfer.cpp +++ b/src/CacheTransfer.cpp @@ -11,10 +11,11 @@ using namespace AmpGen; CacheTransfer::CacheTransfer() = default; -CacheTransfer::CacheTransfer( const size_t& address, const double& value, const size_t& size ) : +CacheTransfer::CacheTransfer( const size_t& address, const std::string& name, const double& value, const size_t& size ) : m_address(address), m_size(size), - m_value(value) + m_value(value), + m_name(name) { } @@ -25,7 +26,7 @@ void CacheTransfer::transfer( CompiledExpressionBase* destination ) void CacheTransfer::print() const { - INFO( m_address << " " << m_value ) ; + INFO( m_address << " " << m_value << " " << m_name ) ; } void ParameterTransfer::transfer( CompiledExpressionBase* destination ) @@ -33,14 +34,13 @@ void ParameterTransfer::transfer( CompiledExpressionBase* destination ) destination->setExternal( m_source->mean(), m_address ); } -ParameterTransfer::ParameterTransfer( const size_t& address, MinuitParameter* source ) - : CacheTransfer(address, source->mean(), 1), +ParameterTransfer::ParameterTransfer(const size_t& address, const std::string& name, MinuitParameter* source ) + : CacheTransfer(address, name, source->mean(), 1), m_source( source ) { } void ParameterTransfer::print() const { - std::cout << this << " " << m_source->name() << " " << m_address << std::endl; - INFO( "Source: " << m_source->name() << " address = " << m_address ); + INFO( "Source: " << m_source->name() << " address = " << m_address << " value = " << m_source->mean() ); } diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index e3240a318d1..2139023116d 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -263,7 +263,7 @@ void CoherentSum::setEvents( const EventList_type& list ) for( auto& me : m_matrixElements ){ DEBUG("Registering: " << me.name() ) ; } if( m_ownEvents && m_events != nullptr ) delete m_events; m_events = &list; - m_cache = Store( m_events->size(), m_matrixElements ); + m_cache . allocate( m_events->size(), m_matrixElements ); } @@ -341,6 +341,13 @@ float_v CoherentSum::operator()( const float_v* /*evt*/, const unsigned block ) return (m_weight/m_norm ) * utils::norm(value); } +#if ENABLE_AVX +double CoherentSum::operator()( const double* /*evt*/, const unsigned block ) const +{ + return operator()((const float_v*)nullptr, block / utils::size::value ).at( block % utils::size::value ); +} +#endif + std::function CoherentSum::evaluator(const EventList_type* ievents) const { auto events = ievents == nullptr ? m_integrator.events() : ievents; diff --git a/src/CompiledExpressionBase.cpp b/src/CompiledExpressionBase.cpp index d9c10a323ff..864f13f3389 100644 --- a/src/CompiledExpressionBase.cpp +++ b/src/CompiledExpressionBase.cpp @@ -144,7 +144,7 @@ void CompiledExpressionBase::compile(const std::string& fname) void CompiledExpressionBase::addDebug( std::ostream& stream ) const { stream << "#include\n"; - stream << "extern \"C\" std::vector() << " >> " + stream << "extern \"C\" std::vector() << " >> " << m_progName << "_DB(" << fcnSignature() << "){\n"; for ( auto& dep : m_debugSubexpressions ) { std::string rt = "auto v" + std::to_string(dep.first) + " = " + dep.second.to_string(m_resolver.get()) +";"; @@ -156,8 +156,8 @@ void CompiledExpressionBase::addDebug( std::ostream& stream ) const const auto expression = m_db[i].second; stream << std::endl << "{\"" << m_db[i].first << "\","; if ( expression.to_string(m_resolver.get()) != "NULL" ) - stream << typeof() << "("<< expression.to_string(m_resolver.get()) << ")}" << comma; - else stream << typeof() << "(-999.,0.)}" << comma ; + stream << type_string() << "("<< expression.to_string(m_resolver.get()) << ")}" << comma; + else stream << type_string() << "(-999.,0.)}" << comma ; } } diff --git a/src/EventList.cpp b/src/EventList.cpp index 86727861ad8..8fe00dcb738 100644 --- a/src/EventList.cpp +++ b/src/EventList.cpp @@ -84,6 +84,7 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) tree->SetBranchStatus( "*", 0 ); TreeReader tr( tree ); bool hasEnergy = branches.size() == 0 || branches.size() == 4 * m_eventType.size(); // if the energy of the particle has been explicitly specified // + std::vector ids( m_eventType.size() ); if( branches.size() != 0 ) { DEBUG("Branches = [" << vectorToString(branches, ", ") << "]" ); @@ -93,15 +94,17 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) DEBUG("Setting branch: " << branches[p] << " pos: " << pos << " fmt = " << inv_map( eventFormat, pos, "NOT FOUND" ) << " has energy? " << hasEnergy ); tr.setBranch( branches[p], &(temp[pos]) ); } - auto pos = eventFormat.size(); - for( const auto& branch : extraBranches ) m_extensions[branch] = pos++; + if( idBranches.size() != 0 ) + { + if( idBranches.size() != m_eventType.size() ) FATAL("Number of ID branches should be number of final state particles"); + for( int i = 0; i != ids.size(); ++i ) tr.setBranch( idBranches[i], ids.data() + i); + } } else for ( auto& branch : eventFormat ) tr.setBranch( branch.first, &(temp[branch.second]) ); - std::vector ids( m_eventType.size() ); - if( idBranches.size() != 0 ) - { - if( idBranches.size() != m_eventType.size() ){ FATAL("Number of ID branches should be number of final state particles"); } - for( int i = 0; i != ids.size(); ++i ) tr.setBranch( idBranches[i], ids.data() + i); + auto pos = eventFormat.size(); + for( const auto& branch : extraBranches ){ + tr.setBranch( branch, &(temp[pos]) ); + m_extensions[branch] = pos++; } if( getGenPdf ) tr.setBranch( "genPdf", temp.pGenPdf() ); if( weightBranch != "" ) tr.setBranch( weightBranch, temp.pWeight() ); @@ -121,14 +124,14 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) auto automaticOrdering = m_eventType.automaticOrdering(); for (const auto& evt : tr) { if( inputUnits != Units::GeV ) for( unsigned k = 0; k != eventFormat.size(); ++k ) temp[k] *= to_double(inputUnits); - if( idBranches.size() != 0 ) automaticOrdering(temp, ids); + if( idBranches.size() != 0 && !automaticOrdering(temp, ids) ) + WARNING("Failed to order event: " << evt ); if( applySym ) symmetriser(temp); if( ! hasEnergy ){ for( unsigned int k = 0 ; k != m_eventType.size(); ++k ) temp[4*k + 3] = sqrt( m_eventType.mass(k) * m_eventType.mass(k) + temp[4*k+0]*temp[4*k+0] + temp[4*k+1]*temp[4*k+1] + temp[4*k+2]*temp[4*k+2] ); } - temp.setIndex( m_data.size() ); - m_data.push_back( temp ); + push_back( temp ); } read_time.stop(); INFO("Time to read tree = " << read_time << "[ms]; nEntries = " << size() ); @@ -205,7 +208,7 @@ double EventList::integral() const void EventList::add( const EventList& evts ) { - for ( auto& evt : evts ) m_data.push_back( evt ); + for ( auto& evt : evts ) push_back( evt ); } void EventList::clear() @@ -218,3 +221,26 @@ void EventList::erase(const std::vector::iterator& begin, { m_data.erase( begin, end ); } + +void EventList::reserve( const size_t& size ) +{ + m_data.reserve( size ); +} + +void EventList::resize ( const size_t& size ) +{ + m_data.resize(size); + for( unsigned int i = 0 ; i != size; ++i ) m_data[i].setIndex(i) ; +} + +void EventList::push_back( const Event& evt ) +{ + m_data.push_back( evt ); + m_data.rbegin()->setIndex(m_data.size()-1); +} + +void EventList::emplace_back( const Event& evt) +{ + m_data.emplace_back(evt) ; + m_data.rbegin()->setIndex(m_data.size()-1); +} diff --git a/src/EventListSIMD.cpp b/src/EventListSIMD.cpp index 2ba7040bcb4..8a1178a2ad6 100644 --- a/src/EventListSIMD.cpp +++ b/src/EventListSIMD.cpp @@ -35,7 +35,9 @@ using namespace AmpGen; // ENABLE_DEBUG(EventListSIMD) -EventListSIMD::EventListSIMD( const EventType& type ) : m_eventType( type ) {} +EventListSIMD::EventListSIMD( const EventType& type ) : + m_data(0, type.eventSize() ), + m_eventType( type ) {} void EventListSIMD::loadFromFile( const std::string& fname, const ArgumentPack& args ) { @@ -108,10 +110,8 @@ void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) bool hasEventList = entryList.size() != 0; size_t nEvents = hasEventList ? entryList.size() : tree->GetEntries(); std::array buffer; - - m_data = Store(nEvents, m_eventType.eventSize() ); - m_weights.resize( m_data.nBlocks() ); - m_genPDF.resize( m_data.nBlocks() ); + + resize( nEvents ); auto symmetriser = m_eventType.symmetriser(); for ( unsigned int block = 0; block < m_data.nBlocks(); ++block ) { @@ -135,10 +135,7 @@ void EventListSIMD::loadFromTree( TTree* tree, const ArgumentPack& args ) EventListSIMD::EventListSIMD( const EventList& other ) : EventListSIMD( other.eventType() ) { - m_data = Store(other.size(), m_eventType.eventSize() ); - DEBUG("Converting EventList -> EventListSIMD, allocate: " << m_data.size() << " events in " << m_data.nBlocks() << " with: " << m_data.nFields() << " fields"); - m_weights.resize( m_data.nBlocks() ); - m_genPDF.resize ( m_data.nBlocks() ); + resize( other.size() ); for( unsigned block = 0 ; block != m_data.nBlocks(); block++ ) { for( unsigned j = 0 ; j != m_data.nFields(); ++j ) diff --git a/src/EventType.cpp b/src/EventType.cpp index 9adc9b788a8..1bace03da37 100644 --- a/src/EventType.cpp +++ b/src/EventType.cpp @@ -105,7 +105,7 @@ void EventType::extendEventType( const std::string& branch ) { m_eventTypeExtensions.push_back(branch); } -std::pair EventType::minmax( const std::vector& indices, bool isGeV ) const +std::pair EventType::minmax( const std::vector& indices) const { std::vector ivec( size() ); std::iota( ivec.begin(), ivec.end(), 0 ); @@ -114,7 +114,7 @@ std::pair EventType::minmax( const std::vector& indice double max = motherMass(); for ( auto& x : ivec ) if ( std::find( indices.begin(), indices.end(), x ) == indices.end() ) max -= mass( x ); - return std::pair(min*min, max*max); + return std::pair(min, max); } std::pair EventType::count(const unsigned& index) const { @@ -180,19 +180,19 @@ std::vector EventType::defaultProjections(const unsigned& nBins) con Projection EventType::projection(const unsigned& nBins, const std::vector& indices, const std::string& observable) const { bool useRootLabelling = NamedParameter("EventType::UseRootTEX", false ); - auto mm = minmax(indices, true); + auto mm = minmax(indices); std::string gevcccc = useRootLabelling ? "GeV^{2}/c^{4}" : "\\mathrm{GeV}^{2}/c^{4}"; std::string gevcc = useRootLabelling ? "GeV/c^{2}" : "\\mathrm{GeV}/c^{2}"; if( observable == "mass2" ) return Projection( [indices]( const Event& evt ) { return evt.s( indices ); }, "s" + vectorToString( indices ), "s_{" + label( indices ) + "}", nBins, - ( mm.first - 0.05 ) , ( mm.second + 0.05 ) , gevcccc ); + ( mm.first * mm.first - 0.05 ) , ( mm.second * mm.second + 0.05 ) , gevcccc ); else if( observable == "mass" ){ return Projection( [indices]( const Event& evt ) { return sqrt( evt.s( indices ) ); }, "m" + vectorToString( indices ), "m_{" + label( indices ) + "}", nBins, - mm.first > 0.05 ? sqrt(mm.first - 0.05) :0 , sqrt( mm.second + 0.05 ) , gevcc ); + mm.first > 0.05 ? mm.first - 0.05 :0 , mm.second + 0.05, gevcc ); } return Projection(); } @@ -242,7 +242,7 @@ std::function EventType::symmetriser() const }; } -std::function&)> EventType::automaticOrdering() const +std::function&)> EventType::automaticOrdering() const { std::vector ids; for( unsigned i = 0 ; i != m_particleNames.size(); ++i ) ids.push_back( ParticleProperties::get(m_particleNames[i])->pdgID() ); @@ -259,12 +259,14 @@ std::function&)> EventType::automaticOrderin return std::all_of( std::begin(used), std::end(used), [](auto b) { return b; } ) ; }; - return [ids, matches](auto& event, const auto& actual_ids) -> void { + return [ids, matches](auto& event, const auto& actual_ids) -> bool { std::vector new_addresses( ids.size(), 999 ); int sgn = +1; if( matches(ids, actual_ids ) ) sgn = +1; else if( matches(ids, actual_ids, -1 ) ) sgn = -1; - else { FATAL("Ids: " << vectorToString(actual_ids, " ") << " do not match either particle or antiparticle ["<< vectorToString(ids, " ") << "]" );} + else { ERROR("Ids: " << vectorToString(actual_ids, " ") << " do not match either particle or antiparticle ["<< vectorToString(ids, " ") << "]" ); + return false; + } for( unsigned i = 0 ; i != ids.size(); ++i ) { @@ -274,6 +276,7 @@ std::function&)> EventType::automaticOrderin } } event.reorder( new_addresses ); + return true; }; } diff --git a/src/Expression.cpp b/src/Expression.cpp index 1584bcb9621..f0ec105523c 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -56,12 +56,12 @@ std::string Constant::to_string(const ASTResolver* resolver) const { str.erase ( str.find_last_not_of('0') + 1, std::string::npos ); return str; }; - std::string complex_type = typeof(); + std::string complex_type = type_string(); std::string literalSuffix = ""; if( resolver != nullptr && (resolver->enableCuda() || resolver->enableAVX()) ) { literalSuffix = "f"; - complex_type = typeof(); + complex_type = type_string(); } return std::imag(m_value) == 0 ? "(" + rounded_string(std::real(m_value)) +literalSuffix + ")" : complex_type +"("+rounded_string(std::real(m_value))+literalSuffix+","+rounded_string(std::imag(m_value))+literalSuffix+")"; @@ -326,8 +326,8 @@ ComplexParameter::ComplexParameter( const Parameter& real, const Parameter& imag std::string ComplexParameter::to_string(const ASTResolver* resolver) const { - std::string complex_type = typeof(); - if( resolver != nullptr && (resolver->enableCuda() || resolver->enableAVX()) ) complex_type = typeof(); + std::string complex_type = type_string(); + if( resolver != nullptr && (resolver->enableCuda() || resolver->enableAVX()) ) complex_type = type_string(); return complex_type + "(" + m_real.to_string(resolver) + ", " + m_imag.to_string(resolver) +")"; } diff --git a/src/ExpressionParser.cpp b/src/ExpressionParser.cpp index a214c9a6af1..5b230478dbb 100644 --- a/src/ExpressionParser.cpp +++ b/src/ExpressionParser.cpp @@ -154,11 +154,9 @@ Expression ExpressionParser::processEndPoint( const std::string& name, const Min bool status = true; double value = lexical_cast( name, status ); if ( status == true ) return value; - if ( name == "PI" ) return M_PI; - if ( name == "pi" ) return M_PI; + if ( name == "PI" || name == "pi" || name == "M_PI" ) return M_PI; if ( name == "e" ) return std::exp(1); - if ( name == "I" ) return complex_t( 0, 1 ); - if ( name == "i" ) return complex_t( 0, 1 ); + if ( name == "I" || name == "i" ) return complex_t( 0, 1 ); if ( mps != nullptr ) { auto it = mps->find(name); if ( it != nullptr ) return MinuitParameterLink( it ); @@ -174,9 +172,13 @@ Expression ExpressionParser::processEndPoint( const std::string& name, const Min } MinuitParameterLink::MinuitParameterLink( MinuitParameter* param ) : m_parameter( param ) {} + std::string MinuitParameterLink::to_string(const ASTResolver* resolver) const { - return resolver == nullptr ? m_parameter->name() : resolver->resolvedParameter(this); + if( resolver == nullptr ) return m_parameter->name(); + if( resolver->enableCompileConstants() && m_parameter != nullptr && m_parameter->flag () == Flag::CompileTimeConstant ) + return std::to_string( m_parameter->mean() ); + return resolver->resolvedParameter(this); } std::string MinuitParameterLink::name() const { @@ -185,7 +187,7 @@ std::string MinuitParameterLink::name() const { void MinuitParameterLink::resolve( ASTResolver& resolver ) const { - resolver.resolve(*this); + if( m_parameter->flag() != Flag::CompileTimeConstant ) resolver.resolve(*this); } complex_t MinuitParameterLink::operator()() const diff --git a/src/Kinematics.cpp b/src/Kinematics.cpp index 937969f1097..8446b000063 100644 --- a/src/Kinematics.cpp +++ b/src/Kinematics.cpp @@ -155,9 +155,7 @@ void AmpGen::boost( Event& evt, const std::tuple& n, con void AmpGen::rotate( Event& evt, const std::tuple& n, const double& v ) { - double nx = std::get<0>( n ); - double ny = std::get<1>( n ); - double nz = std::get<2>( n ); + auto& [nx,ny,nz ] = n; double cv = cos(v); double sv = sin(v); double norm = sqrt( nx * nx + ny * ny + nz * nz ); diff --git a/src/Lineshapes/BW.cpp b/src/Lineshapes/BW.cpp index 14fe2e6ea72..3b2468100b6 100644 --- a/src/Lineshapes/BW.cpp +++ b/src/Lineshapes/BW.cpp @@ -29,7 +29,7 @@ DEFINE_LINESHAPE( FormFactor ) if ( lineshapeModifier == "BL" ) FormFactor = sqrt( BlattWeisskopf( q2 * radius * radius, Lp ) ); if ( lineshapeModifier == "NFF" ) FormFactor = 1; if ( lineshapeModifier == "BELLE2018" ) FormFactor = sqrt( BlattWeisskopf_Norm( q2 * radius * radius, q20 * radius * radius, Lp ) ); - + if( L != 0 ){ ADD_DEBUG( q2 , dbexpressions ); ADD_DEBUG( radius , dbexpressions ); @@ -69,9 +69,13 @@ DEFINE_LINESHAPE( BW ) Expression FormFactor = sqrt( BlattWeisskopf_Norm( q2 * radius * radius, 0, L ) ); if ( lineshapeModifier == "BL" ) FormFactor = sqrt( BlattWeisskopf( q2 * radius * radius, L ) ); if ( lineshapeModifier == "BELLE2018" ) FormFactor = sqrt( BlattWeisskopf_Norm( q2 * radius * radius, q20 * radius * radius, L ) ); + if ( lineshapeModifier == "NFF") FormFactor = 1; Expression runningWidth = width( s_cse, s1, s2, mass, width0, radius, L, dbexpressions ); const Expression BW = FormFactor / ( mass * mass - s_cse -1i * mass * runningWidth ); const Expression kf = kFactor( mass, width0, dbexpressions ); + ADD_DEBUG( s_cse, dbexpressions ); + ADD_DEBUG( s1, dbexpressions ); + ADD_DEBUG( s2, dbexpressions ); ADD_DEBUG( FormFactor, dbexpressions ); ADD_DEBUG( runningWidth, dbexpressions ); ADD_DEBUG( BW, dbexpressions ); diff --git a/src/PhaseSpace.cpp b/src/PhaseSpace.cpp index c63be198602..0722b162d2c 100644 --- a/src/PhaseSpace.cpp +++ b/src/PhaseSpace.cpp @@ -63,8 +63,10 @@ Event PhaseSpace::makeEvent() } } while ( wt < m_rand->Rndm() ); - rt.set(0, { 0, pd[0], 0, sqrt( pd[0] * pd[0] + m_mass[0] * m_mass[0] )} ); - + rt[0] = 0; + rt[1] = pd[0]; + rt[2] = 0; + rt[3] = sqrt( pd[0] * pd[0] + m_mass[0] * m_mass[0] ); for(size_t i = 1 ; i != m_nt ; ++i ){ rt.set( i, { 0, -pd[i-1], 0, sqrt( pd[i-1] * pd[i-1] + m_mass[i] * m_mass[i] ) } ); double cZ = 2 * m_rand->Rndm() - 1; @@ -72,24 +74,17 @@ Event PhaseSpace::makeEvent() double angY = 2 * M_PI * m_rand->Rndm(); double cY = cos(angY); double sY = sin(angY); + double beta = (i == m_nt-1) ? 0 : pd[i] / sqrt( pd[i] * pd[i] + invMas[i] * invMas[i] ); + double gamma = (i == m_nt-1) ? 1 : 1./sqrt( 1 - beta*beta); for (size_t j = 0; j <= i; j++ ) { double x = rt[4*j+0]; double y = rt[4*j+1]; double z = rt[4*j+2]; - rt[4*j+0] = cZ * x - sZ * y; - rt[4*j+1] = sZ * x + cZ * y; - x = rt[4*j+0]; - rt[4*j+0] = cY * x - sY * z; - rt[4*j+2] = sY * x + cY * z; - } - if ( i == ( m_nt - 1 ) ) break; - double beta = pd[i] / sqrt( pd[i] * pd[i] + invMas[i] * invMas[i] ); - double gamma = 1./sqrt( 1 - beta*beta); - for (size_t j = 0; j <= i; j++ ){ - double E = rt[4*j+3]; - double py = rt[4*j+1]; - rt[4*j+1] = gamma*( py + beta * E ); - rt[4*j+3] = gamma*( E + beta * py ); + double E = rt[4*j+3]; + rt[4*j+0] = cY * (cZ * x - sZ * y ) - sY * z; + rt[4*j+1] = gamma*( sZ * x + cZ * y + beta * E ); + rt[4*j+2] = sY * (cZ * x - sZ * y ) + cY * z; + rt[4*j+3] = gamma*( E + beta * (sZ *x + cZ*y) ); } } rt.setGenPdf( 1 ); diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index bc2211757ed..3796544f40b 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -76,7 +76,7 @@ PolarisedSum::PolarisedSum(const EventType& type, Tensor thisExpression( Tensor::dim(polStates.size()) ); auto& [p, coupling] = protoAmps[i]; DebugSymbols syms; - for(unsigned j = 0; j != polStates.size(); ++j) thisExpression[j] = make_cse( p.getExpression(&syms, polStates[j] ) ); + for(unsigned j = 0; j != polStates.size(); ++j) thisExpression[j] = make_cse( p.getExpression(j == 0 ? &syms: nullptr, polStates[j] ) ); m_matrixElements[i] = TransitionMatrix( p, coupling, @@ -85,7 +85,9 @@ PolarisedSum::PolarisedSum(const EventType& type, p.decayDescriptor(), this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); CompilerWrapper().compile( m_matrixElements[i] ); - }); + m_matrixElements[i].size = thisExpression.size(); + } + ); } } if ( stype == spaceType::flavour ) @@ -201,8 +203,8 @@ void PolarisedSum::prepare() if( m_integrator.isReady() ) updateNorms(); std::for_each( m_matrixElements.begin(), m_matrixElements.end(), resetFlags ); // if( m_nCalls % 10000 == 0 ) debug_norm(); - DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << (m_weight/m_norm) * getValNoCache(m_events->at(0))); m_pdfCache.update(m_cache, m_probExpression); + DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << (m_weight/m_norm) * getValNoCache(m_events->at(0))); m_nCalls++; } @@ -211,6 +213,14 @@ float_v PolarisedSum::operator()( const float_v*, const unsigned index ) const return ( m_weight / m_norm ) * m_pdfCache[index]; } +#if ENABLE_AVX +double PolarisedSum::operator()( const double*, const unsigned index ) const +{ + return operator()((const float_v*)nullptr, index / utils::size::value ).at( index % utils::size::value ); +} +#endif + + void PolarisedSum::debug_norm() { double norm_slow = 0; @@ -225,8 +235,8 @@ void PolarisedSum::setEvents( EventList_type& events ) reset(); if( m_events != nullptr && m_ownEvents ) delete m_events; m_events = &events; - m_cache = Store(m_events->size(), m_matrixElements, m_dim.first * m_dim.second ); - m_pdfCache = Store(m_events->size(), m_probExpression ); + m_cache . allocate( m_events->size(), m_matrixElements, m_dim.first * m_dim.second ); + m_pdfCache . allocate( m_events->size(), m_probExpression); } void PolarisedSum::setMC( EventList_type& events ) @@ -268,6 +278,7 @@ real_t PolarisedSum::operator()(const Event& evt) const return utils::at( m_pdfCache[ evt.index() / utils::size::value ], evt.index() % utils::size::value ); } + double PolarisedSum::norm() const { return m_norm; @@ -317,8 +328,9 @@ void PolarisedSum::debug(const Event& evt) std::vector this_cache; for(unsigned i = 0 ; i != tsize; ++i ) this_cache.emplace_back( m_cache(evt.index() / utils::size::value, j*tsize + i) ); INFO( m_matrixElements[j].decayDescriptor() << " " << vectorToString( this_cache, " ") ); + if( m_debug ) m_matrixElements[0].debug( evt ); } - INFO("P(x) = " << getValNoCache(evt) << " " << operator()(nullptr, evt.index() / utils::size::value ) ); + INFO("P(x) = " << getValNoCache(evt) << " " << operator()((const float_v*)nullptr, evt.index() / utils::size::value ) ); INFO("Prod = [" << vectorToString(m_pVector , ", ") <<"]"); } diff --git a/src/Projection.cpp b/src/Projection.cpp index 6d14b4b9a2f..0baec9f479c 100644 --- a/src/Projection.cpp +++ b/src/Projection.cpp @@ -45,6 +45,11 @@ TH1D* Projection::plot(const std::string& prefix) const { plot->GetYaxis()->SetTitleOffset(1.35); plot->SetMarkerSize(0); plot->SetMinimum(0); + + DEBUG("Returning plot: [" << m_min << " " << m_max << "] " << m_name << " " << + plot->GetXaxis()->GetBinLowEdge(1) << " " << + plot->GetXaxis()->GetBinLowEdge(1 + m_nBins) + ); return plot; } std::function Projection::binFunctor() const { @@ -73,7 +78,20 @@ std::pair Projection2D::operator()( const Event& evt ) const template <> TH1D* Projection::projInternal( const EventList& events, const ArgumentPack& args) const { - return events.makeProjection(*this, args); + auto selection = args.getArg().val; + auto weightFunction = args.getArg().val; + std::string prefix = args.getArg(std::string("")); + auto axis = plot(prefix); + axis->SetLineColor(args.getArg(kBlack).val); + axis->SetMarkerSize(0); + for( auto& evt : events ) + { + if( selection != nullptr && !selection(evt) ) continue; + auto pos = operator()(evt); + axis->Fill( pos, evt.weight() * ( weightFunction == nullptr ? 1 : weightFunction(evt) / evt.genPdf() ) ); + } + if( selection != nullptr ) INFO("Filter efficiency = " << axis->GetEntries() << " / " << events.size() ); + return axis; } template <> std::tuple, THStack*> Projection::projInternal(const EventList& events, const Projection::keyedFunctors& weightFunction, const ArgumentPack& args) const diff --git a/src/Spline.cpp b/src/Spline.cpp index c7c3de27547..bcdc41bf3ce 100644 --- a/src/Spline.cpp +++ b/src/Spline.cpp @@ -85,7 +85,7 @@ void SplineTransfer::print() const { INFO( "Source: " << m_parameters[0]->name() SplineTransfer::SplineTransfer() = default; SplineTransfer::SplineTransfer( const SplineTransfer& other ) - : CacheTransfer(other.m_address, other.m_value, other.m_size) + : CacheTransfer(other.m_address, other.m_name, other.m_value, other.m_size) , m_transferMatrix( other.m_transferMatrix ) , m_parameters( other.m_parameters ) , m_nKnots( other.m_nKnots ) @@ -94,8 +94,8 @@ SplineTransfer::SplineTransfer( const SplineTransfer& other ) { } -SplineTransfer::SplineTransfer( const size_t& address, const unsigned int& N, const double& min, const double& max ) - : CacheTransfer(address) +SplineTransfer::SplineTransfer( const size_t& address, const std::string& name, const unsigned int& N, const double& min, const double& max ) + : CacheTransfer(address, name) , m_transferMatrix( TMatrixD( N - 2, N - 2 ) ) , m_parameters( N, nullptr ) , m_nKnots(N) diff --git a/src/TreePhaseSpace.cpp b/src/TreePhaseSpace.cpp index 18ea846740a..d7cf82ec4f5 100644 --- a/src/TreePhaseSpace.cpp +++ b/src/TreePhaseSpace.cpp @@ -41,18 +41,22 @@ TreePhaseSpace::TreePhaseSpace(const std::vector& decayChains, const E m_top.push_back( Vertex::make(p) ); m_weights.push_back(1); } - setRandom(rndm); } - for( auto& w : m_weights ) w /= double(m_weights.size()); + setRandom(rndm); + double sum_of_weights = std::accumulate( m_weights.begin(), m_weights.end(), 0 ); + for( auto& w : m_weights ) w /= sum_of_weights; m_dice = std::discrete_distribution<>(m_weights.begin(), m_weights.end()); } Event TreePhaseSpace::makeEvent() { unsigned j = m_dice(m_gen); + // INFO("Producing event from tree: " << j ); + if( j >= m_top.size() ) ERROR("Out of bounds: " << j << " / " << m_top.size() ); m_top[j].generate(); auto event = m_top[j].event(m_type.size()); - event.setGenPdf(genPdf(event) / m_top[j].weight()); + double w = m_top[j].weight(); + event.setGenPdf( w == 0 ? 0 : genPdf(event) / w ); m_generatorRecord.push_back(j); return event; } @@ -87,56 +91,46 @@ double rho( const double& s, const double& s1, const double& s2) return sqrt( 1 - 2 * (s1+s2)/s + (s1-s2)*(s1-s2) /(s*s) ); } -TreePhaseSpace::Vertex::Vertex(const Particle& particle, const double& min) +TreePhaseSpace::Vertex::Vertex(const Particle& particle, const double& mass) : particle(particle) - , min(min) - , max(min) - , isStable(true) - , isBW(particle.lineshape() == "BW") + , min(mass) + , max(mass) + , type( Type::Stable ) , index(particle.index()) , bwMass(particle.props()->mass()) , bwWidth(particle.props()->width()) + , s(bwMass*bwMass) { if( index != 999 ) indices = {index}; - phiMin = atan((min*min - bwMass*bwMass)/(bwMass*bwWidth)); - phiMax = atan((min*min - bwMass*bwMass)/(bwMass*bwWidth)); -// if( isBW ) INFO( min << " ΔΦ= " << phiMax - phiMin << " " << bwMass << " " << bwWidth << " E= [" << max <<", " << min*min << "]" ); } -TreePhaseSpace::Vertex::Vertex(const Particle& particle, const double& min, const double& max, const bool& isStable ) +TreePhaseSpace::Vertex::Vertex(const Particle& particle, const double& min, const double& max ) : particle(particle) - , min(min) - , max(max) - , isStable(isStable) - , isBW(particle.lineshape() == "BW") , index(particle.index()) , bwMass(particle.props()->mass()) , bwWidth(particle.props()->width()) + , min(min) + , max(max) + , s(bwMass*bwMass) { + if( particle.isStable() ) type = Type::Stable; + else if( particle.isQuasiStable() ) type = Type::QuasiStable; + else if( particle.lineshape().find("BW") != std::string::npos ){ + type = Type::BW; + phiMin = atan((min*min - bwMass*bwMass)/(bwMass*bwWidth)); + phiMax = atan((max*max - bwMass*bwMass)/(bwMass*bwWidth)); + INFO( particle << " [" << min << ", " << max << "] Φ = " << phiMin << ", " << phiMax ); + } + else type = Type::Flat; if( index != 999 ) indices = {index}; - phiMin = atan((min*min - bwMass*bwMass)/(bwMass*bwWidth)); - phiMax = atan((max*max - bwMass*bwMass)/(bwMass*bwWidth)); } double TreePhaseSpace::Vertex::p() const { return 0.5 * sqrt( s - 2 * (left->s+right->s) + (left->s-right->s)*(left->s-right->s)/s ); } - -double TreePhaseSpace::Vertex::genBW() const -{ - return isBW ? bwMass * bwMass + bwMass * bwWidth * tan( ( phiMax - phiMin ) * rand->Rndm() + phiMin ) : (max*max-min*min)*rand->Rndm() + min*min; -} - -double TreePhaseSpace::Vertex::BW(const double& si) const -{ - double m = bwMass; - double w = bwWidth; - double N = m * w / (phiMax - phiMin); - return isBW ? N / ( (si - m*m)*(si-m*m) + m*m*w*w) : 1./(max*max - min*min); -} - + double TreePhaseSpace::Vertex::weight() const { if( left == nullptr || right == nullptr ) return 1.0; @@ -148,37 +142,44 @@ double TreePhaseSpace::Vertex::weight() const return w; } -double TreePhaseSpace::Vertex::maxWeight() const -{ - double w = 1.0; - if( left == nullptr || right == nullptr ) return 1.0; - w *= rhoMax; - w *= left -> maxWeight(); - w *= right -> maxWeight(); - return w; -} - double TreePhaseSpace::Vertex::genPdf(const Event& event) const { if( left == nullptr || right == nullptr ) return 1; - double dp = left->genPdf(event) * right->genPdf(event); - return dp * ( isStable ? 1 : BW( event.s(indices) ) ); + double dp = left->genPdf(event) * right->genPdf(event); + auto st = event.s(indices); + switch( type ) { + case Type::BW : + dp *= ( bwMass* bwWidth ) /( (phiMax-phiMin) * ( (st - bwMass*bwMass)*(st-bwMass*bwMass) + bwMass*bwMass*bwWidth*bwWidth) ); + break; + case Type::Flat : + dp *= 1/(max*max -min*min); + break; + }; + return dp; } void TreePhaseSpace::Vertex::generate() { - s = isStable ? min*min : genBW(); + switch( type ) { + case Type::BW : + s = bwMass * bwMass + bwMass * bwWidth * tan( (phiMax - phiMin ) * rand->Rndm() + phiMin ); + break; + case Type::Flat : + s = (max*max-min*min)*rand->Rndm() + min; + break; + }; if( left != nullptr ) left->generate(); if( right != nullptr ) right->generate(); } void TreePhaseSpace::Vertex::print(const unsigned& offset) const { - double rhoc = ( left != nullptr && right != nullptr ? rho(s,left->s,right->s) : rhoMax ); - if( isStable ) - INFO( std::string(offset,' ') << particle.name() << " [" << vectorToString(indices, ", ") << "] → [" << min << "], ϱ' = " << rhoMax << ", s = " << s << ", ϱ = " << rhoc << " w = " << maxWeight() ); - else - INFO( std::string(offset,' ') << particle.name() << " [" << vectorToString(indices, ", ") << "] → [" << min << ", " << max << "] ϱ' =" << rhoMax << ", s = " << s << ", ϱ = " << rhoc << " w = " << maxWeight() ); + std::array vtxTypeStrings = {"BW", "Flat", "Stable", "QuasiStable"}; + INFO( std::string(offset,' ') << particle.name() << " [" << vectorToString(indices, ", ") << "] type = " << vtxTypeStrings[ type ] << " → [" << min << ", " << max << "] " << sqrt(s) ); + if( type == Type::BW ) + INFO( "phi-range : " << phiMin << " " << phiMax + << " s(min) = " << bwMass * bwMass + bwMass * bwWidth * tan(phiMin) + << " s(max) = " << bwMass * bwMass + bwMass * bwWidth * tan(phiMax) ); if( left != nullptr ) left -> print( offset + 4 ); if( right != nullptr ) right -> print( offset + 4 ); } @@ -197,6 +198,7 @@ void TreePhaseSpace::Vertex::place(Event& event) Event TreePhaseSpace::Vertex::event(const unsigned& eventSize) { + if( isMultiBody ) return phsp.makeEvent(); Event output(4 * eventSize); mom.SetXYZT(0,0,0,sqrt(s)); generateFullEvent(); @@ -213,6 +215,10 @@ void TreePhaseSpace::Vertex::generateFullEvent() double cosPhi = cos(angY); double sinPhi = sin(angY); double pf = p(); + if( std::isnan(pf) || std::isnan(s) ) + { + ERROR("Generating nan: " << pf << " " << s << " " << min << " " << max ); + } left -> mom.SetXYZT( pf*sinTheta*cosPhi, pf*sinTheta*sinPhi, pf*cosTheta, sqrt(left->s + pf*pf) ); left -> mom.Boost( mom.BoostVector() ); left -> generateFullEvent(); @@ -222,12 +228,6 @@ void TreePhaseSpace::Vertex::generateFullEvent() right -> generateFullEvent(); } -void TreePhaseSpace::Vertex::setRhoMax() -{ - rhoMax = rho(max*max, left->min * left->min, right->min * right->min ); - weightMax = maxWeight(); -} - TreePhaseSpace::Vertex TreePhaseSpace::Vertex::make(const Particle& particle, TreePhaseSpace::Vertex* parent) { auto decayProducts = particle.daughters(); @@ -241,25 +241,17 @@ TreePhaseSpace::Vertex TreePhaseSpace::Vertex::make(const Particle& particle, Tr if( decayProducts.size() == 2 ) { double G = particle.isQuasiStable() ? 0 : particle.props()->width() * 10; - TreePhaseSpace::Vertex vtx = (parent == nullptr) ? TreePhaseSpace::Vertex(particle, particle.mass() - G , particle.mass() + G, particle.isQuasiStable() ) : TreePhaseSpace::Vertex(); + TreePhaseSpace::Vertex vtx = (parent == nullptr) ? TreePhaseSpace::Vertex(particle, particle.mass() - G , particle.mass() + G) : TreePhaseSpace::Vertex(); parent = ( parent == nullptr ) ? &vtx : parent; - auto min_mass_1 = threshold(*decayProducts[0]); auto min_mass_2 = threshold(*decayProducts[1]); - auto max_mass_1 = decayProducts[0]->isStable() ? min_mass_1 : parent->max - min_mass_2; - auto max_mass_2 = decayProducts[1]->isStable() ? min_mass_2 : parent->max - min_mass_1; - parent->left = std::make_shared(*decayProducts[0], min_mass_1, max_mass_1, decayProducts[0]->isStable() ); - parent->right = std::make_shared(*decayProducts[1], min_mass_2, max_mass_2, decayProducts[1]->isStable() ); + parent->left = std::make_shared(*decayProducts[0], min_mass_1, parent->max - min_mass_2); + parent->right = std::make_shared(*decayProducts[1], min_mass_2, parent->max - min_mass_1); TreePhaseSpace::Vertex::make(*decayProducts[0], parent->left.get()); TreePhaseSpace::Vertex::make(*decayProducts[1], parent->right.get()); for( auto& index : parent->left ->indices ) parent->indices.push_back(index); for( auto& index : parent->right->indices ) parent->indices.push_back(index); - parent->setRhoMax(); - return *parent; - } - if( decayProducts.size() > 2 ) - { - ERROR("Decomposition only implemented for quasi two-body decays, vertex: " << particle << " does not result in valid phase space"); + return *parent; } return TreePhaseSpace::Vertex(); } @@ -286,7 +278,7 @@ double TreePhaseSpace::genPdf(const Event& event) const { double genPdf = 0; for( unsigned i = 0; i != m_top.size(); ++i ) - genPdf += m_weights[i] * m_top[i].genPdf(event); // / channel.weightMax; // / channel->w; + genPdf += m_weights[i] * m_top[i].genPdf(event); return genPdf; } diff --git a/src/Units.cpp b/src/Units.cpp index 9ae73ad916d..a252032892c 100644 --- a/src/Units.cpp +++ b/src/Units.cpp @@ -3,11 +3,11 @@ #include "AmpGen/Utilities.h" namespace AmpGen { - complete_enum(Units, TeV, GeV, MeV, KeV, eV) + complete_enum(Units, TeV, GeV, MeV, KeV, eV, ms, us, ns, ps, fs) } double AmpGen::to_double(const AmpGen::Units& unit) { - static const double value_table[5] = {TeV, GeV, MeV, KeV, eV}; + static const double value_table[10] = {TeV, GeV, MeV, KeV, eV, ms, us, ns, ps, fs}; return value_table[unsigned(unit)]; } diff --git a/test/test_avx2d.cpp b/test/test_avx2d.cpp index d1ce7d48435..965b8426abd 100644 --- a/test/test_avx2d.cpp +++ b/test/test_avx2d.cpp @@ -7,7 +7,7 @@ namespace utf = boost::unit_test; -#if ENABLE_AVX2 +#if ENABLE_AVX #include "AmpGen/simd/utils.h" using namespace AmpGen; @@ -17,10 +17,10 @@ BOOST_AUTO_TEST_CASE( test_log ) { AVX2d::real_v p(0.3, 0.5, 10.0, 7.0); auto logged = AVX2d::log( p ).to_array() ; - BOOST_TEST( logged[0] == std::log(0.3), boost::test_tools::tolerance(5e-10 ) ); - BOOST_TEST( logged[1] == std::log(0.5), boost::test_tools::tolerance(5e-10 ) ); - BOOST_TEST( logged[2] == std::log(10.0), boost::test_tools::tolerance(5e-10 ) ); - BOOST_TEST( logged[3] == std::log(7.0), boost::test_tools::tolerance(5e-10 ) ); + BOOST_TEST( logged[0] == std::log(0.3), boost::test_tools::tolerance(1e-12 ) ); + BOOST_TEST( logged[1] == std::log(0.5), boost::test_tools::tolerance(1e-12 ) ); + BOOST_TEST( logged[2] == std::log(10.0), boost::test_tools::tolerance(1e-12 ) ); + BOOST_TEST( logged[3] == std::log(7.0), boost::test_tools::tolerance(1e-12 ) ); } BOOST_AUTO_TEST_CASE( test_fmod ) @@ -65,6 +65,29 @@ BOOST_AUTO_TEST_CASE( test_gather ) BOOST_TEST( v[3] == data[4] ); } +BOOST_AUTO_TEST_CASE( test_trig ) +{ + auto data = AVX2d::real_v(0.1,0.4,-2.0,5.0); + auto cos = AVX2d::cos(data).to_array(); + BOOST_TEST( cos[0] == std::cos( data.at(0 )) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( cos[1] == std::cos( data.at(1 )) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( cos[2] == std::cos( data.at(2)) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( cos[3] == std::cos( data.at(3 )) , boost::test_tools::tolerance(1e-15) ); + + auto sin = AVX2d::sin(data).to_array(); + BOOST_TEST( sin[0] == std::sin( data.at(0 )) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( sin[1] == std::sin( data.at(1 )) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( sin[2] == std::sin( data.at(2)) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( sin[3] == std::sin( data.at(3 )) , boost::test_tools::tolerance(1e-15) ); + + auto tan = AVX2d::tan(data).to_array(); + + BOOST_TEST( tan[0] == std::tan( data.at(0 )) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( tan[1] == std::tan( data.at(1 )) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( tan[2] == std::tan( data.at(2)) , boost::test_tools::tolerance(1e-15) ); + BOOST_TEST( tan[3] == std::tan( data.at(3 )) , boost::test_tools::tolerance(1e-15) ); +} + #else BOOST_AUTO_TEST_CASE( test_dummy ) From fb6a9a0599657f4f3ab4c9acd5e0ca16668bf977 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sat, 16 May 2020 14:24:55 +0200 Subject: [PATCH 38/67] Fix OpenMP predicates --- AmpGen/Generator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AmpGen/Generator.h b/AmpGen/Generator.h index 4ab6e0676a1..b864c166d1e 100644 --- a/AmpGen/Generator.h +++ b/AmpGen/Generator.h @@ -99,7 +99,7 @@ namespace AmpGen #ifdef _OPENMP #pragma omp parallel for #endif - for ( size_t block=0; block != mc.nBlocks(); ++block ) + for ( size_t block=0; block < mc.nBlocks(); ++block ) mc.setWeight(block, 1.0, pdf(mc.block(block), block) / mc.genPDF(block)); t_eval.stop(); t_acceptReject.start(); From 958c379d882db03e7ed7e8694b8508904ffa1969 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 17 May 2020 11:31:40 +0200 Subject: [PATCH 39/67] Options to switch of mvec --- AmpGen/simd/avx2d_types.h | 41 ++++++++++++++++++++++++++------------- Standalone.cmake | 20 ++++++++++++++++--- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index 88ab0bfdd4b..bdd949c88bc 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -8,18 +8,24 @@ #include #include +#if USE_MVEC extern "C" __m256d _ZGVcN4v_cos(__m256d x); extern "C" __m256d _ZGVcN4v_sin(__m256d x); extern "C" __m256d _ZGVcN4v_exp(__m256d x); extern "C" __m256d _ZGVcN4v_log(__m256d x); extern "C" void _ZGVdN4vvv_sincos(__m256d x, __m256i ptrs, __m256i ptrc); +#endif namespace AmpGen { namespace AVX2d { - - #define libmvec_alias( function_name, avx_function_name ) \ - inline real_v function_name( const real_v& v ){ return avx_function_name (v) ; } - + #if USE_MVEC + #define libmvec_alias( function_name) \ + inline real_v function_name( const real_v& v ){ return _ZGVcN4v_##function_name (v) ; } + #else + #define libmvec_alias( F ) \ + inline real_v F( const real_v& v ){ auto arr = v.to_array(); return real_v( std::F(arr[0]), std::F(arr[1]), std::F(arr[2]), std::F(arr[3])) ; } + #endif + struct real_v { __m256d data; static constexpr unsigned size = 4; @@ -29,8 +35,7 @@ namespace AmpGen { real_v(const double& f ) : data( _mm256_set1_pd( f )) {} real_v(const double& x0, const double& x1, const double& x2, const double& x3 ) { - double tmp[4] = {x0,x1,x2,x3}; - data = _mm256_loadu_pd(tmp); + data = _mm256_set_pd(x0,x1,x2,x3); } real_v(const double* f ) : data( _mm256_loadu_pd( f ) ) {} real_v(const std::array f ) : data( _mm256_loadu_pd( f.data() ) ) {} @@ -60,22 +65,30 @@ namespace AmpGen { inline real_v operator==( const real_v& lhs, const real_v& rhs ){ return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OS ); } inline real_v sqrt( const real_v& v ) { return _mm256_sqrt_pd(v); } inline real_v abs ( const real_v& v ) { return _mm256_andnot_pd(_mm256_set1_pd(-0.), v); } - - libmvec_alias( sin, _ZGVcN4v_sin ) - libmvec_alias( cos, _ZGVcN4v_cos ) - libmvec_alias( exp, _ZGVcN4v_exp ) - libmvec_alias( log, _ZGVcN4v_log ) + libmvec_alias( sin ) + libmvec_alias( cos ) + libmvec_alias( exp ) + libmvec_alias( log ) inline void sincos( const real_v& v, real_v& s, real_v& c ) { +#if USE_MVEC __m256i sp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&s),_mm256_set_epi64x(24,16,8,0)); __m256i cp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&c),_mm256_set_epi64x(24,16,8,0)); _ZGVdN4vvv_sincos(v,sp,cp); +#else + s = sin(v); + c = cos(v); +#endif + } + inline std::pair sincos( const real_v& v ) + { + std::pair rt; + sincos( v, rt.first, rt.second ); + return rt; } - inline real_v tan( const real_v& v ) { - real_v s, c; - sincos( v, s, c ); + auto [s,c] = sincos( v ); return s / c ; } diff --git a/Standalone.cmake b/Standalone.cmake index a476fb87bd9..c2284d0e92e 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -12,8 +12,9 @@ if( NOT "${CMAKE_CXX_STANDARD}" ) set(CMAKE_CXX_STANDARD 17) endif() -SET(USE_OPENMP TRUE CACHE BOOL "USE_OPENMP") -SET(USE_SIMD "AVX2d" CACHE STRING "USE_SIMD") +SET(USE_OPENMP TRUE CACHE BOOL "USE_OPENMP") # flag to use openmp for threading +SET(USE_SIMD "AVX2d" CACHE STRING "USE_SIMD") # AVX instruction set + precision to use +SET(USE_MVEC TRUE CACHE BOOL "USE_MVEC") # flag to use vector math library mvec set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -31,6 +32,7 @@ include(GNUInstallDirs) option(AMPGEN_DEBUG "AmpGen Debug printout") option(AMPGEN_TRACE "AmpGen Trace printout") + configure_file ("${PROJECT_SOURCE_DIR}/AmpGen/Version.h.in" "${CMAKE_BINARY_DIR}/AmpGenVersion.h") add_library(${PROJECT_NAME} SHARED ${AMPGEN_SRC} ${AMPGEN_HDR}) @@ -64,7 +66,19 @@ target_include_directories(AmpGen PUBLIC $ Date: Sun, 17 May 2020 11:58:31 +0200 Subject: [PATCH 40/67] cleanup mvec code --- AmpGen/simd/avx2d_types.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index bdd949c88bc..7293f30118d 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -9,22 +9,24 @@ #include #if USE_MVEC -extern "C" __m256d _ZGVcN4v_cos(__m256d x); -extern "C" __m256d _ZGVcN4v_sin(__m256d x); -extern "C" __m256d _ZGVcN4v_exp(__m256d x); -extern "C" __m256d _ZGVcN4v_log(__m256d x); +// extern "C" __m256d _ZGVcN4v_cos(__m256d x); +// extern "C" __m256d _ZGVcN4v_sin(__m256d x); +// extern "C" __m256d _ZGVcN4v_exp(__m256d x); +// extern "C" __m256d _ZGVcN4v_log(__m256d x); extern "C" void _ZGVdN4vvv_sincos(__m256d x, __m256i ptrs, __m256i ptrc); #endif +#if USE_MVEC +#define libmvec_alias( function_name) \ + extern "C" __m256d _ZGVcN4v_##function_name(__m256d x); \ + inline real_v function_name( const real_v& v ){ return _ZGVcN4v_##function_name (v) ; } +#else +#define libmvec_alias( F ) \ + inline real_v F( const real_v& v ){ auto arr = v.to_array(); return real_v( std::F(arr[0]), std::F(arr[1]), std::F(arr[2]), std::F(arr[3])) ; } +#endif + namespace AmpGen { namespace AVX2d { - #if USE_MVEC - #define libmvec_alias( function_name) \ - inline real_v function_name( const real_v& v ){ return _ZGVcN4v_##function_name (v) ; } - #else - #define libmvec_alias( F ) \ - inline real_v F( const real_v& v ){ auto arr = v.to_array(); return real_v( std::F(arr[0]), std::F(arr[1]), std::F(arr[2]), std::F(arr[3])) ; } - #endif struct real_v { __m256d data; From 064296da3bbf14d12e9cdc87c4ef83d559fa742f Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 17 May 2020 15:27:09 +0200 Subject: [PATCH 41/67] fix scalar build --- AmpGen/AmplitudeRules.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/AmpGen/AmplitudeRules.h b/AmpGen/AmplitudeRules.h index ed44420896f..57e30519d34 100644 --- a/AmpGen/AmplitudeRules.h +++ b/AmpGen/AmplitudeRules.h @@ -187,7 +187,9 @@ namespace AmpGen } template auto operator()(arg_types... args ) const { return amp_type::operator()(args...) ; } #if ENABLE_AVX - void debug( const Event& event ) const { amp_type::debug(EventListSIMD::makeEvent(event).data() ) ; } + void debug( const Event& event ) const { amp_type::debug(EventListSIMD::makeEvent(event).data() ) ; } + #else + void debug( const Event& event ) const { amp_type::debug(event.address()) ; } #endif const std::string decayDescriptor() const { return decayTree.decayDescriptor() ; } From 0ddc4eb3f8bc6d70aff707165dd8d440c8d34871 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 19 May 2020 18:15:10 +0200 Subject: [PATCH 42/67] Fixes for canonical formulation with deeper decay topologies --- AmpGen/ASTResolver.h | 3 +- AmpGen/Expression.h | 1 + AmpGen/Projection.h | 2 +- AmpGen/Transform.h | 16 +++-- AmpGen/Wigner.h | 10 +++- CMakeLists.txt | 4 +- apps/Debugger.cpp | 3 + apps/Generator.cpp | 1 + examples/FitterWithPolarisation.cpp | 10 +++- src/ASTResolver.cpp | 32 ++++++---- src/EventType.cpp | 2 +- src/Expression.cpp | 4 +- src/OptionsParser.cpp | 3 + src/Particle.cpp | 6 +- src/PolarisedSum.cpp | 2 +- src/RecursivePhaseSpace.cpp | 2 +- src/Transform.cpp | 90 +++++++++++++++++++++-------- src/Wigner.cpp | 57 ++++++++++++------ 18 files changed, 173 insertions(+), 75 deletions(-) diff --git a/AmpGen/ASTResolver.h b/AmpGen/ASTResolver.h index bbad189fb1c..1a29a215a47 100644 --- a/AmpGen/ASTResolver.h +++ b/AmpGen/ASTResolver.h @@ -60,11 +60,12 @@ namespace AmpGen { std::map m_evtMap; /// Event specification std::map m_parameterMapping; /// Mapping of parameters to compile parameters const MinuitParameterSet* m_mps; /// Set of MinuitParameters - std::map m_tempTrees; /// temporary store of sub-trees for performing cse reduction + std::map m_tempTrees; /// temporary store of sub-trees for performing cse reduction unsigned int m_nParameters; /// Number of parameters bool m_enable_cuda {false}; /// flag to generate CUDA code <> bool m_enable_compileTimeConstants {false}; /// flag to enable compile time constants <> bool m_enable_avx {false}; /// flag to generate code using AVX instructions <> + bool m_check_hashes {false}; /// flag to check that hashes are unique }; template <> void ASTResolver::resolve( const Parameter& obj ); diff --git a/AmpGen/Expression.h b/AmpGen/Expression.h index 49fe25acc3f..d40596edf48 100644 --- a/AmpGen/Expression.h +++ b/AmpGen/Expression.h @@ -233,6 +233,7 @@ namespace AmpGen complex_t operator()() const override { return m_expression(); } uint64_t key() const; void setKey( const size_t& new_key ); + Expression expression() const { return m_expression; } Expression m_expression; uint64_t m_key; }; diff --git a/AmpGen/Projection.h b/AmpGen/Projection.h index ce6c926ea53..8192ef58c25 100644 --- a/AmpGen/Projection.h +++ b/AmpGen/Projection.h @@ -56,7 +56,7 @@ namespace AmpGen } friend class Projection2D; - private: + /// private: template TH1D* projInternal(const eventlist_type&, const ArgumentPack&) const; template diff --git a/AmpGen/Transform.h b/AmpGen/Transform.h index 0bf87a5edfb..1c703a7b056 100644 --- a/AmpGen/Transform.h +++ b/AmpGen/Transform.h @@ -41,13 +41,16 @@ namespace AmpGen { class TransformSequence { public: - TransformSequence() = default; + TransformSequence(); + TransformSequence( const Transform& transform ); + TransformSequence( const TransformSequence& t1, const TransformSequence& t2); + TransformSequence( const Transform& t1, const Transform& t2); TransformSequence inverse() const; - Tensor operator()( const Transform::Representation& repr ); + Tensor operator()( const Transform::Representation& repr ) const ; Tensor operator()( const Tensor& tensor, - const Transform::Representation& repr=Transform::Representation::Vector ); - void add( const Transform& transform ); - void add( const TransformSequence& transform ); + const Transform::Representation& repr=Transform::Representation::Vector ) const; + void push_back( const Transform& transform ); + void push_back( const TransformSequence& transform ); void stepThrough( const Tensor& tensor, const Transform::Representation& repr = Transform::Representation::Vector ); @@ -58,7 +61,10 @@ namespace AmpGen { std::vector::const_iterator end() const { return m_transforms.cend(); } std::vector::iterator begin() { return m_transforms.begin(); } std::vector::iterator end() { return m_transforms.end(); } + unsigned size() const { return m_transforms.size(); } + private: + void buildCache(); std::vector m_transforms; std::array m_cache; }; diff --git a/AmpGen/Wigner.h b/AmpGen/Wigner.h index 9abdcf1f372..01a3792ec63 100644 --- a/AmpGen/Wigner.h +++ b/AmpGen/Wigner.h @@ -7,6 +7,7 @@ namespace AmpGen { class Particle; + using TransformCache = std::map; Expression wigner_d( const Expression& cb, const double& j, const double& m, const double& n ); Expression wigner_D( const std::pair& P, const double& J, const double& lA, const double& lB, DebugSymbols* db); @@ -27,9 +28,14 @@ namespace AmpGen { In the case where ve =-1, a second rotation is applied about the x-axis that aligns P to the +ve z-axis. This ensures that singly and doubly primed helicity frames remain orthonormal. */ - TransformSequence wickTransform(const Tensor& P, const Expression& M, const int& ve =1, const bool& handleZeroCase = false); + TransformSequence wickTransform(const Tensor& P, const Particle& p, const int& ve =1, DebugSymbols* db = nullptr ); - Expression helicityAmplitude(const Particle& particle, TransformSequence& parentFrame, const double& Mz, DebugSymbols* db , const int sgn=1, std::map* cacheptr = nullptr); + Expression helicityAmplitude(const Particle& particle, + const TransformSequence& parentFrame, + const double& Mz, + DebugSymbols* db , + const int sgn=1, + TransformCache* cacheptr = nullptr); Tensor basisSpinor(const int& polState, const int& id); Tensor basisVector(const int& polState); diff --git a/CMakeLists.txt b/CMakeLists.txt index 04680a05846..ad75fc9b283 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # Support standalone build if(COMMAND gaudi_subdir) - gaudi_subdir(AmpGen v1r2) + gaudi_subdir(AmpGen v2r0) find_package(ROOT COMPONENTS RIO Hist Matrix Graf Minuit2 Tree MathMore MathCore Physics) find_package(TBB REQUIRED) @@ -29,6 +29,6 @@ if(COMMAND gaudi_subdir) endforeach() else() cmake_minimum_required(VERSION 3.12) - project(AmpGen LANGUAGES CXX VERSION 1.2) + project(AmpGen LANGUAGES CXX VERSION 2.0) include(Standalone.cmake) endif() diff --git a/apps/Debugger.cpp b/apps/Debugger.cpp index a19f3f83563..98308235d4a 100644 --- a/apps/Debugger.cpp +++ b/apps/Debugger.cpp @@ -98,6 +98,9 @@ int main( int argc, char** argv ) accepted.push_back(evt); } } + std::vector event = NamedParameter("Event",0).getVector(); + if( event.size() != 1 ) accepted[0].set( event.data() ); + std::string type = NamedParameter("Type","CoherentSum"); if( type == "PolarisedSum") diff --git a/apps/Generator.cpp b/apps/Generator.cpp index a8f77720b72..00ea2e10643 100644 --- a/apps/Generator.cpp +++ b/apps/Generator.cpp @@ -153,6 +153,7 @@ int main( int argc, char** argv ) omp_set_dynamic( 0 ); #endif + INFO("Writing output: " << outfile ); TRandom3 rand; rand.SetSeed( seed + 934534 ); diff --git a/examples/FitterWithPolarisation.cpp b/examples/FitterWithPolarisation.cpp index 79620f5d706..bc36c2ac700 100644 --- a/examples/FitterWithPolarisation.cpp +++ b/examples/FitterWithPolarisation.cpp @@ -9,6 +9,7 @@ #include #include "AmpGen/Chi2Estimator.h" +#include "AmpGen/RecursivePhaseSpace.h" #include "AmpGen/EventList.h" #include "AmpGen/EventType.h" #include "AmpGen/CoherentSum.h" @@ -42,6 +43,11 @@ using namespace AmpGen; +template Particle getTopology(const pdf_t& pdf) +{ + return pdf.matrixElements()[0].decayTree.quasiStableTree(); +} + template FitResult* doFit( PDF&& pdf, EventList_type& data, EventList_type& mc, MinuitParameterSet& MPS ); @@ -108,7 +114,9 @@ int main( int argc, char* argv[] ) /* Generate events to normalise the PDF with. This can also be loaded from a file, which will be the case when efficiency variations are included. Default number of normalisation events is 2 million. */ - EventList_type eventsMC = simFile == "" ? EventList_type(Generator<>(evtType, &rndm).generate(int(3365617)) ) : EventList_type(simFile, evtType); + Generator signalGenerator( getTopology(sig), events.eventType(), &rndm ); + auto events_l = signalGenerator.generate(1e6); + EventList_type eventsMC = simFile == "" ? EventList_type(events_l) : EventList_type(simFile, evtType); sig.setMC( eventsMC ); diff --git a/src/ASTResolver.cpp b/src/ASTResolver.cpp index 6118fb62795..3b2b5bfe0b7 100644 --- a/src/ASTResolver.cpp +++ b/src/ASTResolver.cpp @@ -20,6 +20,7 @@ ASTResolver::ASTResolver(const std::map& evtMap, { m_enable_cuda = NamedParameter("UseCUDA",false); m_enable_compileTimeConstants = NamedParameter("ASTResolver::CompileTimeConstants", false); + m_check_hashes = NamedParameter("ASTResolver::CheckHashes", false ); } std::vector> ASTResolver::getOrderedSubExpressions( const Expression& expression ) @@ -30,19 +31,22 @@ std::vector> ASTResolver::getOrderedSubExpression std::map subTrees; do { subTrees.clear(); - for( auto& t : m_tempTrees ) + for( auto& [t, s] : m_tempTrees ) { - auto expr = t.first->m_expression; - uint64_t key = t.first->key(); - if( subTrees.count( key ) == 0 ) subTrees[ key ] = t.first->m_expression ; + uint64_t key = s->key(); + if( subTrees.count( key ) == 0 ) subTrees[ key ] = s->expression(); + // else if( m_check_hashes && t.first->m_expression.to_string() != subTrees[key].to_string() ) + // { + // WARNING("Hash collision between in key = " << key << " other key = " << FNV1a_hash( subTrees[key].to_string() ) ); + // } } m_tempTrees.clear(); - for( auto& st : subTrees ){ - st.second.resolve( *this ); - auto stack_pos = used_functions.find( st.first ); + for( auto& [key,expression] : subTrees ){ + expression.resolve( *this ); + auto stack_pos = used_functions.find(key); if ( stack_pos == used_functions.end() ) { - subexpressions.emplace_back( st.first , st.second ); - used_functions[st.first] = subexpressions.size() - 1; + subexpressions.emplace_back(key , expression ); + used_functions[key] = subexpressions.size() - 1; continue; } auto oldPos = stack_pos->second; @@ -52,7 +56,7 @@ std::vector> ASTResolver::getOrderedSubExpression for ( auto uf = used_functions.begin(); uf != used_functions.end(); ++uf ) { if ( uf->second >= oldPos ) uf->second = uf->second - 1; } - used_functions[st.first] = subexpressions.size() - 1; + used_functions[key] = subexpressions.size() - 1; } } while ( subTrees.size() !=0 ); std::reverse( subexpressions.begin(), subexpressions.end() ); @@ -61,8 +65,12 @@ std::vector> ASTResolver::getOrderedSubExpression template <> void ASTResolver::resolve( const SubTree& subTree ) { - if( m_tempTrees.count( &subTree ) != 0 ) return; - m_tempTrees[&subTree] = 1; + auto ptr = subTree.expression().get(); + if( m_tempTrees.count(ptr) != 0 ) return; + else { + ptr->resolve( *this ); + m_tempTrees[ptr] = &subTree; + } } template <> void ASTResolver::resolve( const Spline& spline ) diff --git a/src/EventType.cpp b/src/EventType.cpp index 1bace03da37..c111e0ced4d 100644 --- a/src/EventType.cpp +++ b/src/EventType.cpp @@ -54,7 +54,7 @@ EventType::EventType( const std::vector& particleNames, const bool& if ( prop != nullptr ) m_particleMasses.push_back( prop->mass() ); else { - ERROR( "Particle not found: " << *m_particleNames.rbegin() ); + FATAL( "Particle not found: " << *m_particleNames.rbegin() ); return; } if(m_alt_part_names) diff --git a/src/Expression.cpp b/src/Expression.cpp index f0ec105523c..64661e26c85 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -157,7 +157,7 @@ Expression AmpGen::operator/( const Expression& A, const Expression& B ) if( is( as_prod.r() ) ) return ( Constant( 1./as_prod.r()() ) * A )/ as_prod.l(); } else if( is(B) ) return ( A * cast(B).r() ) / cast(B).l(); - else if( is(B) ) return ( A * fcn::isqrt( cast(B).arg() ) ); +// else if( is(B) ) return ( A / fcn::sqrt( cast(B).arg() ) ); return Expression( Divide( A, B ) ); } Expression AmpGen::operator&&( const Expression& A, const Expression& B ) { return Expression( And( A, B ) ); } @@ -249,7 +249,7 @@ std::string SubTree::to_string(const ASTResolver* /*resolver*/) const void SubTree::resolve( ASTResolver& resolver ) const { resolver.resolve( *this ); - m_expression.resolve( resolver ); + // m_expression.resolve( resolver ); } Expression AmpGen::make_cse( const Expression& A , bool simplify ) diff --git a/src/OptionsParser.cpp b/src/OptionsParser.cpp index 74cfab93f20..469794aed18 100644 --- a/src/OptionsParser.cpp +++ b/src/OptionsParser.cpp @@ -110,6 +110,9 @@ void OptionsParser::addArg( const std::string& arg ) int bc = 0 ; auto tokens = makeParsedStrings( arg, bc ); auto name = tokens[0]; + if ( name == "ParticlePropertiesList::Alias" && tokens.size() == 3 ) { + ParticlePropertiesList::getMutable()->makeAlias( tokens[1], tokens[2] ); + } m_parsedLines[name] = tokens; } diff --git a/src/Particle.cpp b/src/Particle.cpp index caa73e54681..35d2ac2e2a7 100644 --- a/src/Particle.cpp +++ b/src/Particle.cpp @@ -194,9 +194,9 @@ Tensor Particle::P() const Tensor rt( std::vector( { Parameter(index + "_Px"), Parameter(index + "_Py"), - Parameter(index + "_Pz"), 0 }) , Tensor::dim(4) ); - rt[3] = make_cse( fcn::sqrt( mass()*mass() + rt[0]*rt[0] + rt[1]*rt[1] + rt[2]*rt[2] ) ); - // Parameter( index + "_E" , 0, false, 1 )} ), + Parameter(index + "_Pz"), + Parameter(index + "_E") }) , Tensor::dim(4) ); +// rt[3] = fcn::sqrt( mass()*mass() + rt[0]*rt[0] + rt[1]*rt[1] + rt[2]*rt[2] ) ; return rt; } else ERROR( "Stable particle " << m_index << "is unindexed!" ); } diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 3796544f40b..0684979664a 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -69,7 +69,7 @@ PolarisedSum::PolarisedSum(const EventType& type, auto protoAmps = m_rules.getMatchingRules(m_eventType); for(const auto& m : protoAmps ) INFO( m.first.uniqueString() ); m_matrixElements.resize( protoAmps.size() ); - ThreadPool tp( std::thread::hardware_concurrency() ); + ThreadPool tp(std::thread::hardware_concurrency() ); for(unsigned i = 0; i < m_matrixElements.size(); ++i) { tp.enqueue( [i, &protoAmps, &polStates, this]{ diff --git a/src/RecursivePhaseSpace.cpp b/src/RecursivePhaseSpace.cpp index c8f67908071..dad48e3fbde 100644 --- a/src/RecursivePhaseSpace.cpp +++ b/src/RecursivePhaseSpace.cpp @@ -58,7 +58,7 @@ AmpGen::Event RecursivePhaseSpace::makeEvent() rt[4*segment.sink + 3] = pE; } } else { - auto evtTmp = segment.decayProds->makeEvent(); + auto evtTmp = segment.decayProds->makeEvent(); double v = sqrt( px * px + py * py + pz * pz ) / pE; boost( evtTmp, std::tuple(px, py, pz), v ); for(size_t j = 0; j < rt.size(); ++j) rt[j] += evtTmp[j]; diff --git a/src/Transform.cpp b/src/Transform.cpp index cd504e38634..812a6bef05e 100644 --- a/src/Transform.cpp +++ b/src/Transform.cpp @@ -64,7 +64,7 @@ Tensor Transform::boost_spinor() const return Identity(2) * fcn::sqrt(0.5*(m_arg+1)) + sigma_dot_p(m_k)*fcn::sqrt(0.5*(m_arg-1)); } -Tensor Transform::operator()(const Representation& repr) const +Tensor Transform::operator()(const Representation& repr)const { Tensor::Index m,j,k; Tensor I2 = Identity(2); @@ -100,35 +100,40 @@ TransformSequence TransformSequence::inverse() const { TransformSequence rt; for( auto i = m_transforms.rbegin(); i != m_transforms.rend(); ++i ) - rt.add( i->inverse() ); + rt.push_back( i->inverse() ); return rt; } -Tensor TransformSequence::operator()( const Transform::Representation& repr ) +Tensor TransformSequence::operator()( const Transform::Representation& repr ) const { - if( m_cache[repr].nElements() != 1 ) - { - return m_cache[repr]; - } - if( m_transforms.size() == 0 ){ - if( repr == Transform::Representation::Spinor ) return Identity(2); - else return Identity(4); - } - Tensor::Index a,b,c; - Tensor rt = m_transforms[0](repr); - rt.st(); - for( size_t i = 1 ; i < m_transforms.size(); ++i ) + return m_cache[repr]; +} + +void TransformSequence::buildCache() +{ + for( auto repr : { Transform::Representation::Spinor, Transform::Representation::Bispinor, Transform::Representation::Vector } ) { - Tensor rti = m_transforms[i](repr); - rti.st(true); - rt = rti(a,b) * rt(b,c); + if( m_transforms.size() == 0 ){ + if( repr == Transform::Representation::Spinor ) m_cache[repr] = Identity(2); + m_cache[repr] = Identity(4); + } + else { + Tensor::Index a,b,c; + Tensor rt = m_transforms[0](repr); + rt.st(); + for( size_t i = 1 ; i < m_transforms.size(); ++i ) + { + Tensor rti = m_transforms[i](repr); + rti.st(true); + rt = rti(a,b) * rt(b,c); + } + m_cache[repr] = rt; + } } - m_cache[repr] = rt; - return rt; } Tensor TransformSequence::operator()( const Tensor& tensor, - const Transform::Representation& repr ) + const Transform::Representation& repr ) const { Tensor::Index a,b,c; auto seq = this->operator()(repr); @@ -136,26 +141,29 @@ Tensor TransformSequence::operator()( const Tensor& tensor, } Tensor Transform::operator()( const Tensor& tensor, - const Transform::Representation& repr ) const + const Transform::Representation& repr ) const { Tensor::Index a,b; auto seq = this->operator()(repr); return seq(a,b)*tensor(b); } -void TransformSequence::add( const Transform& transform ) +void TransformSequence::push_back( const Transform& transform ) { m_transforms.emplace_back( transform ); + buildCache(); } -void TransformSequence::add( const TransformSequence& transform ) +void TransformSequence::push_back( const TransformSequence& transform ) { for( auto& t : transform ) m_transforms.emplace_back(t); + buildCache(); } void TransformSequence::clear() { m_transforms.clear(); + buildCache(); } void TransformSequence::stepThrough(const Tensor& tensor, @@ -170,4 +178,38 @@ void TransformSequence::stepThrough(const Tensor& tensor, } } +TransformSequence::TransformSequence( const Transform& transform ) : m_transforms{transform} { + buildCache(); +} + +TransformSequence::TransformSequence( const TransformSequence& t1, const TransformSequence& t2) { + for( const auto& t : t1 ) m_transforms.push_back(t); + for( const auto& t : t2 ) m_transforms.push_back(t); + Tensor::Index a,b,c; + + for( auto repr : { Transform::Representation::Spinor, Transform::Representation::Bispinor, Transform::Representation::Vector } ) + { + auto r1 = t1( repr ); + auto r2 = t2( repr ); + m_cache[repr] = r2(a,b) * r1(b,c); + m_cache[repr].st(); + } +} + +TransformSequence::TransformSequence( const Transform& t1, const Transform& t2) : m_transforms{t1,t2} { + Tensor::Index a,b,c; + + for( auto repr : { Transform::Representation::Spinor, Transform::Representation::Bispinor, Transform::Representation::Vector } ) + { + auto r1 = t1(repr); + auto r2 = t2(repr); + m_cache[repr] = r2(a,b) * r1(b,c); + m_cache[repr].st(); + } +} + +TransformSequence::TransformSequence() +{ + buildCache(); +} diff --git a/src/Wigner.cpp b/src/Wigner.cpp index ecbc091ddfe..e098175b9c6 100644 --- a/src/Wigner.cpp +++ b/src/Wigner.cpp @@ -97,9 +97,9 @@ double AmpGen::CG( } TransformSequence AmpGen::wickTransform( const Tensor& P, - const Expression& mass, + const Particle& particle, const int& ve, - const bool& handleZero ) + DebugSymbols* db ) { Tensor x({1,0,0}, Tensor::dim(3)); Tensor y({0,1,0}, Tensor::dim(3)); @@ -111,17 +111,19 @@ TransformSequence AmpGen::wickTransform( const Tensor& P, Transform rot = ve == + 1 ? Transform( cos_theta, sin_phi*x - cos_phi*y, Transform::Type::Rotate) : Transform(-cos_theta, -sin_phi*x + cos_phi*y, Transform::Type::Rotate) ; - TransformSequence sequence; - sequence.add(rot); - if( ve == -1 ) sequence.add( Transform( -1, x, Transform::Type::Rotate ) ); - if( std::real(mass()) != 0. ){ + TransformSequence sequence(rot); + if( ve == -1 ) sequence.push_back( Transform( -1, x, Transform::Type::Rotate ) ); + + if( !particle.isStable() || !( particle.props()->isPhoton() || particle.props()->isNeutrino() ) ) + { + Expression mass = fcn::sqrt( particle.massSq() ); Transform boost( P[3]/mass, z, Transform::Type::Boost ); - sequence.add(boost); + return TransformSequence(sequence, boost); } - return sequence; + return TransformSequence(sequence); } -std::pair angCoordinates(const Tensor& P) +std::pair angCoordinates(const Tensor& P, DebugSymbols* db) { Expression pz = make_cse( P[2] / sqrt( P[0]*P[0] + P[1] * P[1] + P[2]*P[2] ) ); Expression pt2 = make_cse( P[0]*P[0] + P[1]*P[1] ); @@ -137,6 +139,9 @@ Expression AmpGen::wigner_D(const std::pair& P, { auto little_d = make_cse ( wigner_d( P.first, J, lA, lB ) ); if( J != 0 && db != nullptr ){ + db->emplace_back("cos(θ)", P.first ); + db->emplace_back("Ω", atan2( Imag(P.second), Real(P.second) ) ); + db->emplace_back("d[" + std::to_string(J) +", " + std::to_string(lA) +", " + std::to_string(lB) +"](θ)", little_d ); @@ -206,27 +211,40 @@ std::vector userHelicityCouplings( const std::string& key ){ return couplings; } +std::string index_string(const Particle& particle) +{ + if( particle.isStable() ) return std::to_string(particle.index()); + std::string f = "{"; + for( const auto& i : particle.daughters() ) f += index_string( *i ); + return f+ "}"; +} + Expression AmpGen::helicityAmplitude(const Particle& particle, - TransformSequence& parentFrame, + const TransformSequence& parentFrame, const double& Mz, DebugSymbols* db, int sgn, - std::map* cachePtr ) + TransformCache* cachePtr ) { - if( cachePtr == nullptr ) cachePtr = new std::map(); + if( cachePtr == nullptr ) cachePtr = new TransformCache(); if( particle.daughters().size() > 2 ) return 1; if( particle.daughters().size() == 1 ) return helicityAmplitude( *particle.daughter(0), parentFrame, Mz, db, sgn, cachePtr); Tensor::Index a,b,c; - auto myFrame = parentFrame; - if( particle.spin() == 0 ) myFrame.clear(); + // if( particle.props()->twoSpin() == 0 ) myFrame.clear(); Tensor pInParentFrame = parentFrame(particle.P()); pInParentFrame.st(); - auto my_sequence = wickTransform(pInParentFrame, fcn::sqrt(particle.massSq()), sgn, true); - if( cachePtr->count(&particle) != 0 ) my_sequence = (*cachePtr)[&particle]; - else (*cachePtr)[&particle] = my_sequence; + auto key = index_string(particle); + if( cachePtr->count(key) == 0 ) + { + if( ! particle.isHead() || NamedParameter("helicityAmplitude::MovingParent", false) ) + { + (*cachePtr)[key] = TransformSequence(parentFrame, wickTransform(pInParentFrame, particle, sgn, db) ); + } + else (*cachePtr)[key] = TransformSequence(); + } + const TransformSequence& myFrame = (*cachePtr)[key]; - if( ! particle.isHead() ) myFrame.add( my_sequence ); if( particle.isStable() ) { if( particle.props()->twoSpin() == 0 ) return Mz==0; // a scalar @@ -272,7 +290,8 @@ Expression AmpGen::helicityAmplitude(const Particle& particle, vectorToString( particle_couplings, ", ", []( auto& ls ){ return "("+std::to_string(int(ls.first)) + ", " + std::to_string(ls.second) +")";} ) ); } Expression total = 0; - std::pair hco = angCoordinates( myFrame(d1.P()) ); + + std::pair hco = angCoordinates( myFrame(d1.P()) , db); for( auto& coupling : recoupling_constants ) { auto dm = coupling.m1 - coupling.m2; From c3e827ebbfc7b8d3d834be1aa9a371a7552b6c39 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Thu, 28 May 2020 14:48:54 +0200 Subject: [PATCH 43/67] fix fitting in scalar build --- AmpGen/CompiledExpressionBase.h | 2 +- AmpGen/EventListSIMD.h | 6 +- AmpGen/MinuitParameter.h | 4 +- AmpGen/OptionsParser.h | 1 - AmpGen/Store.h | 15 +- AmpGen/simd/avx2d_types.h | 7 +- AmpGen/simd/avx2f_types.h | 95 ++++- AmpGen/simd/avx_mathfun.h | 727 -------------------------------- AmpGen/simd/utils.h | 6 +- README.md | 13 +- README.tex.md | 19 +- apps/Generator.cpp | 33 +- src/CompilerWrapper.cpp | 2 +- src/EventList.cpp | 4 +- src/EventType.cpp | 3 + src/Integrator.cpp | 2 +- src/MinuitParameter.cpp | 7 +- src/Particle.cpp | 17 +- src/PolarisedSum.cpp | 13 +- test/test_avx2d.cpp | 2 +- 20 files changed, 183 insertions(+), 795 deletions(-) delete mode 100644 AmpGen/simd/avx_mathfun.h diff --git a/AmpGen/CompiledExpressionBase.h b/AmpGen/CompiledExpressionBase.h index 691f1f94685..ab81672d567 100644 --- a/AmpGen/CompiledExpressionBase.h +++ b/AmpGen/CompiledExpressionBase.h @@ -72,7 +72,7 @@ namespace AmpGen std::vector> m_cacheTransfers; std::shared_ptr m_resolver; std::vector m_additionalHeaders; - bool m_enableBatch = {false}; + bool m_enableBatch = {true}; private: void addDebug( std::ostream& stream ) const; void addDependentExpressions( std::ostream& stream, size_t& sizeOfStream ) const; diff --git a/AmpGen/EventListSIMD.h b/AmpGen/EventListSIMD.h index 9c7900dd507..487f7898cef 100644 --- a/AmpGen/EventListSIMD.h +++ b/AmpGen/EventListSIMD.h @@ -67,7 +67,7 @@ namespace AmpGen float_v weight(const unsigned& p) const { return m_weights[p]; } float_v genPDF(const unsigned& p) const { return m_genPDF[p]; } - void setWeight( const unsigned& block, const float_v& w, const float_v& g=1) + void setWeight( const unsigned& block, const float_v& w, const float_v& g=1.f) { m_weights[block] = w; m_genPDF[block] = g; @@ -75,8 +75,8 @@ namespace AmpGen void resize( const unsigned nEvents ) { m_data = Store( nEvents, m_eventType.eventSize() ); - m_weights.resize( aligned_size(), 1); - m_genPDF.resize( aligned_size(), 1 ); + m_weights.resize( aligned_size(), 1.f); + m_genPDF.resize( aligned_size(), 1.f ); } const Event operator[]( const size_t&) const; std::array::value> scatter(unsigned) const; diff --git a/AmpGen/MinuitParameter.h b/AmpGen/MinuitParameter.h index 355e6fb3b65..6dec82882e1 100644 --- a/AmpGen/MinuitParameter.h +++ b/AmpGen/MinuitParameter.h @@ -36,7 +36,7 @@ namespace AmpGen double errNeg() const; double* vp() { return &m_meanResult ; } - void setInit( const double& init ); + void setInit( const double& init, const double& step=-1 ); void setStepInit( const double& si ); void setFree() ; void scaleStep( const double& sf ); @@ -73,7 +73,7 @@ namespace AmpGen MinuitProxy(MinuitParameter* param = nullptr, const double& value=0) : m_parameter(param), m_value(value) { update(); } MinuitParameter* operator->() { return m_parameter; } const MinuitParameter* operator->() const { return m_parameter; } - private: +src/EventType.cpp private: MinuitParameter* m_parameter{nullptr}; double m_value; }; diff --git a/AmpGen/OptionsParser.h b/AmpGen/OptionsParser.h index 8bf2a2ca12a..2350fa03141 100644 --- a/AmpGen/OptionsParser.h +++ b/AmpGen/OptionsParser.h @@ -28,7 +28,6 @@ namespace AmpGen iterator end(); const_iterator begin() const; const_iterator end() const; - private: std::map> m_parsedLines; bool m_printHelp = {false}; diff --git a/AmpGen/Store.h b/AmpGen/Store.h index 8686a91d333..425b29433fd 100644 --- a/AmpGen/Store.h +++ b/AmpGen/Store.h @@ -132,6 +132,8 @@ namespace AmpGen { //auto& [p0, s] = f->second; /// bug in the C++ standard. Such fun. auto p0 = f->second.first; auto s = f->second.second; + DEBUG("Updating: " << fcn.name() << " index = " << p0 << " size_of = " << s << " on store: " << size() << " blocks = " << nBlocks() << " fields = " << nFields () ); + if constexpr( std::is_same< typename functor_type::return_type, void >::value ) { @@ -165,9 +167,12 @@ namespace AmpGen { std::map> m_index; }; } -// using aos_store = AmpGen::Store; -// using soa_store = AmpGen::Store; -// -// ENABLE_DEBUG(aos_store) -// ENABLE_DEBUG(soa_store) +#if DEBUG_LEVEL ==1 +using aos_store = AmpGen::Store; +using soa_store = AmpGen::Store; + +ENABLE_DEBUG(aos_store) +ENABLE_DEBUG(soa_store) +#endif + #endif diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index 7293f30118d..0e07026d45c 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -9,10 +9,6 @@ #include #if USE_MVEC -// extern "C" __m256d _ZGVcN4v_cos(__m256d x); -// extern "C" __m256d _ZGVcN4v_sin(__m256d x); -// extern "C" __m256d _ZGVcN4v_exp(__m256d x); -// extern "C" __m256d _ZGVcN4v_log(__m256d x); extern "C" void _ZGVdN4vvv_sincos(__m256d x, __m256i ptrs, __m256i ptrc); #endif @@ -188,8 +184,7 @@ namespace AmpGen { inline complex_v select(const real_v& mask, const complex_v& a, const real_v& b ) { return complex_v( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); } inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; } inline complex_v exp( const complex_v& v ){ - real_v c, s; - sincos( v.im, c, s ); + auto [s,c] = sincos( v.im); return exp(v.re) * complex_v(c, s); } inline complex_v sqrt( const complex_v& v ) diff --git a/AmpGen/simd/avx2f_types.h b/AmpGen/simd/avx2f_types.h index 6ddd78327fd..096f2058cdc 100644 --- a/AmpGen/simd/avx2f_types.h +++ b/AmpGen/simd/avx2f_types.h @@ -5,11 +5,25 @@ #include #include #include -#include "AmpGen/simd/avx_mathfun.h" #include +#if USE_MVEC +extern "C" void _ZGVdN8vvv_sincos(__m256 x, __m256i ptrs, __m256i ptrc); +#endif + +#if USE_MVEC +#define libmvec_alias( function_name) \ + extern "C" __m256 _ZGVcN8v_##function_name(__m256 x); \ + inline real_v function_name( const real_v& v ){ return _ZGVcN8v_##function_name (v) ; } +#else +#define libmvec_alias( F ) \ + inline real_v F( const real_v& v ){ auto arr = v.to_array(); return real_v( \ + std::F(arr[0]), std::F(arr[1]), std::F(arr[2]), std::F(arr[3]), \ + std::F(arr[4]), std::F(arr[5]), std::F(arr[6]), std::F(arr[7]) ) ; } +#endif + namespace AmpGen { - namespace AVX2 { + namespace AVX2f { struct real_v { __m256 data; static constexpr unsigned size = 8 ; @@ -19,6 +33,12 @@ namespace AmpGen { real_v(const float& f ) : data( _mm256_set1_ps(f) ) {} real_v(const double& f ) : data( _mm256_set1_ps( float(f) )) {} real_v(const float* f ) : data( _mm256_loadu_ps( f ) ) {} + real_v(const float& x0, const float& x1, const float& x2, const float& x3, + const float& x4, const float& x5, const float& x6, const float& x7) + { + data = _mm256_set_ps(x0,x1,x2,x3,x4,x5,x6,x7); + } + void store( float* ptr ) const { _mm256_storeu_ps( ptr, data ); } std::array to_array() const { std::array b; store( &b[0] ); return b; } float at(const unsigned i) const { return to_array()[i] ; } @@ -44,11 +64,36 @@ namespace AmpGen { inline real_v operator>( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_GT_OS ); } inline real_v operator==( const real_v& lhs, const real_v& rhs ){ return _mm256_cmp_ps( lhs, rhs, _CMP_EQ_OS ); } inline real_v sqrt( const real_v& v ) { return _mm256_sqrt_ps(v); } - inline real_v sin( const real_v& v ) { return sin256_ps(v) ; } - inline real_v cos( const real_v& v ) { return cos256_ps(v) ; } - inline real_v tan( const real_v& v ) { real_v s; real_v c; sincos256_ps(v, (__m256*)&s, (__m256*)&c) ; return s/c; } - inline real_v log( const real_v& v ) { return log256_ps(v) ; } - inline real_v exp( const real_v& v ) { return exp256_ps(v) ; } + libmvec_alias(sin) + libmvec_alias(cos) + libmvec_alias(exp) + libmvec_alias(log) + + inline void sincos( const real_v& v, real_v& s, real_v& c ) + { +#if USE_MVEC + __m256i sp = _mm256_add_epi64(_mm256_set1_epi32x((uint64_t)&s),_mm256_set_epi32x(28,24,20,16,12,8,4,0)); + __m256i cp = _mm256_add_epi64(_mm256_set1_epi32x((uint64_t)&c),_mm256_set_epi32x(28,24,20,16,12,8,4,0)); + _ZGVdN8vvv_sincos(v,sp,cp); +#else + s = sin(v); + c = cos(v); +#endif + } + inline std::pair sincos( const real_v& v ) + { + std::pair rt; + sincos( v, rt.first, rt.second ); + return rt; + } + inline real_v tan( const real_v& v ) + { + auto [s,c] = sincos( v ); + return s / c ; + } + + + inline real_v abs ( const real_v& v ) { return v & _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); } inline real_v select(const real_v& mask, const real_v& a, const real_v& b ) { return _mm256_blendv_ps( b, a, mask ); } inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; } @@ -57,9 +102,27 @@ namespace AmpGen { for( unsigned i = 0 ; i != 8 ; ++i ) rt[i] = std::atan2( by[i] , bx[i] ); return real_v (rt.data() ); } +// inline real_v gather( const double* base_addr, const real_v& offsets) +// { +// auto addr_avx =_mm256_cvtps_epi32(offsets); +// std::array addr; +// _mm256_storeu_ps( addr.data(), _mm256_cvtps_epi32(offsets) ); +// return real_v( +// base_addr[addr[0]], base_addr[addr[1]],base_addr[addr[2]],base_addr[addr[3]], +// base_addr[addr[4]], base_addr[addr[5]],base_addr[addr[6]],base_addr[addr[7]] ); +// +// //return _mm256_i32gather_ps(base_addr, _mm256_cvtps_epi32(offsets),sizeof(double)); +// } + inline real_v fmadd( const real_v& a, const real_v& b, const real_v& c ) { - return _mm256_fmadd_ps(a, b, c ); + return _mm256_fmadd_ps(a, b, c); + } + inline real_v remainder( const real_v& a, const real_v& b ){ return a - real_v(_mm256_round_ps(a/b, _MM_FROUND_TO_NEG_INF)) * b; } + inline real_v fmod( const real_v& a, const real_v& b ) + { + auto r = remainder( abs(a), abs(b) ); + return select( a > 0., r, -r ); } struct complex_v { real_v re; @@ -76,12 +139,18 @@ namespace AmpGen { complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } - void store( float* sre, float* sim ){ re.store(sre); im.store(sim); } - void store( std::complex* r ){ + void store( float* sre, float* sim ) const { re.store(sre); im.store(sim); } + void store( std::complex* r ) const { auto re_arr = re.to_array(); auto im_arr = im.to_array(); for( unsigned i = 0 ; i != re_arr.size(); ++i ) r[i] = std::complex( re_arr[i], im_arr[i] ); } + auto to_array() const + { + std::array rt; + store( rt.data() ); + return rt; + } }; inline std::ostream& operator<<( std::ostream& os, const real_v& obj ) { @@ -113,9 +182,13 @@ namespace AmpGen { inline complex_v select(const real_v& mask, const complex_v& a, const real_v& b ) { return complex_v( _mm256_blendv_ps( b, a.re, mask ), _mm256_blendv_ps( real_v(0.f), a.im, mask ) ); } inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; } inline complex_v exp( const complex_v& v ){ - real_v s; real_v c; sincos256_ps(v.im, (__m256*)&s, (__m256*)&c) ; + auto [s,c] = sincos( v.im ); return exp( v.re ) * complex_v(c, s); } + inline complex_v log( const complex_v& v ) + { + return complex_v( log( v.re ) , atan2(v.im, v.re) ); + } inline std::ostream& operator<<( std::ostream& os, const complex_v& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } #pragma omp declare reduction(+: real_v: \ omp_out = omp_out + omp_in) diff --git a/AmpGen/simd/avx_mathfun.h b/AmpGen/simd/avx_mathfun.h deleted file mode 100644 index 918a7a35f42..00000000000 --- a/AmpGen/simd/avx_mathfun.h +++ /dev/null @@ -1,727 +0,0 @@ -#ifndef AMPGEN_AVX_MATHFUN_H -#define AMPGEN_AVX_MATHFUN_H 1 - -/* - AVX implementation of sin, cos, sincos, exp and log - - Based on "sse_mathfun.h", by Julien Pommier - http://gruntthepeon.free.fr/ssemath/ - - Copyright (C) 2012 Giovanni Garberoglio - Interdisciplinary Laboratory for Computational Science (LISC) - Fondazione Bruno Kessler and University of Trento - via Sommarive, 18 - I-38123 Trento (Italy) - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - (this is the zlib license) - - modified 8/4/2020: inline all functions, as they end up wrapped anyway -*/ - -#include - -/* yes I know, the top of this file is quite ugly */ -# define ALIGN32_BEG -# define ALIGN32_END __attribute__((aligned(32))) - -/* __m128 is ugly to write */ -typedef __m256 v8sf; // vector of 8 float (avx) -typedef __m256i v8si; // vector of 8 int (avx) -typedef __m128i v4si; // vector of 8 int (avx) - -#define _PI32AVX_CONST(Name, Val) \ - static const ALIGN32_BEG int _pi32avx_##Name[4] ALIGN32_END = { Val, Val, Val, Val } - -_PI32AVX_CONST(1, 1); -_PI32AVX_CONST(inv1, ~1); -_PI32AVX_CONST(2, 2); -_PI32AVX_CONST(4, 4); - - -/* declare some AVX constants -- why can't I figure a better way to do that? */ -#define _PS256_CONST(Name, Val) \ - static const ALIGN32_BEG float _ps256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } -#define _PI32_CONST256(Name, Val) \ - static const ALIGN32_BEG int _pi32_256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } -#define _PS256_CONST_TYPE(Name, Type, Val) \ - static const ALIGN32_BEG Type _ps256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } - -_PS256_CONST(1 , 1.0f); -_PS256_CONST(0p5, 0.5f); -/* the smallest non denormalized float number */ -_PS256_CONST_TYPE(min_norm_pos, int, 0x00800000); -_PS256_CONST_TYPE(mant_mask, int, 0x7f800000); -_PS256_CONST_TYPE(inv_mant_mask, int, ~0x7f800000); - -_PS256_CONST_TYPE(sign_mask, int, (int)0x80000000); -_PS256_CONST_TYPE(inv_sign_mask, int, ~0x80000000); - -_PI32_CONST256(0, 0); -_PI32_CONST256(1, 1); -_PI32_CONST256(inv1, ~1); -_PI32_CONST256(2, 2); -_PI32_CONST256(4, 4); -_PI32_CONST256(0x7f, 0x7f); - -_PS256_CONST(cephes_SQRTHF, 0.707106781186547524); -_PS256_CONST(cephes_log_p0, 7.0376836292E-2); -_PS256_CONST(cephes_log_p1, - 1.1514610310E-1); -_PS256_CONST(cephes_log_p2, 1.1676998740E-1); -_PS256_CONST(cephes_log_p3, - 1.2420140846E-1); -_PS256_CONST(cephes_log_p4, + 1.4249322787E-1); -_PS256_CONST(cephes_log_p5, - 1.6668057665E-1); -_PS256_CONST(cephes_log_p6, + 2.0000714765E-1); -_PS256_CONST(cephes_log_p7, - 2.4999993993E-1); -_PS256_CONST(cephes_log_p8, + 3.3333331174E-1); -_PS256_CONST(cephes_log_q1, -2.12194440e-4); -_PS256_CONST(cephes_log_q2, 0.693359375); - -#ifndef __AVX2__ - -typedef union imm_xmm_union { - v8si imm; - v4si xmm[2]; -} imm_xmm_union; - -#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \ - imm_xmm_union u __attribute__((aligned(32))); \ - u.imm = imm_; \ - xmm0_ = u.xmm[0]; \ - xmm1_ = u.xmm[1]; \ -} - -#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \ - imm_xmm_union u __attribute__((aligned(32))); \ - u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm; \ - } - - -#define AVX2_BITOP_USING_SSE2(fn) \ -static inline v8si avx2_mm256_##fn(v8si x, int a) \ -{ \ - /* use SSE2 instruction to perform the bitop AVX2 */ \ - v4si x1, x2; \ - v8si ret; \ - COPY_IMM_TO_XMM(x, x1, x2); \ - x1 = _mm_##fn(x1,a); \ - x2 = _mm_##fn(x2,a); \ - COPY_XMM_TO_IMM(x1, x2, ret); \ - return(ret); \ -} - -//#warning "Using SSE2 to perform AVX2 bitshift ops" -AVX2_BITOP_USING_SSE2(slli_epi32) -AVX2_BITOP_USING_SSE2(srli_epi32) - -#define AVX2_INTOP_USING_SSE2(fn) \ -static inline v8si avx2_mm256_##fn(v8si x, v8si y) \ -{ \ - /* use SSE2 instructions to perform the AVX2 integer operation */ \ - v4si x1, x2; \ - v4si y1, y2; \ - v8si ret; \ - COPY_IMM_TO_XMM(x, x1, x2); \ - COPY_IMM_TO_XMM(y, y1, y2); \ - x1 = _mm_##fn(x1,y1); \ - x2 = _mm_##fn(x2,y2); \ - COPY_XMM_TO_IMM(x1, x2, ret); \ - return(ret); \ -} - -//#warning "Using SSE2 to perform AVX2 integer ops" -AVX2_INTOP_USING_SSE2(and_si128) -AVX2_INTOP_USING_SSE2(andnot_si128) -AVX2_INTOP_USING_SSE2(cmpeq_epi32) -AVX2_INTOP_USING_SSE2(sub_epi32) -AVX2_INTOP_USING_SSE2(add_epi32) -#define avx2_mm256_and_si256 avx2_mm256_and_si128 -#define avx2_mm256_andnot_si256 avx2_mm256_andnot_si128 -#else -#define avx2_mm256_slli_epi32 _mm256_slli_epi32 -#define avx2_mm256_srli_epi32 _mm256_srli_epi32 -#define avx2_mm256_and_si256 _mm256_and_si256 -#define avx2_mm256_andnot_si256 _mm256_andnot_si256 -#define avx2_mm256_cmpeq_epi32 _mm256_cmpeq_epi32 -#define avx2_mm256_sub_epi32 _mm256_sub_epi32 -#define avx2_mm256_add_epi32 _mm256_add_epi32 -#endif /* __AVX2__ */ - - -/* natural logarithm computed for 8 simultaneous float - return NaN for x <= 0 -*/ -inline v8sf log256_ps(v8sf x) { - v8si imm0; - v8sf one = *(v8sf*)_ps256_1; - - //v8sf invalid_mask = _mm256_cmple_ps(x, _mm256_setzero_ps()); - v8sf invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_LE_OS); - - x = _mm256_max_ps(x, *(v8sf*)_ps256_min_norm_pos); /* cut off denormalized stuff */ - - // can be done with AVX2 - imm0 = avx2_mm256_srli_epi32(_mm256_castps_si256(x), 23); - - /* keep only the fractional part */ - x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_mant_mask); - x = _mm256_or_ps(x, *(v8sf*)_ps256_0p5); - - // this is again another AVX2 instruction - imm0 = avx2_mm256_sub_epi32(imm0, *(v8si*)_pi32_256_0x7f); - v8sf e = _mm256_cvtepi32_ps(imm0); - - e = _mm256_add_ps(e, one); - - /* part2: - if( x < SQRTHF ) { - e -= 1; - x = x + x - 1.0; - } else { x = x - 1.0; } - */ - //v8sf mask = _mm256_cmplt_ps(x, *(v8sf*)_ps256_cephes_SQRTHF); - v8sf mask = _mm256_cmp_ps(x, *(v8sf*)_ps256_cephes_SQRTHF, _CMP_LT_OS); - v8sf tmp = _mm256_and_ps(x, mask); - x = _mm256_sub_ps(x, one); - e = _mm256_sub_ps(e, _mm256_and_ps(one, mask)); - x = _mm256_add_ps(x, tmp); - - v8sf z = _mm256_mul_ps(x,x); - - v8sf y = *(v8sf*)_ps256_cephes_log_p0; - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p1); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p2); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p3); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p4); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p5); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p6); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p7); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p8); - y = _mm256_mul_ps(y, x); - - y = _mm256_mul_ps(y, z); - - tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q1); - y = _mm256_add_ps(y, tmp); - - - tmp = _mm256_mul_ps(z, *(v8sf*)_ps256_0p5); - y = _mm256_sub_ps(y, tmp); - - tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q2); - x = _mm256_add_ps(x, y); - x = _mm256_add_ps(x, tmp); - x = _mm256_or_ps(x, invalid_mask); // negative arg will be NAN - return x; -} - -_PS256_CONST(exp_hi, 88.3762626647949f); -_PS256_CONST(exp_lo, -88.3762626647949f); - -_PS256_CONST(cephes_LOG2EF, 1.44269504088896341); -_PS256_CONST(cephes_exp_C1, 0.693359375); -_PS256_CONST(cephes_exp_C2, -2.12194440e-4); - -_PS256_CONST(cephes_exp_p0, 1.9875691500E-4); -_PS256_CONST(cephes_exp_p1, 1.3981999507E-3); -_PS256_CONST(cephes_exp_p2, 8.3334519073E-3); -_PS256_CONST(cephes_exp_p3, 4.1665795894E-2); -_PS256_CONST(cephes_exp_p4, 1.6666665459E-1); -_PS256_CONST(cephes_exp_p5, 5.0000001201E-1); - -inline v8sf exp256_ps(v8sf x) { - v8sf tmp = _mm256_setzero_ps(), fx; - v8si imm0; - v8sf one = *(v8sf*)_ps256_1; - - x = _mm256_min_ps(x, *(v8sf*)_ps256_exp_hi); - x = _mm256_max_ps(x, *(v8sf*)_ps256_exp_lo); - - /* express exp(x) as exp(g + n*log(2)) */ - fx = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_LOG2EF); - fx = _mm256_add_ps(fx, *(v8sf*)_ps256_0p5); - - /* how to perform a floorf with SSE: just below */ - //imm0 = _mm256_cvttps_epi32(fx); - //tmp = _mm256_cvtepi32_ps(imm0); - - tmp = _mm256_floor_ps(fx); - - /* if greater, substract 1 */ - //v8sf mask = _mm256_cmpgt_ps(tmp, fx); - v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS); - mask = _mm256_and_ps(mask, one); - fx = _mm256_sub_ps(tmp, mask); - - tmp = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C1); - v8sf z = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C2); - x = _mm256_sub_ps(x, tmp); - x = _mm256_sub_ps(x, z); - - z = _mm256_mul_ps(x,x); - - v8sf y = *(v8sf*)_ps256_cephes_exp_p0; - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p1); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p2); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p3); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p4); - y = _mm256_mul_ps(y, x); - y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p5); - y = _mm256_mul_ps(y, z); - y = _mm256_add_ps(y, x); - y = _mm256_add_ps(y, one); - - /* build 2^n */ - imm0 = _mm256_cvttps_epi32(fx); - // another two AVX2 instructions - imm0 = avx2_mm256_add_epi32(imm0, *(v8si*)_pi32_256_0x7f); - imm0 = avx2_mm256_slli_epi32(imm0, 23); - v8sf pow2n = _mm256_castsi256_ps(imm0); - y = _mm256_mul_ps(y, pow2n); - return y; -} - -_PS256_CONST(minus_cephes_DP1, -0.78515625); -_PS256_CONST(minus_cephes_DP2, -2.4187564849853515625e-4); -_PS256_CONST(minus_cephes_DP3, -3.77489497744594108e-8); -_PS256_CONST(sincof_p0, -1.9515295891E-4); -_PS256_CONST(sincof_p1, 8.3321608736E-3); -_PS256_CONST(sincof_p2, -1.6666654611E-1); -_PS256_CONST(coscof_p0, 2.443315711809948E-005); -_PS256_CONST(coscof_p1, -1.388731625493765E-003); -_PS256_CONST(coscof_p2, 4.166664568298827E-002); -_PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI - - -/* evaluation of 8 sines at onces using AVX intrisics - - The code is the exact rewriting of the cephes sinf function. - Precision is excellent as long as x < 8192 (I did not bother to - take into account the special handling they have for greater values - -- it does not return garbage for arguments over 8192, though, but - the extra precision is missing). - - Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the - surprising but correct result. - -*/ -inline v8sf sin256_ps(v8sf x) { // any x - v8sf xmm1, xmm2 = _mm256_setzero_ps(), xmm3, sign_bit, y; - v8si imm0, imm2; - -#ifndef __AVX2__ - v4si imm0_1, imm0_2; - v4si imm2_1, imm2_2; -#endif - - sign_bit = x; - /* take the absolute value */ - x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_sign_mask); - /* extract the sign bit (upper one) */ - sign_bit = _mm256_and_ps(sign_bit, *(v8sf*)_ps256_sign_mask); - - /* scale by 4/Pi */ - y = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_FOPI); - - /* - Here we start a series of integer operations, which are in the - realm of AVX2. - If we don't have AVX, let's perform them using SSE2 directives - */ - -#ifdef __AVX2__ - /* store the integer part of y in mm0 */ - imm2 = _mm256_cvttps_epi32(y); - /* j=(j+1) & (~1) (see the cephes sources) */ - // another two AVX2 instruction - imm2 = avx2_mm256_add_epi32(imm2, *(v8si*)_pi32_256_1); - imm2 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_inv1); - y = _mm256_cvtepi32_ps(imm2); - - /* get the swap sign flag */ - imm0 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_4); - imm0 = avx2_mm256_slli_epi32(imm0, 29); - /* get the polynom selection mask - there is one polynom for 0 <= x <= Pi/4 - and another one for Pi/4 bool all_of( const simd_type& obj, const value_type& v ) { - if constexpr( ! is_vector_type::value ) return obj == v; - else return _mm256_movemask_pd( obj == v ) == 0xF; + if constexpr( size::value == 1 ) return obj == v; + if constexpr( size::value == 4 ) return _mm256_movemask_pd( obj == v ) == 0xF; + if constexpr( size::value == 8 ) return _mm256_movemask_ps( obj == v ) == 0xFF; + return false; } template bool all_of( const simd_type& obj) { diff --git a/README.md b/README.md index a862c53b3d2..7b00577c317 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,6 @@ There is at the time of writing only a master branch (FIXME) ##### Build requirements: * cmake >= 3.11.0 * C++ compiler with CXX standard >= 17 (gcc >= 8) - Defaults to Cxx17 * ROOT >= 6 with MathMore To (re)configure root with these options, use the additional command line options `-Dcxx14 -Dmathmore=ON` when configuring the installation of ROOT. @@ -39,6 +38,7 @@ There is at the time of writing only a master branch (FIXME) * OpenMP for multithreading * ROOT >= 6 with MathMore and Minuit2 enabled. The external version of Minuit2 provided as an external package of GooFit is used if the ROOT version is not unavailable. To (re)configure root with these options, use the additional command line options `-Dminuit2=ON -Dmathmore=ON` when configuring the installation of ROOT. +* libmvec for vectorised math operations #### Building The configuration of the AmpGen build is performed by cmake. @@ -81,7 +81,16 @@ In order to build stand-alone on LXPLUS, you will need a valid development envir source /cvmfs/sft.cern.ch/lcg/views/setupViews.sh LCG_94python3 x86_64-centos7-gcc8-opt ``` The LCG versions and CMTCONFIG may need to be updated over time. -The ROOT versions installed on cvmfs generally require C++17 support when building, i.e. when running cmake add the option `-DCMAKE_CXX_STANDARD=17`. + +#### Using AVX + +AmpGen (v > 2.0) can be setup to generate vectorised code for faster evaluation of amplitudes / computation of integrals on compatible hardware. +Such extensions can be enabled by setting the flag +``` +_DUSE_SIMD=AVX2d +``` +Which indicates that code should be generated using the AVX2 instruction set using double precision. +Single precision is also supported, but not recommended for fitting. ### Options files and decay descriptors diff --git a/README.tex.md b/README.tex.md index 1b5023c912c..46878dc36b6 100644 --- a/README.tex.md +++ b/README.tex.md @@ -84,6 +84,17 @@ All standalone programs can accept both options files and command line arguments They also support `--help` to print help for key arguments to the program. This will also run the program, as arguments can be defined throughout each of the programs rather than all defined at the beginning. +#### Using SIMD instructions. + +AmpGen (v > 2.0) can be setup to generate vectorised code for faster evaluation of amplitudes / computation of integrals on compatible hardware. +Such extensions can be enabled by setting the flag +``` +_DUSE_SIMD=AVX2d +``` +Which indicates that code should be generated using the AVX2 instruction set using double precision. +Single precision is also supported, but not recommended for fitting. + + ### Options files and decay descriptors Options files will generally contain the description of one or more particle decays, @@ -167,6 +178,12 @@ Several models for different @f$D^0@f$ decays published by the LHCb collaboratio ./Generator options/D02Kpipipi.opt --EventType "D0 K- pi+ pi+ pi-" --nEvents 1000000 ``` +#### Phase space generators + +Generating events consists of two phases. Firstly, the kinematics of candidates are generated according to some distribution, typically flat in the phase space. +The target distribution is then obtained using the accept-reject method, that is, for a candidate at position @$f + + ### Debugger The debugger application is a tool for producing verbose information debugging for amplitudes and models. It is used on an options file as @@ -277,7 +294,7 @@ AmpGen implements both the covariant tensor (or Rarita-Schwinger) and canonical Both formalisms refer to states of well-defined orbital angular momentum, as opposed to the helicity states, as the states with well-defined orbital angular momentum have a straightforward parity and momentum dependences. The default formalism is the covariant tensor formalism, but this can be switched to the canonical formalism changing the flag ``` -Particle::SpinFormalism Canonical ## default = Covariant +Particle::SpinFormalism Canonical ``` in the options file. The spin formalism for an individual decay chain can be specified by changing the attribute SpinFormalism in the decay descriptor. For example, diff --git a/apps/Generator.cpp b/apps/Generator.cpp index 00ea2e10643..1bc01fe1a8b 100644 --- a/apps/Generator.cpp +++ b/apps/Generator.cpp @@ -103,21 +103,21 @@ template void generateEvents( EventList& events { Generator signalGenerator(events.eventType(), rndm); signalGenerator.setBlockSize(blockSize); -// signalGenerator.setNormFlag(normalise); + signalGenerator.setNormFlag(normalise); signalGenerator.fillEventList(pdf, events, nEvents ); } else if( phsp_type == phspTypes::RecursivePhaseSpace ) { Generator signalGenerator( getTopology(pdf), events.eventType(), rndm ); signalGenerator.setBlockSize(blockSize); -// signalGenerator.setNormFlag(normalise); + signalGenerator.setNormFlag(normalise); signalGenerator.fillEventList(pdf, events, nEvents); } else if( phsp_type == phspTypes::TreePhaseSpace ) { Generator signalGenerator(getDecayChains(pdf), events.eventType(), rndm); signalGenerator.setBlockSize(blockSize); -// signalGenerator.setNormFlag(normalise); + signalGenerator.setNormFlag(normalise); signalGenerator.fillEventList(pdf, events, nEvents ); } else { @@ -134,26 +134,24 @@ int main( int argc, char** argv ) size_t blockSize = NamedParameter ("BlockSize", 5000000, "Number of events to generate per block" ); int seed = NamedParameter ("Seed" , 0, "Random seed used in event Generation" ); std::string outfile = NamedParameter("Output" , "Generate_Output.root" , "Name of output file" ); - auto pdfType = NamedParameter( "Type", pdfTypes::CoherentSum, optionalHelpString("Generator configuration to use:", - { {"CoherentSum" , "Full phase-space generator with (pseudo)scalar amplitude"} - , {"PolarisedSum" , "Full phase-space generator with particles carrying spin in the initial/final states"} - , {"FixedLib" , "Full phase-space generator with an amplitude from a precompiled library"}} ) ); + auto pdfType = NamedParameter( "Type", pdfTypes::CoherentSum, optionalHelpString("Type of PDF to use:", + { {"CoherentSum" , "Describes decays of a (pseudo)scalar particle to N pseudoscalars"} + , {"PolarisedSum" , "Describes the decay of a particle with spin to N particles carrying spin."} + , {"FixedLib" , "PDF to describe a decay from a precompiled library, such as those provided to GAUSS."}} ) ); auto phspType = NamedParameter( "PhaseSpace", phspTypes::PhaseSpace, optionalHelpString("Phase-space generator to use:", - { {"CoherentSum" , "Full phase-space generator with (pseudo)scalar amplitude"} - , {"PolarisedSum" , "Full phase-space generator with particles carrying spin in the initial/final states"} - , {"FixedLib" , "Full phase-space generator with an amplitude from a precompiled library"}} ) ); + { {"PhaseSpace" , "Phase space generation based on Raubold-Lynchi algorithm (recommended)."} + , {"TreePhaseSpace" , "Divides the phase-space into a series of quasi two-body phase-spaces for efficiently generating narrow states."} + , {"RecursivePhaseSpace", "Includes possible quasi-stable particles and the phase spaces of their decay products, such as Λ baryons."}} ) ); std::string lib = NamedParameter("Library","","Name of library to use for a fixed library generation"); size_t nBins = NamedParameter ("nBins" ,100, "Number of bins for monitoring plots." ); #ifdef _OPENMP unsigned int concurentThreadsSupported = std::thread::hardware_concurrency(); unsigned int nCores = NamedParameter( "nCores", concurentThreadsSupported, "Number of cores to use (OpenMP only)" ); - INFO("Using: " << nCores << " / " << concurentThreadsSupported << " threads" ); omp_set_num_threads( nCores ); omp_set_dynamic( 0 ); #endif - INFO("Writing output: " << outfile ); TRandom3 rand; rand.SetSeed( seed + 934534 ); @@ -171,12 +169,19 @@ int main( int argc, char** argv ) } else eventType = EventType( NamedParameter( "EventType" , "", "EventType to generate, in the format: \033[3m parent daughter1 daughter2 ... \033[0m" ).getVector(), NamedParameter( "GenerateTimeDependent", false , "Flag to include possible time dependence of the amplitude") ); - - if ( NamedParameter( "conj", false ) == true ) { + + bool conj = NamedParameter("Conj",false, "Flag to generate the CP conjugate amplitude under the assumption of CP conservation"); + if ( conj == true ) { eventType = eventType.conj(); INFO( eventType ); AddCPConjugate(MPS); } + if( OptionsParser::printHelp() ) return 0; + + INFO("Writing output: " << outfile ); + #ifdef _OPENMP + INFO("Using: " << nCores << " / " << concurentThreadsSupported << " threads" ); + #endif INFO("Generating time-dependence? " << eventType.isTimeDependent() ); EventList accepted( eventType ); diff --git a/src/CompilerWrapper.cpp b/src/CompilerWrapper.cpp index 1cc55061d28..80962299085 100644 --- a/src/CompilerWrapper.cpp +++ b/src/CompilerWrapper.cpp @@ -54,7 +54,7 @@ void CompilerWrapper::generateSource( const CompiledExpressionBase& expression, std::ofstream output( filename ); for ( auto& include : m_includes ) output << "#include <" << include << ">\n"; if( expression.fcnSignature().find("AVX2d") != std::string::npos ) output << "#include \"AmpGen/simd/avx2d_types.h\"\n; using namespace AmpGen::AVX2d;\n" ; - else if( expression.fcnSignature().find("AVX2") != std::string::npos ) output << "#include \"AmpGen/simd/avx2_types.h\"\n; using namespace AmpGen::AVX2;\n;" ; + else if( expression.fcnSignature().find("AVX2f") != std::string::npos ) output << "#include \"AmpGen/simd/avx2f_types.h\"\n; using namespace AmpGen::AVX2f;\n;" ; else if( expression.fcnSignature().find("AVX512d") != std::string::npos ) output << "#include \"AmpGen/simd/avx512d_types.h\"\n; using namespace AmpGen::AVX512d;\n;" ; else if( expression.fcnSignature().find("AVX512") != std::string::npos ) output << "#include \"AmpGen/simd/avx512_types.h\"\n; using namespace AmpGen::AVX512;\n;" ; output << expression << std::endl; diff --git a/src/EventList.cpp b/src/EventList.cpp index 8fe00dcb738..db09d143e5d 100644 --- a/src/EventList.cpp +++ b/src/EventList.cpp @@ -124,8 +124,8 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) auto automaticOrdering = m_eventType.automaticOrdering(); for (const auto& evt : tr) { if( inputUnits != Units::GeV ) for( unsigned k = 0; k != eventFormat.size(); ++k ) temp[k] *= to_double(inputUnits); - if( idBranches.size() != 0 && !automaticOrdering(temp, ids) ) - WARNING("Failed to order event: " << evt ); + // if( idBranches.size() != 0 && !automaticOrdering(temp, ids) ) + // WARNING("Failed to order event: " << evt ); if( applySym ) symmetriser(temp); if( ! hasEnergy ){ for( unsigned int k = 0 ; k != m_eventType.size(); ++k ) diff --git a/src/EventType.cpp b/src/EventType.cpp index c111e0ced4d..06e3e1f33f6 100644 --- a/src/EventType.cpp +++ b/src/EventType.cpp @@ -20,12 +20,15 @@ #include "AmpGen/Utilities.h" #include "AmpGen/Units.h" #include "AmpGen/Event.h" +#include "AmpGen/OptionsParser.h" using namespace AmpGen; std::string convertTeXtoROOT(std::string input); EventType::EventType( const std::vector& particleNames, const bool& isTD ) : m_timeDependent( isTD ) { + if ( OptionsParser::printHelp() ) return; + if ( particleNames.size() < 3 ) { // Mother plus two daughters minimum required ERROR( "Not enough particles in event type: " << particleNames[0] << " size = " << particleNames.size() ); throw std::runtime_error( "Not enough particles listed in particle names! Was it defined?" ); diff --git a/src/Integrator.cpp b/src/Integrator.cpp index 70da64875db..3736ef37a5d 100644 --- a/src/Integrator.cpp +++ b/src/Integrator.cpp @@ -52,7 +52,7 @@ void Integrator::integrateBlock() im = im + m_weight[i] * imag(c); #endif } - *m_integrals[roll].result = utils::sum_elements( complex_v(re, im) ) / m_norm; + *m_integrals[roll].result = complex_t(utils::sum_elements( complex_v(re, im) )) / m_norm; } m_counter = 0; } diff --git a/src/MinuitParameter.cpp b/src/MinuitParameter.cpp index 3c2d5941e97..896dc3a038d 100644 --- a/src/MinuitParameter.cpp +++ b/src/MinuitParameter.cpp @@ -57,7 +57,12 @@ void MinuitParameter::setFree() { INFO("Setting parameter: " << m_name << " free void MinuitParameter::setCurrentFitVal( double cfv ) { m_meanResult = cfv; } -void MinuitParameter::setInit( const double& val ) { m_meanInit = val; } +void MinuitParameter::setInit( const double& val, const double& step ) +{ + m_meanInit = val; + m_meanResult = val; + if( step != -1 ) m_stepInit = step; +} void MinuitParameter::setResult( double fitMean, double fitErr, double fitErrPos, double fitErrNeg ) { diff --git a/src/Particle.cpp b/src/Particle.cpp index 35d2ac2e2a7..784011c2748 100644 --- a/src/Particle.cpp +++ b/src/Particle.cpp @@ -411,8 +411,7 @@ Expression Particle::getExpression( DebugSymbols* db, const std::vector& st } } if ( includeSpin && m_spinFormalism == spinFormalism::Canonical ){ - TransformSequence t = TransformSequence(); - spinFactor = helicityAmplitude(*this, t, m_props->isBoson() ? polState() : double(polState())/2.0, db); + spinFactor = helicityAmplitude(*this, TransformSequence(), m_props->isBoson() ? polState() : double(polState())/2.0, db); } if( db != nullptr ){ std::string finalStateString=""; @@ -549,18 +548,18 @@ Tensor Particle::externalSpinTensor(const int& polState, DebugSymbols* db ) cons std::pair Particle::orbitalRange( const bool& conserveParity ) const { if( m_daughters.size() == 0 ) return {0, 0}; - if( m_daughters.size() == 1 ) return {0,0}; + if( m_daughters.size() == 1 ) return {0, 0}; if( m_daughters.size() != 2 ) { ERROR( "L not well defined for nDaughters == " << m_daughters.size() ); return {999, 998}; } const int S = m_props->twoSpin(); - const int s1 = daughter( 0 )->props()->twoSpin(); - const int s2 = daughter( 1 )->props()->twoSpin(); - int min = std::abs( S - s1 - s2 ); - if ( std::abs( S + s1 - s2 ) < min ) min = std::abs( S + s1 - s2 ); - if ( std::abs( S - s1 + s2 ) < min ) min = std::abs( S - s1 + s2 ); - int max = S + s1 + s2; + const int s1 = daughter(0)->props()->twoSpin(); + const int s2 = daughter(1)->props()->twoSpin(); + int min = std::abs( S - s1 - s2 ); + min = std::min(min, std::abs( S + s1 - s2 )); + min = std::min(min, std::abs( S - s1 + s2 )); + int max = S + s1 + s2; min /= 2; max /= 2; DEBUG( "Range = " << min << " -> " << max << " conserving parity ? " << conserveParity << " J = " << S << " s1= " << s1 << " s2= " << s2 ); diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 0684979664a..1db1caeb531 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -35,7 +35,9 @@ using namespace AmpGen; using namespace std::complex_literals; -// ENABLE_DEBUG( PolarisedSum ) +#if DEBUGLEVEL == 1 + ENABLE_DEBUG( PolarisedSum ) +#endif namespace AmpGen { make_enum(spaceType, spin, flavour) } @@ -202,9 +204,10 @@ void PolarisedSum::prepare() for_each_sequence(m_matrixElements.begin(), m_matrixElements.end(), flagUpdate, updateData, updateInteg); if( m_integrator.isReady() ) updateNorms(); std::for_each( m_matrixElements.begin(), m_matrixElements.end(), resetFlags ); -// if( m_nCalls % 10000 == 0 ) debug_norm(); + if constexpr( detail::debug_type::value ) + if( m_nCalls % 10000 == 0 ) debug_norm(); m_pdfCache.update(m_cache, m_probExpression); - DEBUG( "m_pdfCache[0] = " << m_pdfCache[0] << " w/o caching = " << (m_weight/m_norm) * getValNoCache(m_events->at(0))); + DEBUG( "m_pdfCache[0] = " << utils::at(m_pdfCache[0],0) << " w/o caching = " << getValNoCache(m_events->at(0)) << " w = " << m_weight << " N = " << m_norm ); m_nCalls++; } @@ -223,6 +226,7 @@ double PolarisedSum::operator()( const double*, const unsigned index ) const void PolarisedSum::debug_norm() { + if( !m_integrator.isReady() ) return; double norm_slow = 0; for( auto& evt : *m_integrator.events() ) norm_slow += evt.weight() * getValNoCache(evt) / evt.genPdf(); @@ -275,10 +279,9 @@ Tensor PolarisedSum::transitionMatrix() real_t PolarisedSum::operator()(const Event& evt) const { - return utils::at( m_pdfCache[ evt.index() / utils::size::value ], evt.index() % utils::size::value ); + return (m_weight/m_norm) * utils::at( m_pdfCache[ evt.index() / utils::size::value ], evt.index() % utils::size::value ); } - double PolarisedSum::norm() const { return m_norm; diff --git a/test/test_avx2d.cpp b/test/test_avx2d.cpp index 965b8426abd..a458fa294d9 100644 --- a/test/test_avx2d.cpp +++ b/test/test_avx2d.cpp @@ -7,7 +7,7 @@ namespace utf = boost::unit_test; -#if ENABLE_AVX +#if ENABLE_AVX2d #include "AmpGen/simd/utils.h" using namespace AmpGen; From 87d38388430a260f0ceaeda8f2773cb7ecf6e789 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Thu, 28 May 2020 15:23:42 +0200 Subject: [PATCH 44/67] fix fitting in scalar build --- AmpGen/MinuitParameter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AmpGen/MinuitParameter.h b/AmpGen/MinuitParameter.h index 6dec82882e1..dbdaadf10bf 100644 --- a/AmpGen/MinuitParameter.h +++ b/AmpGen/MinuitParameter.h @@ -73,7 +73,7 @@ namespace AmpGen MinuitProxy(MinuitParameter* param = nullptr, const double& value=0) : m_parameter(param), m_value(value) { update(); } MinuitParameter* operator->() { return m_parameter; } const MinuitParameter* operator->() const { return m_parameter; } -src/EventType.cpp private: + private: MinuitParameter* m_parameter{nullptr}; double m_value; }; From 07c4773c03e0c981e8ddcbae8eff7b447cbdd7e8 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Sun, 7 Jun 2020 23:43:11 +0200 Subject: [PATCH 45/67] improve customised particles, improve compiler threading --- AmpGen/CompiledExpression.h | 58 ++++++++++++++------------- AmpGen/EventList.h | 8 +++- AmpGen/FitResult.h | 4 +- AmpGen/OptionsParser.h | 5 ++- AmpGen/ParticleProperties.h | 49 +++++++++++------------ AmpGen/ParticlePropertiesList.h | 1 + apps/DataConverter.cpp | 12 ++---- src/CoherentSum.cpp | 2 +- src/EventList.cpp | 3 +- src/OptionsParser.cpp | 56 ++++++++++++-------------- src/Particle.cpp | 8 ++-- src/ParticleProperties.cpp | 59 +++++++++++++++++++++------ src/ParticlePropertiesList.cpp | 22 +++++++++++ src/PolarisedSum.cpp | 70 ++++++++++++++++----------------- src/ThreeBodyCalculators.cpp | 4 +- 15 files changed, 211 insertions(+), 150 deletions(-) diff --git a/AmpGen/CompiledExpression.h b/AmpGen/CompiledExpression.h index 6b3812564b6..1c08c59bd78 100644 --- a/AmpGen/CompiledExpression.h +++ b/AmpGen/CompiledExpression.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include "ArgumentPack.h" namespace AmpGen { @@ -21,11 +23,17 @@ namespace AmpGen usually this is a std::complex, but in principal support also exists for computing coupled channel propagators (i.e. returning array types) */ + + + namespace detail { template struct size_of { static constexpr unsigned value = sizeof(T); }; template <> struct size_of { static constexpr unsigned value = 0; } ; } + DECLARE_ARGUMENT(disableBatch, bool); + template class CompiledExpression; + template class CompiledExpression : public CompiledExpressionBase @@ -37,18 +45,26 @@ namespace AmpGen DynamicFCN>(ARGS...)> m_fdb; std::vector m_externals = {}; bool m_hasExternalsChanged = {false}; - + public: typedef RETURN_TYPE return_type; - - CompiledExpression( const Expression& expression, - const std::string& name, - const std::map& evtMapping = - std::map(), - const DebugSymbols& db = {}, - const MinuitParameterSet* mps = nullptr ) - : CompiledExpressionBase( expression, name, db, evtMapping ) + + template + CompiledExpression( const Expression& expression, const std::string& name, const namedArgs&... args ) : CompiledExpressionBase(expression, name) { + const MinuitParameterSet* mps = nullptr; + auto process_argument = [&]( const auto& arg ) + { + if constexpr( std::is_convertible::value ) this->m_db = arg; + else if constexpr( std::is_convertible>::value ) this->m_evtMap = arg; + else if constexpr( std::is_convertible::value ) this->m_enableBatch = false; + else if constexpr( std::is_convertible::value ) mps = arg; + else if constexpr( std::is_convertible::value ) mps = arg; + else if constexpr( std::is_convertible::value ) mps = &arg; + else ERROR("Unrecognised argument: " << type_string(arg) ); + }; + for_each( std::tuple(args...), process_argument); + DEBUG("Made expression: " << m_name << " " << progName() << " " << mps << " batch enabled ? " << this->m_enableBatch ); resolve(mps); } @@ -145,7 +161,8 @@ namespace AmpGen DEBUG( "Compiling " << name() << " = " << hash() ); stream << "extern \"C\" " << returnTypename() << " " << progName() << "_wParams" << "( const double*__restrict__ E ){" << std::endl; - stream << " double externalParameters [] = {" << (m_externals.size() == 0 ? "0" : vectorToString(m_externals,", ") ) <<"};\n" ; + stream << " double externalParameters [] = {" << + (m_externals.size() == 0 ? "0" : vectorToString(m_externals,", ", [](auto& line){ return std::to_string(line); }) ) <<"};\n" ; stream << " return " << progName() << "( externalParameters, E ); // E is P \n}\n"; } @@ -213,8 +230,7 @@ namespace AmpGen return rt; } - template - CompiledExpression + template CompiledExpression make_expression( const Expression& expression, const std::string& name , const bool& verbose=false) { CompiledExpression rt(expression,name); @@ -222,25 +238,13 @@ namespace AmpGen rt.prepare(); return rt; } - template + template CompiledExpression make_expression( const Expression& expression, const std::string& name, - const MinuitParameterSet& mps ) - { - CompiledExpression rt(expression,name,{},{},&mps); - rt.compile(); - rt.prepare(); - return rt; - } - template - CompiledExpression - make_expression( const Expression& expression, - const std::string& name, - const std::map & evtMap, - const MinuitParameterSet& mps ) + const arg_types&... args) { - CompiledExpression rt(expression,name,evtMap,{},&mps); + CompiledExpression rt(expression,name,args...); rt.compile(); rt.prepare(); return rt; diff --git a/AmpGen/EventList.h b/AmpGen/EventList.h index 8d3f742cf6c..8495845e2dc 100644 --- a/AmpGen/EventList.h +++ b/AmpGen/EventList.h @@ -75,6 +75,12 @@ namespace AmpGen const double* block(const unsigned pos) const { return m_data[pos].address(); } real_t weight( const size_t& pos) const { return m_data[pos].weight(); } real_t genPDF( const size_t& pos) const { return m_data[pos].genPdf(); } + unsigned key(const std::string& key) const + { + auto it = m_extensions.find(key); + if( it == m_extensions.end() ) return m_data[0].size() - 1; + return it->second; + } void reserve( const size_t& size ); void resize ( const size_t& size ); void push_back( const Event& evt ); @@ -134,7 +140,7 @@ namespace AmpGen { unsigned currentSize = size(); m_data.erase( std::remove_if( m_data.begin(), m_data.end(), fcn ) , m_data.end() ); - INFO("Filter removes: " << currentSize - size() << " / " << currentSize << " events"); + INFO("Filter retains " << size() << " / " << currentSize << " events"); } template unsigned count( functor&& fcn ) const diff --git a/AmpGen/FitResult.h b/AmpGen/FitResult.h index 01c09fd83e2..932eab0f979 100644 --- a/AmpGen/FitResult.h +++ b/AmpGen/FitResult.h @@ -2,6 +2,7 @@ #define AMPGEN_FITRESULT_H #include "TMatrixD.h" +#include "TClass.h" #include "AmpGen/FitFraction.h" #include "AmpGen/MinuitParameter.h" @@ -15,6 +16,7 @@ namespace AmpGen class FitResult { public: + ~FitResult(){}; FitResult(); explicit FitResult( const FitResult& other ); explicit FitResult( const std::string& filename ); @@ -56,7 +58,7 @@ namespace AmpGen LinearErrorPropagator getErrorPropagator( const bool& extended = false ) const; private: - MinuitParameterSet* m_mps; + MinuitParameterSet* m_mps = {nullptr}; double m_chi2 = {0}; double m_LL = {-999}; double m_nBins = {0}; diff --git a/AmpGen/OptionsParser.h b/AmpGen/OptionsParser.h index 2350fa03141..ec9dc04832d 100644 --- a/AmpGen/OptionsParser.h +++ b/AmpGen/OptionsParser.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace AmpGen { @@ -30,14 +31,16 @@ namespace AmpGen const_iterator end() const; private: std::map> m_parsedLines; + std::map)> > m_keywords; bool m_printHelp = {false}; bool m_quiet = {false}; static OptionsParser* gOptionsParser; - OptionsParser() = default; + OptionsParser(); bool ignoreThisLine( const std::string& line ); void readStream( std::istream& is ); std::vector makeParsedStrings( const std::string& line, int& braceDepth ) const; + void addArg(const std::vector& tokens ); }; } // namespace AmpGen #endif diff --git a/AmpGen/ParticleProperties.h b/AmpGen/ParticleProperties.h index fc16668483f..74adeab6dd3 100644 --- a/AmpGen/ParticleProperties.h +++ b/AmpGen/ParticleProperties.h @@ -44,8 +44,8 @@ namespace AmpGen int twoSpin() const { return m_twoSpin; } ///< Returns twice the spin of the particle int charge() const { return m_charge; } ///< Returns the (electrical) charge of the particle char S() const { return m_status; } ///< Returns the existence status of the particle, i.e. whether it is confirmed by multiple experiments - std::string I() const { return m_isospin; } ///< Returns the isospin of the particle as a string. - std::string J() const { return m_JtotalSpin; } ///< Returns the total angular momentum of the particle + std::string I() const { return m_isospin; } ///< Returns the isospin of the particle as a string. + std::string J() const; std::string label() const { return m_texName; } ///< Returns the LaTeX formatted label for the particle std::string name() const; ///< Returns the particle name std::string spinName() const; ///< Returns the name of the particles spin. @@ -57,8 +57,8 @@ namespace AmpGen bool isNeutrino() const; ///< Check if the particle is a neutrino bool isPhoton() const; ///< Check if the particle is a photon - - const QuarkContent& netQuarkContent() const { return m_netQuarkContent; } ///< Returns the particle's quark content + void setProperty(const std::string& key, const std::string& value); ///< set a propery of a particle by key + const QuarkContent& quarkContent() const { return m_quarkContent; } ///< Returns the particle's quark content void setLabel( const std::string& label ) { m_texName = label; } ///< Set the LaTeX label of the particle void setName( const std::string& name ) { @@ -79,29 +79,28 @@ namespace AmpGen static const ParticleProperties* get( const std::string& name, const bool& quiet=false ); private: - double m_mass; ///< mass [GeV] - double m_mErrPlus; ///< +ve mass error [GeV] - double m_mErrMinus; ///< -ve mass error [GeV] - double m_width; ///< width [GeV] - double m_wErrPlus; ///< +ve width error [GeV] - double m_wErrMinus; ///< -ve width error [GeV] - double m_radius; ///< hadronic radius - int m_Gparity; ///< G-parity - int m_parity; ///< Parity - int m_Cparity; ///< Charge 'parity' - int m_pdgID; ///< PDG id - int m_Rexist; ///< likelihood of existence, baryons only - int m_charge; ///< electrical charge - int m_twoSpin; ///< twice the spin - std::string m_isospin; ///< isospin - std::string m_JtotalSpin; ///< total spin - std::string m_name; ///< particle name - std::string m_quarks; ///< quark string - std::string m_texName; ///< latex label of particle - std::string m_chargeString; ///< string for particle charge + double m_mass{0}; ///< mass [GeV] + double m_mErrPlus{0}; ///< +ve mass error [GeV] + double m_mErrMinus{0}; ///< -ve mass error [GeV] + double m_width{0}; ///< width [GeV] + double m_wErrPlus{0}; ///< +ve width error [GeV] + double m_wErrMinus{0}; ///< -ve width error [GeV] + double m_radius{0}; ///< hadronic radius + int m_Gparity{0}; ///< G-parity + int m_parity{0}; ///< Parity + int m_Cparity{0}; ///< Charge 'parity' + int m_pdgID{0}; ///< PDG id + int m_Rexist{0}; ///< likelihood of existence, baryons only + int m_charge{0}; ///< electrical charge + int m_twoSpin{0}; ///< twice the spin + std::string m_isospin{""}; ///< isospin + //std::string m_JtotalSpin{""}; ///< total spin + std::string m_name{""}; ///< particle name + std::string m_texName{""}; ///< latex label of particle + std::string m_chargeString{""}; ///< string for particle charge char m_Aformat; ///< anti-particle format character char m_status; ///< status (estalished or not etc) - QuarkContent m_netQuarkContent; ///< The quark content of the state (uD, uud etc.) + QuarkContent m_quarkContent; ///< The quark content of the state (uD, uud etc.) bool m_isValid; ///< Flag to check whether the ParticleProperties have configured correctly bool m_customName = {false}; ///< Flag to make custom name void antiQuarks(); diff --git a/AmpGen/ParticlePropertiesList.h b/AmpGen/ParticlePropertiesList.h index 43806094b46..289ccd8343d 100644 --- a/AmpGen/ParticlePropertiesList.h +++ b/AmpGen/ParticlePropertiesList.h @@ -48,6 +48,7 @@ namespace AmpGen void print( std::ostream& out = std::cout ) const; bool readLatexLabels( const std::string& name ); void makeMappings(); + void addParticle(const std::vector& properties ); }; std::ostream& operator<<( std::ostream& out, const ParticlePropertiesList& ppl ); } // namespace AmpGen diff --git a/apps/DataConverter.cpp b/apps/DataConverter.cpp index 2d8a2dc022c..23d84679183 100644 --- a/apps/DataConverter.cpp +++ b/apps/DataConverter.cpp @@ -208,14 +208,10 @@ int main( int argc, char* argv[] ) INFO("Closing file..."); outputFile->Close(); TFile* outputPlotFile = TFile::Open( plotsName.c_str(), "RECREATE" ); - auto plots = evts.makeDefaultProjections(); - auto proj = Projection([](auto& event){ return sqrt(event.s({0,1,2,3})) ; }, "m_D", "m_D",100, 1.8, 1.9 ); - for ( auto& plot : plots ) { - INFO( "Writing plot " << plot->GetName() << " to file" ); - plot->Write(); + auto projections = evtType.defaultProjections(); + for ( auto& p : projections ) { + p( evts ) -> Write(); + p( evts, WeightFunction([](auto& evt){ return 1; }), PlotOptions::Prefix("noweight") )->Write(); } - proj( evts )->Write(); - for( int i = 0 ;i != 4; ++i ) - Projection([i](auto& event){ return sqrt(event.s(i)) ; }, "m_"+std::to_string(i), "m_D",100, 0, 1.0 )(evts)->Write(); outputPlotFile->Close(); } diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 2139023116d..7a579ad0dac 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -60,7 +60,7 @@ CoherentSum::CoherentSum( const EventType& type, const MinuitParameterSet& mps, m_matrixElements[i] = TransitionMatrix(p, c, CompiledExpression( p.getExpression(m_dbThis ? &db : nullptr), p.decayDescriptor(), - this->m_evtType.getEventFormat(), db, this->m_mps ) ); + this->m_evtType.getEventFormat(), db, &mps ) ); CompilerWrapper().compile( m_matrixElements[i], this->m_objCache); } ); } diff --git a/src/EventList.cpp b/src/EventList.cpp index db09d143e5d..891537d84a8 100644 --- a/src/EventList.cpp +++ b/src/EventList.cpp @@ -124,8 +124,7 @@ void EventList::loadFromTree( TTree* tree, const ArgumentPack& args ) auto automaticOrdering = m_eventType.automaticOrdering(); for (const auto& evt : tr) { if( inputUnits != Units::GeV ) for( unsigned k = 0; k != eventFormat.size(); ++k ) temp[k] *= to_double(inputUnits); - // if( idBranches.size() != 0 && !automaticOrdering(temp, ids) ) - // WARNING("Failed to order event: " << evt ); + if( idBranches.size() != 0 && !automaticOrdering(temp, ids) ) WARNING("Failed to order event: " << evt ); if( applySym ) symmetriser(temp); if( ! hasEnergy ){ for( unsigned int k = 0 ; k != m_eventType.size(); ++k ) diff --git a/src/OptionsParser.cpp b/src/OptionsParser.cpp index 469794aed18..8702a80102d 100644 --- a/src/OptionsParser.cpp +++ b/src/OptionsParser.cpp @@ -12,6 +12,13 @@ using namespace AmpGen; OptionsParser* OptionsParser::gOptionsParser = nullptr; +OptionsParser::OptionsParser() +{ + m_keywords["Import"] = [this](const auto& tokens){ if( tokens.size() != 2 ) return; this->import(expandGlobals(tokens[1] ) ) ; }; + m_keywords["ParticleProperties"] = [](const auto& tokens){ ParticlePropertiesList::getMutable()->addParticle( tokens ); }; + m_keywords["ParticlePropertiesList::Alias"] = [](const auto& tokens){ if( tokens.size() !=3 ) return; ParticlePropertiesList::getMutable()->makeAlias( tokens[1], tokens[2] ); }; +} + OptionsParser* OptionsParser::getMe() { if( gOptionsParser == nullptr ) gOptionsParser = new OptionsParser(); @@ -25,9 +32,9 @@ void OptionsParser::setQuiet(){ bool OptionsParser::ignoreThisLine( const std::string& line ) { if ( line.empty() ) return true; - const char _ignoreLinesStartingWith[] = {'*', '#', '\0'}; - for ( int i = 0; _ignoreLinesStartingWith[i] != '\0'; i++ ) { - if ( line[0] == _ignoreLinesStartingWith[i] ) return true; + const char ignoreLinesStartingWith[] = {'*', '#', '\0'}; + for ( int i = 0; ignoreLinesStartingWith[i] != '\0'; i++ ) { + if ( line[0] == ignoreLinesStartingWith[i] ) return true; } return false; } @@ -55,9 +62,8 @@ void OptionsParser::setCommandLineArgs( int argc, char** argv, const std::string x++; } } - int depth = 0 ; if( key == "help" ) m_printHelp = true; - m_parsedLines[key] = makeParsedStrings( key + " " + val, depth ); + addArg(key +" " + val); } if( m_printHelp ){ std::cout << bold_on << "Usage: " << bold_off << argv[0] << italic_on << " options_file1.opt options_file2.opt --key1=value1 --key2=value2 ..." << italic_off << std::endl; @@ -80,40 +86,30 @@ void OptionsParser::import( const std::string& fName ) auto tokens = this->makeParsedStrings( line, braceDepth ); for ( auto& token : tokens ) currentTokens.push_back( token ); if ( tokens.size() == 0 ) return; - std::string name = currentTokens[0]; - if ( name == "Import" && currentTokens.size() == 2 ) { - this->import( expandGlobals( tokens[1] ) ); - currentTokens.clear(); - return; - } - if ( name == "ParticlePropertiesList::Alias" && currentTokens.size() == 3 ) { - ParticlePropertiesList::getMutable()->makeAlias( tokens[1], tokens[2] ); - currentTokens.clear(); - return; - } + std::string key = currentTokens[0]; if ( braceDepth != 0 ) return; - if ( this->m_parsedLines.find( name ) != this->m_parsedLines.end() ) { - WARNING( "Overwriting parameter: " << name ); + if ( this->m_parsedLines.find( key ) != this->m_parsedLines.end() ) { + WARNING( "Overwriting parameter: " << key ); } - currentTokens.erase( std::remove_if( currentTokens.begin(), currentTokens.end(), - []( const std::string& o ) { return o == "{" || o == "}"; } ), - currentTokens.end() ); - this->m_parsedLines[name] = currentTokens; + auto isCurlyBrace = []( const std::string& o ) { return o == "{" || o == "}"; }; + currentTokens.erase( std::remove_if( currentTokens.begin(), currentTokens.end(), isCurlyBrace), currentTokens.end() ); + this->addArg( currentTokens ); currentTokens.clear(); } ); - - } void OptionsParser::addArg( const std::string& arg ) { int bc = 0 ; - auto tokens = makeParsedStrings( arg, bc ); - auto name = tokens[0]; - if ( name == "ParticlePropertiesList::Alias" && tokens.size() == 3 ) { - ParticlePropertiesList::getMutable()->makeAlias( tokens[1], tokens[2] ); - } - m_parsedLines[name] = tokens; + addArg( makeParsedStrings( arg, bc ) ); +} + +void OptionsParser::addArg( const std::vector& tokens ) +{ + auto& key = tokens[0]; + DEBUG("Adding arg with key: " << key ); + if( m_keywords.count(key) != 0 ) m_keywords[key]( tokens ); + else m_parsedLines[key] = tokens; } std::vector OptionsParser::makeParsedStrings( const std::string& line, int& braceDepth ) const diff --git a/src/Particle.cpp b/src/Particle.cpp index 784011c2748..00e31034e6f 100644 --- a/src/Particle.cpp +++ b/src/Particle.cpp @@ -162,12 +162,11 @@ void Particle::pdgLookup() } else m_usesDefaultLineshape = false; m_parity = m_props->P(); - if ( !isdigit( m_props->J()[0] ) ) ERROR( "Spin not recognised! : " << m_name << " J = " << m_props->J() ); + // if ( !isdigit( m_props->J()[0] ) ) ERROR( "Spin not recognised! : " << m_name << " J = " << m_props->J() ); if( m_defaultModifier != "" && m_lineshape.find(".") == std::string::npos ){ m_lineshape = m_lineshape + "." + m_defaultModifier.getVal(); } bool isStrong = quarks() == daughterQuarks(); - DEBUG( m_name << " is decaying via " << ( isStrong ? "strong" : "electroweak" ) << " interactions; P = " << props()->P() ); if ( m_name.find( "NonRes" ) != std::string::npos ) isStrong = true; m_minL = m_daughters.size() == 2 ? orbitalRange( isStrong ).first : 0; if ( m_daughters.size() == 2 ) { @@ -176,6 +175,9 @@ void Particle::pdgLookup() << " d1 = " << m_daughters[1]->name() ); } if ( m_orbital == 0 ) m_orbital = m_minL; // define in ground state unless specified + if( m_daughters.size() != 0 ){ + DEBUG( m_name << " is decaying via " << ( isStrong ? "strong" : "electroweak" ) << " interactions; P = " << props()->P() << "l = " << m_orbital ); + } int charge = 0; for ( auto& d : m_daughters ){ d->setParent(this); @@ -642,7 +644,7 @@ Particle Particle::conj( bool invertHead , bool reorder ) QuarkContent Particle::quarks() const { - return m_name.find("NonRes") != std::string::npos ? daughterQuarks() : m_props->netQuarkContent(); + return m_name.find("NonRes") != std::string::npos ? daughterQuarks() : m_props->quarkContent(); } QuarkContent Particle::daughterQuarks() const diff --git a/src/ParticleProperties.cpp b/src/ParticleProperties.cpp index e9e512fe95e..a3471531777 100644 --- a/src/ParticleProperties.cpp +++ b/src/ParticleProperties.cpp @@ -19,7 +19,7 @@ void ParticleProperties::print( std::ostream& out ) const out << "Mass " << mass() << " +" << mErrPlus() << " -" << mErrMinus() << "\nWidth " << width() << " +" << wErrPlus() << " -" << wErrMinus() << "\n I=" << I() << ", G=" << G() << "\n J=" << J() << ", C=" << C() << ", P=" << P() << "\n Q = " << charge() << "\n pdgID " << pdgID() << "\n name " << name() - << "\n net-quark-content " << netQuarkContent() << "\n is its own antiparticle? " + << "\n quark-content " << quarkContent() << "\n is its own antiparticle? " << ( hasDistinctAnti() ? "no" : "yes" ) << "\n radius " << radius() * GeV << " /GeV" << "\n"; } @@ -36,7 +36,15 @@ int ParticleProperties::chargeFromString( const std::string& ch, bool& status ) return 0; } -ParticleProperties::ParticleProperties( const std::string& pdg_string ) : m_netQuarkContent() +std::string ParticleProperties::J() const +{ + if( m_twoSpin == -1 ) return "?"; + if( m_twoSpin % 2 == 0 ) return std::to_string( int(m_twoSpin/2) ); + if( m_twoSpin % 2 == 1 ) return std::to_string( int(m_twoSpin/2) ) + "/2"; + return ""; +} + +ParticleProperties::ParticleProperties( const std::string& pdg_string ) { m_isValid = false; if ( pdg_string == "" || pdg_string.empty() ) return; @@ -61,21 +69,19 @@ ParticleProperties::ParticleProperties( const std::string& pdg_string ) : m_netQ m_Cparity = chargeFromString( s[10], status ); m_charge = chargeFromString( s[13], status ); m_isospin = s[6]; - m_JtotalSpin = s[8]; m_status = s[15][0]; m_name = s[16]; - m_quarks = s[17]; m_Aformat = s[11].size() == 1 ? s[11][0] : ' '; m_chargeString = s[13]; - m_netQuarkContent = QuarkContent( m_quarks ); + m_quarkContent = QuarkContent( s[17] ); bool spin_status = 1; - if( m_JtotalSpin == "?" ) m_twoSpin = 0; - else if( m_JtotalSpin.find("/") != std::string::npos ){ - m_twoSpin = lexical_cast( m_JtotalSpin.substr(0, m_JtotalSpin.find("/") ) , spin_status ); + if( s[8] == "?" ) m_twoSpin = -1; + else if( s[8].find("/") != std::string::npos ){ + m_twoSpin = lexical_cast( s[8].substr(0, s[8].find("/") ) , spin_status ); } - else m_twoSpin = 2 * lexical_cast( m_JtotalSpin, spin_status ); + else m_twoSpin = 2 * lexical_cast( s[8], spin_status ); if( spin_status == 0 ){ - DEBUG("Spin of particle: " << name() << " could not be interpretted (J=" << m_JtotalSpin << ")" ); + DEBUG("Spin of particle: " << name() << " could not be interpretted (J=" << s[8] << ")" ); } m_radius = 1.5 / GeV; bool isCharm = ( abs(pdgID()) == 421 || @@ -94,6 +100,7 @@ bool ParticleProperties::antiThis() swapChars( m_chargeString, '+', '-'); m_charge *= -1; if( isFermion() ) m_parity *= -1; + /* if ( !m_quarks.empty() ){ swapChars(m_quarks, 'U', 'u'); swapChars(m_quarks, 'D', 'd'); @@ -106,7 +113,8 @@ bool ParticleProperties::antiThis() m_quarks.replace( pos, 4, "sqrt" ); } } - m_netQuarkContent.antiThis(); + */ + m_quarkContent.antiThis(); m_pdgID *= -1; return true; } @@ -176,7 +184,7 @@ std::string ParticleProperties::spinName() const { if( m_twoSpin == 5 ) return "e"; if( m_twoSpin == 7 ) return "c"; } - WARNING("Spin name not implemented for " << m_JtotalSpin ); + WARNING("Spin name not implemented for " << m_twoSpin ); return "?"; } @@ -205,3 +213,30 @@ bool ParticleProperties::operator>( const ParticleProperties& rhs ) const { retu bool ParticleProperties::operator<=( const ParticleProperties& rhs ) const { return ( *this < rhs || *this == rhs ); } bool ParticleProperties::operator>=( const ParticleProperties& rhs ) const { return ( *this > rhs || *this == rhs ); } + +void ParticleProperties::setProperty(const std::string& key, const std::string& value) +{ + DEBUG("Setting property: " << key << " " << value ); + bool status = true; + if( key == "mass" ) m_mass = stod(value) / MeV; + else if( key == "name" ) m_name = value; + else if( key == "width" ) m_width = stod(value) / MeV; + else if( key == "spin" ) + { + bool spin_status = 1; + if( value == "?" ) m_twoSpin = 0; + else if( value.find("/") != std::string::npos ){ + m_twoSpin = lexical_cast( value.substr(0, value.find("/") ) , spin_status ); + } + else m_twoSpin = 2 * lexical_cast( value, spin_status ); + if( spin_status == 0 ){ + ERROR("Spin of particle: " << name() << " could not be interpretted (J=" << value << ")" ); + } + } + else if( key == "parity" ) m_parity = chargeFromString( value, status ); + else if( key == "charge" ) m_charge = chargeFromString( value, status ); + else if( key == "quarks" ) m_quarkContent = QuarkContent(value); + else { + ERROR("Unrecognised key: " << key ); + } +} diff --git a/src/ParticlePropertiesList.cpp b/src/ParticlePropertiesList.cpp index 9745d94f59d..95c9d6877fc 100644 --- a/src/ParticlePropertiesList.cpp +++ b/src/ParticlePropertiesList.cpp @@ -202,4 +202,26 @@ void ParticlePropertiesList::makeAlias( const std::string& name, const std::stri m_byName[alias] = pp; } +void ParticlePropertiesList::addParticle( const std::vector& tokens ) +{ + INFO( vectorToString(tokens, " ") ); + auto name = tokens[1]; + if( tokens.size() % 2 != 0 ) ERROR("Expecting properties as set of key value pairs"); + ParticleProperties* pp = nullptr; + if( m_byName.find( name ) != m_byName.end() ) + { + WARNING("Overwriting properties of existing particle"); + pp = m_byName[name]; + } + else { + pp = new ParticleProperties(); + pp->setProperty("name", name ); + } + for( int i = 2; i != tokens.size(); i+=2 ) + { + pp->setProperty( tokens[i], tokens[i+1] ); + } + m_byName[name] = pp; +} + double ParticlePropertiesList::quasiStableThreshold() const { return m_quasiStableThreshold; } diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 1db1caeb531..3e9febabcef 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -61,35 +61,34 @@ PolarisedSum::PolarisedSum(const EventType& type, { std::string objCache = NamedParameter("PolarisedSum::ObjectCache", "" ); spaceType stype = NamedParameter( "PolarisedSum::SpaceType" , spaceType::spin); + ThreadPool tp(std::thread::hardware_concurrency() ); if( stype == spaceType::spin ) { auto prodPols = polarisations(m_eventType.mother()); std::vector> polStates; for(const auto& pol : prodPols ) polStates.push_back({pol}); - for(unsigned i = 0 ; i != type.size(); ++i ) polStates = indexProduct(polStates, polarisations( type[i] ) ); - + for(unsigned i = 0 ; i != type.size(); ++i ) polStates = indexProduct(polStates, polarisations( type[i] ) ); auto protoAmps = m_rules.getMatchingRules(m_eventType); for(const auto& m : protoAmps ) INFO( m.first.uniqueString() ); m_matrixElements.resize( protoAmps.size() ); - ThreadPool tp(std::thread::hardware_concurrency() ); for(unsigned i = 0; i < m_matrixElements.size(); ++i) { - tp.enqueue( [i, &protoAmps, &polStates, this]{ - Tensor thisExpression( Tensor::dim(polStates.size()) ); - auto& [p, coupling] = protoAmps[i]; - DebugSymbols syms; - for(unsigned j = 0; j != polStates.size(); ++j) thisExpression[j] = make_cse( p.getExpression(j == 0 ? &syms: nullptr, polStates[j] ) ); - m_matrixElements[i] = TransitionMatrix( - p, - coupling, - CompiledExpression( - TensorExpression(thisExpression), - p.decayDescriptor(), - this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); + auto [p, c] = protoAmps[i]; + tp.enqueue( [i, p, c, polStates, &mps, this] () mutable { + Tensor thisExpression( Tensor::dim(polStates.size()) ); + DebugSymbols syms; + for(unsigned j = 0; j != polStates.size(); ++j) + thisExpression[j] = make_cse( p.getExpression(j == 0 ? &syms: nullptr, polStates[j] ) ); + + this->m_matrixElements[i] = TransitionMatrix( + p, c, + CompiledExpression( + TensorExpression(thisExpression), p.decayDescriptor(), &mps, + this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ) ); + CompilerWrapper().compile( m_matrixElements[i] ); - m_matrixElements[i].size = thisExpression.size(); - } - ); + m_matrixElements[i].size = thisExpression.size(); + }); } } if ( stype == spaceType::flavour ) @@ -98,24 +97,21 @@ PolarisedSum::PolarisedSum(const EventType& type, auto r1 = m_rules.getMatchingRules(m_eventType, m_prefix); auto r2 = m_rules.getMatchingRules(m_eventType.conj(true), m_prefix); m_matrixElements.resize( r1.size() + r2.size() ); - ThreadPool tp(8); for(unsigned i = 0 ; i != m_matrixElements.size(); ++i) { - tp.enqueue( [i, this, &r1, &r2]{ - Tensor thisExpression( Tensor::dim(2) ); - DebugSymbols syms; - auto& tm = i < r1.size() ? r1[i] : r2[i-r1.size()]; - thisExpression[0] = i < r1.size() ? make_cse( tm.first.getExpression(&syms) ) : 0; - thisExpression[1] = i < r1.size() ? 0 : make_cse( tm.first.getExpression(&syms) ); - m_matrixElements[i] = TransitionMatrix( - tm.first, - tm.second, - CompiledExpression( - TensorExpression(thisExpression), - tm.first.decayDescriptor(), - this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ,this->m_mps ) ); - CompilerWrapper().compile( m_matrixElements[i] ); - }); + tp.enqueue( [i, this, &r1, &r2] () mutable { + Tensor thisExpression( Tensor::dim(2) ); + DebugSymbols syms; + auto& [p,coupling] = i < r1.size() ? r1[i] : r2[i-r1.size()]; + thisExpression[0] = i < r1.size() ? make_cse( p.getExpression(&syms) ) : 0; + thisExpression[1] = i < r1.size() ? 0 : make_cse( p.getExpression(&syms) ); + this->m_matrixElements[i] = TransitionMatrix( + p, coupling, + CompiledExpression( + TensorExpression(thisExpression), p.decayDescriptor(), this->m_mps, + this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ) ); + CompilerWrapper().compile( m_matrixElements[i] ); + }); } } if( m_pVector.size() == 0 ) @@ -129,7 +125,7 @@ PolarisedSum::PolarisedSum(const EventType& type, DebugSymbols db; auto prob = probExpression(transitionMatrix(), convertProxies(m_pVector,[](auto& p){ return Parameter(p->name());} ), m_debug ? &db : nullptr); - m_probExpression = make_expression( prob, "prob_unnormalised", *m_mps ); + m_probExpression = make_expression( prob, "prob_unnormalised", m_mps ); } std::vector PolarisedSum::polarisations( const std::string& name ) const @@ -363,14 +359,14 @@ void PolarisedSum::generateSourceCode(const std::string& fname, const double& no auto amp_extPol = probExpression(T_matrix, {Parameter("x2",0,true), Parameter("x3",0,true), Parameter("x4",0,true)}); stream << CompiledExpression( amp / normalisation, "FCN",{},{}, m_mps ) << std::endl ; + const int&)>( amp / normalisation, "FCN", m_mps, disableBatch() ) << std::endl ; stream << CompiledExpression( amp_extPol / normalisation, "FCN_extPol",{},{},m_mps ) << std::endl; + const double&)>(amp_extPol / normalisation, "FCN_extPol", m_mps, disableBatch() ) << std::endl; stream.close(); } diff --git a/src/ThreeBodyCalculators.cpp b/src/ThreeBodyCalculators.cpp index d6351163d76..20c9eb69447 100644 --- a/src/ThreeBodyCalculators.cpp +++ b/src/ThreeBodyCalculators.cpp @@ -240,9 +240,9 @@ ThreeBodyCalculator::PartialWidth::PartialWidth( const EventType& evt, MinuitPar auto evtFormat = evt.getEventFormat(); for ( auto& p : unpacked ) { name += p.first.decayDescriptor(); - partialWidths.emplace_back( spinAverageMatrixElement( {p}, &msym ), p.first.decayDescriptor(), evtFormat, DebugSymbols(), &mps ); + partialWidths.emplace_back( spinAverageMatrixElement( {p}, &msym ), p.first.decayDescriptor(), &mps, evtFormat); } - totalWidth = CompiledExpression< complex_t(const real_t*, const real_t*) > ( matrixElementTotal, "width", evtFormat, {} , &mps ); + totalWidth = CompiledExpression< complex_t(const real_t*, const real_t*) > ( matrixElementTotal, "width", &mps, evtFormat); CompilerWrapper(true).compile( totalWidth, ""); } From 9a16468951b64a47f057ca71471ff1c9d1c21f90 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 8 Jun 2020 13:06:53 +0200 Subject: [PATCH 46/67] fix polarisedSum issue for clang --- src/PolarisedSum.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 3e9febabcef..b5e582ea8b0 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -73,8 +73,8 @@ PolarisedSum::PolarisedSum(const EventType& type, m_matrixElements.resize( protoAmps.size() ); for(unsigned i = 0; i < m_matrixElements.size(); ++i) { - auto [p, c] = protoAmps[i]; - tp.enqueue( [i, p, c, polStates, &mps, this] () mutable { + auto [lp, lc] = protoAmps[i]; + tp.enqueue( [i, p=lp, c=lc, polStates, &mps, this] () mutable { Tensor thisExpression( Tensor::dim(polStates.size()) ); DebugSymbols syms; for(unsigned j = 0; j != polStates.size(); ++j) From 614a5a1cdead60815ed19bc25165ddaf0443f8e7 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 9 Jun 2020 11:58:07 +0200 Subject: [PATCH 47/67] fix isPhoton flag --- AmpGen/Generator.h | 1 + AmpGen/ParticleProperties.h | 1 - src/CoherentSum.cpp | 4 +++- src/ParticleProperties.cpp | 2 +- src/TreePhaseSpace.cpp | 19 ++++++++++++------- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/AmpGen/Generator.h b/AmpGen/Generator.h index b864c166d1e..5fafd74a841 100644 --- a/AmpGen/Generator.h +++ b/AmpGen/Generator.h @@ -109,6 +109,7 @@ namespace AmpGen if ( event.genPdf() > maxProb ) { std::cout << std::endl; WARNING( "PDF value exceeds norm value: " << event.genPdf() << " > " << maxProb ); +// pdf.debug( event ); } if ( accept_all || event.genPdf() > maxProb * m_rnd->Rndm() ){ list.push_back(event); diff --git a/AmpGen/ParticleProperties.h b/AmpGen/ParticleProperties.h index 74adeab6dd3..df8e000bfe3 100644 --- a/AmpGen/ParticleProperties.h +++ b/AmpGen/ParticleProperties.h @@ -94,7 +94,6 @@ namespace AmpGen int m_charge{0}; ///< electrical charge int m_twoSpin{0}; ///< twice the spin std::string m_isospin{""}; ///< isospin - //std::string m_JtotalSpin{""}; ///< total spin std::string m_name{""}; ///< particle name std::string m_texName{""}; ///< latex label of particle std::string m_chargeString{""}; ///< string for particle charge diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 7a579ad0dac..980e224e060 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -112,6 +112,8 @@ void CoherentSum::updateNorms() void CoherentSum::debug( const Event& evt, const std::string& nameMustContain ) { prepare(); + INFO("Weight = " << evt.weight() << " genPDF = " << evt.genPdf() ); + for ( auto& me : m_matrixElements ) { auto A = me(evt); INFO( std::setw(70) << me.decayTree.uniqueString() @@ -120,7 +122,7 @@ void CoherentSum::debug( const Event& evt, const std::string& nameMustContain ) << m_cache( evt.index(), std::distance(&m_matrixElements[0], &me ) ) << me.decayTree.CP() ); } - if( m_dbThis ) for ( auto& me : m_matrixElements ) me.debug( m_events->block(0) ); + if( m_dbThis ) for ( auto& me : m_matrixElements ) me.debug( evt) ; INFO( "A(x) = " << getVal(evt) << " without cache: " << getValNoCache(evt) ); } diff --git a/src/ParticleProperties.cpp b/src/ParticleProperties.cpp index a3471531777..bba59dd93d7 100644 --- a/src/ParticleProperties.cpp +++ b/src/ParticleProperties.cpp @@ -153,7 +153,7 @@ bool ParticleProperties::isFermion() const { bool ParticleProperties::isPhoton() const { - return m_name == "gamma0"; + return m_pdgID == 22; } bool ParticleProperties::isNeutrino() const diff --git a/src/TreePhaseSpace.cpp b/src/TreePhaseSpace.cpp index d7cf82ec4f5..e1447a1ea16 100644 --- a/src/TreePhaseSpace.cpp +++ b/src/TreePhaseSpace.cpp @@ -50,13 +50,18 @@ TreePhaseSpace::TreePhaseSpace(const std::vector& decayChains, const E Event TreePhaseSpace::makeEvent() { - unsigned j = m_dice(m_gen); - // INFO("Producing event from tree: " << j ); - if( j >= m_top.size() ) ERROR("Out of bounds: " << j << " / " << m_top.size() ); - m_top[j].generate(); - auto event = m_top[j].event(m_type.size()); - double w = m_top[j].weight(); - event.setGenPdf( w == 0 ? 0 : genPdf(event) / w ); + unsigned j = 0; + double w = 0; + Event event; + do { + // INFO("Producing event from tree: " << j ); + j = m_dice(m_gen); + if( j >= m_top.size() ) ERROR("Out of bounds: " << j << " / " << m_top.size() ); + m_top[j].generate(); + event = m_top[j].event(m_type.size()); + w = m_top[j].weight(); + event.setGenPdf( w == 0 ? 0 : genPdf(event) / w ); + } while ( w == 0 ); m_generatorRecord.push_back(j); return event; } From b4863e3abfa931d613907685eda4603ecd59f43d Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 9 Jun 2020 13:17:54 +0200 Subject: [PATCH 48/67] add some sanity checks for photons --- src/PolarisedSum.cpp | 11 +++++------ src/Wigner.cpp | 11 +++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index b5e582ea8b0..5994eb171c3 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -131,12 +131,11 @@ PolarisedSum::PolarisedSum(const EventType& type, std::vector PolarisedSum::polarisations( const std::string& name ) const { auto props = *ParticlePropertiesList::get( name ); - if( props.twoSpin() == 0 ) return {0}; - if( props.twoSpin() == 1 ) return {1,-1}; - if( props.twoSpin() == 4 ) return {-2,1,0,1,2}; - if( name == "gamma0" && props.twoSpin() == 2 ) return {1,-1}; - if( name != "gamma0" && props.twoSpin() == 2 ) return {1,0,-1}; - + if( props.twoSpin() == 0 ) return {0}; // scalar + if( props.isPhoton() ) return {1,-1}; // photon + if( props.twoSpin() == 1 ) return {1,-1}; // fermion + if( props.twoSpin() == 4 ) return {-2,1,0,1,2};// tensor + if( props.twoSpin() == 2 ) return {1,0,-1}; // vector else { WARNING("Particle with spin: " << props.twoSpin() << "/2" << " not implemented in initial/final state"); return {0}; diff --git a/src/Wigner.cpp b/src/Wigner.cpp index e098175b9c6..9a676900334 100644 --- a/src/Wigner.cpp +++ b/src/Wigner.cpp @@ -130,7 +130,8 @@ std::pair angCoordinates(const Tensor& P, DebugSymbols* Expression px = P[0] / sqrt(pt2); Expression py = P[1] / sqrt(pt2); return {pz, make_cse(px + 1i*py)}; -} +} /// spherical coordinates are paramterised as {z=cos(theta), e^(iphi)}, as this avoids any trigonometric functions + Expression AmpGen::wigner_D(const std::pair& P, const double& J, const double& lA, @@ -249,6 +250,8 @@ Expression AmpGen::helicityAmplitude(const Particle& particle, { if( particle.props()->twoSpin() == 0 ) return Mz==0; // a scalar // polarisation spinor / vector etc. in the quantisation of the lab (i.e. along the z-axis or lab particle momentum) + if( particle.props()->isPhoton() && Mz == 0. ) ERROR("Photon polarisation state is wrong"); + if( particle.polState() == 0 ) ERROR("Photon external state is wrong..."); auto labPol = particle.externalSpinTensor(particle.polState(), db); auto inverseMyTransform = myFrame.inverse(); if( particle.props()->twoSpin() == 1 ) // so a fermion @@ -291,12 +294,12 @@ Expression AmpGen::helicityAmplitude(const Particle& particle, } Expression total = 0; - std::pair hco = angCoordinates( myFrame(d1.P()) , db); + auto hco = angCoordinates( myFrame(d1.P()) , db); for( auto& coupling : recoupling_constants ) { auto dm = coupling.m1 - coupling.m2; - if( (d1.name() == "gamma0" && coupling.m1 == 0) || - (d2.name() == "gamma0" && coupling.m2 == 0) ) continue; + if( (d1.props()->isPhoton() && coupling.m1 == 0.) || + (d2.props()->isPhoton() && coupling.m2 == 0.) ) continue; auto term = wigner_D(hco, particle.spin(), Mz, dm, db); auto h1 = helicityAmplitude(d1, myFrame, coupling.m1, db, +1, cachePtr); auto h2 = helicityAmplitude(d2, myFrame, coupling.m2, db, -1, cachePtr); From d5e981b0bfb65f750b10d3e5c33f60d3127950f2 Mon Sep 17 00:00:00 2001 From: Timothy David Evans Date: Mon, 29 Jun 2020 09:09:57 +0200 Subject: [PATCH 49/67] fix cuts in DataConvertor --- Standalone.cmake | 2 +- apps/DataConverter.cpp | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Standalone.cmake b/Standalone.cmake index c2284d0e92e..9dc1819d549 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -222,7 +222,7 @@ add_subdirectory(test) include(CMakePackageConfigHelpers) write_basic_package_version_file(AmpGenVersion.cmake VERSION ${PACKAGE_VERSION} COMPATIBILITY AnyNewerVersion) -configure_file(AmpGenConfig.cmake.in AmpGenConfig.cmake) # @ONLY) +configure_file(AmpGenConfig.cmake.in AmpGenConfig.cmake) export( TARGETS AmpGen NAMESPACE AmpGen:: FILE AmpGenTargets.cmake ) set(CMAKE_EXPORT_PACKAGE_REGISTRY ON) diff --git a/apps/DataConverter.cpp b/apps/DataConverter.cpp index 23d84679183..8a41a628a0e 100644 --- a/apps/DataConverter.cpp +++ b/apps/DataConverter.cpp @@ -52,7 +52,7 @@ int main( int argc, char* argv[] ) std::vector idBranches = NamedParameter("IdBranches" , std::vector() ).getVector(); bool usePIDCalib = NamedParameter("usePIDCalib" , false); bool rejectMultipleCandidates = NamedParameter("rejectMultipleCandidates", true ); - std::string cuts = NamedParameter("Cut",""); + auto cuts = NamedParameter("Cut","").getVector(); EventType evtType( NamedParameter( "EventType" ).getVector() ); std::vector branches; @@ -70,7 +70,6 @@ int main( int argc, char* argv[] ) in_tree->AddFriend( tokens[1].c_str(), tokens[0].c_str() ); } - INFO( "Using cut = " << cuts ); if(inputFilename == "") FATAL("No input specified in options" ); if(treeName == "") FATAL("No tree specified in options" ); @@ -79,8 +78,11 @@ int main( int argc, char* argv[] ) if(in_tree == nullptr ) FATAL(treeName + " not found" ); INFO( "Got tree " << inputFilename << ":" << treeName ); - - in_tree->Draw( ">>elist", cuts.c_str() ); + std::string cut = ""; + for( auto& i : cuts ) cut += i; + INFO( "Using cut = " << cut ); + + in_tree->Draw( ">>elist", cut.c_str() ); TEventList* elist = (TEventList*)gDirectory->Get( "elist" ); INFO( "Total efficiency = " << elist->GetN() / (double)in_tree->GetEntries() ); @@ -187,14 +189,9 @@ int main( int argc, char* argv[] ) } if ( pdfLibrary != "" ) { INFO( "Setting generator level PDF from " << pdfLibrary ); - void* handle = dlopen( pdfLibrary.c_str(), RTLD_NOW ); - if ( handle == nullptr ) dlerror(); - - DynamicFCN fcn( handle, "FCN" ); + DynamicFCN fcn( pdfLibrary, "FCN" ); for ( unsigned int i = 0; i < evts.size(); ++i ) { - if ( i % 500000 == 0 ) { - INFO( "Set for " << i << " events" ); - } + if ( i % 500000 == 0 ) INFO( "Set for " << i << " events" ); evts[i].setGenPdf( fcn( (const real_t*)(evts[i]), 1 ) ); } } @@ -211,7 +208,12 @@ int main( int argc, char* argv[] ) auto projections = evtType.defaultProjections(); for ( auto& p : projections ) { p( evts ) -> Write(); - p( evts, WeightFunction([](auto& evt){ return 1; }), PlotOptions::Prefix("noweight") )->Write(); + // p( evts, WeightFunction([](auto& evt){ return 1; }), PlotOptions::Prefix("noweight") )->Write(); + } + for( int i = 0 ; i != evtType.size(); ++i ) + { + Projection p( [i](auto& event){ return sqrt( event.s(i) ); }, "m_"+std::to_string(i), "m_"+std::to_string(i), 100, 0, 2.5 ); + p(evts)->Write(); } outputPlotFile->Close(); } From e527eb0db399328532ed8c7af4450e310f292bcb Mon Sep 17 00:00:00 2001 From: Timothy David Evans Date: Mon, 27 Jul 2020 14:54:24 +0200 Subject: [PATCH 50/67] Fix complex logarithm for AVX, Weyl spinors for neutrinos --- AmpGen/DiracMatrices.h | 1 + AmpGen/PolarisedSum.h | 2 +- AmpGen/simd/avx2d_types.h | 8 ++++++-- AmpGen/simd/avx2f_types.h | 5 +++-- apps/Debugger.cpp | 4 +++- apps/Generator.cpp | 19 ++++++++++++++---- src/DiracMatrices.cpp | 14 +++++++++++++ src/Particle.cpp | 26 ++++++++++++------------- src/PolarisedSum.cpp | 36 +++++++++++++++++++++------------- src/Spline.cpp | 2 +- src/Tensor.cpp | 3 --- src/Vertex.cpp | 4 ++++ test/test_avx2d.cpp | 41 ++++++++++++++------------------------- 13 files changed, 99 insertions(+), 66 deletions(-) diff --git a/AmpGen/DiracMatrices.h b/AmpGen/DiracMatrices.h index 4ccd1330830..7e646e323b4 100644 --- a/AmpGen/DiracMatrices.h +++ b/AmpGen/DiracMatrices.h @@ -12,6 +12,7 @@ namespace AmpGen extern const Expression Z; extern const std::array Gamma; extern const std::array Sigma; + extern const std::array Sigma4; extern const std::array S03; extern const std::array SU3; } // namespace AmpGen diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index 987a090dd6d..70221e910e0 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -69,7 +69,7 @@ namespace AmpGen std::vector fitFractions(const LinearErrorPropagator&); std::vector> matrixElements() const; void transferParameters(); - Tensor transitionMatrix(); + Tensor transitionMatrix() const; const TransitionMatrix& operator[](const size_t& i) const { return m_matrixElements[i] ; } std::function evaluator(const EventList_type* = nullptr) const; KeyedFunctors componentEvaluator(const EventList_type* = nullptr) const; diff --git a/AmpGen/simd/avx2d_types.h b/AmpGen/simd/avx2d_types.h index 0e07026d45c..c232663eaee 100644 --- a/AmpGen/simd/avx2d_types.h +++ b/AmpGen/simd/avx2d_types.h @@ -33,7 +33,7 @@ namespace AmpGen { real_v(const double& f ) : data( _mm256_set1_pd( f )) {} real_v(const double& x0, const double& x1, const double& x2, const double& x3 ) { - data = _mm256_set_pd(x0,x1,x2,x3); + data = _mm256_set_pd(x3,x2,x1,x0); } real_v(const double* f ) : data( _mm256_loadu_pd( f ) ) {} real_v(const std::array f ) : data( _mm256_loadu_pd( f.data() ) ) {} @@ -67,6 +67,7 @@ namespace AmpGen { libmvec_alias( cos ) libmvec_alias( exp ) libmvec_alias( log ) + //inline real_v log( const real_v& v ){ auto arr = v.to_array(); return real_v( std::log(arr[0]), std::log(arr[1]), std::log(arr[2]), std::log(arr[3])) ; } inline void sincos( const real_v& v, real_v& s, real_v& c ) { #if USE_MVEC @@ -138,6 +139,9 @@ namespace AmpGen { complex_v( const float& re, const float& im) : re(re), im(im) {} complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + complex_v( const std::complex* arr ) : + re ( arr[0].real(), arr[1].real(), arr[2].real(), arr[3].real() ), + im ( arr[0].imag(), arr[1].imag(), arr[2].imag(), arr[3].imag() ){} explicit complex_v( const real_v& arg ) : re(arg) {}; explicit complex_v( const double& arg ) : re(arg) {}; const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } @@ -194,7 +198,7 @@ namespace AmpGen { } inline complex_v log( const complex_v& v ) { - return complex_v( log( v.re ) , atan2(v.im, v.re) ); + return complex_v( 0.5 * log( v.norm() ) , atan2(v.im, v.re) ); } inline std::ostream& operator<<( std::ostream& os, const complex_v& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } diff --git a/AmpGen/simd/avx2f_types.h b/AmpGen/simd/avx2f_types.h index 096f2058cdc..d5fc784b3ba 100644 --- a/AmpGen/simd/avx2f_types.h +++ b/AmpGen/simd/avx2f_types.h @@ -36,7 +36,7 @@ namespace AmpGen { real_v(const float& x0, const float& x1, const float& x2, const float& x3, const float& x4, const float& x5, const float& x6, const float& x7) { - data = _mm256_set_ps(x0,x1,x2,x3,x4,x5,x6,x7); + data = _mm256_set_ps(x7,x6,x5,x4,x3,x2,x1,x0); } void store( float* ptr ) const { _mm256_storeu_ps( ptr, data ); } @@ -138,6 +138,7 @@ namespace AmpGen { complex_v( const float& re, const float& im) : re(re), im(im) {} complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} complex_v( const std::complex& f ) : re( f.real() ), im( f.imag() ) {} + const std::complex at(const unsigned i) const { return std::complex(re.to_array()[i], im.to_array()[i]) ; } void store( float* sre, float* sim ) const { re.store(sre); im.store(sim); } void store( std::complex* r ) const { @@ -187,7 +188,7 @@ namespace AmpGen { } inline complex_v log( const complex_v& v ) { - return complex_v( log( v.re ) , atan2(v.im, v.re) ); + return complex_v( 0.5 * log( v.norm() ) , atan2(v.im, v.re) ); } inline std::ostream& operator<<( std::ostream& os, const complex_v& obj ) { return os << "( "<< obj.re << ") (" << obj.im << ")"; } #pragma omp declare reduction(+: real_v: \ diff --git a/apps/Debugger.cpp b/apps/Debugger.cpp index 98308235d4a..35f65cccd0e 100644 --- a/apps/Debugger.cpp +++ b/apps/Debugger.cpp @@ -55,9 +55,11 @@ template < class FCN > void debug( FCN& sig, EventList& accepted, bool verbose, sig.debug( accepted[eventToDebug] ); accepted[eventToDebug].print(); // if( verbose ) print( accepted[0], sig.matrixElements(), verbose ); - invertParity(accepted[eventToDebug], accepted.eventType().size() ); + for( unsigned int i = 0 ; i != accepted.size(); ++i ) + invertParity(accepted[i], accepted.eventType().size() ); accepted[eventToDebug].print(); sig.reset(); + sig.setEvents(accepted); sig.prepare(); sig.debug( accepted[eventToDebug] ); } diff --git a/apps/Generator.cpp b/apps/Generator.cpp index 1bc01fe1a8b..612cbfed422 100644 --- a/apps/Generator.cpp +++ b/apps/Generator.cpp @@ -53,6 +53,7 @@ struct FixedLibPDF void debug( const Event& event) {}; void prepare(){}; void setEvents( AmpGen::EventList& evts ){}; + void setEvents( AmpGen::EventListSIMD& evts ){}; double operator()( const AmpGen::Event& evt ) const { return PDF( evt, 1 ); } double operator()( const double* evt, const unsigned& index ) { @@ -99,6 +100,14 @@ template void generateEvents( EventList& events , TRandom* rndm , const bool& normalise = true ) { + if constexpr( std::is_same::value ) + { + Generator signalGenerator(events.eventType(), rndm); + signalGenerator.setBlockSize(blockSize); + signalGenerator.setNormFlag(normalise); + signalGenerator.fillEventList(pdf, events, nEvents ); + } + else { if( phsp_type == phspTypes::PhaseSpace ) { Generator signalGenerator(events.eventType(), rndm); @@ -123,9 +132,11 @@ template void generateEvents( EventList& events else { FATAL("Phase space configuration: " << phsp_type << " is not supported"); } + } } + int main( int argc, char** argv ) { OptionsParser::setArgs( argc, argv ); @@ -196,10 +207,10 @@ int main( int argc, char** argv ) PolarisedSum pdf(eventType, MPS); generateEvents( accepted, pdf, phspType, nEvents, blockSize, &rand ); } - // else if ( pdfType == pdfTypes::FixedLib ){ - // FixedLibPDF pdf(lib); - // generateEvents( accepted, pdf, phspType, nEvents, blockSize, &rand, false ); - // } + else if ( pdfType == pdfTypes::FixedLib ){ + FixedLibPDF pdf(lib); + generateEvents( accepted, pdf, phspType, nEvents, blockSize, &rand, false ); + } else { FATAL("Did not recognise configuration: " << pdfType ); } diff --git a/src/DiracMatrices.cpp b/src/DiracMatrices.cpp index 938c41de246..a17c6d64ebe 100644 --- a/src/DiracMatrices.cpp +++ b/src/DiracMatrices.cpp @@ -23,6 +23,20 @@ extern const std::array AmpGen::Sigma( { Tensor({ Z,-I, I, Z}, Tensor::dim(2,2)), Tensor({ 1, 0, 0,-1}, Tensor::dim(2,2))} ); +extern const std::array AmpGen::Sigma4( { + Tensor({ 0, 1, 0, 0, + 1, 0, 0, 0, + 0, 0, 0, 1, + 0, 0, 1, 0 }, Tensor::dim(4,4)), + Tensor({ Z,-I, Z, Z, + I, Z, Z, Z, + Z, Z, Z, -I, + Z, Z, I, Z }, Tensor::dim(4,4)), + Tensor({ 1, 0, 0, 0, + 0, -1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, -1 }, Tensor::dim(4,4))} ); + extern const std::array AmpGen::S03 ( { Tensor({ 0, 0, 0, 0, 0, 0,-1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, Tensor::dim(4,4) ), Tensor({ 0, 0, 1, 0, 0, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0 }, Tensor::dim(4,4) ), diff --git a/src/Particle.cpp b/src/Particle.cpp index 00e31034e6f..e625687e659 100644 --- a/src/Particle.cpp +++ b/src/Particle.cpp @@ -167,6 +167,7 @@ void Particle::pdgLookup() m_lineshape = m_lineshape + "." + m_defaultModifier.getVal(); } bool isStrong = quarks() == daughterQuarks(); + if( abs(m_props->pdgID()) == 24 || abs(m_props->pdgID()) == 23 ) isStrong = false; if ( m_name.find( "NonRes" ) != std::string::npos ) isStrong = true; m_minL = m_daughters.size() == 2 ? orbitalRange( isStrong ).first : 0; if ( m_daughters.size() == 2 ) { @@ -519,19 +520,18 @@ Tensor Particle::externalSpinTensor(const int& polState, DebugSymbols* db ) cons { if( m_spinBasis == spinBasis::Weyl ) { + std::array xi; Expression n = fcn::sqrt( 2 * pP*(pP+pZ) ); - double isqrt_two = 1./sqrt(2); - Expression fa = m_props->isNeutrino() ? 2 : fcn::sqrt(pE/m + 1); - Expression fb = m_props->isNeutrino() ? 0 : fcn::sqrt(pE/m - 1); - Expression aligned = make_cse( Abs(pP + pZ) < 10e-6 ) ; - Expression xi10 = make_cse(Ternary( aligned, 1, (pP+pZ)/n )); - Expression xi11 = make_cse(Ternary( aligned, 0, z/n )); - Expression xi00 = make_cse(Ternary( aligned, 0, -zb/n )); - Expression xi01 = make_cse(Ternary( aligned, 1, (pP+pZ)/n )); - if(id > 0 && polState == 1) return isqrt_two * Tensor({ fa*xi10, fa*xi11, fb*xi10, fb*xi11 } ); - if(id > 0 && polState == -1) return isqrt_two * Tensor({ fa*xi00, fa*xi01, -fb*xi00, -fb*xi01 } ); - if(id < 0 && polState == 1) return isqrt_two * Tensor({ fb*xi00, fb*xi01, -fa*xi00, -fa*xi01 } ); - if(id < 0 && polState == -1) return isqrt_two * Tensor({ fb*xi10, fb*xi11, -fa*xi01, -fa*xi11 } ); + + xi[0] = Tensor( {-zb/n , (pP+pZ)/n}); + xi[1] = Tensor( {(pP+pZ)/n, z/n }); + + Expression fa = m_props->isNeutrino() ? polState * fcn::sqrt(pE) : polState * fcn::sqrt( pE/m- 1 ); + Expression fb = m_props->isNeutrino() ? fcn::sqrt(pE) : fcn::sqrt( pE/m + 1 ); + int ind = (id>0 ? polState : -polState) == -1 ? 0 : 1; + + return id > 0 ? Tensor({ - fa * xi[ind][0], - fa * xi[ind][1], fb * xi[ind][0], fb * xi[ind][1] }) + : -polState * Tensor({ fb * xi[ind][0], fb * xi[ind][1], fa * xi[ind][0], fa * xi[ind][1] }); } if ( m_spinBasis == spinBasis::Dirac ) { @@ -716,7 +716,7 @@ std::string Particle::topologicalString() const const ParticleProperties* Particle::props() const { return m_props; } bool Particle::isHead() const { return m_parent == nullptr; } -bool Particle::isWeakDecay() const { return quarks() == daughterQuarks(); } +bool Particle::isWeakDecay() const { return quarks() != daughterQuarks(); } bool Particle::isStateGood() const { return m_isStateGood; } bool Particle::isStable() const { return m_daughters.size() == 0; } diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 5994eb171c3..45b0a1bd755 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -61,6 +61,7 @@ PolarisedSum::PolarisedSum(const EventType& type, { std::string objCache = NamedParameter("PolarisedSum::ObjectCache", "" ); spaceType stype = NamedParameter( "PolarisedSum::SpaceType" , spaceType::spin); + { ThreadPool tp(std::thread::hardware_concurrency() ); if( stype == spaceType::spin ) { @@ -74,20 +75,23 @@ PolarisedSum::PolarisedSum(const EventType& type, for(unsigned i = 0; i < m_matrixElements.size(); ++i) { auto [lp, lc] = protoAmps[i]; - tp.enqueue( [i, p=lp, c=lc, polStates, &mps, this] () mutable { + auto & p = lp; + auto & c = lc; + PolarisedSum* ptr = this; + tp.enqueue( [i, p=lp, c=lc, polStates, &mps, ptr] () mutable { Tensor thisExpression( Tensor::dim(polStates.size()) ); DebugSymbols syms; for(unsigned j = 0; j != polStates.size(); ++j) - thisExpression[j] = make_cse( p.getExpression(j == 0 ? &syms: nullptr, polStates[j] ) ); - - this->m_matrixElements[i] = TransitionMatrix( + //thisExpression[j] = make_cse( p.getExpression(j == 0 ? &syms: nullptr, polStates[j] ) ); + thisExpression[j] = make_cse( p.getExpression(&syms, polStates[j] ) ); + ptr->m_matrixElements[i] = TransitionMatrix( p, c, CompiledExpression( TensorExpression(thisExpression), p.decayDescriptor(), &mps, - this->m_eventType.getEventFormat(), this->m_debug ? syms : DebugSymbols() ) ); + ptr->m_eventType.getEventFormat(), ptr->m_debug ? syms : DebugSymbols() ) ); - CompilerWrapper().compile( m_matrixElements[i] ); - m_matrixElements[i].size = thisExpression.size(); + CompilerWrapper().compile( ptr->m_matrixElements[i] ); + ptr->m_matrixElements[i].size = thisExpression.size(); }); } } @@ -114,6 +118,7 @@ PolarisedSum::PolarisedSum(const EventType& type, }); } } + } if( m_pVector.size() == 0 ) { auto p = [this](const std::string& name){ return this->m_mps->addOrGet(name, Flag::Fix, 0, 0); }; @@ -125,7 +130,7 @@ PolarisedSum::PolarisedSum(const EventType& type, DebugSymbols db; auto prob = probExpression(transitionMatrix(), convertProxies(m_pVector,[](auto& p){ return Parameter(p->name());} ), m_debug ? &db : nullptr); - m_probExpression = make_expression( prob, "prob_unnormalised", m_mps ); + m_probExpression = make_expression( prob, "prob_unnormalised", m_mps, this->m_debug ? db : DebugSymbols() ); } std::vector PolarisedSum::polarisations( const std::string& name ) const @@ -254,13 +259,14 @@ size_t PolarisedSum::size() const void PolarisedSum::reset( const bool& flag ){ m_nCalls = 0 ; } -Tensor PolarisedSum::transitionMatrix() +Tensor PolarisedSum::transitionMatrix() const { auto size = m_dim.first * m_dim.second; std::vector expressions(size, 0); unsigned totalSize = 0 ; - for( auto& me : m_matrixElements ){ - auto coupling = me.coupling.to_expression() ; + for( const auto& me : m_matrixElements ){ + auto coupling = me.coupling.to_expression(); + INFO( me.decayDescriptor() << " " << coupling ); auto cacheIndex = totalSize; for( size_t i = 0 ; i < size ; ++i ){ expressions[i] = expressions[i] + coupling * Parameter( "x1["+std::to_string(cacheIndex+i)+"]",0,true); @@ -325,8 +331,11 @@ void PolarisedSum::debug(const Event& evt) { std::vector this_cache; for(unsigned i = 0 ; i != tsize; ++i ) this_cache.emplace_back( m_cache(evt.index() / utils::size::value, j*tsize + i) ); - INFO( m_matrixElements[j].decayDescriptor() << " " << vectorToString( this_cache, " ") ); - if( m_debug ) m_matrixElements[0].debug( evt ); + INFO( m_matrixElements[j].decayDescriptor() << " " << vectorToString(this_cache, " ") ); + if( m_debug ){ + m_matrixElements[0].debug( evt ); + m_probExpression.debug(this_cache.data() ); + } } INFO("P(x) = " << getValNoCache(evt) << " " << operator()((const float_v*)nullptr, evt.index() / utils::size::value ) ); INFO("Prod = [" << vectorToString(m_pVector , ", ") <<"]"); @@ -398,6 +407,7 @@ Expression PolarisedSum::probExpression(const Tensor& T_matrix, const std::vecto rho(2,2) = 1. - 1.5*pz + sqrt(1.5)*Tzz; } ADD_DEBUG_TENSOR(T_matrix, db); + ADD_DEBUG_TENSOR(T_conj, db); ADD_DEBUG_TENSOR(rho, db); ADD_DEBUG_TENSOR(TT , db); Expression rt = rho(a,b) * TT(b,a); diff --git a/src/Spline.cpp b/src/Spline.cpp index bcdc41bf3ce..ed4fcf38530 100644 --- a/src/Spline.cpp +++ b/src/Spline.cpp @@ -63,7 +63,7 @@ Expression Spline::eval(DebugSymbols* db) const double spacing = ( m_max - m_min ) / ( (double)m_nKnots - 1. ); Expression dx = Fmod( x - m_min, spacing ); Expression bin = ( x - m_min ) / spacing; - Expression continuedValue = 0; + Expression continuedValue = m_points[m_nKnots-1]; Expression returnValue = Ternary( x > m_min && x < m_max, m_points[bin] + ( ( m_points[bin + 1] - m_points[bin] ) / spacing diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 5195f2299bf..90ae8a46863 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -413,9 +413,6 @@ TensorProxy AmpGen::operator*( const TensorProxy& t1, const TensorProxy& t2 ) Tensor value( finalTensorRank ); unsigned nElem = value.nElements(); - //DEBUG("Got " << t1_tensor.dims().size() << " x " << t2_tensor.dims().size() << " with " << contractions.size() << " contractions " << nElementsInSum); - //DEBUG(t1_tensor.dimString() << " x " << t2_tensor.dimString() << " -> " << value.dimString()); - //DEBUG("Contraction matrix = " << "[" << vectorToString(contractionMatrix, ", ") << "]"); for( unsigned elem = 0; elem < nElem; ++elem ) { auto coords = Tensor::index_to_coordinates( elem, finalTensorRank ); diff --git a/src/Vertex.cpp b/src/Vertex.cpp index 2a3029e7f21..f86045f0604 100644 --- a/src/Vertex.cpp +++ b/src/Vertex.cpp @@ -426,11 +426,15 @@ DEFINE_VERTEX( V_ff_S1 ){ return Bar(V2)(a) * Gamma[4](a,b) * Gamma4Vec()(mu,b,c DEFINE_VERTEX( V_ff_PL ) { Tensor proj = Spin1Projector(P); + auto pl = 0.5 * ( Identity(4) - Gamma[4] ); + ADD_DEBUG_TENSOR( pl, db); + ADD_DEBUG_TENSOR( pl(b,c)* V2(c), db ); return proj(mu, nu) * Bar(V1)(a) * Gamma4Vec()(-nu,a,b) * ( Identity(4) - Gamma[4] )(b,c)* V2(c); } DEFINE_VERTEX( V_ff_PR ) { Tensor proj = Spin1Projector(P); + ADD_DEBUG_TENSOR( (Identity(4) + Gamma[4] )(b,c)* V2(c), db ); return proj(mu, nu) * Bar(V1)(a) * Gamma4Vec()(-nu,a,b) * ( Identity(4) + Gamma[4] )(b,c)* V2(c); } diff --git a/test/test_avx2d.cpp b/test/test_avx2d.cpp index a458fa294d9..049dc34b186 100644 --- a/test/test_avx2d.cpp +++ b/test/test_avx2d.cpp @@ -12,15 +12,21 @@ namespace utf = boost::unit_test; using namespace AmpGen; using namespace AmpGen::AVX2d; +using namespace std::complex_literals; + +#define test_simd( avx_function, scalar_function, data, tv) \ +{ auto r = avx_function( data ).to_array(); auto vals = data.to_array(); \ + for(int i =0;i!=4;++i) BOOST_TEST( r[i] == scalar_function(vals[i]), boost::test_tools::tolerance(tv) ); } BOOST_AUTO_TEST_CASE( test_log ) { - AVX2d::real_v p(0.3, 0.5, 10.0, 7.0); - auto logged = AVX2d::log( p ).to_array() ; - BOOST_TEST( logged[0] == std::log(0.3), boost::test_tools::tolerance(1e-12 ) ); - BOOST_TEST( logged[1] == std::log(0.5), boost::test_tools::tolerance(1e-12 ) ); - BOOST_TEST( logged[2] == std::log(10.0), boost::test_tools::tolerance(1e-12 ) ); - BOOST_TEST( logged[3] == std::log(7.0), boost::test_tools::tolerance(1e-12 ) ); + test_simd( AVX2d::log, std::log, AVX2d::real_v(0.3, 0.5, 10, 7.0), 1e-12 ); +} + +BOOST_AUTO_TEST_CASE( test_complex_log ) +{ + std::array, 4> pr = {0.3 - 3.0*1i, 0.5 - 4.0*1i, 10.+3.*1i, -4.0 + 1.0*1i}; + test_simd( AVX2d::log, std::log, AVX2d::complex_v( pr.data() ), 1e-8 ); } BOOST_AUTO_TEST_CASE( test_fmod ) @@ -32,7 +38,6 @@ BOOST_AUTO_TEST_CASE( test_fmod ) AVX2d::real_v bv( b.data() ); auto modv = AVX2d::fmod(av,bv); - BOOST_TEST_MESSAGE( "fmod = " << modv ); auto mod = modv.to_array(); BOOST_TEST( mod[0] == 2.1 , boost::test_tools::tolerance(1e-15)); @@ -68,27 +73,11 @@ BOOST_AUTO_TEST_CASE( test_gather ) BOOST_AUTO_TEST_CASE( test_trig ) { auto data = AVX2d::real_v(0.1,0.4,-2.0,5.0); - auto cos = AVX2d::cos(data).to_array(); - BOOST_TEST( cos[0] == std::cos( data.at(0 )) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( cos[1] == std::cos( data.at(1 )) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( cos[2] == std::cos( data.at(2)) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( cos[3] == std::cos( data.at(3 )) , boost::test_tools::tolerance(1e-15) ); - - auto sin = AVX2d::sin(data).to_array(); - BOOST_TEST( sin[0] == std::sin( data.at(0 )) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( sin[1] == std::sin( data.at(1 )) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( sin[2] == std::sin( data.at(2)) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( sin[3] == std::sin( data.at(3 )) , boost::test_tools::tolerance(1e-15) ); - - auto tan = AVX2d::tan(data).to_array(); - - BOOST_TEST( tan[0] == std::tan( data.at(0 )) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( tan[1] == std::tan( data.at(1 )) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( tan[2] == std::tan( data.at(2)) , boost::test_tools::tolerance(1e-15) ); - BOOST_TEST( tan[3] == std::tan( data.at(3 )) , boost::test_tools::tolerance(1e-15) ); + test_simd( AVX2d::cos, std::cos, data, 1e-15); + test_simd( AVX2d::sin, std::sin, data, 1e-15); + test_simd( AVX2d::tan, std::tan, data, 1e-15); } - #else BOOST_AUTO_TEST_CASE( test_dummy ) { From 08d32d095f1bca088943512158d2473c2f92e85c Mon Sep 17 00:00:00 2001 From: Timothy David Evans Date: Wed, 29 Jul 2020 18:01:35 +0200 Subject: [PATCH 51/67] fix debugging of |T|^2 in PolarisedSum --- src/PolarisedSum.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 45b0a1bd755..1f3f4407dfa 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -82,8 +82,8 @@ PolarisedSum::PolarisedSum(const EventType& type, Tensor thisExpression( Tensor::dim(polStates.size()) ); DebugSymbols syms; for(unsigned j = 0; j != polStates.size(); ++j) - //thisExpression[j] = make_cse( p.getExpression(j == 0 ? &syms: nullptr, polStates[j] ) ); - thisExpression[j] = make_cse( p.getExpression(&syms, polStates[j] ) ); + thisExpression[j] = make_cse( p.getExpression(j == 0 ? &syms: nullptr, polStates[j] ) ); + //thisExpression[j] = make_cse( p.getExpression(&syms, polStates[j] ) ); ptr->m_matrixElements[i] = TransitionMatrix( p, c, CompiledExpression( @@ -327,16 +327,14 @@ void PolarisedSum::updateNorms() void PolarisedSum::debug(const Event& evt) { auto tsize = m_dim.first * m_dim.second; + std::vector this_cache; for(unsigned j = 0; j != m_matrixElements.size(); ++j) { - std::vector this_cache; for(unsigned i = 0 ; i != tsize; ++i ) this_cache.emplace_back( m_cache(evt.index() / utils::size::value, j*tsize + i) ); INFO( m_matrixElements[j].decayDescriptor() << " " << vectorToString(this_cache, " ") ); - if( m_debug ){ - m_matrixElements[0].debug( evt ); - m_probExpression.debug(this_cache.data() ); - } - } + if( m_debug ) m_matrixElements[j].debug( evt ); + } + if( m_debug ) m_probExpression.debug(this_cache.data() ); INFO("P(x) = " << getValNoCache(evt) << " " << operator()((const float_v*)nullptr, evt.index() / utils::size::value ) ); INFO("Prod = [" << vectorToString(m_pVector , ", ") <<"]"); } From f88f7f66f474faf782bbf5ea9eedd98008cdc8b5 Mon Sep 17 00:00:00 2001 From: tevans Date: Thu, 30 Jul 2020 10:25:14 +0200 Subject: [PATCH 52/67] hopefully scalar builds don't accidentally include some AVX instructions --- CMakeLists.txt | 2 +- Standalone.cmake | 24 +++++++++++++----------- options/MintDalitzSpecialParticles.csv | 1 + src/Utilities.cpp | 1 + 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ad75fc9b283..f05bc3f941a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ if(COMMAND gaudi_subdir) LINK_LIBRARIES AmpGen) endforeach() else() - cmake_minimum_required(VERSION 3.12) + cmake_minimum_required(VERSION 3.11.1) project(AmpGen LANGUAGES CXX VERSION 2.0) include(Standalone.cmake) endif() diff --git a/Standalone.cmake b/Standalone.cmake index c2284d0e92e..03804d5cbef 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -166,26 +166,28 @@ target_compile_options(AmpGen -Wno-unknown-pragmas $<$:-O3>) -# if( NOT ${USE_SIMD} MATCHES "" ) - if ( ${USE_SIMD} MATCHES "AVX2d" ) +if ( ${USE_SIMD} MATCHES "AVX2d" ) message(STATUS "Enabling AVX2 [double precision]") target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX=1" "ENABLE_AVX2d=1") target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx2 -ffast-math -DHAVE_AVX2_INSTRUCTIONS) - elseif ( ${USE_SIMD} MATCHES "AVX2f" ) +elseif ( ${USE_SIMD} MATCHES "AVX2f" ) message(STATUS "Enabling AVX2 [single precision]") target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX=1" "ENABLE_AVX2f=1") target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx2 -ffast-math -DHAVE_AVX2_INSTRUCTIONS) - elseif ( ${USE_SIMD} MATCHES "AVX512d" ) +elseif ( ${USE_SIMD} MATCHES "AVX512d" ) message(STATUS "Enabling AVX2 [double precision]") target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX=1" "ENABLE_AVX512=1") target_compile_options(AmpGen PUBLIC -march=native -ftree-vectorize -mavx512f -ffast-math -DHAVE_AVX512_INSTRUCTIONS) - endif() - if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) - target_compile_options(AmpGen PUBLIC -mfma) - endif() - # else() - # message("SIMD disabled, resorting to scalar build : ${USE_SIMD}") - # endif() +else() + target_compile_definitions(AmpGen PUBLIC "ENABLE_AVX=0") + target_compile_options(AmpGen PUBLIC -march=x86-64) + message("SIMD disabled, resorting to scalar build : ${USE_SIMD}") +endif() + +if(${CMAKE_CXX_COMPILER_ID} MATCHES "AppleClang" OR + ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang" ) + target_compile_options(AmpGen PUBLIC -mfma) +endif() if("${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" ) target_link_libraries(AmpGen PUBLIC stdc++ ) diff --git a/options/MintDalitzSpecialParticles.csv b/options/MintDalitzSpecialParticles.csv index 00c3a6774b6..9ed8bdb1b0f 100644 --- a/options/MintDalitzSpecialParticles.csv +++ b/options/MintDalitzSpecialParticles.csv @@ -48,4 +48,5 @@ 1.86484E+03 ,1.7E-01,1.7E-01,1.605E-09 ,6.0E-12,6.0E-12,1/2, ,0 ,-, ,F, 998421, 0, ,R,D~ ,cU 3.8948E+03 ,1.1E-02,1.1E-02,2.96E+00 ,2.1E-03,2.1E-03,1 ,+,1 ,+,-, , 999443, 0, ,R,Z(c)(3900) ,cCuU 3.8948E+03 ,1.1E-02,1.1E-02,2.96E+00 ,2.1E-03,2.1E-03,1 ,+,1 ,+,-, , 999444, +, ,R,Z(c)(3900) ,cCuD +9.990E+03 ,1.0E+01,1.0E+01,9.9E+09 ,3.0E+01,3.0E+01,1/2,+,0 ,+,+,F, 998140, 0, ,R,XiPi0 ,?? *** diff --git a/src/Utilities.cpp b/src/Utilities.cpp index a03553b20fc..60ba141d84e 100644 --- a/src/Utilities.cpp +++ b/src/Utilities.cpp @@ -331,6 +331,7 @@ std::string AmpGen::expandGlobals( std::string path ) } else { end_pos = find_next_of( path, {".", "/"}, pos ); variable_name = path.substr( pos + 1, end_pos - pos - 1 ); + end_pos--; } const char* global_var = getenv( variable_name.c_str() ); if ( global_var == nullptr ) { From 76fa62ab0e5deb5d692158547884a8499837c33a Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 5 Aug 2020 19:39:37 +0200 Subject: [PATCH 53/67] change the way constants are encoded into strings to allow very large (or small) constants --- src/Expression.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Expression.cpp b/src/Expression.cpp index 64661e26c85..3ea94ad33de 100644 --- a/src/Expression.cpp +++ b/src/Expression.cpp @@ -52,9 +52,8 @@ bool isZero( const complex_t& A ){ std::string Constant::to_string(const ASTResolver* resolver) const { auto rounded_string = [](const double& val ){ - std::string str = std::to_string (val); - str.erase ( str.find_last_not_of('0') + 1, std::string::npos ); - return str; + std::string str = mysprintf("%g", val); + return str.find(".") != std::string::npos or str.find("e") != std::string::npos ? str : str + "."; }; std::string complex_type = type_string(); std::string literalSuffix = ""; From 5365228afabe1a34789946c6db848da784e58da1 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 5 Aug 2020 20:26:26 +0200 Subject: [PATCH 54/67] possible fix in CompilerWrapper --- src/CompilerWrapper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/CompilerWrapper.cpp b/src/CompilerWrapper.cpp index 80962299085..64d9460f38f 100644 --- a/src/CompilerWrapper.cpp +++ b/src/CompilerWrapper.cpp @@ -166,9 +166,9 @@ void CompilerWrapper::compileSource( const std::string& fname, const std::string #if __APPLE__ argp.push_back("-lstdc++"); #endif - #ifdef _OPENMP - argp.push_back("-fopenmp=libiomp5"); - #endif +// #ifdef _OPENMP +// argp.push_back("-fopenmp=libiomp5"); +// #endif } argp.push_back( fname.c_str() ); From e83679b917729c9060bba553aafdfe6f184c4044 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 5 Aug 2020 20:51:07 +0200 Subject: [PATCH 55/67] disable openmp for osx by default --- src/CompilerWrapper.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/CompilerWrapper.cpp b/src/CompilerWrapper.cpp index 64d9460f38f..da79239729a 100644 --- a/src/CompilerWrapper.cpp +++ b/src/CompilerWrapper.cpp @@ -131,12 +131,14 @@ bool CompilerWrapper::isClang() const return m_cxx.find("clang") != std::string::npos || m_cxx.find("llvm-g++") != std::string::npos; } -std::string get_cpp_version(){ +std::string get_cpp_version() +{ if( __cplusplus >= 201703L ) return "c++17"; if( __cplusplus >= 201402L ) return "c++14"; if( __cplusplus >= 201103L ) return "c++11"; else return ""; } + void CompilerWrapper::compileSource( const std::string& fname, const std::string& oname ) { using namespace std::chrono_literals; @@ -151,7 +153,7 @@ void CompilerWrapper::compileSource( const std::string& fname, const std::string compile_flags.push_back("-mavx2"); compile_flags.push_back("-DHAVE_AVX2_INSTRUCTIONS"); #endif - #ifdef _OPENMP + #if USE_OPENMP compile_flags.push_back("-fopenmp"); #endif @@ -166,9 +168,9 @@ void CompilerWrapper::compileSource( const std::string& fname, const std::string #if __APPLE__ argp.push_back("-lstdc++"); #endif -// #ifdef _OPENMP -// argp.push_back("-fopenmp=libiomp5"); -// #endif + #if USE_OPENMP + argp.push_back("-fopenmp=libiomp5"); + #endif } argp.push_back( fname.c_str() ); From 559e8ea98812b2d7c22b5f5c428893b8d1061772 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 5 Aug 2020 21:16:58 +0200 Subject: [PATCH 56/67] fix openmp silliness --- Standalone.cmake | 4 +++- apps/DataConverter.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Standalone.cmake b/Standalone.cmake index 19c4076325c..8784ca4f74f 100644 --- a/Standalone.cmake +++ b/Standalone.cmake @@ -100,6 +100,7 @@ endif() target_link_libraries(AmpGen PUBLIC ROOT::Minuit2 ) if( USE_OPENMP ) + target_compile_definitions(AmpGen PUBLIC "USE_OPENMP=1") if(OpenMP_FOUND OR OpenMP_CXX_FOUND) if(NOT TARGET OpenMP::OpenMP_CXX) add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE) @@ -116,6 +117,8 @@ if( USE_OPENMP ) else() message(STATUS "OpenMP not found for CXX, you might have forgotten lb-run ROOT bash or CXX=`which g++` in CERN stack") endif() +else() + target_compile_definitions(AmpGen PUBLIC "USE_OPENMP=0") endif() set(RAPIDSIM_DATA "") @@ -154,7 +157,6 @@ target_compile_definitions(AmpGen PRIVATE "AMPGENROOT_CMAKE=\"${CMAKE_BINARY_DIR}/bin\"" "AMPGENROOT=\"${PROJECT_SOURCE_DIR}\"" "AMPGEN_CXX=\"${AMPGEN_CXX}\"" - "USE_OPENMP=\"${USE_OPENMP}\"" $<$:DEBUGLEVEL=1> $<$:TRACELEVEL=1>) diff --git a/apps/DataConverter.cpp b/apps/DataConverter.cpp index 8a41a628a0e..bde9ca91297 100644 --- a/apps/DataConverter.cpp +++ b/apps/DataConverter.cpp @@ -210,7 +210,7 @@ int main( int argc, char* argv[] ) p( evts ) -> Write(); // p( evts, WeightFunction([](auto& evt){ return 1; }), PlotOptions::Prefix("noweight") )->Write(); } - for( int i = 0 ; i != evtType.size(); ++i ) + for( unsigned i = 0 ; i != evtType.size(); ++i ) { Projection p( [i](auto& event){ return sqrt( event.s(i) ); }, "m_"+std::to_string(i), "m_"+std::to_string(i), 100, 0, 2.5 ); p(evts)->Write(); From d39508a1a2cf43f08e76192a5f9fbada8dc74967 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 5 Aug 2020 21:32:27 +0200 Subject: [PATCH 57/67] disable openmp in the travis job --- .ci/travis_osx.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.ci/travis_osx.sh b/.ci/travis_osx.sh index 2ca3917298e..2c68f74053a 100644 --- a/.ci/travis_osx.sh +++ b/.ci/travis_osx.sh @@ -10,16 +10,9 @@ echo "Building under OS: $TRAVIS_OS_NAME" mkdir -p build cd build echo "CMake-ing, CXX = $CXX" -cmake .. -DCMAKE_CXX_COMPILER=clang -DUSE_SIMD="" +cmake .. -DCMAKE_CXX_COMPILER=clang -DUSE_SIMD="" -DUSE_OPENMP=0 echo "Building ..." cmake --build . -- -j2 cd .. echo "Running test job ..." ./build/bin/Generator options/example_b2kstarll.opt --CompilerWrapper::Verbose --nEvents 10000 -# echo -e 'travis_fold:end:script.build\\r' -# echo -en 'travis_fold:start:script.test\\r' -# echo "Testing..." -# set -evx - -# ctest --output-on-failure - From 754298aabb96b2dccff2df716b91354ca9e1f54d Mon Sep 17 00:00:00 2001 From: mstahl Date: Mon, 10 Aug 2020 12:53:43 +0200 Subject: [PATCH 58/67] [Lineshapes] Add production (P-)vector approach to GenericKmatrix and use it by default --- src/Lineshapes/GenericKmatrix.cpp | 115 +++++++++++++++++++----------- 1 file changed, 73 insertions(+), 42 deletions(-) diff --git a/src/Lineshapes/GenericKmatrix.cpp b/src/Lineshapes/GenericKmatrix.cpp index a6276f4156b..b66cbe9cda7 100644 --- a/src/Lineshapes/GenericKmatrix.cpp +++ b/src/Lineshapes/GenericKmatrix.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "AmpGen/Expression.h" #include "AmpGen/Factory.h" @@ -16,88 +17,118 @@ #include "AmpGen/Utilities.h" #include "AmpGen/kMatrix.h" #include "AmpGen/CoupledChannel.h" +#include "AmpGen/enum.h" using namespace AmpGen; -using namespace AmpGen::fcn; +using namespace AmpGen::fcn; +namespace AmpGen{ make_enum(PA_TYPE, PVec, QVec); } DEFINE_LINESHAPE(GenericKmatrix) { - auto props = ParticlePropertiesList::get( particleName ); - Expression mass = Parameter( particleName + "_mass", props->mass() ); - INFO( "kMatrix modifier " << lineshapeModifier << " particle = " << particleName ); - auto tokens = split(lineshapeModifier, '.' ); - DEBUG("kMatrix modifier = " << lineshapeModifier << " nTokens = " << tokens.size() ); - unsigned nPoles = NamedParameter( lineshapeModifier + "::kMatrix::nPoles"); - auto channels = NamedParameter(lineshapeModifier + "::kMatrix::channels").getVector(); - unsigned nChannels = channels.size(); - std::vector phsps; - std::vector bw_phase_space; - auto s0 = mass*mass; + auto props = ParticlePropertiesList::get( particleName ); + Expression mass = Parameter( particleName + "_mass", props->mass() ); + unsigned nPoles = NamedParameter(particleName+"::kMatrix::nPoles"); + auto channels = NamedParameter(particleName+"::kMatrix::channels").getVector(); + auto const pa_type = NamedParameter(particleName+"::kMatrix::production_amplitude",PA_TYPE::PVec); + auto nChannels = channels.size(); + auto s0 = mass*mass; + std::vector phsps, bw_phase_space; ADD_DEBUG(s, dbexpressions ); ADD_DEBUG(s0, dbexpressions ); - INFO("Initialising K-matrix with [nChannels = " << nChannels << ", nPoles = " << nPoles << "]"); + INFO("Initialising K-matrix with [nChannels = " << nChannels << ", nPoles = " << nPoles << "]"); + //phase-space for( unsigned i = 0 ; i < channels.size(); i+=1 ){ - Particle p( channels[i] ); + Particle p( channels[i] ); INFO( p.decayDescriptor() ); phsps.emplace_back( phaseSpace(s, p, p.L() ) ); bw_phase_space.emplace_back( phaseSpace(s0, p, p.L() ) ); if( dbexpressions != nullptr ) dbexpressions->emplace_back("phsp_"+p.decayDescriptor(), *phsps.rbegin() ); //ADD_DEBUG( *phsps.rbegin(), dbexpressions); -// ADD_DEBUG( phaseSpace(s0,p,p.L()), dbexpressions ); +// ADD_DEBUG( phaseSpace(s0,p,p.L()), dbexpressions ); } - Tensor non_resonant( Tensor::dim(nChannels, nChannels) ); + //pole configuration for kMatrix (see e.g. eq. (48.25) in http://pdg.lbl.gov/2019/reviews/rpp2019-rev-resonances.pdf) std::vector poleConfigs; for (unsigned pole = 1; pole <= nPoles; ++pole ){ - std::string stub = lineshapeModifier + "::pole::" + std::to_string(pole); + std::string stub = particleName+"::pole::" + std::to_string(pole); Expression mass = Parameter(stub + "::mass"); - INFO( "Will link to parameter: " << stub + "::mass"); + DEBUG( "Will link to parameter: " << stub + "::mass"); poleConfig thisPole(mass*mass); if( dbexpressions != nullptr ) dbexpressions->emplace_back(stub+"::mass", mass); Expression bw_width = 0; Expression bw_width0 = 0; - for (unsigned channel = 1; channel <= nChannels; ++channel ) - { + for (unsigned channel = 1; channel <= nChannels; ++channel ){ Expression g = Parameter(stub+"::g::"+std::to_string(channel)); - INFO("Will link to parameter: " << stub+"::g::"+std::to_string(channel) ); + DEBUG("Will link to parameter: " << stub+"::g::"+std::to_string(channel) ); thisPole.add(g, 1); - if( dbexpressions != nullptr ){ - dbexpressions->emplace_back( stub+"::g::"+std::to_string(channel), g); - } + if(dbexpressions != nullptr) dbexpressions->emplace_back( stub+"::g::"+std::to_string(channel), g); bw_width = bw_width + g*g*phsps[channel-1] / mass; bw_width0 = bw_width0 + g*g*bw_phase_space[channel-1] / mass; } - for( unsigned channel = 1 ; channel <= nChannels; ++channel ){ - Expression g = Parameter(stub+"::g::"+std::to_string(channel)); - Expression BR = g*g*bw_phase_space[channel-1] / ( mass * bw_width0 ); - ADD_DEBUG( BR, dbexpressions ); + if( dbexpressions != nullptr ){ + for( unsigned channel = 1 ; channel <= nChannels; ++channel ){ + Expression g = Parameter(stub+"::g::"+std::to_string(channel)); + Expression BR = g*g*bw_phase_space[channel-1] / ( mass * bw_width0 ); + ADD_DEBUG( BR, dbexpressions ); + } + ADD_DEBUG(bw_width, dbexpressions); + ADD_DEBUG(bw_width0, dbexpressions); } - ADD_DEBUG(bw_width, dbexpressions); - ADD_DEBUG(bw_width0, dbexpressions); poleConfigs.push_back(thisPole); } + //add non-resonant term to kMatrix (eq. (48.25) in http://pdg.lbl.gov/2019/reviews/rpp2019-rev-resonances.pdf) + Tensor non_resonant( Tensor::dim(nChannels, nChannels) ); for(unsigned ch1 = 1; ch1 <= nChannels; ++ch1){ for( unsigned ch2 = 1; ch2 <= nChannels; ++ch2 ){ auto c1 = std::to_string(ch1); - auto c2 = std::to_string(ch2); + auto c2 = std::to_string(ch2); if( ch1 > ch2 ) std::swap(c1,c2); - std::string nrShape = NamedParameter(lineshapeModifier +"::"+c1+"::"+c2+"::nrShape", "flat"); - Expression f1 = Parameter(lineshapeModifier+"::f1::"+c1+"::"+c2, 0); + std::string nrShape = NamedParameter(particleName+"::"+c1+"::"+c2+"::nrShape", "flat"); + Expression f1 = Parameter(particleName+"::f1::"+c1+"::"+c2, 0); if( nrShape == "flat") non_resonant[{ch1-1,ch2-1}] = f1; - else if( nrShape == "pole"){ - Expression f2 = Parameter(lineshapeModifier+"::f2::"+c1+"::"+c2, 0); - Expression s0 = Parameter(lineshapeModifier+"::s0::"+c1+"::"+c2, 0); + else if( nrShape == "pole"){ + Expression f2 = Parameter(particleName+"::f2::"+c1+"::"+c2, 0); + Expression s0 = Parameter(particleName+"::s0::"+c1+"::"+c2, 0); non_resonant[{ch1-1,ch2-1}] = (f1 + f2*sqrt(s)) / (s-s0); } - else WARNING("Unknown shape: " << nrShape); + else WARNING("Unknown shape: " << nrShape); } } + Tensor kMatrix = constructKMatrix(s, nChannels, poleConfigs); - ADD_DEBUG_TENSOR(kMatrix , dbexpressions); - kMatrix = kMatrix + non_resonant; + ADD_DEBUG_TENSOR(kMatrix, dbexpressions); + kMatrix = kMatrix + non_resonant; Tensor propagator = getPropagator(kMatrix, phsps); ADD_DEBUG_TENSOR(non_resonant, dbexpressions); - Expression M; - for(unsigned i = 0 ; i < nChannels; ++i) M = M + kMatrix[{i,0}] * propagator[{0,i}]; - return M ; // * phsps[0]; + + //we have all ingredients to build the production amplitude now + //follow https://doi.org/10.1007/s1010502a0002 eqns (9)-(13) modulo the Adler zero term (which is argued away in a fuzzy manner by citing a private communication) + if(pa_type==PA_TYPE::PVec){ + std::vector P(nChannels,0), a(nChannels,0), phi(nChannels,0);//the P-vector, a and phi coefficients + Expression s_0 = Parameter(particleName+"::s0"); + Expression F_0 = 0;//the object we'll return later: the production amplitude in the 0th channel + //get the coefficients first + for(unsigned k = 0 ; k < nChannels; ++k) a[k] = Parameter(particleName+"::a::"+std::to_string(k+1)); + //now start loop to calculate the production amplitude + for(unsigned k = 0 ; k < nChannels; ++k){ + for(unsigned alpha = 0; alpha < nPoles; ++alpha){ + Expression beta = 0; + for(unsigned q = 0 ; q < nChannels; ++q){ + beta += a[q] * poleConfigs[alpha].couplings[q]; + phi[k] += a[q] * non_resonant[{k,q}]; + } + P[k] += (beta * poleConfigs[alpha].couplings[k])/(poleConfigs[alpha].s - s) + phi[k] * (1.+s_0)/(s-s_0); + } + F_0 += propagator[{0,k}] * P[k]; + } + //TODO: implement higher orbital angular momentum + return F_0; + } + else if(pa_type==PA_TYPE::QVec){ + INFO("Using Q-vector approach to build the production amplitude"); + Expression M; + for(unsigned i = 0 ; i < nChannels; ++i) M = M + kMatrix[{i,0}] * propagator[{0,i}]; + return M ; // * phsps[0]; + } + else throw std::runtime_error("This shouldn't happen. Currently supported types: P-vector approach (PA_TYPE::PVec) and Q-vector approach (PA_TYPE::QVec)"); } From dde16712aea45ad71b7f3d4228b6ba12e881be55 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Mon, 10 Aug 2020 17:37:54 +0200 Subject: [PATCH 59/67] fix custom helicity couplings + cleanup of evaluation of PDFs to arbitrary datasets --- AmpGen/CoherentSum.h | 4 +-- AmpGen/KeyedFunctors.h | 31 ++++++++++++++++++++ AmpGen/LiteSpan.h | 42 --------------------------- AmpGen/MinuitParameter.h | 3 +- AmpGen/PolarisedSum.h | 2 +- AmpGen/Projection.h | 4 +-- AmpGen/SumPDF.h | 6 ++-- apps/ConvertToSourceCode.cpp | 55 ++++++++++++++++++------------------ src/CoherentSum.cpp | 23 ++++++++++----- src/MinuitParameter.cpp | 2 +- src/MinuitParameterSet.cpp | 12 ++------ src/Particle.cpp | 6 ++-- src/PolarisedSum.cpp | 52 +++++++++++++++++++--------------- 13 files changed, 119 insertions(+), 123 deletions(-) create mode 100644 AmpGen/KeyedFunctors.h delete mode 100644 AmpGen/LiteSpan.h diff --git a/AmpGen/CoherentSum.h b/AmpGen/CoherentSum.h index 9b0bf903f7c..297f4eb65c2 100644 --- a/AmpGen/CoherentSum.h +++ b/AmpGen/CoherentSum.h @@ -21,7 +21,7 @@ #include "AmpGen/Projection.h" #include "AmpGen/MinuitParameter.h" #include "AmpGen/Store.h" -#include "AmpGen/LiteSpan.h" +#include "AmpGen/KeyedFunctors.h" namespace AmpGen { class LinearErrorPropagator; @@ -99,7 +99,7 @@ namespace AmpGen Bilinears norms() const { return m_normalisations ; } std::function evaluator(const EventList_type* = nullptr) const; - KeyedFunctors componentEvaluator(const EventList_type* = nullptr) const; + KeyedFunctors componentEvaluator(const EventList_type* = nullptr) const; protected: std::vector> m_matrixElements; ///< Vector of matrix elements diff --git a/AmpGen/KeyedFunctors.h b/AmpGen/KeyedFunctors.h new file mode 100644 index 00000000000..c1c9fa7654a --- /dev/null +++ b/AmpGen/KeyedFunctors.h @@ -0,0 +1,31 @@ +#ifndef AMPGEN_LITESPAN_H +#define AMPGEN_LITESPAN_H + +#include "AmpGen/MsgService.h" +#include "AmpGen/Utilities.h" + +namespace AmpGen { + template struct KeyedFunctors; + + template struct KeyedFunctors + { + std::vector > functors; + std::vector keys; + std::vector titles; + template void add(const functor_type& functor, const std::string& key, const std::string& title="") + { + functors.push_back(functor); + keys.push_back(key); + titles.push_back(title); + } + std::vector operator()( arg_types... arg ) const + { + std::vector rt; + for( auto& f : functors ) rt.push_back( f(arg...) ); + return rt; + } + }; + +} + +#endif diff --git a/AmpGen/LiteSpan.h b/AmpGen/LiteSpan.h deleted file mode 100644 index e947665a4ef..00000000000 --- a/AmpGen/LiteSpan.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef AMPGEN_LITESPAN_H -#define AMPGEN_LITESPAN_H - -#include "AmpGen/MsgService.h" -#include "AmpGen/Utilities.h" - -namespace AmpGen { - // replace with std::span when c++20 becomes widespread - template class LiteSpan - { - public: - LiteSpan( const type* data, unsigned size =1) : m_data(data), m_size(size) {} - const type& operator[](const unsigned index) const { return m_data[index]; } - operator type() const { return m_data[0] ; } - unsigned size() const { return m_size; } - private: - const type* m_data = {nullptr}; - unsigned m_size = {0}; - }; - - template struct KeyedFunctors - { - std::vector > functors; - std::vector keys; - std::vector titles; - template - void add(const functor_type& functor, const std::string& key, const std::string& title="") - { - functors.push_back(functor); - keys.push_back(key); - titles.push_back(title); - } - std::vector operator()( const arg_type& arg ) const { - - std::vector rt; - for( auto& f : functors ) rt.push_back( f(arg) ); - return rt; } - }; - -} - -#endif diff --git a/AmpGen/MinuitParameter.h b/AmpGen/MinuitParameter.h index dbdaadf10bf..197f660ffb2 100644 --- a/AmpGen/MinuitParameter.h +++ b/AmpGen/MinuitParameter.h @@ -10,8 +10,7 @@ namespace AmpGen { class MinuitParameterSet; - declare_enum( Flag, Free, Hide, Fix, CompileTimeConstant ) - + declare_enum( Flag, Free, Hide, Fix, CompileTimeConstant, Invalid ) class MinuitParameter { public: diff --git a/AmpGen/PolarisedSum.h b/AmpGen/PolarisedSum.h index 70221e910e0..83ec58ccda2 100644 --- a/AmpGen/PolarisedSum.h +++ b/AmpGen/PolarisedSum.h @@ -72,7 +72,7 @@ namespace AmpGen Tensor transitionMatrix() const; const TransitionMatrix& operator[](const size_t& i) const { return m_matrixElements[i] ; } std::function evaluator(const EventList_type* = nullptr) const; - KeyedFunctors componentEvaluator(const EventList_type* = nullptr) const; + KeyedFunctors componentEvaluator(const EventList_type* = nullptr) const; private: size_t m_nCalls = {0}; real_t m_norm = {1}; diff --git a/AmpGen/Projection.h b/AmpGen/Projection.h index 8192ef58c25..ce91ffb9e1e 100644 --- a/AmpGen/Projection.h +++ b/AmpGen/Projection.h @@ -12,7 +12,7 @@ #include "AmpGen/ArgumentPack.h" #include "AmpGen/Types.h" -#include "AmpGen/LiteSpan.h" +#include "AmpGen/KeyedFunctors.h" namespace AmpGen { @@ -22,7 +22,7 @@ namespace AmpGen class Projection { - using keyedFunctors = KeyedFunctors; + using keyedFunctors = KeyedFunctors; public: Projection(); template diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index 830c7d4413c..e9663c5f127 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -5,7 +5,7 @@ #include "AmpGen/MetaUtils.h" #include "AmpGen/MsgService.h" #include "AmpGen/ProfileClock.h" -#include "AmpGen/LiteSpan.h" +#include "AmpGen/KeyedFunctors.h" #include #if ENABLE_AVX @@ -119,9 +119,9 @@ namespace AmpGen } ); return arrayToFunctor(values); } - KeyedFunctors componentEvaluator(const eventListType* events) const + KeyedFunctors componentEvaluator(const eventListType* events) const { - KeyedFunctors view; + KeyedFunctors view; for_each( this->m_pdfs, [&view, &events]( const auto& pdf) mutable { auto eval = pdf.evaluator(events); view.add([eval](const auto& event){ return eval(event) ; } , type_string(pdf), "" ); diff --git a/apps/ConvertToSourceCode.cpp b/apps/ConvertToSourceCode.cpp index 94ffd3f8567..e1695ff454c 100644 --- a/apps/ConvertToSourceCode.cpp +++ b/apps/ConvertToSourceCode.cpp @@ -71,31 +71,44 @@ void create_integration_tests(T& pdf, } */ -template void generate_source(T& pdf, EventList& normEvents, const std::string& sourceFile, MinuitParameterSet& mps, const double& sf) +template void generate_source(T& pdf, const std::string& sourceFile, MinuitParameterSet& mps, const double& sf) { bool normalise = NamedParameter("Normalise",true); - std::string type = NamedParameter("Type", "CoherentSum" ); + double safetyFactor = NamedParameter( "SafetyFactor", 3 ); + int seed = NamedParameter("Seed", 1); + size_t nEvents = NamedParameter( "NormEvents", 1000000 ); + auto oEventType = NamedParameter("EventType").getVector(); + + TRandom3 rnd(seed); + INFO("First random number = " << rnd.Uniform() ); + + EventType eventType( oEventType ); + Generator phsp(eventType); + phsp.setRandom(&rnd); + EventList normEvents = phsp.generate(nEvents); + if constexpr( std::is_same::value ) pdf.prepare(); double norm = 1; if( normalise ){ - double pMax = 0 ; - pdf.setEvents( normEvents ); - pdf.prepare(); - pdf.debug( normEvents[0] ); - for ( auto& evt : normEvents ) { - if( type == "PolarisedSum" ){ + double pMax = 0; + for ( auto& evt : normEvents ) + { + if constexpr ( std::is_same::value ) + { double px, py, pz; - gRandom->Sphere(px,py,pz, gRandom->Uniform(0,1)); + rnd.Sphere(px,py,pz, rnd.Uniform(0,1)); mps["Px"]->setCurrentFitVal(px); mps["Py"]->setCurrentFitVal(py); mps["Pz"]->setCurrentFitVal(pz); pdf.transferParameters(); } - double n = pdf(evt); + double n = 0; + if constexpr ( std::is_same::value ) n = std::norm( pdf.getValNoCache(evt) ); + if constexpr ( std::is_same::value ) n = pdf.getValNoCache(evt); if ( n > pMax ) pMax = n; } norm = pMax * sf ; - INFO( "Making binary with " << pMax << " x safety factor = " << sf ); + INFO( "Making binary with " << pMax << " x safety factor = " << sf ); } mps.resetToInit(); pdf.generateSourceCode( sourceFile, norm, true ); @@ -110,7 +123,7 @@ int main( int argc, char** argv ) std::string outputPS = NamedParameter( "OutputEvents", "" ); unsigned int NormEvents = NamedParameter( "NormEvents", 1000000 ); double safetyFactor = NamedParameter( "SafefyFactor", 3 ); - + unsigned seed = NamedParameter("Seed", 0); EventType eventType( oEventType ); AmpGen::MinuitParameterSet MPS; // @@ -122,28 +135,16 @@ int main( int argc, char** argv ) } Generator phsp( eventType ); TRandom3 rnd; - + rnd.SetSeed( seed ); gRandom = &rnd; phsp.setRandom( &rnd ); - EventList phspEvents( oEventType ); - phsp.fillEventListPhaseSpace( phspEvents, NormEvents ); - if( type == "CoherentSum" ){ CoherentSum sig( eventType, MPS, "" ); - generate_source( sig, phspEvents, sourceFile, MPS, safetyFactor ); - //create_integration_tests(sig, eventType, MPS, {phspEvents[15]}, sourceFile ); + generate_source( sig, sourceFile, MPS, safetyFactor ); } if( type == "PolarisedSum" ){ PolarisedSum sig( eventType, MPS ); - generate_source( sig, phspEvents, sourceFile, MPS, safetyFactor ); - } - if ( outputPS != "" ) { - std::ofstream ofile( outputPS ); - ofile << "0x,0y,0z,0t,1x,1y,1z,1t,2x,2y,2z,2t,3x,3y,3z,3t\n"; - for ( auto& event : phspEvents ) { - for ( size_t i = 0; i < event.size(); i++ ) ofile << ( i == 0 ? "" : "," ) << event[i]; - ofile << "\n"; - } + generate_source( sig, sourceFile, MPS, safetyFactor ); } } diff --git a/src/CoherentSum.cpp b/src/CoherentSum.cpp index 980e224e060..378f50f2029 100644 --- a/src/CoherentSum.cpp +++ b/src/CoherentSum.cpp @@ -370,24 +370,33 @@ std::function CoherentSum::evaluator(const EventList_type* return arrayToFunctor(values); } -KeyedFunctors CoherentSum::componentEvaluator(const EventList_type* ievents) const +KeyedFunctors CoherentSum::componentEvaluator(const EventList_type* ievents) const { - auto& cache = m_integrator.cache(); - KeyedFunctors rt; + using store_t = Store; + auto events = ievents == nullptr ? m_integrator.events() : ievents; + KeyedFunctors rt; + std::shared_ptr cache; + if( events != m_integrator.events() ) + { + cache = std::make_shared(events->size(), m_matrixElements); + for( auto& me : m_matrixElements ) const_cast(cache.get())->update(events->store(), me); + } + else cache = std::shared_ptr( & m_integrator.cache(), [](const store_t* t){} ); + /// this little slice of weirdness allows either a new cache to be instantiated, or one to just get a pointer to the one used for the integration. + for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) { - for( unsigned j = i ; j != m_matrixElements.size(); ++j ){ + for( unsigned j = i ; j != m_matrixElements.size(); ++j ) + { auto mi = m_matrixElements[i]; auto mj = m_matrixElements[j]; auto ci = this->m_matrixElements[i].coefficient; auto cj = this->m_matrixElements[j].coefficient; double s = (i==j) ? 1 : 2 ; auto name = programatic_name(mi.decayTree.decayDescriptor()) + "_" + programatic_name( mj.decayTree.decayDescriptor() ); - INFO("Adding evaluator for: " << name ); - auto functor = [ci,cj,i,j,s, &cache](const Event& event){ return s * std::real( ci * cache.get( event.index(), i ) * std::conj( cj * cache.get( event.index(), j ) ) ) ;}; + auto functor = [ci,cj,i,j,s, cache](const Event& event){ return s * std::real( ci * cache->get( event.index(), i ) * std::conj( cj * cache->get( event.index(), j ) ) ) ;}; rt.add(functor, name, ""); } } - INFO(" Returning: " << rt.keys.size() << " functors" ); return rt; } diff --git a/src/MinuitParameter.cpp b/src/MinuitParameter.cpp index 896dc3a038d..b0913ea5ae3 100644 --- a/src/MinuitParameter.cpp +++ b/src/MinuitParameter.cpp @@ -10,7 +10,7 @@ using namespace AmpGen; namespace AmpGen { - complete_enum( Flag, Free, Hide, Fix, CompileTimeConstant ) + complete_enum( Flag, Free, Hide, Fix, CompileTimeConstant, Invalid ) } MinuitParameter::MinuitParameter( const std::string& name, const Flag& fix, const double& mean, const double& step, diff --git a/src/MinuitParameterSet.cpp b/src/MinuitParameterSet.cpp index 1bf0031846b..39ed23b5433 100644 --- a/src/MinuitParameterSet.cpp +++ b/src/MinuitParameterSet.cpp @@ -26,16 +26,6 @@ MinuitParameterSet::MinuitParameterSet(const std::vector& para for( auto& param : params ) add(param); } -// MinuitParameterSet MinuitParameterSet::getFloating() -// { -// MinuitParameterSet floating; -// for ( auto& param : *this ) { -// if ( param->isFree() && dynamic_cast(param) != nullptr ) -// floating.add(param); -// } -// return floating; -// } - bool MinuitParameterSet::addToEnd( MinuitParameter* parPtr ) { bool success = true; @@ -132,6 +122,7 @@ void MinuitParameterSet::tryParameter( const std::vector& line ) double max = hasLimits ? lexical_cast( line[5], status ) : 0; if( !status ) return; auto flag = parse( line[1] ); + if( flag == Flag::Invalid ) return; if ( OptionsParser::printHelp() ) INFO( "MINUIT: Registered " << line[0] << " ( " << to_string(flag) << ") = " << mean << ", step=" << step << " ("<< min << "," << max << ")" ); add( new MinuitParameter( line[0], flag, mean, step, min, max ) ); @@ -150,6 +141,7 @@ void MinuitParameterSet::tryParameter( const std::vector& line ) if ( !status ) return; auto flag_re = parse(line[1]); auto flag_im = parse(line[4 + 2*hasLimits]); + if( flag_re == Flag::Invalid || flag_im == Flag::Invalid ) return; if ( OptionsParser::printHelp() ) { INFO( "MINUIT: Complex " << line[0] << "_Re ( " << to_string(flag_re) << ") = " << mean_re << ", step=" << step_re << " (" << min_re << "," << max_re << ")" ); INFO( "MINUIT: Complex " << line[0] << "_Im ( " << to_string(flag_im) << ") = " << mean_im << ", step=" << step_im << " (" << min_im << "," << max_im << ")" ); diff --git a/src/Particle.cpp b/src/Particle.cpp index e625687e659..b03103ec569 100644 --- a/src/Particle.cpp +++ b/src/Particle.cpp @@ -522,9 +522,9 @@ Tensor Particle::externalSpinTensor(const int& polState, DebugSymbols* db ) cons { std::array xi; Expression n = fcn::sqrt( 2 * pP*(pP+pZ) ); - - xi[0] = Tensor( {-zb/n , (pP+pZ)/n}); - xi[1] = Tensor( {(pP+pZ)/n, z/n }); + Expression aligned = make_cse( Abs(pP + pZ) < 10e-6 ) ; + xi[0] = Tensor( {make_cse( Ternary(aligned, 1, -zb/n)) , make_cse( Ternary(aligned, 0, (pP+pZ)/n ) ) }); + xi[1] = Tensor( {make_cse( Ternary(aligned, 0, (pP+pZ)/n)), make_cse(Ternary(aligned,1, z/n) ) }); Expression fa = m_props->isNeutrino() ? polState * fcn::sqrt(pE) : polState * fcn::sqrt( pE/m- 1 ); Expression fb = m_props->isNeutrino() ? fcn::sqrt(pE) : fcn::sqrt( pE/m + 1 ); diff --git a/src/PolarisedSum.cpp b/src/PolarisedSum.cpp index 1f3f4407dfa..75f6c2a42a0 100644 --- a/src/PolarisedSum.cpp +++ b/src/PolarisedSum.cpp @@ -266,7 +266,7 @@ Tensor PolarisedSum::transitionMatrix() const unsigned totalSize = 0 ; for( const auto& me : m_matrixElements ){ auto coupling = me.coupling.to_expression(); - INFO( me.decayDescriptor() << " " << coupling ); + // INFO( me.decayDescriptor() << " " << coupling ); auto cacheIndex = totalSize; for( size_t i = 0 ; i < size ; ++i ){ expressions[i] = expressions[i] + coupling * Parameter( "x1["+std::to_string(cacheIndex+i)+"]",0,true); @@ -498,15 +498,23 @@ std::function PolarisedSum::evaluator(const EventList_type { utils::store(values.data() + utils::size::value * block, (m_weight/m_norm) * m_probExpression(&store(block,0)) ); } - for( unsigned int i = 0 ; i != 10; ++i ) - DEBUG(values[i] << " " << getValNoCache( events->at(i) ) * ( m_weight / m_norm ) ); return arrayToFunctor(values); } -KeyedFunctors PolarisedSum::componentEvaluator(const EventList_type* events) const + +KeyedFunctors PolarisedSum::componentEvaluator(const EventList_type* ievents) const { - auto& cache = m_integrator.cache(); - KeyedFunctors rt; + using store_t = Store; + auto events = ievents == nullptr ? m_integrator.events() : ievents; + std::shared_ptr cache; + if( events != m_integrator.events() ) + { + cache = std::make_shared(events->size(), m_matrixElements, m_dim.first*m_dim.second); + for( auto& me : m_matrixElements ) const_cast(cache.get())->update(events->store(), me); + } + else cache = std::shared_ptr( & m_integrator.cache(), [](const store_t* t){} ); + + KeyedFunctors rt; for( unsigned i = 0 ; i != m_matrixElements.size(); ++i ) { for( unsigned j = i ; j != m_matrixElements.size(); ++j ){ @@ -516,23 +524,21 @@ KeyedFunctors PolarisedSum::componentEvaluator(const EventList_ty auto cj = this->m_matrixElements[j].coefficient; double s = (i==j) ? 1 : 2 ; auto name = programatic_name(mi.decayTree.decayDescriptor()) + "_" + programatic_name( mj.decayTree.decayDescriptor() ); - INFO("Adding evaluator for: " << name ); - auto functor = [ci,cj,i,j,s, &cache, this](const Event& event){ - auto [s1,s2] = this->m_dim; - auto R = s1 * s2; - complex_t total = 0; - for( unsigned x = 0; x != this->m_norms.size(); ++x ) - { - auto f = x % s2; - auto psiIndex = (x-f) / s2; - auto m2 = psiIndex % s1; - auto m1 = (psiIndex-m2)/s1; - total += this->m_rho[psiIndex] * ci * cache.get(event.index(),R * i + m1 * s2 + f) - * std::conj( cj * cache.get(event.index(),R * j + m2 * s2 + f) ); - } - return s * std::real(total); - }; - rt.add(functor, name, ""); + rt.add( [ci,cj,i,j,s, cache, this](const Event& event){ + auto [s1,s2] = this->m_dim; + auto R = s1 * s2; + complex_t total = 0; + for( unsigned x = 0; x != this->m_norms.size(); ++x ) + { + auto f = x % s2; + auto psiIndex = (x-f) / s2; + auto m2 = psiIndex % s1; + auto m1 = (psiIndex-m2)/s1; + total += this->m_rho[psiIndex] * ci * cache->get(event.index(),R * i + m1 * s2 + f) + * std::conj( cj * cache->get(event.index(),R * j + m2 * s2 + f) ); + } + return s * std::real(total); + }, name, ""); } } return rt; From fa24e34854fe35a8b126464b890f82df2f48e2ee Mon Sep 17 00:00:00 2001 From: mstahl Date: Mon, 10 Aug 2020 17:39:33 +0200 Subject: [PATCH 60/67] [example apps] add BaryonFitter and fit_hyperon_parameters.cpp --- examples/BaryonFitter.cpp | 230 ++++++++++++++++++++++++++++ examples/fit_hyperon_parameters.cpp | 22 +++ 2 files changed, 252 insertions(+) create mode 100644 examples/BaryonFitter.cpp create mode 100644 examples/fit_hyperon_parameters.cpp diff --git a/examples/BaryonFitter.cpp b/examples/BaryonFitter.cpp new file mode 100644 index 00000000000..4b20e83952a --- /dev/null +++ b/examples/BaryonFitter.cpp @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "AmpGen/Chi2Estimator.h" +#include "AmpGen/ErrorPropagator.h" +#if ENABLE_AVX + #include "AmpGen/EventListSIMD.h" + using EventList_type = AmpGen::EventListSIMD; +#else + #include "AmpGen/EventList.h" + using EventList_type = AmpGen::EventList; +#endif +#include "AmpGen/EventType.h" +#include "AmpGen/Factory.h" +#include "AmpGen/RecursivePhaseSpace.h" +#include "AmpGen/FitResult.h" +#include "AmpGen/IExtendLikelihood.h" +#include "AmpGen/Minimiser.h" +#include "AmpGen/MinuitParameter.h" +#include "AmpGen/MinuitParameterSet.h" +#include "AmpGen/MsgService.h" +#include "AmpGen/NamedParameter.h" +#include "AmpGen/SumPDF.h" +#include "AmpGen/ThreeBodyCalculators.h" +#include "AmpGen/Utilities.h" +#include "AmpGen/Generator.h" +#include "AmpGen/PolarisedSum.h" +#include "AmpGen/Kinematics.h" + +#ifdef _OPENMP + #include + #include +#endif + +using namespace AmpGen; + +void randomizeStartingPoint( MinuitParameterSet& mps, TRandom3& rand) +{ + for (auto& param : mps) { + if ( ! param->isFree() || param->name() == "Px" || param->name() == "Py" || param->name() == "Pz" ) continue; + double min = param->minInit(); + double max = param->maxInit(); + double new_value = rand.Uniform(param->mean()-param->stepInit(),param->mean()+param->stepInit()); + if( min != 0 && max != 0 ) + new_value = rand.Uniform(min,max); + param->setInit( new_value ); + param->setCurrentFitVal( new_value ); + INFO( param->name() << " = " << param->mean() << " " << param->stepInit() ); + } +} + +template +FitResult* doFit( PDF&& pdf, EventList_type& data, EventList_type& mc, MinuitParameterSet& MPS ) +{ + auto time_wall = std::chrono::high_resolution_clock::now(); + auto time = std::clock(); + + pdf.setEvents( data ); + + /* Minimiser is a general interface to Minuit1/Minuit2, + that is constructed from an object that defines an operator() that returns a double + (i.e. the likielihood, and a set of MinuitParameters. */ + Minimiser mini( pdf, &MPS ); + mini.doFit(); + FitResult* fr = new FitResult(mini); + + /* Estimate the chi2 using an adaptive / decision tree based binning, + down to a minimum bin population of 15, and add it to the output.*/ + //if(data.eventType().size() < 5){ + // Chi2Estimator chi2( data, mc, pdf, 15 ); + // //chi2.writeBinningToFile("chi2_binning.txt"); + // fr->addChi2( chi2.chi2(), chi2.nBins() ); + //} + + auto twall_end = std::chrono::high_resolution_clock::now(); + double time_cpu = ( std::clock() - time ) / (double)CLOCKS_PER_SEC; + double tWall = std::chrono::duration( twall_end - time_wall ).count(); + INFO( "Wall time = " << tWall / 1000. ); + INFO( "CPU time = " << time_cpu ); + fr->print(); + + /* Save weighted data and norm MC for the different components in the PDF, i.e. the signal and backgrounds. + The structure assumed the PDF is some SumPDF. */ + unsigned int counter = 1; + for_each(pdf.pdfs(), [&]( auto& f ){ + mc.transform([&f](auto& mcevt){mcevt.setWeight(f.getValNoCache(mcevt)*mcevt.weight()/mcevt.genPdf());}).tree(counter>1?"MCt"+std::to_string(counter):"MCt")->Write(); + data.tree(counter>1?"t"+std::to_string(counter):"t")->Write(); + counter++; + } ); + + return fr; +} + +void invertParity( Event& event, const size_t& nParticles=0) +{ + for( size_t i = 0 ; i < nParticles; ++i ) + { + event[4*i + 0] = -event[4*i+0]; + event[4*i + 1] = -event[4*i+1]; + event[4*i + 2] = -event[4*i+2]; + } +} + +int main( int argc, char* argv[] ) +{ + gErrorIgnoreLevel = 1001; + + OptionsParser::setArgs( argc, argv ); + + const std::vector dataFile = NamedParameter("DataSample","").getVector(); + const std::string simFile = NamedParameter("SimFile", "" , "Name of file containing simulated sample for using in MC integration"); + const std::string logFile = NamedParameter("LogFile","Fitter.log"); + const std::string plotFile = NamedParameter("Plots","plots.root"); + const std::string prefix = NamedParameter("PlotPrefix",""); + const std::string idbranch = NamedParameter("IDBranch",""); + const std::string mcidbranch = NamedParameter("MCIDBranch",""); + const std::string weight_branch = NamedParameter("WeightBranch","","Name of branch containing event weights."); + const std::string mc_weight_branch = NamedParameter("MCWeightBranch","","Name of branch containing event weights."); + + const auto nev_MC = NamedParameter("NEventsMC", 8e6, "Number of MC events for normalization."); + auto bNames = NamedParameter("Branches", std::vector(), + "List of branch names, assumed to be \033[3m daughter1_px ... daughter1_E, daughter2_px ... \033[0m" ).getVector(); + auto MCbNames = NamedParameter("MCBranches", std::vector(), + "List of branch names, assumed to be \033[3m daughter1_px ... daughter1_E, daughter2_px ... \033[0m" ).getVector(); + auto pNames = NamedParameter("EventType" , "" + , "EventType to fit, in the format: \033[3m parent daughter1 daughter2 ... \033[0m" ).getVector(); + +#ifdef _OPENMP + unsigned int concurentThreadsSupported = std::thread::hardware_concurrency(); + unsigned int nThreads = NamedParameter( "nCores", concurentThreadsSupported ); + omp_set_num_threads( nThreads ); + INFO( "Setting " << nThreads << " fixed threads for OpenMP" ); + omp_set_dynamic( 0 ); +#endif + + + /* A MinuitParameterSet is (unsurprisingly) a set of fit parameters, and can be loaded from + the parsed options. For historical reasons, this is referred to as loading it from a "Stream" */ + MinuitParameterSet MPS; + MPS.loadFromStream(); + TRandom3 rndm = TRandom3( NamedParameter("Seed", 1 ) ) ; + if( NamedParameter("RandomizeStartingPoint",false) ) randomizeStartingPoint(MPS,rndm ); + + /* An EventType specifies the initial and final state particles as a vector that will be described by the fit. + It is typically loaded from the interface parameter EventType. */ + EventType evtType(pNames); + + /* A CoherentSum is the typical amplitude to be used, that is some sum over quasi two-body contributions + weighted by an appropriate complex amplitude. The CoherentSum is generated from the couplings described + by a set of parameters (in a MinuitParameterSet), and an EventType, which matches these parameters + to a given final state and a set of data. A common set of rules can be matched to multiple final states, + i.e. to facilitate the analysis of coupled channels. */ + PolarisedSum sig(evtType, MPS); + + /* Events are read in from ROOT files. If only the filename and the event type are specified, + the file is assumed to be in the specific format that is defined by the event type, + unless the branches to load are specified in the user options */ + EventList_type events(dataFile, evtType, Branches(bNames), GetGenPdf(false), WeightBranch(weight_branch), ExtraBranches(std::vector{idbranch}) ); + + /* Generate events to normalise the PDF with. This can also be loaded from a file, + which will be the case when efficiency variations are included. */ + EventList_type eventsMC = simFile == "" + ? EventList_type(Generator(sig.matrixElements()[0].decayTree.quasiStableTree(), events.eventType(), &rndm).generate(nev_MC)) + : EventList_type(simFile, evtType, Branches(MCbNames), WeightBranch(mc_weight_branch), ExtraBranches(std::vector{mcidbranch})); + + /* Transform data if we have an ID brach. That branch indicates that we operate on a sample with particles+antiparticles mixed. + The transformation also includes boosting to the restframe of the head of the decay chain. + TODO: There might be situations where you want to separate both transformations */ + auto const n_final_state_particles = evtType.size(); + std::vector daughters_as_ints(n_final_state_particles); + std::iota (daughters_as_ints.begin(), daughters_as_ints.end(), 0u); + auto frame_transform = [&evtType, &daughters_as_ints, &n_final_state_particles](auto& event){ + TVector3 pBeam(0,0,1); + if( event[event.size()-1] < 0 ){ + invertParity( event, n_final_state_particles); + pBeam = -pBeam; + } + TLorentzVector pP = pFromEvent(event,daughters_as_ints); + //if( pP.P() < 10e-5) return; + TVector3 pZ = pP.Vect(); + rotateBasis( event, (pBeam.Cross(pZ) ).Cross(pZ), pBeam.Cross(pZ), pZ ); + boost( event, {0, 0, -1}, pP.P()/pP.E() ); + }; + + + for( auto& event : events ) + if( event[event.size()-1] < 0 ){ + event.print(); + break; + } + events.transform( frame_transform ); + for( auto& event : events ) + if( event[event.size()-1] < 0 ){//E if there's no ID branch + event.print(); + break; + } + for( auto& event : eventsMC ) + if( event[event.size()-1] < 0 ){ + event.print(); + break; + } + eventsMC.transform( frame_transform ); + for( auto& event : eventsMC ) + if( event[event.size()-1] < 0 ){//E if there's no ID branch + event.print(); + break; + } + sig.setMC(eventsMC); + sig.setEvents(events); + + TFile* output = TFile::Open( plotFile.c_str(), "RECREATE" ); + output->cd(); + auto fr = doFit(make_pdf(sig), events, eventsMC, MPS); + + auto ff = sig.fitFractions( fr->getErrorPropagator() ); + fr->addFractions(ff); + fr->writeToFile( logFile ); + output->Close(); + return 0; +} diff --git a/examples/fit_hyperon_parameters.cpp b/examples/fit_hyperon_parameters.cpp new file mode 100644 index 00000000000..ccb856cfe76 --- /dev/null +++ b/examples/fit_hyperon_parameters.cpp @@ -0,0 +1,22 @@ +#include "AmpGen/MinuitParameterSet.h" +#include "AmpGen/Minimiser.h" +#include "AmpGen/NamedParameter.h" + +using namespace AmpGen; + +int main( int argc, char* argv[] ){ + OptionsParser::setArgs( argc, argv ); + MinuitParameterSet mps; + mps.add("gRe",AmpGen::Flag::Free,0,0.1,-10,10); + mps.add("gIm",AmpGen::Flag::Free,0,0.1,-10,10); + const auto alpha_measured = NamedParameter("alpha", 0.75); + const auto phi_measured = NamedParameter("phi", -6.5); + const auto alpha_error = NamedParameter("alpha_error", 0.012); + const auto phi_error = NamedParameter("phi_error", 3.5); + std::function f = [&mps, &alpha_measured, &alpha_error, &phi_measured, &phi_error]() -> double { + auto alpha_pred = 2.*mps[0]->mean()/(1.+mps[0]->mean()*mps[0]->mean()+mps[1]->mean()*mps[1]->mean()); + auto phi_pred = 180.*atan(2.*mps[1]->mean()/(1.-(mps[0]->mean()*mps[0]->mean()+mps[1]->mean()*mps[1]->mean())))/M_PI; + return pow((alpha_measured-alpha_pred)/alpha_error,2.)+pow((phi_measured-phi_pred)/phi_error,2.); + }; + Minimiser(f, &mps).doFit(); +} From d50abb1af887fcab54e18aabd3fda3f8d8223f3a Mon Sep 17 00:00:00 2001 From: mstahl Date: Mon, 10 Aug 2020 17:42:38 +0200 Subject: [PATCH 61/67] [SumPDF] use event weight in fit --- AmpGen/SumPDF.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AmpGen/SumPDF.h b/AmpGen/SumPDF.h index 830c7d4413c..0255a41f0c3 100644 --- a/AmpGen/SumPDF.h +++ b/AmpGen/SumPDF.h @@ -64,7 +64,8 @@ namespace AmpGen #pragma omp parallel for reduction( +: LL ) for ( unsigned int i = 0; i < m_events->size(); ++i ) { auto prob = ((*this))(( *m_events)[i] ); - LL += log(prob); + auto w = (*m_events)[i].weight(); + LL += w*log(prob); } return -2 * LL; } From 8982ae888b4f65f8236556ff933c2744169969c5 Mon Sep 17 00:00:00 2001 From: mstahl Date: Mon, 10 Aug 2020 21:33:58 +0200 Subject: [PATCH 62/67] [BaryonFitter] throw error when adding branches and using AVX; [Minimiser] add option to run Minos --- examples/BaryonFitter.cpp | 7 +++++++ src/Minimiser.cpp | 38 +++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/examples/BaryonFitter.cpp b/examples/BaryonFitter.cpp index 4b20e83952a..a4657e804bb 100644 --- a/examples/BaryonFitter.cpp +++ b/examples/BaryonFitter.cpp @@ -135,6 +135,13 @@ int main( int argc, char* argv[] ) auto pNames = NamedParameter("EventType" , "" , "EventType to fit, in the format: \033[3m parent daughter1 daughter2 ... \033[0m" ).getVector(); +#if ENABLE_AVX + if(!idbranch.empty() || !weight_branch.empty() || !mcidbranch.empty() || !mc_weight_branch.empty()){ + ERROR("Vectorized version currently not supported when adding extra branches"); + return 1; + } +#endif + #ifdef _OPENMP unsigned int concurentThreadsSupported = std::thread::hardware_concurrency(); unsigned int nThreads = NamedParameter( "nCores", concurentThreadsSupported ); diff --git a/src/Minimiser.cpp b/src/Minimiser.cpp index 0eaabb29f01..b28f69fcaa5 100644 --- a/src/Minimiser.cpp +++ b/src/Minimiser.cpp @@ -74,12 +74,12 @@ void Minimiser::prepare() m_minimiser->SetPrintLevel( m_printLevel ); m_mapping.clear(); m_covMatrix.clear(); - for(size_t i = 0 ; i < m_parSet->size(); ++i) + for(size_t i = 0 ; i < m_parSet->size(); ++i) { auto par = m_parSet->at(i); if ( ! par->isFree() ) continue; m_minimiser->SetVariable(m_mapping.size(), par->name(), par->mean(), par->stepInit()); - if ( par->minInit() != 0 || par->maxInit() != 0 ) + if ( par->minInit() != 0 || par->maxInit() != 0 ) m_minimiser->SetVariableLimits( m_mapping.size(), par->minInit(), par->maxInit() ); m_mapping.push_back(i); if ( m_printLevel != 0 ) INFO( *par ); @@ -107,23 +107,23 @@ bool Minimiser::doFit() for ( unsigned int j = 0; j < m_nParams; ++j ) { m_covMatrix[i + m_nParams * j] = m_minimiser->CovMatrix( i, j ); } - } - m_status = m_minimiser->Status(); - /* - for( unsigned i = 0 ; i != m_nParams; ++i ){ - double low = 0; - double high = 0; - int status = 0; - m_minimiser->GetMinosError(i, low, high, status); - auto param = m_parSet->at( m_mapping[i] ); - param->setResult( *param, param->err(), low, high ); } - for( unsigned i = 0 ; i != m_nParams; ++i ) - { - auto param = m_parSet->at( m_mapping[i] ); - INFO( param->name() << " " << param->mean() << " " << param->errPos() << " " << param->errNeg() ); + m_status = m_minimiser->Status(); + if(NamedParameter("Minimiser::RunMinos",false)){ + for( unsigned i = 0 ; i != m_nParams; ++i ){ + double low = 0; + double high = 0; + int status = 0; + m_minimiser->GetMinosError(i, low, high, status); + auto param = m_parSet->at( m_mapping[i] ); + param->setResult( *param, param->err(), low, high ); + } + for( unsigned i = 0 ; i != m_nParams; ++i ) + { + auto param = m_parSet->at( m_mapping[i] ); + INFO( param->name() << " " << param->mean() << " " << param->errPos() << " " << param->errNeg() ); + } } - */ return 1; } @@ -168,8 +168,8 @@ TMatrixTSym Minimiser::covMatrixFull() const MinuitParameterSet* Minimiser::parSet() const { return m_parSet; } void Minimiser::addExtendedTerm( IExtendLikelihood* m_term ) -{ - m_extendedTerms.push_back( m_term ); +{ + m_extendedTerms.push_back( m_term ); } ROOT::Minuit2::Minuit2Minimizer* Minimiser::minimiserInternal() { return m_minimiser; } From da730a7868c346b5bd4c426f42b9bf558054d340 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 11 Aug 2020 11:59:01 +0200 Subject: [PATCH 63/67] fix enum parser --- AmpGen/enum.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/AmpGen/enum.h b/AmpGen/enum.h index 22abf25ba28..62c745030d6 100644 --- a/AmpGen/enum.h +++ b/AmpGen/enum.h @@ -39,13 +39,14 @@ namespace AmpGen { for( size_t x = 0; x != nChar ; ++x) if( word[x] != otherWord[x] ) return false; return true; }; - for( ; args[begin] != '\0' ; begin++ ) + while( args[begin] != '\0' ) { while( args[begin] == ' ' ) begin++; for( end=begin; args[end] != '\0'; end++ ) if( args[end] == ',' ) break; - if( compare( word.c_str(), args + begin , end-begin ) ) break; - begin = end; - counter++; + if( compare( word.c_str(), args + begin , end-begin ) ) break; + begin = end+1; + counter++; + if( args[end] == '\0' ) break; } if( args[begin] == '\0' ) return T(counter-1); return T(counter); From 14a8c123025daf9bb64cbfa7151ec5564d038fc5 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Tue, 11 Aug 2020 12:15:48 +0200 Subject: [PATCH 64/67] fix enum --- AmpGen/enum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AmpGen/enum.h b/AmpGen/enum.h index 62c745030d6..9ffe2ad052f 100644 --- a/AmpGen/enum.h +++ b/AmpGen/enum.h @@ -48,7 +48,7 @@ namespace AmpGen { counter++; if( args[end] == '\0' ) break; } - if( args[begin] == '\0' ) return T(counter-1); + if( args[end] == '\0' ) return T(counter-1); return T(counter); } template std::string to_string(const T& enumItem, const char* args) From d26f957c25057de2de531cd81e596a040b5533a1 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Wed, 12 Aug 2020 09:47:06 +0200 Subject: [PATCH 65/67] fix enum... again ... and add unit tests this time --- AmpGen/MinuitParameter.h | 2 +- AmpGen/enum.h | 10 ++++++---- src/AmplitudeRules.cpp | 9 ++++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/AmpGen/MinuitParameter.h b/AmpGen/MinuitParameter.h index 197f660ffb2..7d79369bd0d 100644 --- a/AmpGen/MinuitParameter.h +++ b/AmpGen/MinuitParameter.h @@ -10,7 +10,7 @@ namespace AmpGen { class MinuitParameterSet; - declare_enum( Flag, Free, Hide, Fix, CompileTimeConstant, Invalid ) + declare_enum( Flag, Free, Hide, Fix, CompileTimeConstant) class MinuitParameter { public: diff --git a/AmpGen/enum.h b/AmpGen/enum.h index 9ffe2ad052f..ba526c714b1 100644 --- a/AmpGen/enum.h +++ b/AmpGen/enum.h @@ -1,9 +1,10 @@ #ifndef AMPGEN_ENUM_H #define AMPGEN_ENUM_H 1 #include "AmpGen/MsgService.h" +#include "AmpGen/Utilities.h" #define declare_enum(name, ...) \ -enum class name {__VA_ARGS__}; \ +enum class name {__VA_ARGS__, Invalid}; \ template <> name parse(const std::string& word); \ template <> std::string to_string( const name& enumItem ); \ std::ostream& operator<<( std::ostream& os, const name& np); @@ -15,7 +16,7 @@ template <> name lexical_cast(const std::string& word, bool& /*status*/){ return std::ostream& operator<<(std::ostream& os, const name& np){ return os << to_string(np);} #define make_enum(name, ...) \ -enum class name {__VA_ARGS__}; \ +enum class name {__VA_ARGS__, Invalid}; \ template <> name parse(const std::string& word){ constexpr auto args = #__VA_ARGS__; return AmpGen::detail::parse(word, args); } \ template <> std::string to_string( const name& enumItem ){ constexpr auto args = #__VA_ARGS__; return AmpGen::detail::to_string(enumItem, args) ; } \ template <> name lexical_cast(const std::string& word, bool& /*status*/){ return parse(word); } \ @@ -39,16 +40,17 @@ namespace AmpGen { for( size_t x = 0; x != nChar ; ++x) if( word[x] != otherWord[x] ) return false; return true; }; + bool found = false; while( args[begin] != '\0' ) { while( args[begin] == ' ' ) begin++; for( end=begin; args[end] != '\0'; end++ ) if( args[end] == ',' ) break; - if( compare( word.c_str(), args + begin , end-begin ) ) break; + if( compare( word.c_str(), args + begin , end-begin ) ) { found = true; break; } begin = end+1; counter++; if( args[end] == '\0' ) break; } - if( args[end] == '\0' ) return T(counter-1); + if(!found) return T::Invalid; return T(counter); } template std::string to_string(const T& enumItem, const char* args) diff --git a/src/AmplitudeRules.cpp b/src/AmplitudeRules.cpp index b7a3a10e452..cabb04e1b49 100644 --- a/src/AmplitudeRules.cpp +++ b/src/AmplitudeRules.cpp @@ -34,14 +34,17 @@ Coupling::Coupling(MinuitParameter* re, MinuitParameter* im) : } m_particle = Particle(m_name); coordinateType coord = NamedParameter("CouplingConstant::Coordinates", coordinateType::cartesian); - angType degOrRad = NamedParameter("CouplingConstant::AngularUnits", angType::rad); + angType degOrRad = NamedParameter("CouplingConstant::AngularUnits" , angType::rad); m_isCartesian = true; + if( coord == coordinateType::polar ) m_isCartesian = false; - else if ( coord != coordinateType::cartesian){ + + if ( coord == coordinateType::Invalid){ FATAL("Coordinates for coupling constants must be either cartesian or polar"); } if ( degOrRad == angType::deg) m_sf = M_PI / 180; - else if ( degOrRad != angType::rad){ + + if ( degOrRad == angType::Invalid ){ FATAL("TotalCoupling::AngularUnits must be either rad or deg"); } } From 24d7015d872cc4ebd0b74739f359db904bd97068 Mon Sep 17 00:00:00 2001 From: mstahl Date: Wed, 12 Aug 2020 10:45:31 +0200 Subject: [PATCH 66/67] [Particle tables] update particle tables to be compatible with Gauss --- options/MintDalitzSpecialParticles.csv | 4 +++- options/mass_width.csv | 16 ++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/options/MintDalitzSpecialParticles.csv b/options/MintDalitzSpecialParticles.csv index 9ed8bdb1b0f..6565871935b 100644 --- a/options/MintDalitzSpecialParticles.csv +++ b/options/MintDalitzSpecialParticles.csv @@ -48,5 +48,7 @@ 1.86484E+03 ,1.7E-01,1.7E-01,1.605E-09 ,6.0E-12,6.0E-12,1/2, ,0 ,-, ,F, 998421, 0, ,R,D~ ,cU 3.8948E+03 ,1.1E-02,1.1E-02,2.96E+00 ,2.1E-03,2.1E-03,1 ,+,1 ,+,-, , 999443, 0, ,R,Z(c)(3900) ,cCuU 3.8948E+03 ,1.1E-02,1.1E-02,2.96E+00 ,2.1E-03,2.1E-03,1 ,+,1 ,+,-, , 999444, +, ,R,Z(c)(3900) ,cCuD -9.990E+03 ,1.0E+01,1.0E+01,9.9E+09 ,3.0E+01,3.0E+01,1/2,+,0 ,+,+,F, 998140, 0, ,R,XiPi0 ,?? +9.990E+03 ,1.0E+01,1.0E+01,9.9E+09 ,3.0E+01,3.0E+01,1 , ,1/2 ,-, ,F, , 0,3,R,LambdaEta ,uds +9.990E+03 ,1.0E+01,1.0E+01,9.9E+09 ,3.0E+01,3.0E+01,1/2, ,1/2 ,-, ,F, , -,3,R,XiPi ,dss +9.990E+03 ,1.0E+01,1.0E+01,9.9E+09 ,3.0E+01,3.0E+01,1/2, ,1/2 ,-, ,F, , 0,3,R,XiPi ,uss *** diff --git a/options/mass_width.csv b/options/mass_width.csv index 4833cbc1c7e..122d5fec4a0 100644 --- a/options/mass_width.csv +++ b/options/mass_width.csv @@ -666,16 +666,16 @@ 1.32171E+03 ,7.0E-02,7.0E-02,4.04E-12 ,4.0E-14,4.0E-14,1/2, ,1/2 ,+, ,F, 3312, -,4,R,Xi ,dss 1.53180E+03 ,3.2E-01,3.2E-01,9.1E+00 ,5.0E-01,5.0E-01,1/2, ,3/2 ,+, ,F, 3324, 0,4,R,Xi(1530) ,uss 1.5350E+03 ,6.0E-01,6.0E-01,9.9E+00 ,1.7E+00,1.9E+00,1/2, ,3/2 ,+, ,F, 3314, -,4,R,Xi(1530) ,dss -1.6240E+03 ,3.0E+00,3.0E+00,22.5 ,-1 ,-1 ,1/2, ,3/2 ,+, ,F, , 0,1,S,Xi(1620) ,uss -1.6240E+03 ,3.0E+00,3.0E+00,22.5 ,-1 ,-1 ,1/2, ,3/2 ,+, ,F, , -,1,S,Xi(1620) ,dss -1.6900E+03 ,1.0E+01,1.0E+01,-1 ,-1 ,-1 ,1/2, ,? ,?, ,F, , 0,3,D,Xi(1690) ,uss -1.6900E+03 ,1.0E+01,1.0E+01,-1 ,-1 ,-1 ,1/2, ,? ,?, ,F, , -,3,D,Xi(1690) ,dss +1.6240E+03 ,3.0E+00,3.0E+00,22.5 ,-1 ,-1 ,1/2, ,1/2 ,-, ,F, , 0,1,S,Xi(1620) ,uss +1.6240E+03 ,3.0E+00,3.0E+00,22.5 ,-1 ,-1 ,1/2, ,1/2 ,-, ,F, , -,1,S,Xi(1620) ,dss +1.6900E+03 ,1.0E+01,1.0E+01,-1 ,-1 ,-1 ,1/2, ,1/2 ,-, ,F, , 0,3,D,Xi(1690) ,uss +1.6900E+03 ,1.0E+01,1.0E+01,-1 ,-1 ,-1 ,1/2, ,1/2 ,-, ,F, , -,3,D,Xi(1690) ,dss 1.823E+03 ,5.0E+00,5.0E+00,2.40E+01 ,1.5E+01,1.0E+01,1/2, ,3/2 ,-, ,F, 13324, 0,3,D,Xi(1820) ,uss 1.823E+03 ,5.0E+00,5.0E+00,2.40E+01 ,1.5E+01,1.0E+01,1/2, ,3/2 ,-, ,F, 13314, -,3,D,Xi(1820) ,dss -1.950E+03 ,1.5E+01,1.5E+01,6.0E+01 ,2.0E+01,2.0E+01,1/2, ,? ,?, ,F, , 0,3,D,Xi(1950) ,uss -1.950E+03 ,1.5E+01,1.5E+01,6.0E+01 ,2.0E+01,2.0E+01,1/2, ,? ,?, ,F, , -,3,D,Xi(1950) ,dss -2.025E+03 ,5.0E+00,5.0E+00,2.0E+01 ,1.5E+01,5.0E+00,1/2, ,>3/2,?, ,F, , 0,3,D,Xi(2030) ,uss -2.025E+03 ,5.0E+00,5.0E+00,2.0E+01 ,1.5E+01,5.0E+00,1/2, ,>3/2,?, ,F, , -,3,D,Xi(2030) ,dss +1.950E+03 ,1.5E+01,1.5E+01,6.0E+01 ,2.0E+01,2.0E+01,1/2, ,5/2 ,-, ,F, , 0,3,D,Xi(1950) ,uss +1.950E+03 ,1.5E+01,1.5E+01,6.0E+01 ,2.0E+01,2.0E+01,1/2, ,5/2 ,-, ,F, , -,3,D,Xi(1950) ,dss +2.025E+03 ,5.0E+00,5.0E+00,2.0E+01 ,1.5E+01,5.0E+00,1/2, ,3/2 ,+, ,F, , 0,3,D,Xi(2030) ,uss +2.025E+03 ,5.0E+00,5.0E+00,2.0E+01 ,1.5E+01,5.0E+00,1/2, ,3/2 ,+, ,F, , -,3,D,Xi(2030) ,dss 2.137E+03 ,4.0E+00,4.0E+00,20 ,-1 ,-1 ,1/2, ,? ,?, ,F, , 0,1,S,Xi(2120) ,uss 2.137E+03 ,4.0E+00,4.0E+00,20 ,-1 ,-1 ,1/2, ,? ,?, ,F, , -,1,S,Xi(2120) ,dss 2.189E+03 ,7.0E+00,7.0E+00,4.6E+01 ,2.7E+01,2.7E+01,1/2, ,? ,?, ,F, , 0,2,S,Xi(2250) ,uss From 7b5d15a89a19ed5cd9031dcb77fa3e12bb58e796 Mon Sep 17 00:00:00 2001 From: tevans1260 Date: Thu, 13 Aug 2020 15:04:17 +0200 Subject: [PATCH 67/67] add automated testing against released models --- AmpGen/NamedParameter.h | 17 +++-- AmpGen/Particle.h | 16 +++-- apps/ConvertToSourceCode.cpp | 1 - apps/Generator.cpp | 12 ++-- apps/lib_diff.cpp | 121 ++++++++++++++++++++++++++++++++++ doc/release.notes | 1 + options/run_lbAmpGen_tests.sh | 22 +++++++ src/NamedParameter.cpp | 9 --- test/test_enum.cpp | 22 +++++++ 9 files changed, 194 insertions(+), 27 deletions(-) create mode 100644 apps/lib_diff.cpp create mode 100755 options/run_lbAmpGen_tests.sh create mode 100644 test/test_enum.cpp diff --git a/AmpGen/NamedParameter.h b/AmpGen/NamedParameter.h index 40774b215fb..8c5a1264385 100644 --- a/AmpGen/NamedParameter.h +++ b/AmpGen/NamedParameter.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "AmpGen/MsgService.h" #include "AmpGen/OptionsParser.h" @@ -74,7 +75,6 @@ namespace AmpGen setFromOptionsParser(); if ( OptionsParser::printHelp() ) help( defVec.size() > 0 ? defVec[0] : T() ); } -// ~NamedParameter(){ INFO("Deconstructing: " << m_name ); } void help(const T& def){ std::string type = type_string(); if( type == "std::__cxx11::basic_string, std::allocator >" ) type = "string"; @@ -148,9 +148,8 @@ namespace AmpGen return return_container; } }; - template std::ostream& operator<<( std::ostream& os, const NamedParameter& np ); - - std::string optionalHelpString(const std::string& header, const std::vector>& args); + template std::ostream& operator<<( std::ostream& os, const NamedParameter& np ); + template std::string optionalHelpString(const std::string& header, const T&... args); } template @@ -165,4 +164,14 @@ std::ostream& AmpGen::operator<<( std::ostream& os, const AmpGen::NamedParameter return os; } +template std::string AmpGen::optionalHelpString(const std::string& header, const T&... args ) +{ + std::stringstream rt; + rt << header; + for_each( std::make_tuple(args...), [&rt](const auto& f) mutable { + rt << "\n\033[3m " << f.first << "\033[0m: " << f.second; + }); + return rt.str(); +} + #endif diff --git a/AmpGen/Particle.h b/AmpGen/Particle.h index 593fc2663b5..4b9fc065585 100644 --- a/AmpGen/Particle.h +++ b/AmpGen/Particle.h @@ -299,13 +299,15 @@ namespace AmpGen std::string modifierString() const; ///< Re-generate modifier string used to create particle void sortDaughters(); ///< Recursively order the particle's decay products. - NamedParameter m_spinFormalism = {"Particle::SpinFormalism" ,spinFormalism::Covariant, optionalHelpString("Formalism to use for spin calculations", { - {"Covariant", "[default] Covariant Tensor, based on Rarita-Schwinger constraints on the allowed covariant wavefunctions."} - , {"Canonical", "Canonical formulation, based on rotational properties of wavefunctions, i.e. Wigner D-matrices and Clebsch-Gordan for (L,S) expansion."} } ) }; - - NamedParameter m_spinBasis = {"Particle::SpinBasis", spinBasis::Dirac, optionalHelpString("Basis to use for calculating external polarisation tensors / spinors.", { - {"Dirac", "[default] Quantises along the z-axis"} - , {"Weyl" , "Quantises along the direction of motion"}} )}; + NamedParameter m_spinFormalism = {"Particle::SpinFormalism" ,spinFormalism::Covariant, + optionalHelpString("Formalism to use for spin calculations", + std::make_pair("Covariant", "[default] Covariant Tensor, based on Rarita-Schwinger constraints on the allowed covariant wavefunctions.") + , std::make_pair("Canonical", "Canonical formulation, based on rotational properties of wavefunctions, i.e. Wigner D-matrices and Clebsch-Gordan for (L,S) expansion.") ) }; + + NamedParameter m_spinBasis = {"Particle::SpinBasis", spinBasis::Dirac, + optionalHelpString("Basis to use for calculating external polarisation tensors / spinors.", + std::make_pair("Dirac", "[default] Quantises along the z-axis") + , std::make_pair("Weyl", "Quantises along the direction of motion") )}; NamedParameter m_defaultModifier = {"Particle::DefaultModifier","", "Default modifier to use for lineshapes, for example to use normalised vs unnormalised Blatt-Weisskopf factors."}; }; std::ostream& operator<<( std::ostream& os, const Particle& particle ); diff --git a/apps/ConvertToSourceCode.cpp b/apps/ConvertToSourceCode.cpp index e1695ff454c..43b77c0048c 100644 --- a/apps/ConvertToSourceCode.cpp +++ b/apps/ConvertToSourceCode.cpp @@ -80,7 +80,6 @@ template void generate_source(T& pdf, const std::string& sourceFile, M auto oEventType = NamedParameter("EventType").getVector(); TRandom3 rnd(seed); - INFO("First random number = " << rnd.Uniform() ); EventType eventType( oEventType ); Generator phsp(eventType); diff --git a/apps/Generator.cpp b/apps/Generator.cpp index 612cbfed422..3b79639481b 100644 --- a/apps/Generator.cpp +++ b/apps/Generator.cpp @@ -146,13 +146,13 @@ int main( int argc, char** argv ) int seed = NamedParameter ("Seed" , 0, "Random seed used in event Generation" ); std::string outfile = NamedParameter("Output" , "Generate_Output.root" , "Name of output file" ); auto pdfType = NamedParameter( "Type", pdfTypes::CoherentSum, optionalHelpString("Type of PDF to use:", - { {"CoherentSum" , "Describes decays of a (pseudo)scalar particle to N pseudoscalars"} - , {"PolarisedSum" , "Describes the decay of a particle with spin to N particles carrying spin."} - , {"FixedLib" , "PDF to describe a decay from a precompiled library, such as those provided to GAUSS."}} ) ); + std::make_pair(pdfTypes::CoherentSum , "Describes decays of a (pseudo)scalar particle to N pseudoscalars") + , std::make_pair(pdfTypes::PolarisedSum, "Describes the decay of a particle with spin to N particles carrying spin.") + , std::make_pair(pdfTypes::FixedLib , "PDF to describe a decay from a precompiled library, such as those provided to GAUSS.") ) ); auto phspType = NamedParameter( "PhaseSpace", phspTypes::PhaseSpace, optionalHelpString("Phase-space generator to use:", - { {"PhaseSpace" , "Phase space generation based on Raubold-Lynchi algorithm (recommended)."} - , {"TreePhaseSpace" , "Divides the phase-space into a series of quasi two-body phase-spaces for efficiently generating narrow states."} - , {"RecursivePhaseSpace", "Includes possible quasi-stable particles and the phase spaces of their decay products, such as Λ baryons."}} ) ); + std::make_pair(phspTypes::PhaseSpace , "Phase space generation based on Raubold-Lynch algorithm (recommended).\0") + , std::make_pair(phspTypes::TreePhaseSpace , "Divides the phase-space into a series of quasi two-body phase-spaces for efficiently generating narrow states.\0") + , std::make_pair(phspTypes::RecursivePhaseSpace, "Includes possible quasi-stable particles and the phase spaces of their decay products, such as Λ baryons.\0") ) ); std::string lib = NamedParameter("Library","","Name of library to use for a fixed library generation"); size_t nBins = NamedParameter ("nBins" ,100, "Number of bins for monitoring plots." ); diff --git a/apps/lib_diff.cpp b/apps/lib_diff.cpp new file mode 100644 index 00000000000..344f87b20ea --- /dev/null +++ b/apps/lib_diff.cpp @@ -0,0 +1,121 @@ +#include "AmpGen/PhaseSpace.h" +#include "AmpGen/DynamicFCN.h" +#include "AmpGen/Utilities.h" +#include "AmpGen/OptionsParser.h" +#include "AmpGen/NamedParameter.h" + +using namespace AmpGen; + +std::vector cmd( const std::string& command ){ + std::vector output; + FILE* proc = popen(command.c_str(), "r"); + char buf[4096]; + while (!feof(proc) && fgets(buf, sizeof(buf), proc)) output.push_back(buf); + pclose(proc); + return output; +} + +struct Counter { + int pass {0}; + int total{0}; + Counter() = default; + Counter( const bool& pass ) : pass(pass), total(1) {} + Counter( const double& a, const double& b, const std::string& name, const double& tolerance ); + Counter( const complex_t& a, const complex_t& b, const std::string& name, const double& tolerance ); + Counter(const std::vector& a, const std::vector& b, const std::string& name, const double& tolerance); + void operator+=( const Counter& other ) + { + this->pass += other.pass; + this->total += other.total; + } + +}; + +Counter::Counter(const double& a, const double& b, const std::string& name, const double& tolerance) +{ + double diff = std::abs(a-b); + total = 1; + if( diff > std::abs(tolerance) ){ + ERROR( name << " (" << a << " " << b << ") = " << diff << " > " << tolerance); + pass = 0; + } + else pass =1; + DEBUG( name << " (" << a << " " << b << ") = " << diff << " > " << tolerance); +} + +Counter::Counter(const complex_t& a, const complex_t& b, const std::string& name, const double& tolerance) +{ + double diff_re = std::abs(std::real(a)-std::real(b)); + double diff_im = std::abs(std::imag(a)-std::imag(b)); + total =1; + if( diff_re > std::abs(tolerance) || diff_im > std::abs(tolerance) ){ + ERROR( name << " (" << a << " " << b << ") = " << diff_re << ", " << diff_im << " > " << tolerance); + pass =0; + } + else pass = 1; + DEBUG( name << " (" << a << " " << b << ") = " << diff_re << ", " << diff_im << " < " << tolerance); +} + +Counter::Counter(const std::vector& a, const std::vector& b, const std::string& name, const double& tolerance) +{ + total = a.size(); + for( size_t i = 0 ; i < a.size(); ++i ){ + double diff_re = std::abs(std::real(a[i])-std::real(b[i])); + double diff_im = std::abs(std::imag(a[i])-std::imag(b[i])); + if( diff_re > std::abs(tolerance) || diff_im > std::abs(tolerance) ){ + ERROR( name << " (" << a[i] << " " << b[i] << ") = " << diff_re << ", " << diff_im << " > " << tolerance); + } + else pass++; + DEBUG( name << " (" << a[i] << " " << b[i] << ") = " << diff_re << ", " << diff_im << " > " << tolerance); + } +} + +int main( int argc, char** argv ) +{ + std::string modelName = argv[1]; + OptionsParser::getMe()->setQuiet(); + OptionsParser::setArgs( argc, argv ); + PhaseSpace phsp( EventType(NamedParameter("EventType", "").getVector()) ); + auto event = phsp.makeEvent(); + auto event2 = phsp.makeEvent(); + std::string lib = NamedParameter("Lib",""); + std::string refLib = NamedParameter("RefLib",""); + std::string type = NamedParameter("Type","CoherentSum"); + Counter total; + + auto ftable = cmd("nm " + refLib + "| grep __wParams"); + if( type == "CoherentSum"){ + auto f1 = DynamicFCN(lib, "AMP"); + auto f2 = DynamicFCN(refLib, "AMP"); + total += Counter(f1(event,+1), f2(event,+1), modelName + " fcn(x)", 10e-6 ); + total += Counter(f1(event,-1), f2(event,-1), modelName + " fcn(Px)", 10e-6); + for( auto& line : ftable ) + { + auto i = trim( split(line,' ')[2] ); + auto f = DynamicFCN(lib, i ); + auto g = DynamicFCN(refLib, i ); + if( !f.isLinked() || !g.isLinked() ) total += false ; + else total += Counter( f(event), g(event), i, 10e-6); + } + } + if( type == "PolarisedSum"){ + auto f1 = DynamicFCN(lib , "FCN_extPol"); + auto f2 = DynamicFCN(refLib, "FCN_extPol"); + total += Counter(f1(event,+1,0,0,0), f2(event,+1,0,0,0), modelName + " fcn(x)" , 1e-6); + total += Counter(f1(event,-1,0,0,0), f2(event,-1,0,0,0), modelName + " fcn(Px)", 1e-6); + for( auto& line : ftable ){ + auto i = trim( split(line,' ')[2] ); + auto a1 = DynamicFCN(const double*)>(lib , i ); + auto a2 = DynamicFCN(const double*)>(refLib, i ); + if( ! a1.isLinked() || ! a2.isLinked() ) total += false; + else { + auto pass = Counter(a1(event), a2(event), i, 10e-6); + total += pass; + } + } + } + if(total.pass == total.total) + INFO("Library: " << modelName << " matches [passed = " << total.pass << " / " << total.total << "]" ); + else + ERROR("Library: " << modelName << " does not match [passed = " << total.pass << " / " << total.total << "]" ); +} diff --git a/doc/release.notes b/doc/release.notes index 6b1c47a5a5d..cc2cb56fdc8 100644 --- a/doc/release.notes +++ b/doc/release.notes @@ -8,6 +8,7 @@ - Support for AVX2 for amplitude evaluation / integration single/double precision. - Updated plotting for making component amplitude plots. - Better thread safety of integration should improve fit stability. + - Add GenericKmatrix lineshape for handling ... Generic K-matrices (L=0 only) !=================== AmpGen v1r2 2019-11-12 ================== - New phase space Generator TreePhaseSpace for producing narrow resonances. - Improved handling of CP conjugated amplitudes. diff --git a/options/run_lbAmpGen_tests.sh b/options/run_lbAmpGen_tests.sh new file mode 100755 index 00000000000..0e1ea585131 --- /dev/null +++ b/options/run_lbAmpGen_tests.sh @@ -0,0 +1,22 @@ +branch=mstahl_AmpGen + +tmp_dir=$(mktemp -d -t ci-XXXXXXXXXX) +echo "TMPDIR=$tmp_dir" +cd $tmp_dir + +wget https://gitlab.cern.ch/lhcb/Gauss/-/archive/$branch/Gauss-${branch}.zip?path=Gen/LbAmpGen -O LbAmpGen.zip >> /dev/null +unzip LbAmpGen.zip >> /dev/null +top=Gauss-$branch-Gen-LbAmpGen + +for model in $top/Gen/LbAmpGen/models/*.opt ; do + filename=$(basename $model) + without_ext=${filename%.*} + if [ $without_ext == "DtoKpipipi_v1" ] || + [ $without_ext == "DtopiKpipi_v1" ] || + [ $without_ext == "DtoKKpipi_v1" ] ; then continue ; fi # these are old models that kept for backwards compatability, dont expect to be able reproduce exactly + mkdir -p build/$without_ext + $AMPGENROOT/build/bin/ConvertToSourceCode $model --Output build/$without_ext/new.cpp >> /dev/null + g++ -Ofast -shared -rdynamic --std=c++11 -fPIC build/$without_ext/new.cpp -o build/$without_ext/new.so + g++ -Ofast -shared -rdynamic --std=c++11 -fPIC $top/Gen/LbAmpGen/src/${without_ext}.cpp -o build/$without_ext/gaussUpdate.so + $AMPGENROOT/build/bin/lib_diff $model --Lib=build/$without_ext/new.so --RefLib=build/$without_ext/gaussUpdate.so +done diff --git a/src/NamedParameter.cpp b/src/NamedParameter.cpp index a2b7ef86db8..282581e3b75 100644 --- a/src/NamedParameter.cpp +++ b/src/NamedParameter.cpp @@ -11,12 +11,3 @@ template class AmpGen::NamedParameter; template class AmpGen::NamedParameter; template class AmpGen::NamedParameter; -std::string AmpGen::optionalHelpString(const std::string& header, const std::vector>& args ) -{ - std::string rt=header +"\n"; - for( size_t i = 0 ; i < args.size(); ++i ){ - rt += "\033[3m " + args[i].first + std::string( 25 - args[i].first.size(), ' '); - rt += "\033[0m: " + args[i].second + (i==args.size()-1 ? "" : "\n" ); - } - return rt; -} diff --git a/test/test_enum.cpp b/test/test_enum.cpp new file mode 100644 index 00000000000..2a3c492c275 --- /dev/null +++ b/test/test_enum.cpp @@ -0,0 +1,22 @@ + +#define BOOST_TEST_DYN_LINK + +#define BOOST_TEST_MODULE "enum" + +#include +namespace utf = boost::unit_test; + +#include "AmpGen/enum.h" + +namespace AmpGen { + make_enum( test_enum, state1, state2, state3 ) +} +using namespace AmpGen; + +BOOST_AUTO_TEST_CASE( test_enums ) +{ + BOOST_CHECK( parse ("state1") == test_enum::state1 ); + BOOST_CHECK( parse ("state2") == test_enum::state2 ); + BOOST_CHECK( parse ("state3") == test_enum::state3 ); + BOOST_CHECK( parse ("blag") == test_enum::Invalid ); +}