1616#include < mutex>
1717#include < thread>
1818#include < future>
19+ #include < functional>
1920#include < cstdlib>
2021#include < pybind11/pybind11.h>
2122#include < pybind11/numpy.h>
2930#include < cereal/types/vector.hpp>
3031#include < cereal/types/unordered_map.hpp>
3132
33+ #include " parallel.h"
3234
3335
3436namespace py = pybind11;
@@ -69,12 +71,12 @@ class Uncopyable {
6971
7072class BB {
7173 public:
72- double xmin, xmax, ymin, ymax;
74+ float xmin, xmax, ymin, ymax;
7375 BB () {
7476 clear ();
7577 }
7678
77- BB (const double & _xmin, const double & _xmax, const double & _ymin, const double & _ymax){
79+ BB (const float & _xmin, const float & _xmax, const float & _ymin, const float & _ymax){
7880 if (unlikely (_xmin > _xmax || _ymin > _ymax)){
7981 throw std::runtime_error (" Impossible rectange was given. xmin < xmax and ymin < ymax must be satisfied." );
8082 }
@@ -103,7 +105,7 @@ class BB{
103105 return *this ;
104106 }
105107
106- inline void expand (const double & dx, const double & dy){
108+ inline void expand (const float & dx, const float & dy){
107109 xmin -= dx;
108110 xmax += dx;
109111 ymin -= dy;
@@ -116,7 +118,7 @@ class BB{
116118 return unlikely (c1 && c2);
117119 }
118120
119- inline double operator [](const int & i)const {
121+ inline float operator [](const int & i)const {
120122 if (i == 0 ){
121123 return xmin;
122124 } else if (i == 1 ){
@@ -388,25 +390,15 @@ class PseudoPRTree : Uncopyable{
388390 }
389391
390392 std::pair<DataType<int >*, DataType<int >*> as_X (void * placement, const int hint){
391- const size_t nthreads = std::max (1 , (int ) std::thread::hardware_concurrency ());
392393 DataType<int > *b, *e;
393394 auto children = get_all_leaves (hint);
394395 int total = children.size ();
395396 b = (DataType<int >*)placement;
396397 e = b + total;
397- {
398- vec<std::thread> threads (nthreads);
399- for (int t = 0 ; t < nthreads; t++){
400- threads[t] = std::thread (std::bind (
401- [&](const int bi, const int ei, const int t){
402- for (int i = bi; i < ei; i++){
403- new (b + i) DataType<int >{i, children[i]->mbb };
404- }
405- }, t * total/nthreads, unlikely ((t+1 )==nthreads)?total:(t+1 )*total/nthreads, t)
406- );
407- }
408- std::for_each (threads.begin (), threads.end (), [&](std::thread& x){x.join ();});
409- }
398+ parallel_for_each (b, e, [&](auto & p){
399+ int i = &p - b;
400+ new (b + i) DataType<int >{i, children[i]->mbb };
401+ });
410402 return {b, e};
411403 }
412404};
@@ -488,8 +480,7 @@ class PRTree : Uncopyable{
488480 }
489481 }
490482
491-
492- PRTree (const py::array_t <T>& idx, const py::array_t <double >& x){
483+ PRTree (const py::array_t <T>& idx, const py::array_t <float >& x){
493484 const auto &buff_info_idx = idx.request ();
494485 const auto &shape_idx = buff_info_idx.shape ;
495486 const auto &buff_info_x = x.request ();
@@ -505,22 +496,30 @@ class PRTree : Uncopyable{
505496 DataType<T> *b, *e;
506497 void *placement = std::malloc (std::max (sizeof (DataType<T>), sizeof (DataType<int >)) * length);
507498 b = (DataType<T>*)placement;
508- e = b;
509- for (size_t i = 0 ; i < length; i++){
510- auto bb = BB (*x.data (i, 0 ), *x.data (i, 1 ), *x.data (i, 2 ), *x.data (i, 3 ));
499+ e = b + length;
511500
512- new (e) DataType<T>{*idx.data (i), bb};
513- e++;
501+ parallel_for_each (b, e,
502+ [&](auto & it){
503+ int i = &it - b;
504+ auto bb = BB (*x.data (i, 0 ), *x.data (i, 1 ), *x.data (i, 2 ), *x.data (i, 3 ));
505+ new (b + i) DataType<T>{*idx.data (i), bb};
506+ });
514507
508+ auto build_umap = [&]{for (size_t i = 0 ; i < length; i++){
509+ auto bb = BB (*x.data (i, 0 ), *x.data (i, 1 ), *x.data (i, 2 ), *x.data (i, 3 ));
515510 umap.emplace_hint (umap.end (), *idx.data (i), std::move (bb));
516- }
511+ }};
512+
517513 // ProfilerStart("construct.prof");
518- build (b, e, placement);
514+ auto t1 = std::thread (build_umap);
515+ auto t2 = std::thread ([&]{build (b, e, placement);});
516+ t1.join ();
517+ t2.join ();
519518 // ProfilerStop();
520519 std::free (placement);
521520 }
522521
523- void insert (const T& idx, const py::array_t <double >& x){
522+ void insert (const T& idx, const py::array_t <float >& x){
524523 vec<Leaf<T, B>*> cands;
525524 std::queue<PRTreeNode<T, B>*, std::deque<PRTreeNode<T, B>*>> que;
526525 BB bb;
@@ -538,9 +537,9 @@ class PRTree : Uncopyable{
538537 }
539538
540539 bb = BB (*x.data (0 ), *x.data (1 ), *x.data (2 ), *x.data (3 ));
541- double dx = bb.xmax - bb.xmin + 0.000000001 ;
542- double dy = bb.ymax - bb.ymin + 0.000000001 ;
543- double c = 0.0 ;
540+ float dx = bb.xmax - bb.xmin + 0.000000001 ;
541+ float dy = bb.ymax - bb.ymin + 0.000000001 ;
542+ float c = 0.0 ;
544543 std::stack<PRTreeNode<T, B>*> sta;
545544 while (likely (cands.size () == 0 )){
546545 while (likely (!sta.empty ())){
@@ -600,16 +599,17 @@ class PRTree : Uncopyable{
600599
601600 template <class iterator >
602601 void build (iterator b, iterator e, void *placement){
603- const int nthreads = std::max (1 , (int ) std::thread::hardware_concurrency ());
604602 vec<Leaf<int , B>*> leaves;
605603 vec<std::unique_ptr<PRTreeNode<T, B>>> tmp_nodes, prev_nodes;
606604 std::unique_ptr<PRTreeNode<T, B>> p, q, r;
607605
608606 auto first_tree = PseudoPRTree<T, B>(b, e);
609- for (auto & leaf : first_tree.get_all_leaves (e - b)){
610- p = std::make_unique<PRTreeNode<T, B>>(leaf);
611- prev_nodes.emplace_back (std::move (p));
612- }
607+ auto first_leaves = first_tree.get_all_leaves (e - b);
608+ parallel_for_each (first_leaves.begin (), first_leaves.end (), prev_nodes,
609+ [&](auto & leaf, auto & o){
610+ auto pp = std::make_unique<PRTreeNode<T, B>>(leaf);
611+ o.emplace_back (std::move (pp));
612+ });
613613
614614 auto [bb, ee] = first_tree.as_X (placement, e - b);
615615 while (likely (prev_nodes.size () > 1 )){
@@ -619,18 +619,9 @@ class PRTree : Uncopyable{
619619 tmp_nodes.clear ();
620620 tmp_nodes.reserve (leaves_size);
621621
622- vec<decltype (tmp_nodes)> out_privates (nthreads);
623- {
624- vec<std::thread> threads (nthreads);
625- for (auto & o : out_privates){
626- o.reserve (leaves_size/nthreads*2 );
627- }
628- for (int t = 0 ; t < nthreads; t++){
629- threads[t] = std::thread (std::bind (
630- [&](const int bi, const int ei, const int t){
631- for (size_t k = bi; k < ei; k++){
622+ parallel_for_each (leaves.begin (), leaves.end (), tmp_nodes,
623+ [&](auto & leaf, auto & o){
632624 int idx, jdx;
633- auto & leaf = leaves[k];
634625 int len = leaf->data .size ();
635626 auto pp = std::make_unique<PRTreeNode<T, B>>(leaf->mbb );
636627 if (likely (!leaf->data .empty ())){
@@ -642,21 +633,11 @@ class PRTree : Uncopyable{
642633 idx = leaf->data [0 ].first ;
643634 pp->head = std::move (prev_nodes[idx]);
644635 if (!pp->head ){throw std::runtime_error (" ppp" );}
645- out_privates[t] .emplace_back (std::move (pp));
636+ o .emplace_back (std::move (pp));
646637 } else {
647638 throw std::runtime_error (" what????" );
648639 }
649- }
650- }, t*leaves_size/nthreads, unlikely ((t+1 )==nthreads)?leaves_size:(t+1 )*leaves_size/nthreads, t));
651- }
652- std::for_each (threads.begin (), threads.end (), [&](std::thread& x){x.join ();});
653-
654- for (int t = 0 ; t < nthreads; t++){
655- tmp_nodes.insert (tmp_nodes.end (),
656- std::make_move_iterator (out_privates[t].begin ()),
657- std::make_move_iterator (out_privates[t].end ()));
658- }
659- }
640+ });
660641
661642 {
662643 auto tmp = tree.as_X (placement, ee - bb);
@@ -687,7 +668,7 @@ class PRTree : Uncopyable{
687668 }
688669
689670
690- auto find_all (const py::array_t <double >& x){
671+ auto find_all (const py::array_t <float >& x){
691672 const auto &buff_info_x = x.request ();
692673 const auto &ndim= buff_info_x.ndim ;
693674 const auto &shape_x = buff_info_x.shape ;
@@ -711,35 +692,13 @@ class PRTree : Uncopyable{
711692 }
712693 }
713694 size_t length = X.size ();
714- const int nthreads = std::max (1 , (int ) std::thread::hardware_concurrency ());
715- vec<vec<vec<T>>> out_privates (nthreads);
716- {
717- vec<std::thread> threads (nthreads);
718- for (auto & o : out_privates){
719- o.reserve (length/nthreads+1 );
720- }
721- for (int t = 0 ; t < nthreads; t++){
722- threads[t] = std::thread (std::bind (
723- [&](const int bi, const int ei, const int t){
724- for (int i = bi; i < ei; i++){
725- out_privates[t].emplace_back (std::move (find (X[i])));
726- }
727- }, t*length/nthreads, unlikely ((t+1 )==nthreads)?length:(t+1 )*length/nthreads, t));
728- }
729- std::for_each (threads.begin (), threads.end (), [&](std::thread& x){x.join ();});
730- }
731695 vec<vec<T>> out;
732696 out.reserve (length);
733- for (int t = 0 ; t < nthreads; t++){
734- out.insert (out.end (),
735- std::make_move_iterator (out_privates[t].begin ()),
736- std::make_move_iterator (out_privates[t].end ()));
737- vec<vec<T>>().swap (out_privates[t]);
738- }
697+ parallel_for_each (X.begin (), X.end (), out, [&](const BB & x, auto & o){o.emplace_back (find (x));});
739698 return out;
740699 }
741700
742- vec<T> find_one (const py::array_t <double >& x){
701+ vec<T> find_one (const py::array_t <float >& x){
743702 const auto &buff_info_x = x.request ();
744703 const auto &ndim= buff_info_x.ndim ;
745704 const auto &shape_x = buff_info_x.shape ;
0 commit comments