C++ algorithm like python's 'groupby'
Are there any C++ transformations which are similar to itertools.groupby()?
Of course I could easily write my own, but I'd prefer to leverage the idiomatic behavior or compose one from the features provided by the STL or boost.
#include <cstdlib> #include <map> #include <algorithm> #include <string> #include <vector> struct foo { int x; std::string y; float z; }; bool lt_by_x(const foo &a, const foo &b) { return a.x < b.x; } void list_by_x(const std::vector<foo> &foos, std::map<int, std::vector<foo> > &foos_by_x) { /* ideas..? */ } int main(int argc, const char *argv[]) { std::vector<foo> foos; std::map<int, std::vector<foo> > foos_by_x; std::vector<foo> sorted_foos; std::sort(foos.begin(), foos.end(), lt_by_x); list_by_x(sorted_foos, foos_by_x); return EXIT_SUCCESS; }
Answers
What is the point of bloating standard C++ library with an algorithm that is one line of code?
for (const auto & foo : foos) foos_by_x[foo.x].push_back(foo);
Also, take a look at std::multimap, it might be just what you need.
UPDATE:
The one-liner I have provided is not well-optimized for the case when your vector is already sorted. A number of map lookups can be reduced if we remember the iterator of previously inserted object, so it the "key" of the next object and do a lookup only when the key is changing. For example:
#include <map> #include <vector> #include <string> #include <algorithm> #include <iostream> struct foo { int x; std::string y; float z; }; class optimized_inserter { public: typedef std::map<int, std::vector<foo> > map_type; optimized_inserter(map_type & map) : map(&map), it(map.end()) {} void operator()(const foo & obj) { typedef map_type::value_type value_type; if (it != map->end() && last_x == obj.x) { it->second.push_back(obj); return; } last_x = obj.x; it = map->insert(value_type(obj.x, std::vector<foo>({ obj }))).first; } private: map_type *map; map_type::iterator it; int last_x; }; int main() { std::vector<foo> foos; std::map<int, std::vector<foo>> foos_by_x; foos.push_back({ 1, "one", 1.0 }); foos.push_back({ 3, "third", 2.5 }); foos.push_back({ 1, "one.. but third", 1.5 }); foos.push_back({ 2, "second", 1.8 }); foos.push_back({ 1, "one.. but second", 1.5 }); std::sort(foos.begin(), foos.end(), [](const foo & lhs, const foo & rhs) { return lhs.x < rhs.x; }); std::for_each(foos.begin(), foos.end(), optimized_inserter(foos_by_x)); for (const auto & p : foos_by_x) { std::cout << "--- " << p.first << "---\n"; for (auto & f : p.second) { std::cout << '\t' << f.x << " '" << f.y << "' / " << f.z << '\n'; } } }
This doesn't really answer your question, but for the fun of it, I implemented a group_by iterator. Maybe someone will find it useful:
#include <assert.h> #include <iostream> #include <set> #include <sstream> #include <string> #include <vector> using std::cout; using std::cerr; using std::multiset; using std::ostringstream; using std::pair; using std::vector; struct Foo { int x; std::string y; float z; }; struct FooX { typedef int value_type; value_type operator()(const Foo &f) const { return f.x; } }; template <typename Iterator,typename KeyFunc> struct GroupBy { typedef typename KeyFunc::value_type KeyValue; struct Range { Range(Iterator begin,Iterator end) : iter_pair(begin,end) { } Iterator begin() const { return iter_pair.first; } Iterator end() const { return iter_pair.second; } private: pair<Iterator,Iterator> iter_pair; }; struct Group { KeyValue value; Range range; Group(KeyValue value,Range range) : value(value), range(range) { } }; struct GroupIterator { typedef Group value_type; GroupIterator(Iterator iter,Iterator end,KeyFunc key_func) : range_begin(iter), range_end(iter), end(end), key_func(key_func) { advance_range_end(); } bool operator==(const GroupIterator &that) const { return range_begin==that.range_begin; } bool operator!=(const GroupIterator &that) const { return !(*this==that); } GroupIterator operator++() { range_begin = range_end; advance_range_end(); return *this; } value_type operator*() const { return value_type(key_func(*range_begin),Range(range_begin,range_end)); } private: void advance_range_end() { if (range_end!=end) { typename KeyFunc::value_type value = key_func(*range_end++); while (range_end!=end && key_func(*range_end)==value) { ++range_end; } } } Iterator range_begin; Iterator range_end; Iterator end; KeyFunc key_func; }; GroupBy(Iterator begin_iter,Iterator end_iter,KeyFunc key_func) : begin_iter(begin_iter), end_iter(end_iter), key_func(key_func) { } GroupIterator begin() { return GroupIterator(begin_iter,end_iter,key_func); } GroupIterator end() { return GroupIterator(end_iter,end_iter,key_func); } private: Iterator begin_iter; Iterator end_iter; KeyFunc key_func; }; template <typename Iterator,typename KeyFunc> inline GroupBy<Iterator,KeyFunc> group_by( Iterator begin, Iterator end, const KeyFunc &key_func = KeyFunc() ) { return GroupBy<Iterator,KeyFunc>(begin,end,key_func); } static void test() { vector<Foo> foos; foos.push_back({5,"bill",2.1}); foos.push_back({5,"rick",3.7}); foos.push_back({3,"tom",2.5}); foos.push_back({7,"joe",3.4}); foos.push_back({5,"bob",7.2}); ostringstream out; for (auto group : group_by(foos.begin(),foos.end(),FooX())) { out << group.value << ":"; for (auto elem : group.range) { out << " " << elem.y; } out << "\n"; } assert(out.str()== "5: bill rick\n" "3: tom\n" "7: joe\n" "5: bob\n" ); } int main(int argc,char **argv) { test(); return 0; }