I have made a few modifications to make the tool more widget like. It now reads the subset size from the command line and takes the data from an input file.

Code:
// file sort3_02.cpp based from code by razzle
// http://forums.codeguru.com/member.php?419897-razzle


#include <cstdlib>
#include <list>
#include <vector>
#include <string>
#include <bitset>
#include <iostream>
#include <algorithm>
#include <unistd.h>
#include <fstream>

const int strsize = 18;


inline int next(int x) { // next K subset of an N bitset (gosper's hack (hakmem 175))
   int s = x & -x;
   int r = x + s;
   return r | (((x^r)>>2)/s);
}

inline int start(int k) { // first subset
   return (1<<k) - 1;
}

inline int limit(int n) { // first non-subset
   return 1<<n;
}

int to_set(const std::string& s) { // string to bitset
   return int(std::bitset<strsize>(s).to_ulong());
}

std::string to_string(int x) { // bitset to string
   return std::bitset<strsize>(x).to_string();
}



int main(int argc, char **argv) {

// declarations for args-----------------------------------------------

   // command line arguments
   int cmd_arguments;
   // subset size
   int K = 0;
   std::string str_K;
   // input file read to data[]
   std::string input_file;

   //  Parse command line arguments
   while ((cmd_arguments = getopt (argc, argv, "s:i:h:")) != -1)
      switch (cmd_arguments)
   {
      case 's':
         str_K = optarg;
         K = atoi(str_K.c_str());
      break;
      case 'i':
         input_file = optarg;
      break;
      case 'h':
         // print help blurb
         std::cout << std::endl;
         std::cout << "use argument -i to specify input file" << std::endl;
         std::cout << "use argument -s to specify subset size" << std::endl;
         std::cout << std::endl;
         exit(0);
      break;
   }

   // check args
   if(input_file.length() == 0) {
      std::cerr << "  -> no input file was specified, use -i" << std::endl;
      exit(-1);
   }
   if(str_K.length() == 0) {
      std::cerr << "  -> subset size was specified, use -s" << std::endl;
      exit(-2);
   }

//---------------------------------------------------------------------

   // create an input stream and open the input_file file
   std::ifstream input_file_ifstream;
   input_file_ifstream.open( input_file.c_str() );

   // check if input_file was opened
   if(!input_file_ifstream.is_open()) {
      std::cerr << "file  " << input_file << "  could not be opened" << std::endl;
      exit(-3);
   }

   // save data from file in vector
   std::vector<std::string> inputs_from_file;
   std::string new_input_line;

   while( getline(input_file_ifstream, new_input_line) ) {
      // remove '\r' EOL chars and tab characters
      new_input_line.erase (remove(new_input_line.begin(),new_input_line.end(),'\r'), new_input_line.end());
      new_input_line.erase (remove(new_input_line.begin(),new_input_line.end(),'\t'), new_input_line.end());
      // add line to vector of string
      inputs_from_file.push_back(new_input_line);
   }

   // close istream
   input_file_ifstream.close();


   // size of user sets (string length)
   const int N = strsize; 

   std::list<int> usersets; // all user sets converted to bitsets
   for (std::string s : inputs_from_file) usersets.push_back(to_set(s));

   std::vector<int> counts; // one counter per subset
   int b = 0; // number of K subsets of an N set (the N over K binominal)
   for (int s=start(K); s<limit(N); s=next(s)) ++b;
   std::cout << "*** binominal = " << b << std::endl;
   counts.resize(b);

   bool quit = false;
   do {
      for (int& c : counts) c = 0; // reset counters

      // count all user sets for every subset 
      for (int s=start(K), j=0; s<limit(N); s=next(s), ++j) {
         for (int set : usersets) {
            if ((set & s) == s) ++counts[j];
         }
      }

      // find max counter (biggest run)
      int runSet = 0;
      for (int s=start(K), j=0, m=0; s<limit(N); s=next(s), ++j) {
         if (counts[j] > m) {
            m = counts[j];
            runSet = s;
         }
      }

      // print all user sets belonging to this run
      if (runSet > 0) {
         // gather output sets
         std::vector<int> out;
         for (auto sp = usersets.begin(); sp != usersets.end();) {
            if ((*sp & runSet) == runSet) {
               out.push_back(*sp);
               sp = usersets.erase(sp); // remove printed user set
            } else ++sp;
         }

         //actual output
         std::sort(out.rbegin(),out.rend());  // from largest to smallest
         std::cout << to_string(runSet) << " *" << std::endl;
         for (int x : out) {
            std::cout << to_string(x) << " : " << x << std::endl;
         }
      } else quit = true; // no more runs

      std::cout << std::endl;

   } while (!quit);

   // print all remaining user sets
   if (usersets.size() > 0) {
      std::cout << "*** remaining sets:" << std::endl;
      for (int x : usersets) std::cout << to_string(x) << std::endl;
   }

// main EOF
}
This was compiled an built with g++ 4.8 using,
g++ -O2 -c -std=c++11 sort3_02.cpp
g++ -std=c++11 -o sort3_02.exe sort3_02.o

The code is quite fast (instantaneous) for all of the examples I have tried, even up to subset size of 9/18.

I am going to get this learning on the data set I have now and then work on making the subset size look up recursive, meaning the for 8 on bits, the sorting will start with subset size of 7. All strings that are not part of a run of at least 2 will be re-analyzed with a subset size of 6, then 5 etc. This will repeat until all strings have been assigned to a run, or there are only two unassigned strings remaining.

LMHmedchem