-
October 23rd, 2020, 01:53 AM
#1
Recursive wildcard expansion
I'm experimenting with a recursive function to do wildcard expansion. The asterisk, '*', can expand to 0-n characters.
My question is if there is something fundamentally wrong with my recursive code.
My goal is to have the expansion not creating duplicates.
The recursive code below is the bare minimum and I've added code so it's easy to see the unique strings and the duplicate ones.
Code:
#include <stdio.h>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
// Forward declarations
void WildcardExpansion(const char* key, int nodeLevel);
size_t SeparateDuplicates(vector<string>& vec, vector<string>& dup);
// Global varables
int g_maxWordLen;
int g_minWordLen;
vector <string> g_words; // Generated words here
int main()
{
int d, u, i;
vector<string> duplicates;
g_maxWordLen = 2; // Max length of generated words
g_minWordLen = 2; // Min length
WildcardExpansion("*b*", 0); // Collects all generated words in g_words
SeparateDuplicates(g_words, duplicates); // Separates the duplicates from g_words
u = (int)g_words.size(); // Size of unique words
d = (int)duplicates.size(); // Size of duplicated words
printf("Unique count: %d\n", u);
for (i = 0; i < u; i++)
printf("%s\n", g_words[i].c_str());
printf("Duplicate count: %d\n", d);
for (i = 0; i < d; i++)
printf("%s\n", duplicates[i].c_str());
return 0;
}
////////////////////////////////////////////////////////////////////////
// Recursive function to do wildcard expansion
// Key can contain one or more wildcards, '*' but not in sequence
// (** is equal to * but is not handled here)
//
void WildcardExpansion(const char* key, int charPos)
{
int letter;
int keyLen;
int astCount;
char c;
char* p;
char keyX[20];
strcpy(keyX, key);
astCount = 0;
p = keyX;
while (*p) // Count asterisks
if (*p++ == '*')
astCount++;
keyLen = (int)strlen(keyX) - astCount; // Letter count
if (keyLen > g_maxWordLen)
return; // Too long word
do // while c
{
c = key[charPos];
switch (c)
{
case '*': // -> key[nodeLevel] == '*'
{
//
// Remove one asterisk
//
strcpy(keyX + charPos, key + charPos + 1);
WildcardExpansion(keyX, charPos); // Recurs same level
strcpy(keyX, key); // Copy original with wildcard back for replacement below
//
// Replace * with letter a-z
// *b* -> bb* -> bb AND *b* -> *bb -> bb => Duplicates!
//
for (letter = 0; letter < 26; letter++)
{
keyX[charPos] = ('a' + letter); // Replace * with letter
strcpy(keyX + charPos + 1, key + charPos); // Expanded: abc -> abc*
WildcardExpansion(keyX, charPos + 1); // Recurs next level
}
return;
} // *
case '\0': // Found a complete word without wildcards
{
if (keyLen < g_minWordLen)
return;
g_words.push_back(key);
break;
}
default: // Dive deeper
{
charPos++;
}
} // switch
} while (c);
}
///////////////////////////////////////////////////////////////////////
// Helper function to store the duplicates in a separate vector
//
size_t SeparateDuplicates(vector<string>& vec, vector<string>& dup)
{
typename std::vector<string>::iterator it;
std::sort(vec.begin(), vec.end());
it = unique(vec.begin(), vec.end(), [&dup](auto& first, auto& second) -> bool
{
if (first == second)
{
dup.push_back(second);
return true;
}
return false;
});
vec.resize(distance(vec.begin(), it));
return dup.size();
}
Program input '*b*' with output length 2:
The pattern '*b*' will expand to bb*, the asterisk removed and 'bb' remains. So far, so good.
The pattern '*b*' will also expand to *bb, the asterisk removed and 'bb' remains. This creates a duplicate.
Program output:
Code:
Unique count: 51
ab
ba
bb
bc
bd
be
bf
bg
bh
bi
bj
bk
bl
bm
bn
bo
bp
bq
br
bs
bt
bu
bv
bw
bx
by
bz
cb
db
eb
fb
gb
hb
ib
jb
kb
lb
mb
nb
ob
pb
qb
rb
sb
tb
ub
vb
wb
xb
yb
zb
Duplicate count: 1
bb
Posting Permissions
- You may not post new threads
- You may not post replies
- You may not post attachments
- You may not edit your posts
-
Forum Rules
|
Click Here to Expand Forum to Full Width
|