Genetic Mutations

Problem

First Try

  • Recursive DFS?
    Let’s see some examples.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

start: "AAAAACCC"
end: "AACCCCCC"
bank: ["AAAACCCC", "AAACCCCC", "AACCCCCC"]

return: 3

we need to check from start of string which letter we can change so that string exists in bank

start: "AA[A][A][A]CCC"
end: "AA[C][C][C]CCC"

tmp string AA(C)AACCC in bank? no
AAA(C)ACCC in bank? no
AAAA(C)CCC ? yes!
update string to AAAACCCC

start: AAAACCCC
end: AACCCCCC

check if in bank:
AA(C)ACCCC? no
AAA(C)CCCC? yes!

start:AAACCCCC
end:AACCCCCC

check if in bank:
AA(C)CCCCC? yes!

done....3 tries

Attempted Solution

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
int dfs(string start, string end, vector<string>& bank, int tries) {
//base case

if (start == end) return tries;

for (int i = 0; i < start.size(); i++) {


if (start[i] == end[i]) continue; // nothing to do if letter is same
// if letter is different in end string then we copy that letter and
// check if it exists in bank
string tmp = start;
tmp[i] = end[i];
int ret = -1;
if (find(bank.begin(), bank.end(), tmp) != bank.end()) {
ret = dfs(tmp, end, bank, tries+1);
}
if (ret != -1) return ret;
}

return -1; //oops don't forget this...if not found at all
}
int minMutation(string start, string end, vector<string>& bank) {
return dfs(start, end, bank, 0);
}

Unfortunately, there is a major flaw in the logic above. The problem is that we are only flipping the letters as per the end string. This worked in the example I chose above, but there may be paths we have not considered. This means we may not always find the path to the end string.

We should flip letters to {A, C, T, G} and not just the ones we want to change for the end string.

Notes for next approach

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
keep a visited vector of booleans to denote when we have visited a particular string from the bank

Using this example
start: "AACCGGTT"
end: "AAACGGTA"
["AACCGATT","AACCGATA","AAACGATA","AAACGGTA"]

Given:
queue Q of visiting
set V of visited
set B of Bank of valid strings
List L of available letters
level = 0
string target

Q.add(start)
while !Q.empty
s = Q.pop()
level++
if s == target return level
V.push(s)
for i=0 -> s.size()
for letter l2 in L
char c = s[i]
s[i] = l2
if (s in B) and (s not in V)
Q.push(s)
end
s[i] = c // restore char?
end
end
end
return -1

Buggy solution below

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
int minMutation(string start, string end, vector<string>& bank) {
unordered_set<string> V; // visited
unordered_set<string> B; // bank
vector<string> Q; // work queue
vector<char> letters = {'A', 'C', 'G', 'T'};
// copy strings from bank to set for quicker lookup
for (auto e: bank) {
B.push_back(e);
}

Q.push_back(start);

while (!Q.empty()) {
string s = Q.front();
Q.erase(Q.begin());
level++;
if (s == end) return level;
for (int i = 0; i < s.size(); i++) {
char c = s[i];
for (auto l: letters) {
s[i] = l;
if (!V.find(s) && B.find(s)) {
Q.push_back(s);
}
s[i] = c;
}
}
}

return -1;
}

FIxed

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class Solution {
public:
int minMutation(string start, string end, vector<string>& bank) {
unordered_set<string> V; // visited
unordered_set<string> B; // bank
vector<string> Q; // work queue
vector<char> letters = {'A', 'C', 'G', 'T'};
int level = 0;
// copy strings from bank to set for quicker lookup
for (auto e: bank) {
B.insert(e);
}

Q.push_back(start);

while (!(Q.empty())) {

int num = Q.size(); // Get number of strings to inspect at level
while (num-- > 0) {
string s = Q.front();
//cout << "pop " <<s << endl;
Q.erase(Q.begin());
V.insert(s);
if (s == end) return level;
for (int i = 0; i < s.size(); i++) {
char c = s[i];
for (auto l: letters) {
s[i] = l;
if ((V.find(s) == V.end()) && (B.find(s) != B.end())) {
//cout << "push " << s << endl;
Q.push_back(s);
}
s[i] = c;
}
}
}

// increment level after we have exhausted strings at previous level
level++;
}

return -1;
}
};