A contextually-similar problem is Gene Finding via TATA box search.
/* Test Case 1: Input: A (this is an example of a string with no Gs or Cs) Expected Output: 0, not a gene Actual Output: 0, not a gene Test Case 2: Input: G (this is an example of a string with only Gs and Cs) Expected Output: 1, probably a gene Actual Output: 1, probably a gene Test Case 3: Input: ACATAGACTAG (this is an example of a string with a mix of all four bases) Expected Output: .36, probably not a gene Actual Output: .36, probably not a gene */ #include <iostream> #include <string> using namespace std; int main() { // Inputs: DNA sequence // Outputs ratio of G&C to total, and prediction of whether or not it's a gene // Prompt the user cout << "Please input a DNA sequence: "; string dna_string; cin >> dna_string; int gc_count = 0; // Calculate GC-content for (int i = 0; i < dna_string.length(); i++) { // Count the number of Gs and Cs string base_at_i = dna_string.substr(i, 1); // The following statement can be uncommented to check that our loop is working correctly. // cout << "base at i is " << base_at_i << endl; if (base_at_i == "G" || base_at_i == "C") { gc_count++; } } cout << "Total GC count was " << gc_count << endl; // Divide by total string length to get ratio, making sure to account for integer division problems. double gc_ratio = 1.0 * gc_count / dna_string.length(); cout << "GC ratio for sequence was " << gc_ratio << endl; // if ratio > .60, then report it's a gene if (gc_ratio > .6) { cout << "You've got a gene on your hands!" << endl; } else { cout << "Probably not a gene..." << endl; } system("pause"); return 0; }