In this Leetcode Repeated DNA Sequences problem solution The DNA sequence is composed of a series of nucleotides abbreviated as 'A', 'C', 'G', and 'T'. For example, "ACGAATTCCG" is a DNA sequence. When studying DNA, it is useful to identify repeated sequences within the DNA.

Given a string s that represents a DNA sequence, return all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule. You may return the answer in any order.

Leetcode Repeated DNA Sequences problem solution


Problem solution in Python.

class Solution:
    def findRepeatedDnaSequences(self, s: str) -> List[str]:
        mp = {}
        res = set()
        for i in range(len(s)-9):
            k = s[i:i+10]
            mp[k] = mp.get(k,0) + 1
            if mp[k] > 1:
                res.add(k)
        return list(res)



Problem solution in Java.

class Solution {
    public List<String> findRepeatedDnaSequences(String s) {
        
        List<String> list = new ArrayList<>();
        
        if(s == null || s.length()<=10){
            return list;
        }
        
        HashMap<String, Integer> map = new HashMap<>();
        
        for(int i = 0 ; i<=s.length()-10 ; i++){
            
            String current = s.substring(i,i+10);
            map.put(current, map.getOrDefault(current,0)+1);
        }
		
        for(Map.Entry<String,Integer> entry : map.entrySet()){
            
            if(entry.getValue()>=2){
                list.add(entry.getKey());
            }
            
        } 
        return list;
    }
}


Problem solution in C++.

class Solution {
public:
    vector<string> findRepeatedDnaSequences(string s) 
    {
        unordered_set<string> HT;
        unordered_set<string> Res;
        
        for(int i = 0; i <= (int)s.size() - 10; ++i)
        {
            string Curr = s.substr(i,10);
            
            if(HT.find(Curr) != HT.end())
            {
                Res.insert(Curr);
            }
            
            HT.insert(Curr);
        }
        
        return vector<string> (Res.begin(),Res.end());
    }
};


Problem solution in C.

#define INF -9999
typedef struct Trie{
    struct Trie *child[4];
    int leaf;
}Trie;
int arrSize;
int charTOint(char ch){
    int val;
    switch(ch){
        case 'A':
            val = 0;
            break;
        case 'C':
            val = 1;
            break;
        case 'G':
            val =2;
            break;
        default:
            val = 3;
            break;
    };
    return val;
}
Trie* create(){
    Trie* newNode = (Trie*)malloc(sizeof(Trie));
    int i;
    for(i=0;i<4;i++){
        newNode->child[i]=NULL;
    }
    newNode->leaf = 0;
    return newNode;
}
void insert(Trie* root, char* str, char** strArr){
    int i,j,index;
    Trie* cur = root;
    for(i = 0;i<=9;i++){
        index = charTOint(str[i]);
        if(!cur->child[index]){
            cur->child[index]=create();
        }
        cur=cur->child[index];
    }
    cur->leaf++;
    if(cur->leaf>1){
        for(j=0;j<=9;j++){
            strArr[arrSize][j]=str[j];
        }
        strArr[arrSize][j]='\0';
        arrSize++;
        cur->leaf=INF;
    }
}
char ** findRepeatedDnaSequences(char * s, int* returnSize){
    char **strArr = (char*)malloc(sizeof(char*)*10005);
    int i;
    for( i=0;i<10005;i++){
        strArr[i]=(char*)malloc(sizeof(char)*11);
    }
    for(i=0;s[i];i++);
    if(i<10){
        *returnSize = 0;
        return strArr;
    }
    Trie* root = create();
    arrSize = 0;
    for(i=0;s[i+9]!='\0';i++){
        insert(root,s+i,strArr);   
    }
    *returnSize = arrSize;
    return strArr;
}