In this Leetcode Repeated DNA Sequences problem solution The DNA sequence is composed of a series of nucleotides abbreviated as 'A', 'C', 'G', and 'T'. For example, "ACGAATTCCG" is a DNA sequence. When studying DNA, it is useful to identify repeated sequences within the DNA.
Given a string s that represents a DNA sequence, return all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule. You may return the answer in any order.
Problem solution in Python.
class Solution: def findRepeatedDnaSequences(self, s: str) -> List[str]: mp = {} res = set() for i in range(len(s)-9): k = s[i:i+10] mp[k] = mp.get(k,0) + 1 if mp[k] > 1: res.add(k) return list(res)
Problem solution in Java.
class Solution { public List<String> findRepeatedDnaSequences(String s) { List<String> list = new ArrayList<>(); if(s == null || s.length()<=10){ return list; } HashMap<String, Integer> map = new HashMap<>(); for(int i = 0 ; i<=s.length()-10 ; i++){ String current = s.substring(i,i+10); map.put(current, map.getOrDefault(current,0)+1); } for(Map.Entry<String,Integer> entry : map.entrySet()){ if(entry.getValue()>=2){ list.add(entry.getKey()); } } return list; } }
Problem solution in C++.
class Solution { public: vector<string> findRepeatedDnaSequences(string s) { unordered_set<string> HT; unordered_set<string> Res; for(int i = 0; i <= (int)s.size() - 10; ++i) { string Curr = s.substr(i,10); if(HT.find(Curr) != HT.end()) { Res.insert(Curr); } HT.insert(Curr); } return vector<string> (Res.begin(),Res.end()); } };
Problem solution in C.
#define INF -9999 typedef struct Trie{ struct Trie *child[4]; int leaf; }Trie; int arrSize; int charTOint(char ch){ int val; switch(ch){ case 'A': val = 0; break; case 'C': val = 1; break; case 'G': val =2; break; default: val = 3; break; }; return val; } Trie* create(){ Trie* newNode = (Trie*)malloc(sizeof(Trie)); int i; for(i=0;i<4;i++){ newNode->child[i]=NULL; } newNode->leaf = 0; return newNode; } void insert(Trie* root, char* str, char** strArr){ int i,j,index; Trie* cur = root; for(i = 0;i<=9;i++){ index = charTOint(str[i]); if(!cur->child[index]){ cur->child[index]=create(); } cur=cur->child[index]; } cur->leaf++; if(cur->leaf>1){ for(j=0;j<=9;j++){ strArr[arrSize][j]=str[j]; } strArr[arrSize][j]='\0'; arrSize++; cur->leaf=INF; } } char ** findRepeatedDnaSequences(char * s, int* returnSize){ char **strArr = (char*)malloc(sizeof(char*)*10005); int i; for( i=0;i<10005;i++){ strArr[i]=(char*)malloc(sizeof(char)*11); } for(i=0;s[i];i++); if(i<10){ *returnSize = 0; return strArr; } Trie* root = create(); arrSize = 0; for(i=0;s[i+9]!='\0';i++){ insert(root,s+i,strArr); } *returnSize = arrSize; return strArr; }
0 Comments