Header Ad

HackerEarth One mismatch problem solution

In this HackerEarth One mismatch problem solution we have given a string s and a set of n strings pi.

For each i find the number of occurrences of pi in s with at most one k-length gap.

The string s occurs in string t at position p with at most one k-length gap if strings s and tptp+1 ... tp+|s|-1 are equal with at most one k-length gap.

Strings s and r are equal with at most one k-length gap if:
  1. |s| = |r|;
  2. or all i and j such that si != ri and sj != rj, it holds |i - j| < k.


HackerEarth One mismatch problem solution


HackerEarth One mismatch problem solution.

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <algorithm>
#include <set>
#include <map>
#include <cmath>
#include <ctime>
#include <functional>
#include <sstream>
#include <fstream>
#include <valarray>
#include <complex>
#include <queue>
#include <cassert>
#include <bitset>
using namespace std;

#ifdef LOCAL
#define debug_flag 0
#else
#define debug_flag 0
#endif

template <class T1, class T2 >
std::ostream& operator << (std::ostream& os, const pair<T1, T2> &p)
{
os << "[" << p.first << ", " << p.second << "]";
return os;
}

template <class T >
std::ostream& operator << (std::ostream& os, const std::vector<T>& v)
{
os << "[";
bool first = true;
for (typename std::vector<T>::const_iterator it = v.begin(); it != v.end(); ++it)
{
if (!first)
os << ", ";
first = false;
os << *it;
}
os << "]";
return os;
}

template <class T >
std::ostream& operator << (std::ostream& os, const std::set<T>& v)
{
os << "[";
bool first = true;
for (typename std::set<T>::const_iterator it = v.begin(); it != v.end(); ++it)
{
if (!first)
os << ", ";
first = false;
os << *it;
}
os << "]";
return os;
}

#define dbg(args...) { if (debug_flag) { _print(_split(#args, ',').begin(), args); cerr << endl; } else { void(0);} }

vector<string> _split(const string& s, char c) {
vector<string> v;
stringstream ss(s);
string x;
while (getline(ss, x, c))
v.emplace_back(x);
return v;
}

void _print(vector<string>::iterator) {}
template<typename T, typename... Args>
void _print(vector<string>::iterator it, T a, Args... args) {
string name = it -> substr((*it)[0] == ' ', it -> length());
if (isalpha(name[0]))
cerr << name << " = " << a << " ";
else
cerr << name << " ";
_print(++it, args...);
}

typedef long long int int64;

const int N = (int)2e5 + 100;
const int ALP = 130;

//build sa with empty suffix

struct SuffixArray
{
int n;
string str;
vector<int> sa;
vector<int> rev_sa;

int first_pos_by_char[ALP];
int pref_sum[ALP][N];

SuffixArray()
{
}

SuffixArray(string _str)
{
str = _str;
n = (int)str.length();

build();
}

int add(int a, int b, int m)
{
a += b;
return a < m ? a : a - m;
}

void build()
{
int tmp[5][N];
int cnt[N];

int *pos = tmp[0];
int *arr = tmp[1], *narr = tmp[2];
int *col = tmp[3], *ncol = tmp[4];

str += " ";
n++;

int classes = 0;
memset(cnt, 0, sizeof(cnt));
for (int i = 0; i < n; i++)
{
cnt[(int)str[i]]++;
classes = max(classes, str[i] + 1);
}
pos[0] = 0;
for (int i = 1; i < classes; i++)
pos[i] = pos[i - 1] + cnt[i - 1];
for (int i = 0; i < n; i++)
{
col[i] = str[i];
arr[pos[col[i]]++] = i;
}

pos[0] = 0;
for (int i = 1; i < classes; i++)
pos[i] = pos[i - 1] + cnt[i - 1];

for (int shift = 1; shift < n; shift *= 2)
{
for (int i = 0; i < n; i++)
{
int npos = arr[i] - shift;
if (npos < 0)
npos += n;
narr[pos[col[npos]]++] = npos;
}

pos[0] = 0;
ncol[narr[0]] = 0;
for (int i = 1; i < n; i++)
{
ncol[narr[i]] = ncol[narr[i - 1]];
if (col[narr[i]] != col[narr[i - 1]] || col[add(narr[i], shift, n)] != col[add(narr[i - 1], shift, n)])
pos[++ncol[narr[i]]] = i;
}

swap(col, ncol);
swap(arr, narr);

if (col[arr[n - 1]] == n - 1)
break;
}

sa.resize(n);
rev_sa.resize(n);

for (int i = 0; i < n; i++)
{
sa[i] = arr[i];
rev_sa[arr[i]] = i;
}

str.pop_back();
n--;

memset(pref_sum, 0, sizeof(pref_sum));

for (int i = 0; i <= n; i++)
{
int suf_ind = sa[i];
if (suf_ind == 0)
continue;
char char_before = str[suf_ind - 1];
pref_sum[(int)char_before][i]++;
}

for (int c = 0; c < ALP; c++)
for (int i = 1; i < N; i++)
pref_sum[c][i] += pref_sum[c][i - 1];

fill(first_pos_by_char, first_pos_by_char + ALP, -1);
for (int i = 0; i <= n; i++)
{
int suf_ind = sa[i];
if (suf_ind == n)
continue;
char cur_char = str[suf_ind];
if (first_pos_by_char[(int)cur_char] == -1)
first_pos_by_char[(int)cur_char] = i;
}
}
};

struct Point
{
int x, y;
Point() : x(), y() {}
Point(int _x, int _y) : x(_x), y(_y) {}
};

std::ostream& operator << (std::ostream& os, const Point &p)
{
os << "(" << p.x << ", " << p.y << ")";
return os;
}

const int ADD_TYPE = 0;
const int QUERY_TYPE = 1;

struct Event
{
int x, type, id;
int y1, y2, sign;

Event() : x(), type(), id(), y1(), y2(), sign() {}
Event(int _x, int _type, int _id, int _y1, int _y2, int _sign) :
x(_x), type(_type), id(_id), y1(_y1), y2(_y2), sign(_sign) {}

bool operator < (const Event & other) const
{
if (x != other.x)
return x < other.x;
return type < other.type;
}
};

struct Fenwick
{
int sum[N];

Fenwick()
{
fill(sum, sum + N, 0);
}

void add_val(int pos, int val)
{
for (int i = pos; i < N; i |= (i + 1))
sum[i] += val;
}

int get_sum(int a)
{
int res = 0;
for (int i = a; i >= 0; i = (i & (i + 1)) - 1)
res += sum[i];
return res;
}

int get_sum(int a, int b)
{
return get_sum(b) - get_sum(a - 1);
}
};

struct RectSolver
{
vector<Event> e_list;

RectSolver()
{

}

void add_query(int id, const pair<int, int> & seg_x, const pair<int, int> & seg_y, int sign)
{
if (seg_x.first == -1 || seg_y.first == -1)
return;

e_list.push_back(Event(seg_x.first - 1, QUERY_TYPE, id, seg_y.first, seg_y.second, -sign));
e_list.push_back(Event(seg_x.second, QUERY_TYPE, id, seg_y.first, seg_y.second, sign));
}

void solve(const vector<Point> & points, int ans[])
{
for (Point p : points)
e_list.push_back(Event(p.x, ADD_TYPE, 0, p.y, p.y, 0));

sort(e_list.begin(), e_list.end());

Fenwick fenwick;

for (Event e : e_list)
{
if (e.type == ADD_TYPE)
{
fenwick.add_val(e.y1, 1);
}
else
{
int sum = fenwick.get_sum(e.y1, e.y2);
ans[e.id] += sum * e.sign;
}
}
}
};

string next_token()
{
char buf[N];
scanf("%s", buf);
return string(buf);
}

string reversed(string s)
{
reverse(s.begin(), s.end());
return s;
}

int k;
string s;
string rev_s;

SuffixArray direct_array;
SuffixArray reverse_array;

vector<Point> k_points;
vector<Point> k1_points;

RectSolver k_solver, k1_solver;

int ans[N];

int get_sum(const int pref_sum[], int l, int r)
{
if (l > r)
return 0;
int res = pref_sum[r];
if (l != 0)
res -= pref_sum[l - 1];
return res;
}

vector<pair<int, int> > get_segs(const SuffixArray & array, const string & pattern)
{
dbg(array.n, array.str, array.sa, array.rev_sa, pattern);

int pat_len = pattern.length();
vector<pair<int, int> > seg_list(pat_len, make_pair(-1, -1));

int cur_l = 0;
int cur_r = array.n;

dbg(cur_l, cur_r);

for (int i = pat_len - 1; i >= 0; i--)
{
char pat_c = pattern[i];

int seg_sum = get_sum(array.pref_sum[(int)pat_c], cur_l, cur_r);
if (seg_sum == 0)
break;

int sum_before = get_sum(array.pref_sum[(int)pat_c], 0, cur_l - 1);

int new_l = array.first_pos_by_char[(int)pat_c] + sum_before;
int new_r = new_l + seg_sum - 1;

dbg(i, seg_sum, sum_before, new_l, new_r);

seg_list[i] = make_pair(new_l, new_r);

cur_l = new_l;
cur_r = new_r;
}

dbg(seg_list);
dbg("---");

return seg_list;
}

void solve(int id, const string & pattern)
{
if (k > (int)pattern.length())
{
ans[id] = max(0, (int)s.length() - (int)pattern.length() + 1);
return;
}

vector<pair<int, int> > direct_seg_list = get_segs(direct_array, pattern);

vector<pair<int, int> > reverse_seg_list = get_segs(reverse_array, reversed(pattern));
reverse(reverse_seg_list.begin(), reverse_seg_list.end());

dbg(pattern, direct_seg_list, reverse_seg_list);

for (int i = 0; i + k - 1 < (int)pattern.length(); i++)
{
pair<int, int> x_seg = make_pair(0, (int)s.length());
pair<int, int> y_seg = make_pair(0, (int)s.length());

if (i - 1 >= 0)
y_seg = reverse_seg_list[i - 1];

if (i + k < (int)pattern.length())
x_seg = direct_seg_list[i + k];

pair<int, int> sh_y_seg = reverse_seg_list[i];

k_solver.add_query(id, x_seg, y_seg, 1);

if (i + k != (int)pattern.length())
k1_solver.add_query(id, x_seg, sh_y_seg, -1);
}
}

int main()
{
#ifdef LOCAL
freopen ("input.txt", "r", stdin);
#endif

scanf("%d", &k);

s = next_token();
rev_s = reversed(s);

direct_array = SuffixArray(s);
reverse_array = SuffixArray(rev_s);

dbg(direct_array.n, direct_array.str, direct_array.sa);

for (int l = 0; l < (int)s.length(); l++)
{
int r = l + k - 1;

if (r >= (int)s.length())
break;

int x = 0;
int y = 0;

if (l - 1 >= 0)
{
int rev_i = l - 1;
rev_i = (int)s.length() - 1 - rev_i;
y = reverse_array.rev_sa[rev_i];
}

if (r + 1 < (int)s.length())
x = direct_array.rev_sa[r + 1];

int new_rev_i = l;
new_rev_i = (int)s.length() - 1 - new_rev_i;
int new_y = reverse_array.rev_sa[new_rev_i];

k_points.push_back(Point(x, y));
k1_points.push_back(Point(x, new_y));
}

dbg(k_points);
dbg(k1_points);

int p_cnt;
scanf("%d", &p_cnt);
for (int i = 0; i < p_cnt; i++)
{
string pat = next_token();
solve(i, pat);
}

k_solver.solve(k_points, ans);
k1_solver.solve(k1_points, ans);

for (int i = 0; i < p_cnt; i++)
printf("%d\n", ans[i]);

return 0;
}


Post a Comment

0 Comments