Hashing With Application

Hashing is very important for competitive programming. Normally,what we do in hashing is ,We compress the whole structure (string, may be an array) to a single number..(hash_value)..
Okay, All the different structure should have different hash value, To compute hash_value, we need a hash function that is computes a unique hash value for each..


Let assume A={1,2,3,4} , B= {1,2,3,3}, C = {1,2,3,4,5} , D ={1,2,3,4};
Than getHash(A) should be equal to getHash(D) as they are same. Same rules apply for String

Constructing a hash function is very simple


Step 1: Choose A prime as mod ,typically I use P = 1e7 + 7
Step 2: Choose A prime as base value (which need to greater than in entries of the structure) For String I use B = 131
Step 3: Using hornar’s rule , calculate H

Here it is :
This is shown for string, same rule applicable for vector, array
Okay, I use Polynomial representation of vector
Here h = s[0] * B^(n-1) + s[1] * B^(n-2) + S[2] * B^(n-3) ….. + S[n-1] * B^0

long long getHash(string s)
{
	long long h = 0;
	for(int i = 0 ; s[i]; i ++ ){
		h = (h * B + s[i]) % P;
	}
	return h;
}

This hash_value is stored in a map … or set what you like to use which supports find or count operation…

Now I illustrate a simple problem with hashing

Problem : http://codeforces.com/contest/4/problem/C

Abridge statement: Given N strings, One by one, Process them ,
* If they have not occurred previously ,then print ‘OK’
* Else Print “The string + Previously occurred how many times”

Solution:

1. for each string , We compute its hash value ,Then check if it is already in the map ??
2. If yes, Then print The string + The number of times it occurred previously,
3. Else print NO


#include <bits/stdc++.h>
#include <cstring>
using namespace std;

/*------- Constants---- */
#define LMT                     15
#define ll                      long long
#define ull                     unsigned long long
#define mod                     1000000007
#define MEMSET_INF              63
#define MEM_VAL                 1061109567
#define FOR(i,n)                for( int i=0 ; i < n ; i++ )
#define mp(i,j)                 make_pair(i,j)
#define lop(i,a,b)              for( int i = (a) ; i < (b) ; i++)
#define pb(a)                   push_back((a))
#define gc                      getchar
#define PI                      acos(-1.0)
#define inf                     1<<30
#define lc                      ((n)<<1)
#define rc                      ((n)<<1 |1)
#define GREATER(x, y)           ((x) > (y) + EPS)
#define GREATER_EQUAL(x, y)     ((x) > (y) - EPS)
#define LESS(x, y)              ((x) < (y) - EPS)
#define LESS_EQUAL(x, y)        ((x) < (y) + EPS)
#define EQUAL(x, y)             (abs((x) - (y)) < EPS)
#define msg(x)                  cout << x << endl

/* -------Global Variables ------ */
ll euclid_x,euclid_y,d;

/*---- short Cuts ------- */
#define ms(ara_name,value) memset(ara_name,value,sizeof(ara_name))
typedef pair<int, int> ii;
typedef vector<int > vi ;
/*------ template functions ------ */

inline void sc(int &x)
{
      
	register int c = gc();
	x = 0;
	int neg = 0;
	for(;((c<48 | c>57) && c != '-');c = gc());
	if(c=='-') {neg=1;c=gc();}
	for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
	if(neg) x=-x;
}

template <class T> inline T bigmod(T p,T e,T M){
      ll ret = 1;
      for(; e > 0; e >>= 1){
            if(e & 1) ret = (ret * p) % M;
            p = (p * p) % M;
      } return (T)ret;
}
template <class T> inline T gcd(T a,T b){if(b==0)return a;return gcd(b,a%b);}
template <class T> inline T modinverse(T a,T M){return bigmod(a,M-2,M);}

/*************************** END OF TEMPLATE ****************************/





string str;

map<ll , int> kmp;
ll B = 131;
ll P = 1e7 + 7;


long long getHash(string s)
{
	long long h = 0;
	for(int i = 0 ; s[i]; i ++ ){
		h = (h * B + s[i]) % P;
	}
	return h;
}


int main()
{
      
      int n;
      
      
      sc(n) ;
      
      for(int i = 0 ; i < n ; i ++ ) {
            cin >> str;
            ll k = getHash(str);
            if( kmp.count(k) == 0 ) {
                  printf("OK\n");
            }
            else {
                  cout << str ;
                  printf("%d\n",kmp[k]);
            }
            kmp[k] ++;
            
      }
      
      return 0;
}

Yaahhh..
This is a wrong code… Why ?? Because My hash function is simple but it is producing some collision. So we need a way out…
1. if two strings have same length, Then the probability of collision is very very small. So we can do , We use a length wise map…

2.Use double hashing

3. Others ,I don’t know

Approach 1:
In the above solution we were using one map, But now we will use an array of map..
How many Map I need? Maximum Length

So This is an accepted code:


#include <bits/stdc++.h>
#include <cstring>
using namespace std;

/*------- Constants---- */
#define LMT                     15
#define ll                      long long
#define ull                     unsigned long long
#define mod                     1000000007
#define MEMSET_INF              63
#define MEM_VAL                 1061109567
#define FOR(i,n)                for( int i=0 ; i < n ; i++ )
#define mp(i,j)                 make_pair(i,j)
#define lop(i,a,b)              for( int i = (a) ; i < (b) ; i++)
#define pb(a)                   push_back((a))
#define gc                      getchar
#define PI                      acos(-1.0)
#define inf                     1<<30
#define lc                      ((n)<<1)
#define rc                      ((n)<<1 |1)
#define GREATER(x, y)           ((x) > (y) + EPS)
#define GREATER_EQUAL(x, y)     ((x) > (y) - EPS)
#define LESS(x, y)              ((x) < (y) - EPS)
#define LESS_EQUAL(x, y)        ((x) < (y) + EPS)
#define EQUAL(x, y)             (abs((x) - (y)) < EPS)
#define msg(x)                  cout << x << endl

/* -------Global Variables ------ */
ll euclid_x,euclid_y,d;

/*---- short Cuts ------- */
#define ms(ara_name,value) memset(ara_name,value,sizeof(ara_name))
typedef pair<int, int> ii;
typedef vector<int > vi ;
/*------ template functions ------ */

inline void sc(int &x)
{
      
	register int c = gc();
	x = 0;
	int neg = 0;
	for(;((c<48 | c>57) && c != '-');c = gc());
	if(c=='-') {neg=1;c=gc();}
	for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
	if(neg) x=-x;
}

template <class T> inline T bigmod(T p,T e,T M){
      ll ret = 1;
      for(; e > 0; e >>= 1){
            if(e & 1) ret = (ret * p) % M;
            p = (p * p) % M;
      } return (T)ret;
}
template <class T> inline T gcd(T a,T b){if(b==0)return a;return gcd(b,a%b);}
template <class T> inline T modinverse(T a,T M){return bigmod(a,M-2,M);}

/*************************** END OF TEMPLATE ****************************/





string str;

map<ll , int> kmp[LMT];
ll B = 131;
ll P = 1e7 + 7;


long long getHash(string s)
{
	long long h = 0;
	for(int i = 0 ; s[i]; i ++ ){
		h = (h * B + s[i]) % P;
	}
	return h;
}


int main()
{
      
      int n;
      
      
      sc(n) ;
      
      for(int i = 0 ; i < n ; i ++ ) {
            cin >> str;
            ll k = getHash(str);
            if( kmp[str.length()].count(k) == 0 ) {
                  printf("OK\n");
            }
            else {
                  cout << str ;
                  printf("%d\n",kmp[str.length()][k]);
            }
            kmp[str.length()][k] ++;
            
      }
      
      return 0;
}

Approach 2: Double Hashing
We use Two mod instead of 1.
so now P1 = 1e7+7 , P2 = 1e9+9;
now store pair of values for them,,

Here is the code



#include <bits/stdc++.h>
#include <cstring>
using namespace std;

/*------- Constants---- */
#define LMT                     15
#define ll                      long long
#define ull                     unsigned long long
#define mod                     1000000007
#define MEMSET_INF              63
#define MEM_VAL                 1061109567
#define FOR(i,n)                for( int i=0 ; i < n ; i++ )
#define mp(i,j)                 make_pair(i,j)
#define lop(i,a,b)              for( int i = (a) ; i < (b) ; i++)
#define pb(a)                   push_back((a))
#define gc                      getchar
#define PI                      acos(-1.0)
#define inf                     1<<30
#define lc                      ((n)<<1)
#define rc                      ((n)<<1 |1)
#define GREATER(x, y)           ((x) > (y) + EPS)
#define GREATER_EQUAL(x, y)     ((x) > (y) - EPS)
#define LESS(x, y)              ((x) < (y) - EPS)
#define LESS_EQUAL(x, y)        ((x) < (y) + EPS)
#define EQUAL(x, y)             (abs((x) - (y)) < EPS)
#define msg(x)                  cout << x << endl

/* -------Global Variables ------ */
ll euclid_x,euclid_y,d;

/*---- short Cuts ------- */
#define ms(ara_name,value) memset(ara_name,value,sizeof(ara_name))
typedef pair<int, int> ii;
typedef vector<int > vi ;
/*------ template functions ------ */

inline void sc(int &x)
{
      
	register int c = gc();
	x = 0;
	int neg = 0;
	for(;((c<48 | c>57) && c != '-');c = gc());
	if(c=='-') {neg=1;c=gc();}
	for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
	if(neg) x=-x;
}

template <class T> inline T bigmod(T p,T e,T M){
      ll ret = 1;
      for(; e > 0; e >>= 1){
            if(e & 1) ret = (ret * p) % M;
            p = (p * p) % M;
      } return (T)ret;
}
template <class T> inline T gcd(T a,T b){if(b==0)return a;return gcd(b,a%b);}
template <class T> inline T modinverse(T a,T M){return bigmod(a,M-2,M);}

/*************************** END OF TEMPLATE ****************************/





string str;

map<pair<ll, ll>, int > kmp;
ll B = 131;
ll P1 = 1e7 + 7;
ll P2 = 1e9+9;

pair<ll , ll>  getHash(string s)
{
	long long h1 = 0 , h2 = 0;
	for(int i = 0 ; s[i]; i ++ ){
		h1 = (h1 * B + s[i]) % P1;
            h2 = (h2 * B + s[i]) % P2;
	}
	return mp(h1,h2);
}


int main()
{
      
      int n;
      
      
      sc(n) ;
      
      for(int i = 0 ; i < n ; i ++ ) {
            cin >> str;
            pair<ll, ll> k = getHash(str);
            if( kmp.count(k) == 0 ) {
                  printf("OK\n");
            }
            else {
                  cout << str ;
                  printf("%d\n",kmp[k]);
            }
            kmp[k] ++;
            
      }
      
      return 0;
}

We will discuss some other problem in other day 🙂