Algorithms FAQs Part 3.

Q: Implement Binary Heap in C++?

A: BinaryHeap.h


#ifndef BINARY_HEAP_H_

#define BINARY_HEAP_H_

#include “dsexceptions.h”

#include “vector.h”

// BinaryHeap class


// CONSTRUCTION: with an optional capacity (that defaults to 100)


// ******************PUBLIC OPERATIONS*********************

// void insert( x ) –> Insert x

// deleteMin( minItem ) –> Remove (and optionally return) smallest item

// Comparable findMin( ) –> Return smallest item

// bool isEmpty( ) –> Return true if empty; else false

// bool isFull( ) –> Return true if full; else false

// void makeEmpty( ) –> Remove all items

// ******************ERRORS********************************

// Throws Underflow and Overflow as warranted


class BinaryHeap



explicit BinaryHeap( int capacity = 100 );

bool isEmpty( ) const;

bool isFull( ) const;

const Comparable & findMin( ) const;

void insert( const Comparable & x );

void deleteMin( );

void deleteMin( Comparable & minItem );

void makeEmpty( );


int currentSize; // Number of elements in heap

vector array; // The heap array

void buildHeap( );

void percolateDown( int hole );





#include “BinaryHeap.h”


* Construct the binary heap.

* capacity is the capacity of the binary heap.



BinaryHeap::BinaryHeap( int capacity )

: array( capacity + 1 ), currentSize( 0 )




* Insert item x into the priority queue, maintaining heap order.

* Duplicates are allowed.

* Throw Overflow if container is full.



void BinaryHeap::insert( const Comparable & x )


if( isFull( ) )

throw Overflow( );

// Percolate up

int hole = ++currentSize;

for( ; hole > 1 && x < array[ hole / 2 ]; hole /= 2 )

array[ hole ] = array[ hole / 2 ];

array[ hole ] = x;



* Find the smallest item in the priority queue.

* Return the smallest item, or throw Underflow if empty.



const Comparable & BinaryHeap::findMin( ) const


if( isEmpty( ) )

throw Underflow( );

return array[ 1 ];



* Remove the smallest item from the priority queue.

* Throw Underflow if empty.



void BinaryHeap::deleteMin( )


if( isEmpty( ) )

throw Underflow( );

array[ 1 ] = array[ currentSize– ];

percolateDown( 1 );



* Remove the smallest item from the priority queue

* and place it in minItem. Throw Underflow if empty.



void BinaryHeap::deleteMin( Comparable & minItem )


if( isEmpty( ) )

throw Underflow( );

minItem = array[ 1 ];

array[ 1 ] = array[ currentSize– ];

percolateDown( 1 );



* Establish heap order property from an arbitrary

* arrangement of items. Runs in linear time.



void BinaryHeap::buildHeap( )


for( int i = currentSize / 2; i > 0; i– )

percolateDown( i );



* Test if the priority queue is logically empty.

* Return true if empty, false otherwise.



bool BinaryHeap::isEmpty( ) const


return currentSize == 0;



* Test if the priority queue is logically full.

* Return true if full, false otherwise.



bool BinaryHeap::isFull( ) const


return currentSize == array.size( ) – 1;



* Make the priority queue logically empty.



void BinaryHeap::makeEmpty( )


currentSize = 0;



* Internal method to percolate down in the heap.

* hole is the index at which the percolate begins.



void BinaryHeap::percolateDown( int hole )


/* 1*/ int child;

/* 2*/ Comparable tmp = array[ hole ];

/* 3*/ for( ; hole * 2 <= currentSize; hole = child )


/* 4*/ child = hole * 2;

/* 5*/ if( child != currentSize && array[ child + 1 ] < array[ child ] )

/* 6*/ child++;

/* 7*/ if( array[ child ] < tmp )

/* 8*/ array[ hole ] = array[ child ];


/* 9*/ break;


/*10*/ array[ hole ] = tmp;





#include “BinaryHeap.h”

#include “dsexceptions.h”

// Test program

int main( )


int numItems = 10000;

BinaryHeap h( numItems );

int i = 37;

int x;



for( i = 37; i != 0; i = ( i + 37 ) % numItems )

h.insert( i );

for( i = 1; i < numItems; i++ )


h.deleteMin( x );

if( x != i )

cout << “Oops! ” << i << endl;


for( i = 37; i != 0; i = ( i + 37 ) % numItems )

h.insert( i );

h.insert( 0 );

h.insert( i = 999999 ); // Should overflow


catch( Overflow )

{ cout << “Overflow (expected)! ” << i << endl; }

return 0;


Q: Implement Binary Search Tree in C++?

A: BinarySearchTree.h




#include “dsexceptions.h”

#include // For NULL

// Binary node and forward declaration because g++ does

// not understand nested classes.


class BinarySearchTree;


class BinaryNode


Comparable element;

BinaryNode *left;

BinaryNode *right;

BinaryNode( const Comparable & theElement, BinaryNode *lt, BinaryNode *rt )

: element( theElement ), left( lt ), right( rt ) { }

friend class BinarySearchTree;


// BinarySearchTree class


// CONSTRUCTION: with ITEM_NOT_FOUND object used to signal failed finds


// ******************PUBLIC OPERATIONS*********************

// void insert( x ) –> Insert x

// void remove( x ) –> Remove x

// Comparable find( x ) –> Return item that matches x

// Comparable findMin( ) –> Return smallest item

// Comparable findMax( ) –> Return largest item

// boolean isEmpty( ) –> Return true if empty; else false

// void makeEmpty( ) –> Remove all items

// void printTree( ) –> Print tree in sorted order


class BinarySearchTree



explicit BinarySearchTree( const Comparable & notFound );

BinarySearchTree( const BinarySearchTree & rhs );

~BinarySearchTree( );

const Comparable & findMin( ) const;

const Comparable & findMax( ) const;

const Comparable & find( const Comparable & x ) const;

bool isEmpty( ) const;

void printTree( ) const;

void makeEmpty( );

void insert( const Comparable & x );

void remove( const Comparable & x );

const BinarySearchTree & operator=( const BinarySearchTree & rhs );


BinaryNode *root;

const Comparable ITEM_NOT_FOUND;

const Comparable & elementAt( BinaryNode *t ) const;

void insert( const Comparable & x, BinaryNode * & t ) const;

void remove( const Comparable & x, BinaryNode * & t ) const;

BinaryNode * findMin( BinaryNode *t ) const;

BinaryNode * findMax( BinaryNode *t ) const;

BinaryNode * find( const Comparable & x, BinaryNode *t ) const;

void makeEmpty( BinaryNode * & t ) const;

void printTree( BinaryNode *t ) const;

BinaryNode * clone( BinaryNode *t ) const;





#include “BinarySearchTree.h”



* Implements an unbalanced binary search tree.

* Note that all “matching” is based on the < method.



* Construct the tree.



BinarySearchTree::BinarySearchTree( const Comparable & notFound ) :

root( NULL ), ITEM_NOT_FOUND( notFound )




* Copy constructor.




BinarySearchTree( const BinarySearchTree & rhs ) :



*this = rhs;



* Destructor for the tree.



BinarySearchTree::~BinarySearchTree( )


makeEmpty( );



* Insert x into the tree; duplicates are ignored.



void BinarySearchTree::insert( const Comparable & x )


insert( x, root );



* Remove x from the tree. Nothing is done if x is not found.



void BinarySearchTree::remove( const Comparable & x )


remove( x, root );



* Find the smallest item in the tree.

* Return smallest item or ITEM_NOT_FOUND if empty.



const Comparable & BinarySearchTree::findMin( ) const


return elementAt( findMin( root ) );



* Find the largest item in the tree.

* Return the largest item of ITEM_NOT_FOUND if empty.



const Comparable & BinarySearchTree::findMax( ) const


return elementAt( findMax( root ) );



* Find item x in the tree.

* Return the matching item or ITEM_NOT_FOUND if not found.



const Comparable & BinarySearchTree::

find( const Comparable & x ) const


return elementAt( find( x, root ) );



* Make the tree logically empty.



void BinarySearchTree::makeEmpty( )


makeEmpty( root );



* Test if the tree is logically empty.

* Return true if empty, false otherwise.



bool BinarySearchTree::isEmpty( ) const


return root == NULL;



* Print the tree contents in sorted order.



void BinarySearchTree::printTree( ) const


if( isEmpty( ) )

cout << “Empty tree” << endl;


printTree( root );



* Deep copy.



const BinarySearchTree &


operator=( const BinarySearchTree & rhs )


if( this != &rhs )


makeEmpty( );

root = clone( rhs.root );


return *this;



* Internal method to get element field in node t.

* Return the element field or ITEM_NOT_FOUND if t is NULL.



const Comparable & BinarySearchTree::

elementAt( BinaryNode *t ) const


if( t == NULL )



return t->element;



* Internal method to insert into a subtree.

* x is the item to insert.

* t is the node that roots the tree.

* Set the new root.



void BinarySearchTree::

insert( const Comparable & x, BinaryNode * & t ) const


if( t == NULL )

t = new BinaryNode( x, NULL, NULL );

else if( x < t->element )

insert( x, t->left );

else if( t->element < x )

insert( x, t->right );


; // Duplicate; do nothing



* Internal method to remove from a subtree.

* x is the item to remove.

* t is the node that roots the tree.

* Set the new root.



void BinarySearchTree::

remove( const Comparable & x, BinaryNode * & t ) const


if( t == NULL )

return; // Item not found; do nothing

if( x < t->element )

remove( x, t->left );

else if( t->element < x )

remove( x, t->right );

else if( t->left != NULL && t->right != NULL ) // Two children


t->element = findMin( t->right )->element;

remove( t->element, t->right );




BinaryNode *oldNode = t;

t = ( t->left != NULL ) ? t->left : t->right;

delete oldNode;




* Internal method to find the smallest item in a subtree t.

* Return node containing the smallest item.



BinaryNode *

BinarySearchTree::findMin( BinaryNode *t ) const


if( t == NULL )

return NULL;

if( t->left == NULL )

return t;

return findMin( t->left );



* Internal method to find the largest item in a subtree t.

* Return node containing the largest item.



BinaryNode *

BinarySearchTree::findMax( BinaryNode *t ) const


if( t != NULL )

while( t->right != NULL )

t = t->right;

return t;



* Internal method to find an item in a subtree.

* x is item to search for.

* t is the node that roots the tree.

* Return node containing the matched item.



BinaryNode *


find( const Comparable & x, BinaryNode *t ) const


if( t == NULL )

return NULL;

else if( x < t->element )

return find( x, t->left );

else if( t->element < x )

return find( x, t->right );


return t; // Match


/****** NONRECURSIVE VERSION*************************


BinaryNode *


find( const Comparable & x, BinaryNode *t ) const


while( t != NULL )

if( x < t->element )

t = t->left;

else if( t->element < x )

t = t->right;


return t; // Match

return NULL; // No match




* Internal method to make subtree empty.



void BinarySearchTree::

makeEmpty( BinaryNode * & t ) const


if( t != NULL )


makeEmpty( t->left );

makeEmpty( t->right );

delete t;


t = NULL;



* Internal method to print a subtree rooted at t in sorted order.



void BinarySearchTree::printTree( BinaryNode *t ) const


if( t != NULL )


printTree( t->left );

cout << t->element << endl;

printTree( t->right );




* Internal method to clone subtree.



BinaryNode *

BinarySearchTree::clone( BinaryNode * t ) const


if( t == NULL )

return NULL;


return new BinaryNode( t->element, clone( t->left ), clone( t->right ) );





#include “BinarySearchTree.h”

// Test program

int main( )


const int ITEM_NOT_FOUND = -9999;

BinarySearchTree t( ITEM_NOT_FOUND );

int NUMS = 4000;

const int GAP = 37;

int i;

cout << “Checking… (no more output means success)” << endl;

for( i = GAP; i != 0; i = ( i + GAP ) % NUMS )

t.insert( i );

for( i = 1; i < NUMS; i+= 2 )

t.remove( i );

if( NUMS < 40 )

t.printTree( );

if( t.findMin( ) != 2 || t.findMax( ) != NUMS – 2 )

cout << “FindMin or FindMax error!” << endl;

for( i = 2; i < NUMS; i+=2 )

if( t.find( i ) != i )

cout << “Find error1!” << endl;

for( i = 1; i < NUMS; i+=2 )


if( t.find( i ) != ITEM_NOT_FOUND )

cout << “Find error2!” << endl;


BinarySearchTree t2( ITEM_NOT_FOUND );

t2 = t;

for( i = 2; i < NUMS; i+=2 )

if( t2.find( i ) != i )

cout << “Find error1!” << endl;

for( i = 1; i < NUMS; i+=2 )


if( t2.find( i ) != ITEM_NOT_FOUND )

cout << “Find error2!” << endl;


return 0;




Vector : The Vector ADT extends the notion of� array by storing a sequence of arbitrary objects An element can be accessed, inserted or removed by specifying its rank (number of elements preceding it) An exception is thrown if an incorrect rank is specified (e.g., a negative rank)


The Stack ADT stores arbitrary objects Insertions and deletions follow the last-in first-out scheme.

The Queue ADT stores arbitrary objects Insertions and deletions follow the first-in first-out scheme Insertions are at the rear of the queue and removals are at the front of the queue Main queue operations:

enqueue(object): inserts an element at the end of the queue object

dequeue(): removes and returns the element at the front of the queue


A singly linked list is a concrete data structure consisting of a sequence of nodes

Each node stores

􀂄 element

􀂄 link to the next node


A doubly linked list provides a natural implementation of the List ADT Nodes implement Position and store:

􀂄 element

􀂄 link to the previous node

􀂄 link to the next node


A tree is an abstract model of a hierarchical structure A tree consists of nodes with a parent-child relation


Root: node without parent (A)

Internal node: node with at least one child (A, B, C, F)

External node (a.k.a. leaf): node without children (E, I, J, K, G, H, D)

Ancestors of a node: parent, grandparent, grand-grandparent, etc.

Depth of a node: number of ancestors

Height of a tree: maximum depth of any node (3)

Descendant of a node: child, grandchild, grand-grandchild, etc.

Subtree: tree consisting of a node and its descendants



A binary tree is a tree with the following properties:

􀂄 Each internal node has two children

􀂄 The children of a node are an ordered pair

We call the children of an internal node left child and right child

Alternative recursive definition: a binary tree is either

􀂄 a tree consisting of a single node,


􀂄 a tree whose root has an ordered pair of children, each of which is a

binary tree



n number of nodes

e number of

external nodes

i number of internal


h height



􀂄 i + 1

􀂄 = 2− 1

􀂄 h ≤ i

􀂄 h ≤ (− 1)/2

􀂄 e ≤ 2h

􀂄 h ≥ log2 e

􀂄 h ≥ log2 (+ 1) – 1


A priority queue stores a collection of items

An item is a pair (key, element)

Main methods of the Priority Queue ADT

􀂄 insertItem(k, o) inserts an item with key k and element o

􀂄 removeMin() removes the item with smallest key and returns its element.


A heap is a binary tree storing keys at its internal nodes and satisfying the

following properties:

􀂄 Heap-Order: for every internal node v other than the root,

key(v) ≥ key(parent(v))

􀂄 Complete Binary Tree: let be the height of the heap

􀂊 for i = 0, . , − 1, there are 2nodes of depth i

􀂊 at depth − 1, the internal nodes are to the left of the external nodes

The last node of a heap is the rightmost internal node of depth – 1


A hash function maps keys of a given type to integers in a fixed interval [0, − 1]


h(x) = mod is a hash function for integer keys

The integer h(x) is called the hash value of key x

A hash table for a given key type consists of

􀂄 Hash function h

􀂄 Array (called table) of size N

When implementing a dictionary with a hash table, the goal is to store item (ko) at index i h(k)


A hash function is usually specified as the composition of two


Hash code map:

h1: keys → integers

Compression map:

h2: integers → [0, − 1]

The hash code map is applied first, and the compression map is applied next on the result, i.e.,

h(x) = h2(h1(x))

The goal of the hash function is to �disperse� the keys in an apparently random way.


The dictionary ADT models a searchable collection of key element items

The main operations of a dictionary are searching, inserting, and deleting items

Multiple items with the same key are allowed



Merge-sort on an input

sequence with n

elements consists of

three steps:

􀂄 Divide: partition into

two sequences S1 and S2

of about n/2 elements


􀂄 Recur: recursively sort S1

and S2

􀂄 Conquer: merge S1 and

S2 into a unique sorted




Algorithm mergeSort(S, C)

Input sequence with n

elements, comparator C

Output sequence sorted

according to C

if S.size() 1

(S1, S2) ← partition(Sn/2)

mergeSort(S1, C)

mergeSort(S2, C)

← merge(S1, S2)



Quick-sort is a randomized

sorting algorithm based

on the divide-and-conquer


􀂄 Divide: pick a random

element (called pivot) and

partition into

􀂊 elements less than x

􀂊 elements equal x

􀂊 elements greater than x

􀂄 Recur: sort and G

􀂄 Conquer: join Land G


We represent a set by the sorted sequence of its elements

􀂊 By specializing the auxliliary methods the generic merge algorithm can be used to perform basic set operations:

􀂄 union

􀂄 intersection

􀂄 subtraction

􀂊 The running time of an operation on sets and should be at most O(nA nB)


Quick-select is a randomized selection algorithm based on the prune-and-search paradigm:

􀂄 Prune: pick a random element x(called pivot) and partition into

􀂊 elements less than x

􀂊 elements equal x

􀂊 elements greater than x

􀂄 Search: depending on k, either answer is in E, or we need to recurse in either or G


Big O:

F(n) (- O(g(n)) if there exists c > 0 and an integer n0 > 0 such that f(n) <= c*g(n) for all n >= n0.


Big Omega

F(n) (- O(g(n)) if there exists c > 0 and an integer n0 > 0 such that f(n) >= c*g(n) for all n >= n0.


Big theta

F(n) (- O(g(n)) if there exists c > 0 and an integer n0 > 0 such that f(n) = c*g(n) for all n >= n0.

ModelTaxonomy AI

Machine Learning Essentials – Machine Learning Taxonomy

This Artificial Intelligence post covers all essentials Machine Learning concepts required for a data engineer to know before he embarks upon journey towards Development of Deep Learning products.

I have listed below the most popular and often used Machine Learning concepts, read them and provide feedback to us:

actor-critic – Refers to a class of agent architectures, where the actor plays out a particular policy, while the critic learns to evaluate actor’s policy. Both the actor and critic are simultaneously improving by bootstrapping on each other.

agent – A system that is embedded in an environment, and takes actions to change the state of the environment. Examples include mobile robots, software agents, or industrial controllers.

average-reward methods – A framework where the agent’s goal is to maximize the expected payoff per step. Average-reward methods are appropriate in problems where the goal is maximize the long-term performance. They are usually much more difficult to analyze than discounted algorithms.

discount factor – A scalar value between 0 and 1 which determines the present value of future rewards. If the discount factor is 0, the agent is concerned with maximizing immediate rewards. As the discount factor approaches 1, the agent takes more future rewards into account. Algorithms which discount future rewards include Q-learning and TD(lambda).

dynamic programming (DP) is a class of solution methods for solving sequential decision problems with a compositional cost structure. Richard Bellman was one of the principal founders of this approach.

environment – The external system that an agent is “embedded” in, and can perceive and act on.

function approximator refers to the problem of inducing a function from training examples.
Standard approximators include decision trees, neural networks, and nearest-neighbor methods.

Markov decision process (MDP) – A probabilistic model of a sequential decision problem, where states can be perceived exactly, and the current state and action selected determine a probability distribution on future states. Essentially, the outcome of applying an action to a state depends only on the current action and state (and not on preceding actions or states).

model-based algorithms
– These compute value functions using a model of the system dynamics. Adaptive Real-time DP (ARTDP) is a well-known example of a model-based algorithm.

model-free algorithms – these directly learn a value function without requiring knowledge of the consequences of doing actions. Q-learning is the best known example of a model-free algorithm. For full details on this concept watch this video :

model – The agent’s view of the environment, which maps state-action pairs to probability distributions over states. Note that not every reinforcement learning agent uses a model of its environment. For full details on this concept watch this video :

Monte Carlo methods – A class of methods for learning of value functions, which estimates the value of a state by running many trials starting at that state, then averages the total rewards received on those trials.

policy – The decision-making function (control strategy) of the agent, which represents a mapping from situations to actions.

reward – A scalar value which represents the degree to which a state or action is desirable. Reward functions can be used to specify a wide range of planning goals (e.g. by penalizing every non-goal state, an agent can be guided towards learning the fastest route to the final state).

sensor – Agents perceive the state of their environment using sensors, which can refer to physical transducers, such as ultrasound, or simulated feature-detectors.

state – this can be viewed as a summary of the past history of the system, that determines its future evolution.

stochastic approximation – Most RL algorithms can be viewed as stochastic approximations of exact DP algorithms, where instead of complete sweeps over the state space, only selected states are backed up (which are sampled according to the underlying probabilistic model). A rich theory of stochastic approximation (e.g Robbins Munro) can be brought to bear to understand the theoretical convergence of RL methods.

TD (temporal difference) algorithms – A class of learning methods, based on the idea of comparing temporally successive predictions. Possibly the single most fundamental idea in all of reinforcement learning.

unsupervised learning – The area of machine learning in which an agent learns from interaction with its environment, rather than from a knowledgeable teacher that specifies the action the agent should take in any given state.

value function is a mapping from states to real numbers, where the value of a state represents the long-term reward achieved starting from that state, and executing a particular policy. The key distinguishing feature of RL methods is that they learn policies indirectly, by instead learning value functions. RL methods can be constrasted with direct optimization methods, such as genetic algorithms (GA), which attempt to search the policy space directly.

Parametric Models can be defined as models that first make an assumption about a function form, or shape of
function f ( ie linear). Then fits the model. This reduces estimating mapping function f to just estimating set of parameters, but if our assumption was wrong, will lead to bad results.

Algorithms that do not make strong assumptions about the form of the mapping function are called nonparametric machine learning algorithms. By not making assumptions, they are free to learn any functional form from the training data.

Supervised Models can be defined as models that fit input variables xi = (x 1 , x 2 , …x n ) to a known output variables y i = (y 1 , y 2 , …y n )

Unsupervised Models can be defined as models that take in input variables xi = (x1 , x2 , …xn ), but they do not have an associated output to supervise the training. The goal is to understand relationships between the variables or observations.

Blackbox Machine Learning models are models that make decisions, but we do not know what happens ”under the hood” e.g. deep learning, neural networks

Descriptive Machine Learning models are models that provide insight into why they make their decisions e.g. linear regression, decision trees

First-Principle models can be defined as models based on a prior belief of how the system under investigation works, incorporates domain knowledge on ad-hoc basis.

Data-Driven models can be defined as models based on observed correlations between input and output variables

Deterministic models are defined as models that produce a single ”prediction” e.g. yes or no, true or false

Stochastic models are defined as models that produce probability distributions over possible events

Flat models are models that solve problems on a single level,no notion of subproblems.
Flat clustering creates a flat set of clusters without any explicit structure that would relate clusters to each other.

Hierarchical models are models that solve several different nested subproblems. Hierarchical clustering creates a hierarchy of clusters.

There is very self explanatory video about these concepts, explained briefly about different modelling techniques and Models. Please have a look at it and provide the feedback.

Predictive Analytics AI

Predictive Analytics

2017 has seen role of big data analytics grow and diversify it’s application into various business domains, ranging from retail business to governance sphere. Analytics has facilitated the use of unstructured data into usable insights. As business house continue to take leaps of growth with the revolutionary data analytics, it is expected that this growing and expanding vertical shall leave an indelible mark in 2018 too.

Nowadays, the size of the data that is being generated and created in different organizations is increasing drastically. Due to this large amount of data, several areas in artificial intelligence and data science have been raised.

Businesses who don’t effectively use their data will be losing $1.2 trillion to their competitors every year by 2020. By that time, experts predict that there will be a 4300% increase in annual data production.

In order to stay competitive, companies need to find a way to leverage data into actionable strategies. Data science and artificial intelligence are the key to maximizing data utilization.

Predictive Analytics is among the most useful applications of data science.

Using it allows executives to predict upcoming challenges, identify opportunities for growth, and optimize their internal operations.

So to understand thoroughly about important role played by analytics in technology  we will start with basics,

What is Predictive Analytics?

According to Wikipedia, “Predictive analytics encompasses a variety of statistical techniques from predictive modeling, machine learning, and data mining that analyze current and historical facts to make predictions about future or otherwise unknown events.”


Predictive analytics is the branch of the advanced analytics which is used to make predictions about unknown future events. Predictive analytics uses many techniques from data mining, statistics, modelling, machine learning, and artificial intelligence to analyse current data to make predictions about future. It uses a number of data mining, and analytical techniques to bring together the management, information technology, and modelling business process to make predictions about future. The patterns found in historical and transactional data can be used to identify risks and opportunities for future. Predictive analytics models capture relationships among many factors to assess risk with a particular set of conditions to assign a score, or weight-age. By successfully applying predictive analytics the businesses can effectively interpret big data for their opportunities.

Predictive Analytics can also be defined using techniques, tools and technologies that use data to find models – models that can anticipate outcomes with a significant probability of accuracy.

Data Scientist explore data, formulate hypothesis, and use algorithms to find predictive models

The six steps of Predictive Analytics are
1. Understand and Collect Data
2. Prepare and Clean Data
3. Create Model using Statistical & Machine Learning Algorithm
4. Evaluate the model to make sure it will work
5. Deploy the model, use the model in applications
6. Monitor the model, Measure the effectiveness of the model in the real world.

Predictive analytics can be further categorized as –

  1. Predictive Modelling –What will happen next, if ?
  2. Root Cause Analysis-Why this actually happened?
  3. Data Mining- Identifying correlated data.
  4. Forecasting- What if the existing trends continue?
  5. Monte-Carlo Simulation – What could happen?
  6. Pattern Identification and Alerts –When should an action be invoked to correct a process.

Below are some of the important reasons for using Predictive Analytics by​ organisation. Organizations are turning to predictive analytics to help solve difficult problems and uncover new opportunities. Common uses include:

Detecting fraud. Combining multiple analytics methods can improve pattern detection and prevent criminal behavior. As cybersecurity becomes a growing concern, high-performance behavioral analytics examines all actions on a network in real time to spot abnormalities that may indicate fraud, zero-day vulnerabilities and advanced persistent threats.

Optimizing marketing campaigns. Predictive analytics are used to determine customer responses or purchases, as well as promote cross-sell opportunities. Predictive models help businesses attract, retain and grow their most profitable customers.

Improving operations. Many companies use predictive models to forecast inventory and manage resources. Airlines use predictive analytics to set ticket prices. Hotels try to predict the number of guests for any given night to maximize occupancy and increase revenue. Predictive analytics enables organizations to function more efficiently.

Reducing risk. Credit scores are used to assess a buyer’s likelihood of default for purchases and are a well-known example of predictive analytics. A credit score is a number generated by a predictive model that incorporates all data relevant to a person’s creditworthiness. Other risk-related uses include insurance claims and collections.


The types of Modeling techniques used in Predictive Analytics
1. Classifiers : An algorithm that maps the input data to a specific category. This algorithm is used in Predictive Data Classification process. This process has two stages: The Learning stage and Prediction stage.

This supervisor algorithm used for categorise structured or unstructured data into different sections by tagging them with unique classification tag attribute. This process of classification is done after learning process. The goal is to teach your model to extract and discover hidden relationships and rules — the classification rules from training data. The model does so by employing a classification algorithm.

The prediction stage that follows the learning stage consists of having the model predict new class tag attributes or numerical values that classify data it has not seen before (that is, test data). The main goal of a classification problem is to identify the category/class to which a new data will fall under.

The following are the steps involved in building a classification model:
i. Initialise the classifier to be used.
ii. Train the classifier: All classifiers in scikit-learn uses a fit(X, y) method to fit the model(training) for the given train data X and train label y.
iii. Predict the target: Given an unlabelled observation X, the predict(X) returns the predicted label y.
iv. Evaluate the classifier model.

Some of the examples of Classification Algorithms :
1. Logistic Regression
2. Naive Bayes
3. Stochastic Gradient Descent
4. K-Nearest Neighbours
5. Decision Tree
6. Random Forest
7. Support Vector Machine

2. Recommenders:
A Recommendation system or Recommenders works in well-defined, logical phases viz., data collection, ratings, and filtering. These phases are described below.
• Recommender System helps match users with item
• Implicit or explicit user feedback or item suggestion
• Different Recommender System designs are based on the availability of data or content/context of the data.

Recommendation allows you to make recommendations (similarly to Association Rules or Market Basket Analysis) by generating rules (for example, purchasing Product A leads to purchasing Product B). Recommendation uses the link analysis technique. This technique is optimized to work on large volumes of transactions. Recommendation triggers all the existing rules in a projected graph whose antecedent is a neighbor of the given user in the bipartite graph. Recommendation provides a specialized workflow to make it easy to obtain a set of recommendations for a given customer.

Data Collection:
How does the Recommendation system capture the details? If the user has logged in, then the details are extracted either from an HTTP session or from the system cookies. In case the Recommendation system depends on system cookies, then the data is available only till the time the user is using the same terminal. Events are fired almost in every case — a user liking a Product or adding it to a cart and purchasing it. So that is how user details are stored. But that is just one part of what Recommenders do.

Ratings are important in the sense that they tell you what a user feels about a product. User’s feelings about a product can be reflected to an extent in the actions he or she takes such as likes, adding to shopping cart, purchasing or just clicking. Recommendation systems can assign implicit ratings based on user actions.

Filtering means filtering products based on ratings and other user data. Recommendation systems use three types of filtering: collaborative, user-based and a hybrid approach. In collaborative filtering, a comparison of users’ choices is done and recommendations given. For example, if user X likes products A, B, C, and D and user Y likes products A, B, C, D and E, then it is likely that user X will be recommended product E because there are a lot of similarities between users X and Y as far as choice of products is concerned.

Several reputed brands such as Social Media Ecosystems use this model to provide effective and relevant recommendations to the users consuming those services. In user-based filtering, the user’s browsing history, likes and ratings are taken into account before providing recommendations. Many companies also use a hybrid approach. Netflix is known to use a hybrid approach.

While big data and Recommendation engines have already proved an extremely useful combination for big corporations, it raises a question of whether companies with smaller budgets can afford such investments. Powerful media recommendation engines can be built for anything from movies and videos to music, books, and products – think Netflix, Pandora, or Amazon.

3. Clusters
Using unsupervised techniques like clustering, we can seek to understand the relationships between the variables or between the observations by determining whether observations fall into relatively distinct groups. During Clustering case, most of the data is categorised using unsupervised learning — so we don’t have response variables telling us whether a customer is a frequent shopper or not. Hence, we can attempt to cluster the customers on the basis of the variables in order to identify distinct customer groups. There are other types of unsupervised statistical learning including k-means clustering, hierarchical clustering, principal component analysis, etc.

Clustering is an unsupervised data mining technique where the records in a data set are organised into different logical groupings. The groupings are in such a way that records inside the same group are more similar than records outside the group. Clustering has a wide variety of applications ranging from market segmentation to customer segmentation, electoral grouping, web analytics, and outlier detection.

Clustering is also used as a data compression technique and data preprocessing technique for supervised data mining tasks. Many different data mining approaches are available to cluster the data and are developed based on proximity between the records, density in the data set, or novel application of neural networks.

Clustering can help us explore the dataset and separate cases into groups representing similar traits or characteristics. Each group could be a potential candidate for a Category/class. Clustering is used for exploratory data analytics, i.e., as unsupervised learning, rather than for confirmatory analytics or for predicting specific outcomes. Examples of several interesting case-studies, including Divorce and Consequences on Young Adults, Paediatric Trauma, and Youth Development, demonstrate hierarchical clustering,

4. Numerical, time series forecasting :A time series is a sequence of measurements over time, usually obtained at equally spaced intervals

Any metric that is measured over regular time intervals forms a time series. Analysis of time series is commercially importance because of industrial need and relevance especially w.r.t forecasting (demand, sales, supply etc).

An Ordered sequence of observations of a variable or captured object at equally distributed time interval. Time series is anything which is observed sequentially over the time at regular interval like hourly, daily, weekly, monthly, quarterly etc. Time series data is important when you are predicting something which is changing over the time using past data. In time series analysis the goal is to estimate the future value using the behaviours in the past data.

There are many statistical techniques available for time series forecast, however we have found few effective ones which are listed below:
Techniques of Forecasting:
1. Simple Moving Average (SMA)
2. Exponential Smoothing (SES)
3. Autoregressive Integration Moving Average (ARIMA)
4. Neural Network (NN)Croston

Components of a Time Series
• Secular Trend
– Linear
– Nonlinear

• Cyclical Variation
– Rises and Falls over periods longer than
one year

• Seasonal Variation
– Patterns of change within a year, typically
repeating themselves

• Residual Variation

Components of a Time Series

Yt = Ct +St +Rt

Time series data mining combines traditional data mining and forecasting techniques. Data mining techniques such as sampling, clustering and decision trees are applied to data collected over time with the goal of improving predictions.

To know more in detail about the AI and Machine Learning and we will explore Predictive Analytics:

Some of the popular Predictive Algorithms are given below:

Predictive Algorithms:
1. K-means Clustering
2. Association rules
3. Boosting trees

5. Cluster Analysis
6. Feature Selection
7. Independent Component Analysis
8. Kohonen Networks (SOFM)
9. Neural Networks

10. Social network analysis (SNA)
11. Random Forest (Decision Trees)
12. Mars regression splines
13. Linear and logistic regression
14. Naive Bayesian classifiers

15.Optimal binning
16. Partial Least Squares
17. Response Optimisation
18. Root cause analysis.

In my next blog I will be explaining clearly each one of the predictive analytics algorithms mentioned above, starting with K-means Clustering. I hope you liked the article

please let you know with your comments what more would like to see in the upcoming articles on Predictive Analytics. Happy Reading !!




Prerequisite Skill sets for Artificial Intelligence Development. PART I

There is hardly any theory which is more elementary than linear algebra, inspite of the fact that generations of

Data scientist and  Mathematicians have obscured its simplicity by preposterous calculations with matrices 

            -Jean Dieudonnie French Mathematician

Mathematics requires a small dose, not of genius but of an imaginative freedom which in a larger dose would be insanity

               – August K Rodgers

It’ll be about linear algebra, which as a lot of you know is one of these subjects that’s required knowledge

for just about any technical education, but it’s also I have noticed generally poorly understood by IT professionals

taking it for the first time. An Data Scientist might go through a class and learn how to compute lost of things, like determinant,

matrices multiplication or cross product which use the determinant or eigenvalues, but they might come out

without really understanding why matrix multiplication is defined the way that it is, why cross product has

anything to do with the determinant or what an eigenvalues really represent.

In this blog I have tried to help all those data scientist to re look of Linear algebra in altogether in a different

perspective in terms of Analytical Geometry or 3 Dimensional Spatial Geometry and help them understand how

this can be easily put to use in Computer vision , Image processing, Speech recognition, and Robotics.

So Let’s start with some basics viz; Vector Space.


        Fields and Vector Spaces As has been indicated in the preface, our first aim is to re-work

the linear algebra presented in our AAS course, generalising from R to an arbitrary field as domain

from which coefficients are to be taken. Fields are treated in the companion course, Rings and

Arithmetic, but to make this course and these notes self-contained we begin with a definition of

what we mean by a field.

Axioms for Fields 

A field is a set F with distinguished elements 0, 1, with a unary operation −, and with two binary operations +

and × satisfying the axioms below (the ‘axioms of arithmetic’).

Conventionally, for the image of (a, b) under the function + : F × F → F we write a + b; for the image of (a, b)

under the function × : F × F → F we write a b; and x − y means x + (−y), x + y z means x + (y z).

The axioms of arithmetic

(1) a + (b + c) = (a + b) + c        [+ is associative] 

(2) a + b = b + a                   [+ is commutative]           

(3) a +0= a                                

(4) a + (−a) = 0 

(5) a (b c) = (a b) c                 [× is associative]                

(6) ab = ba                               [× is commutative] 

(7) a 1 = a 

(8) a 6 = 0 ⇒ ∃ x ∈ F : a x = 1 

(9) a (b + c) = a b + a c                            [× distributes over +]  

(10) 0 ≠ 1  


Note 1: All axioms are understood to have ∀ a, b, . . . ∈ F in front.

Note 2: See my Notes on Rings and Arithmetic for more discussion.

Note 3: Examples are Q, R, C, Z 2 or more generally Z p , the field of integers modulo p for any prime number p.

Vector Spaces

Let F be a field. A vector space over F is a set V with distinguished element 0, with a unary operation −,

with a binary operation +, and with a function × : F × V → V satisfying the axioms below.

Conventionally: for the image of (a, b) under the function + : V × V → V

we write a + b; for a + (−b) we write a − b; and for the image of (α, v)

under the function × : F × V → V we write α v .

Vector space axioms

(1) u + (v + w) = (u + v) + w  (2) u + v = v + u  (3) u +0= u  (4)u + (u) = 0 (5) α (β v) = (α β) v  (6) α (u + v) = α u + α v (7) (α + β) v = α v + β v (8) 1 v = v

Note: All axioms are understood to have appropriate quantifiers ∀ u, v, . . . ∈ V
and/or ∀ α, β, . . . ∈ F in front.


  1.  Fn is a vector space over F ;
  2. the polynomial ring F [x]  is a vector space over F ;
  3.  M m×n (F ) select: is a vector space over F ;
  4. if K is a field and F a sub-field then K is a vector space over F ;

Exercise 1. Let X be any set, F any field. Define

F X to be the set of all  functions X  F with the usual pointwise addition and multiplication by  scalars (elements of F ). Show that F X is a vector space over F

Exactly as for rings, or for vector spaces over R, one can prove important “trivialities”.

Of course, if we could not prove them then we would add further axioms until we

had captured all properties that we require, or at least expect, of the algebra of vectors.

But the fact is that this set of axioms, feeble though it seems, is enough. For example:

PROPOSITION: Let V be a vector space over the field F . For any v ∈ V and any α ∈ F we have

(i)  0 v = 0; (ii) α 0 = 0; (iii) if α v = 0 then α = 0 or v = 0;(iv)  α (v) = (α v) = (α) v ;

Proof. For v ∈ V , from Field Axiom (3) and Vector Space Axiom (7) we have
0 v = (0 + 0) v = 0 v + 0 v.

Then adding −(0 v) to both sides of this equation and using Vector Space Axioms (4)
on the left and (1), (4), (3) on the right, we get that 0 = 0 v , as required for (i). The
reader is invited to give a proof of (ii).

For (iii), suppose that α v = 0 and α 6 = 0: our task is to show that v = 0. By Field
Axioms (8) and (6) there exists β ∈ F such that β α = 1. Then

v = 1 v = (β α) v = β (α v) = β 0

by Vector Space Axioms (8) and (5). But β 0 = 0 by (ii), and so v = 0, as required.
Clause (iv), like Clause (ii), is offered as an exercise:


Let V be a vector space over a field F . A subspace of V is a subset U such that
(1) 0 ∈ U and u + v ∈ U whenever u, v ∈ U
(2) if u ∈ U , α ∈ F then α u ∈ U . and −u ∈ U whenever u ∈ U ;

Note 1: Condition (1) says that U is an additive subgroup of V . Condition (2) is closure under

multiplication by scalars.

Note 2: We write UV to mean that U is a subspace of V . Note 3: Always {0} is a subspace; if U{0} then we say that U is nonzero or nontrivial. Likewise, V is a subspace; if UV then we say that U is a proper subspace. Note 4: A subset U of V is a subspace if and only if U and U is closed under + (that is, u, v  U  u + v  U ) and under multiplication by scalars.


(1) Let  L1 , . . . , Lm be homogeneous linear expressions  cij  xj withcoefficients c ij  F , and let U :={(x 1 , . . . , x n )  F n | L1 = 0, . . . , Lm = 0}. Then U F n (2) Let F [n] [x] := {f  F [x] | f = 0 or deg f  n}. Then F [n] [x]   F [x]. (3) Upper triangular matrices form a subspace of M n×n (F )


Suppose that U <= V where V is a vector space over a field F . Define the quotient
space V /U as follows:

set := {x + U | x ∈ V }   [additive cosets]

0 := U

additive inverse: −(x + U ) := (−x) + U

addition: (x + U ) + (y + U ) := (x + y) + U

multiplication by scalars: α(x + U ) :=αx +U

Note: The notion of quotient space is closely analogous with the notion of quotient
of a group by a normal subgroup or of a ring by an ideal. It is not in the Part A syllabus,
nor will it play a large part in this course. Nevertheless, it is an important and useful
construct which is well worth becoming familiar with.


Although technically new, the following ideas and results translate so simply from
the case of vector spaces over R to vector spaces over an arbitrary field F that I propose
simply to list headers for revision:
(1) spanning sets;
linear dependence and independence;
(2) dimension;

(3) dim V = d V  F d ;

(4) any linearly independent set may be extended (usually in many ways) to a basis;
(5) intersection U ∩ W of subspaces; sum U + W ;

(6) dim (U + W ) = dim U + dim W − dim (U ∩ W );


Let F be a field, V1, V2  vector spaces over F.

A map T : V 1  V 2 is said to be linear if

T 0 = 0,
T (−x) = −T x,
T (x + y) = T x + T y,
and T (λ x) = λ (T x)
for all x, y ∈ V and all λ ∈ F .

Note 1: The definition is couched in terms which are intended to emphasise that
what should be required of a linear transformation  is
that it preserves all the ingredients, 0, +, − and multiplication by scalars, that go into

the making of a vector space. What we use in practice is the fact that T : V 1  V 2 is

linear if and only if T (α x + β y) = α T x + β T y  for all x, y ∈ V and all α, β ∈ F .

Note 2:

The identity I : V  V is linear; if T : V 1  V 2 and S : V 2  V 3 are linear then S  T : V 1  V 3 is linear.

For a linear transformation T : V → W we define the kernel or null-space by Ker T := {x ∈ V | Tx = 0}.

We prove that Ker T V, that Im T W and we define nullityT:dim Ker T,rankT:= dim Im T.


nullity T + rank T = dim V

Note: The Rank-Nullity Theorem is a version of the First Isomorphism Theorem
for vector spaces, which states that

Im T V/Ker T.


The vector space V is said to be the direct sum of its subspaces U and W , and we
write V = U ⊕ W , if V = U + W and U ∩ W = {0}.

Lemma: Let U , W be subspaces of V . Then V = U ⊕ W if and only if for every
v ∈ V there exist unique vectors u ∈ U and w ∈ W such that v = u + w.

Proof. Suppose first that for every v ∈ V there exist unique vectors u ∈ U and

w ∈ W such that v = u + w . Certainly then V = U + W and what we need to prove

is that U ∩ W = {0}. So let x ∈ U ∩ W . Then x = x + 0 with x ∈ U and 0 ∈ W .

Equally, x = 0 + x with 0 ∈ U and x ∈ W . But the expression x = u + w with u ∈ U

and w ∈ W is, by assumption, unique, and it follows that x = 0. Thus U ∩ W = {0}, as required.

Now suppose that V = U ⊕ W . If v ∈ V then since V = U + W there certainly exist vectors u ∈ U

and w ∈ W such that v = u + w . The point at issue therefore is: are u, w uniquely determined by v?

Suppose that u + w = u ′ + w ′ , where u, u ′ ∈ U and w, w ′ ∈ W . Then u − u ′ = w ′ − w .

This vector lies both in U and in W .

By assumption, U ∩ W = {0} and so u − u ′ = w ′ − w = 0. Thus u = u ′ and w = w ′ ,

so the decomposition of a vector v as u + w with u ∈ U and w ∈ W is unique, as required.

Note: What we are discussing is sometimes (but rarely) called the “internal” direct

sum to distinguish it from the natural construction which starts from two vector spaces

V 1 , V 2 over the same field F and constructs a new vector space whose set is the product set V 1 × V 2 and in which the vector space structure is defined componentwisecompare

the direct product of groups or of rings. This is (equally rarely) called the “external”

direct sum. These are two sides of the same coin: the external direct sum of V 1 and V  2 is the internal direct sum of its subspaces V 1 × {0} and {0} × V 2 ; while if V= U  W

then V is naturally isomorphic with the external direct sum of U and W .

We come now to projection operators. Suppose that V = U ⊕ W .

Define P : V → V as follows.

For v ∈ V write v = u + w where u ∈ U and w ∈ W and then define

P v := u. Strictly P depends on the ordered pair (U, W ) of summands of V , but to

keep things simple we will not build this dependence into the notation.


(1) P is well-defined;
(2) P is linear;
(3) Im P = U , Ker P = W ;

 (4) P2 = P

Proofs. That P is well-defined is an immediate consequence of the existence and
uniqueness of the decomposition v = u + w with u ∈ U , v ∈ V .

To see that P is linear, let v1 , v2  V and α 1 , α 2  F (where, as always, F is the field of scalars). Let v1 = u1 + w1 and v2 = u2 + w2 be the decompositions of v1 and v2.Then P v1 = u1 and P v2 = u2 .  What about P (α1 v 1 + α2 v2 )? Well, α1 v1 + α2 v2                     = α1 (u1 + w1 ) + α2 (u2 + w2 ) = (α1 u1 + α2u2 ) + (α1w1 + α2w2 )    Since α1u1 + α2u2  U and α1w1 + α2w2  W it follows that P (α1v1 + α2v2 ) = α1u1 + α2u2.  Therefore P (α1v1 + α2v2 ) = α1P(v1) + α2P(v2), that is, P is linear.

For (3) it is clear from the definition that Im P ⊆ U ; but if u ∈ U then u = P u,

and therefore Im P = U . Similarly, it is clear that W ⊆ Ker P ;

but if v ∈ Ker P then

v = 0 + w for some w ∈ W , and therefore Ker P = W .

Finally, if v ∈ V and we write v = u + w with u ∈ U and w ∈ W then

P 2 v = P (P v) = P u = u = P v , and this shows that P2 = P , as required.

Terminology: the operator (linear transformation) P is called the projection of V onto U along W.

Note 1. Suppose that V is finitedimensional. Choose a basis u 1 , . . . , u r for U and a basis w 1 , . . . , w m for W .  Then the matrix of P with respect to the basis u 1 , . . . , u r , w 1 , . . . , w m of V is  Ir  00  0


Note 2. If P is the projection onto U along W then I − P , where I : V → V is the identity transformation,

is the projection onto W along U .

Note 3. If P is the projection onto U along W then u ∈ U if and only if P u = u. The fact that if u ∈ U then

P u = u is immediate from the definition of P , while if P u = u then obviously u ∈ Im P = U .

Our next aim is to characterise projection operators algebraically. It turns out that Observation (4) above is the key:


An operator T such that T2 = T is said to be idempotent.


THEOREM : A linear transformation is a projection operator if and only if it is idempotent.

Proof: We have seen already that every projection is idempotent, so the problem is to prove that an idempotent

linear transformation is a projection operator. Suppose that P : V → V is linear and idempotent. Define

U := Im P, W := Ker P. Our first task is to prove that V = U  W . Let v  V . Then v = Pv + (v  Pv). Now Pv  U (obviously), and P(v  Pv) = Pv  P2v = 0, so v  Pv  W . Thus V = U + W . Now let x  U  W . Since x  U there exists y  V such that x = Py , and since x  W also Px = 0. Then x = Py = P2 y = Px = 0. Thus U  W = {0}, and so V = U  W .

To finish the proof we need to convince ourselves that P is the projection onto U along W .

For v ∈ V write v = u + w where u ∈ U and w ∈ W . Since u ∈ U there must exist x ∈ V such that
u = P x. Then


Pv = P(u + w) = Pu + Pw = P2 x + 0 = Px = u ,


and therefore P is indeed the projection onto U along W , as we predicted.

The next result is a theorem which turns out to be very useful in many situations.
It is particularly important in applications of linear algebra in Quantum Mechanics.



Let P : V  V be the projection onto U along W and let T : V  V be any linear transformation. Then P T = T P  if and only if U and W are T invariant (that is T U U and T W  W ).

Proof : Suppose first that P T = T P . If u ∈ U , so that P u = u, then T u = T P u = P T u ∈ U , so T U 6 U .

And if w  W then P (T w) = T P w = T 0 = 0, and therefore T W W.

       Now suppose conversely that U and W are T -invariant. Let v ∈ V and write

v = u + w with u ∈ U and w ∈ W . Then

P T v = P T (u + w) = P (T u + T w) = T u ,

since T u ∈ U and T w ∈ W . Also,

T P v = T P (u + w) = T u .

Thus P T v = T P v for all v ∈ V and therefore P T = T P , as asserted.

We turn now to direct sums of more than two subspaces. The vector space V is said to be the direct sum of subspaces U1,..  . . . , Uk if for every v  V there exist unique vectors ui  Ui for 1  i k such that v = u1 + · · · + uk . We write V = U1 · · · Uk .

Note 1: If k = 2 then this reduces to exactly the same concept as we have just been

Moreover, if k > 2 then U 1  U 2  · · ·  U k = (· · · ((U 1  U 2 )  U 3 )  · · ·  U k ).

Note 2: 

If Ui  V for 1 i  k then V = U1  · · ·  Uk if and only if V = U1 + U2 + · · · + Uk and Ur  ( ir Ui ) = {0} for 1  r  k . It is NOT sufficient that Ui  Uj = {0} whenever i j . Consider, for example, the 2dimensional space F2 of pairs (x1 , x2 ) with x1 , x2  F . Its three subspaces


U 1 := {(x, 0) | x  F }, U 2 := {(0, x) | x  F }, U 3 := {(x, x) | x  F }


U 1 ∩ U 2 = U 1 ∩ U 3 = U 2 ∩ U 3 = {0}

and yet it is clearly not true that


is their direct sum.


Note 3:  If V = U1  U2  · · ·  Uk and Bi is a basis of Ui then B1  B2  · · ·  Bk   is a basis of V . In particular, dim V =  i=1k dim Ui . Let P1 , . . . , Pk be linear mappings V  V such that Pi2 = Pi for all i and PiPj = 0 whenever i  j . If P1 + · · · + Pk = I then {P1 , . . . , Pk } is known as a partition of the identity on V .


EXAMPLE : If P is a projection then {P, I − P } is a partition of the identity.


If V = U1  · · ·  Uk and Pi is the projection of V onto Ui along j i  Uj then {P1 , . . . , Pk } is a partition of the identity on V. Conversely, if {P1 , . . . , Pk } is a partition of the identity on V and Ui := Im Pi then V = U1  · · ·  Uk .



Suppose first that V = U1  · · ·  Uk . Let Pi be the projection of V onto 2 Ui along ji Uj . Certainly Pi2 = Pi , and if i j then Im Pj Ker Pi so Pi Pj = 0. If v  V then there are uniquely determined vectors ui  Ui for 1  i  k such that v = u1 + · · · + uk . Then L Pi v = ui by definition of what it means for Pi to be projection of V onto Ui along ji Uj . Therefore                            Iv = v = P1 v + · · · + Pk v = (P1 + · · · + Pk ) v . Since this equation holds for all v  V we have I = P1 + · · · + Pk . Thus {P 1 , . . . , Pk } is a partition of the Identity.To understand the converse, let {P1 , . . . , Pk } be a partition of the identity on V and let Ui := Im Pi.  For v  V , defining ui := Piv we have                            v = Iv = (P1 + · · · + Pk ) v = P1v + · · · + Pkv= u1 + · · · + uk .


Suppose that v = w1 + · · · + wk where wi  Ui for 1  i  k . Then Piwi = wi since Pi is a projection onto Ui. And if j  i then Pj wi = Pj (Pi wi ) = (PjPi ) wi, so Pjwi = 0 since PjPi = 0. Therefore                                                     Piv = Pi(w1 + · · · + w k ) = Piw1 + · · · + Pi wk = wi , that is, wi = ui. This shows the uniqueness of the decomposition v = u1 + · · · + uk with ui  Ui and so V = U1  · · ·  Uk, and the proof is complete.



A linear functional on the vector space V over the field F is a function f : V  F such that                  f (α1v1 + α2v2 ) = α1f (v1) + α2f(v2)for all α1 , α2  F and all v1, v2  V . Note: A linear functional, then, is a linear transformation V  F , where F is construed as a 1dimensional vector space over itself. Example. If V = F n (column vectors) and y is a 1 × n row vector then the map v 7 y v is a linear functional on V .


The dual space V  of V is defined as follows:                                              Set := set of linear functionals on V                                                 0 := zero function  [v0 for all v  V ]                                      (f )(v) := (f (v))                               (f1 + f2 )(v) := f1 (v) + f2(v)           [pointwise addition]                                         (λf )(v) := λf (v)                         [pointwise multiplication by scalars]


Note: It is a matter of important routine to check that the vector space axioms are

satisfied. It is also important that, when invited to define the dual space of a vector

space, you specify not only the set, but also the operations which make that set into a vector space.



Let V be a finitedimensional vector space over a field F . For every basis v1 , v2 , . . . , vn of V there is a basis f1 , f2 , . . . , fn of V such that fi (vj ) = 1 if i=j ,  0 if ij . in particular, dim V = dim V  Proof. Define f i as follows. For v  V we set f i (v) := αi where α1 , . . . , αn  F are such that v = α1v1 + · · · + αnvn. This definition is acceptable because v1 , . . . , vn span V and so such scalars α1 , . . . , αn certainly exist; moreover, since v1 , . . . , vn are linearly independent the coefficients α1 , . . . , αn are uniquely determined by v . If w  V , say w = β1v1 + · · · + βnvn , and λ, μ  F then f i (λ v + μ w) =fiλ jαjvj+μjβjvj                           =fiλ j(λαj+μβj)vj                           = λαi+μβi                            =λfi(v)+μfi(w), and so f i  V  . We have thus found elements f 1 , . . . , f n of V  such that  f i (v j ) =1 if i = j.0 if i  j.

To finish the proof we must show that they form a basis of V ′ .

To see that they are independent suppose that

jμ j f j = 0, where μ1 , . . . , μn  F . Evaluate at vi : 0 = jμjfj (vi) =  jμjfj(vi) =μiThus μ1 = · · · = μn = 0 and so f1 , . . . , fn are linearly independent.      To see that they span V let f  V and define g :=jf(vj) fj. Then also g  Vand for 1  i  n we have                                                  g(v i ) =j f(vj) fj(vi) = jf(vi) fj(vi) = f(vi) j Since f and g are linear and agree on a basis of V we have                                 f = g , that is, f =jf(vj)fj. Thus f1, . . . , fn is indeed a basis ofV , as the theorem states.


Note. The basis f1, f2 , . . . , fn is known as the dual basis of v1, v2 , . . . , vn . Clearly, it is uniquely determined by this basis of V . EXAMPLE: If V = Fn(n × 1 column vectors) then we may identify V with the space of 1 × n row vectors. The canonical basis e1 , e2 , . . . , en then has dual basis e1, e2 , . . . , en , the canonical basis of the space of row vectors.

Let V be a vector space over the field F . For a subset X of V the annihilator is defined by

X0 := {f  V | f(x) = 0 for all x  X } . Note: For any subset X the annihilator X0 is a subspace. For, if f1 , f2  X and α1 , α2  F then for any x  X(α1 f1 + α2f2 )(x) = α1 f1(x) + α2f2(x) = 0 + 0 = 0, and so α1f1 + α2f2  X.Note: X = {f  V | X  Ker f }.


Let V be a finite-dimensional vector space over a field F and let U be
a subspace. Then

dim U + dim U = dim V .


       Let u1 , . . . , um be a basis for U and extend it to a basis u1 , . . . , um , um+1 , . . . , un for V. Thus dim U = m and dim V = n. Let f1 , . . . , fn be the dual basis of V. Well prove that fm+1 , . . . , fn is a basis of U . Certainly, if m + 1 j  n then fj (ui ) = 0 for i  m and so fj  U since u1 , . . . , um span U. Now let f  U. There exist α1 , . . . , αn  F such that f = jαjfj . Then                                    f (u i ) = jαjfj(ui ) = αi,and so αi = 0 for 1 i  m, that is, f is a linear combination of fm+1, . . . , fn. Thus fm+1, . . . , fn span U and so they form a basis of it. Therefore dim U = n  m, that is, dim U + dim U = dim V .For example : Let V be a finitedimensional vector space over a field F . Show that if U 1 , U 2 are subspaces then (U 1 + U 2 ) = U1U2 and (U1  U2 ) = U1 + U2 .


Response. If f  U1  U2 then f (u1 + u2 ) = f(u1) + f(u2) = 0 + 0 = 0 for any u1U1and any u2  U2. Therefore U1 U2  (U1 + U2 ). On the other hand, U1  U1 + U2 and U2  U1 + U2 and so if f  (U1 + U2 ) then f  U1U2, that is (U1U2 )  U1 +U2. Therefore in fact (U1 U2 ) = U1 +U2 . Note that the assumption that V is finitedimensional is not needed here.For the second part, clearly U1  (U1  U2 ) and U2 (U1  U2 ) and so U1 + U2 (U1  U2 ). Now we compare dimensions:                  dim (U1 + U2 ) = dim U1 + dim U2 dim (U1  U2)                                                  = dim U1 + dim U2  dim (U1 + U2) [by the above]                                                  = (dim V  dim U1 ) + (dim V  dim U2 )  (dim V  dim (U1 + U2 ))                                                  = dim V  (dim U1 + dim U2  dim (U 1 + U2 ))                                                  = dim V  dim (U1  U2 )                                                  = dim (U1  U2) .                           Therefore U1 + U2  = (U1  U2 ) , as required.


To finish this study of dual spaces we examine the second dual, that is, the dual of
the dual. It turns out that if V is a finite-dimensional vector space then the second dual
V ′′ can be naturally identified with V itself.


Let V be a vector space over a field F . Define Φ : V  V  by (Φ v)(f ) := f (v) for all v  V and all f  V  . Then Φ is linear and oneone [injective]. If V is finitedimensional then Φ is an isomorphism.Proof. We check linearity as follows. For v1 , v2  V and α 1 , α 2  F , and for any f  V , Φ(α1 v1 + α2v2)(f) = f (α1v1 + α2v2 )                                    = α1f(v1) + α2f(v2)                                    = α1(Φv1)(f) + α2(Φv2 )(f)                                   = (α1(Φ v1) + α2 (Φv2 ))(f ),and so Φ(α1 v1 + α2 v2 )α1 (Φ v1 ) + α2 (Φ v2 ).Now Ker Φ = {v  V | Φ v = 0} = {v  V | f (v) = 0 for all f  V  } = {0} and therefore Φ is injective. If V is finitedimensional then dim Im (Φ) = dim V = dim V  = dim V  and so Φ is also surjective, that is, it is an isomorphism.
In the next article I will be writing about 3 dimensional implementation of Dual Spaces and Eigen Values & Eigenvectors.
to be continued.






“This is the beginning of the quantum age of computing and the latest advancement is towards building a universal quantum computer.

“A universal quantum computer, once built, will represent one of the greatest milestones in the history of information technology and has the potential to solve certain problems we couldn’t solve, and will never be able to solve, with today’s classical computers.”

quantum computing is likely to have the biggest impact on industries that are data-rich and time-sensitive. Machine Learning is the main application for quantum computing.

a quantum computer has quantum bits or qubits, which work in a particularly intriguing way. Where a bit can store either a zero or a 1, a qubit can store a zero, a one, both zero and one, or an infinite number of values in between—and be in multiple states (store multiple values) at the same time! If that sounds confusing, think back to light being a particle and a wave at the same time, Schrödinger’s cat being alive and dead, or a car being a bicycle and a bus.

A quantum computer exponentially expands the vocabulary of binary code words by using two spooky principles of quantum physics, namely ‘entanglement’ and ‘superposition’. Qubits can store a 0, a 1, or an arbitrary combination of 0 and 1 at the same time. Multiple qubits can be made in superposition states that have strictly no classical analogue, but constitute legitimate digital code in a quantum computer.

In a quantum computer, each quantum bit of information–or qubit–can therefore be unfixed, a mere probability; this in turn means that in some mysterious way, a qubit can have the value of one or zero simultaneously, a phenomenon called superposition.

And just as a quantum computer can store multiple values at once, so it can process them simultaneously. Instead of working in series, it can work in parallel, doing multiple operations at once.

In theory, a quantum computer could solve in less than a minute problems that it would take a classical computer millennia to solve.

To date, most quantum computers have been more or less successful science experiments. None have harnessed more than 12 qubits, and the problems the machines have solved have been trivial. Quantum computers have been complicated, finicky machines, employing delicate lasers, vacuum pumps, and other exotic machinery to shepherd their qubits.

The world’s fastest supercomputer, China’s Sunway TaihuLight, runs at 93 petaflops (93 quadrillion flops, or around 1017) – but it relies on 10 million processing cores and uses massive amounts of energy.

In comparison, even a small 30-qubit universal quantum computer could, theoretically, run at the equivalent of a classical computer operating at 10 teraflops (10 trillion flops, or 1012


IBM says “With a quantum computer built of just 50 qubits, none of today’s TOP500 supercomputers could successfully emulate it, reflecting the tremendous potential of this technology,” .

A screenshot of IBM’s the author attempting to quantum compute. (courtesy : Silicon)






The significant advance, by a team at the University of New South Wales (UNSW) in Sydney appears today in the international journal Nature.

“What we have is a game changer,” said team leader Andrew Dzurak, Scientia Professor and Director of the Australian National Fabrication Facility at UNSW.

“We’ve demonstrated a two-qubit logic gate – the central building block of a quantum computer – and, significantly, done it in silicon. Because we use essentially the same device technology as existing computer chips, we believe it will be much easier to manufacture a full-scale processor chip than for any of the leading designs, which rely on more exotic technologies.

“This makes the building of a quantum computer much more feasible, since it is based on the same manufacturing technology as today’s computer industry,” he added.


The benefits of quantum computing

quantum computers could be used for discovering new drugs, securing the internet, modeling the economy, or potentially even building far more powerful artificial intelligence systems—all sorts of exceedingly complicated tasks.

A functional quantum computer will provide much faster computation in a number of key areas, including: searching large databases, solving complicated sets of equations, and modelling atomic systems such as biological molecules and drugs. This means they’ll be enormously useful for finance and healthcare industries, and for government, security and defence organisations.

For example, they could be used to identify and develop new medicines by greatly accelerating the computer-aided design of pharmaceutical compounds (and minimizing lengthy trial and error testing); develop new, lighter and stronger materials spanning consumer electronics to aircraft; and achieve much faster information searching through large databases.

Functional quantum computers will also open the door for new types of computational applications and solutions that are probably too premature to even conceive.

The fusion of quantum computing and machine learning has become a booming research area. Can it possibly live up to its high expectations?

“These advantages that you end up seeing, they’re modest; they’re not exponential, but they are quadratic,” said Nathan Wiebe, a quantum-computing researcher at Microsoft Research. “Given a big enough and fast enough quantum computer, we could revolutionize many areas of machine learning.” And in the course of using the systems, computer scientists might solve the theoretical puzzle of whether they are inherently faster, and for what.

A recent Fortune essay states, “Companies like Microsoft, Google and IBM are making rapid breakthroughs that could make quantum computing viable in less than 10 years.”

Very soon you will find hardware and software with fundamentally new integrated circuits that store and process quantum information. Many important computational problems will only be solved by building quantum computers. Quantum computing has been picking up the momentum, and there are many startups and scholars discussing quantum machine learning. A basic knowledge of quantum two-level computation ought to be acquired.

I am sure that Quantum Information and Quantum Computation will play a crucial role in defining where the course of our technology is headed.

“For quantum computing to take traction and blossom, we must enable the world to use and to learn it,” said Gambetta. “This period is for the world of scientists and industry to focus on getting quantum-ready.”

The Era of Quantum Computing Is Here.


Algorithms FAQs Part 1.

The following are the list of most important FAQs of Algorithms and Data Structures used in most of the programming languages. To improve your

coding skills every programmer has to know the how to implement Algorithms and Data structures in his application.

1. What is an algorithm?
The process of providing solution to a problem is a sequence of steps is called an algorithm.


2. What are the properties of an algorithm?  

An algorithm must possess the following properties:
a. It should get input data.
b. It should produce output.
c. The algorithm should terminate.
d. Algorithm should be clear to understand.
e. It should be easy to perform.

3. What are the types of algorithms?
Algorithms are of two types: iterative and recursive algorithms.

4. Define iterative algorithms.
Algorithms which use loops and conditions to solve the problem are called iterative algorithms.

5. Define recursive algorithms.
Algorithms which perform the recursive steps in a divide and conquer method are called recursive algorithms.

6. What is an array?
An array is a sequential collection of same kind of data elements.

7. What is a data structure?

A data structure is a way of organizing data that considers not only the items stored, but also their relationship to each other. Advance knowledge about the relationship between data items allows designing of efficient algorithms for the manipulation of data.

8. Name the areas of application of data structures.

The following are the areas of application of data structures:

a. Compiler design
b. Operating system
c. Statistical analysis package
e. Numerical analysis
f. Simulation
g. Artificial Intelligence
h. Graphics

9. What are the major data structures used in the following areas: Network data model, Hierarchical data model, and RDBMS?

The major data structures used in the above areas are:
Network data model – Graph
Hierarchical data model – Trees
RDBMS – Array

10. What are the types of data structures?
Data structures are of two types: Linear and non linear data structures.

11. What is a linear data structure?
If the elements of a data structure are stored sequentially, then it is a linear data structure.

12. What is non linear data structure?
If the elements of a data structure are not stored in sequential order, then it is a non linear data structure.

13. What are the types of array operations?

The following are the operations which can be performed on an array:
Insertion, Deletion, Search, Sorting, Merging, Reversing and Traversal.

14. What is a matrix?
An array of two dimensions is called a matrix.

15. What are the types of matrix operations?

The following are the types of matrix operations:
Addition, multiplication, transposition, finding determinant of square matrix, subtraction, checking whether it is singular matrix or not etc.

16.  What is the condition to be checked for the multiplication of two matrices?
If matrices are to be multiplied, the number of columns of first matrix should be equal to the number of rows of second matrix.


17.  Write the syntax for multiplication of matrices?Syntax:
for (=0; < value;++)
for (=0; < value;++)
for (=0; < value;++)
arr[var1][var2] += arr[var1][var3] * arr[var3][arr2];


18.  What is a string?
A sequential array of characters is called a string.

19.  What is use terminating null character?
Null character is used to check the end of string.

20.  What is an empty string?
A string with zero character is called an empty string.

21.  What are the operations that can be performed on a string?
The following are the operations that can be performed on a string:
finding the length of string, copying string, string comparison, string concatenation, finding substrings etc.

22.  What is Brute Force algorithm?
Algorithm used to search the contents by comparing each element of array is called Brute Force algorithm.

23.  What are the limitations of arrays?
The following are the limitations of arrays:
Arrays are of fixed size.
Data elements are stored in continuous memory locations which may not be available always.
Adding and removing of elements is problematic because of shifting the locations.

24.  How can you overcome the limitations of arrays?
Limitations of arrays can be solved by using the linked list.

25.  What is a linked list?
Linked list is a data structure which store same kind of data elements but not in continuous memory locations and size is not fixed. The linked lists are related logically.

26.  What is the difference between an array and a linked list?
The size of an array is fixed whereas size of linked list is variable. In array the data elements are stored in continuous memory locations but in linked list it is non continuous memory locations. Addition, removal of data is easy in linked list whereas in arrays it is complicated.

27.  What is a node?
The data element of a linked list is called a node.

28.  What does node consist of?
Node consists of two fields:
data field to store the element and link field to store the address of the next node.

29.  Write the syntax of node creation?

struct node
data type ;
node *ptr; //pointer for link node


30.  Write the syntax for pointing to next node?Syntax:

32.  What is a data structure? What are the types of data structures? Briefly explain them

The scheme of organizing related information is known as ‘data structure’. The types of data structure are:

Lists: A group of similar items with connectivity to the previous or/and next data items.
Arrays: A set of homogeneous values
Records: A set of fields, where each field consists of data belongs to one data type.
Trees: A data structure where the data is organized in a hierarchical structure. This type of data structure follows the sorted order of insertion, deletion and modification of data items.
Tables: Data is persisted in the form of rows and columns. These are similar to records, where the result or manipulation of data is reflected for the whole table.

33.  Define a linear and non linear data structure.

Linear data structure: A linear data structure traverses the data elements sequentially, in which only one data element can directly be reached. Ex: Arrays, Linked Lists

Non-Linear data structure: Every data item is attached to several other data items in a way that is specific for reflecting relationships. The data items are not arranged in a sequential structure. Ex: Trees, Graphs

34.  Define in brief an array. What are the types of array operations?

An array is a set of homogeneous elements. Every element is referred by an index.

Arrays are used for storing the data until the application expires in the main memory of the computer system. So that, the elements can be accessed at any time. The operations are:

– Adding elements
– Sorting elements
– Searching elements
– Re-arranging the elements
– Performing matrix operations
– Pre-fix and post-fix operations

35.  What is a matrix? Explain its uses with an example

A matrix is a representation of certain rows and columns, to persist homogeneous data. It can also be called as double-dimensioned array.

– To represent class hierarchy using Boolean square matrix
– For data encryption and decryption
– To represent  traffic flow and plumbing in a network
– To implement graph theory of node representation

36.  Define an algorithm. What are the properties of an algorithm? What are the types of algorithms?


Algorithm: A step by step process to get the solution for a well defined problem.
Properties of an algorithm:

– Should be unambiguous, precise and lucid
– Should provide the correct solutions
– Should have an end point
– The output statements should follow input, process instructions
– The initial statements should be of input statements
– Should have finite number of steps
– Every statement should be definitive

37.  Types of algorithms:

– Simple recursive algorithms. Ex: Searching an element in a list
– Backtracking algorithms Ex: Depth-first recursive search in a tree
– Divide and conquer algorithms. Ex: Quick sort and merge sort
– Dynamic programming algorithms. Ex: Generation of Fibonacci series
– Greedy algorithms Ex: Counting currency
– Branch and bound algorithms. Ex: Travelling salesman (visiting each city once and minimize the total distance travelled)
– Brute force algorithms. Ex: Finding the best path for a travelling salesman
– Randomized algorithms. Ex. Using a random number to choose a pivot in quick sort).

38.  What is an iterative algorithm?

The process of attempting for solving a problem which finds successive approximations for solution, starting from an initial guess. The result of repeated calculations is a sequence of approximate values for the quantities of interest.

39.  What is an recursive algorithm?

Recursive algorithm is a method of simplification that divides the problem into sub-problems of the same nature. The result of one recursion is the input for the next recursion. The repetition is in the self-similar fashion. The algorithm calls itself with smaller input values and obtains the results by simply performing the operations on these smaller values. Generation of factorial, Fibonacci number series are the examples of recursive algorithms.

40.  Explain quick sort and merge sort algorithms.

Quick sort employs the ‘divide and conquer’ concept by dividing the list of elements into two sub elements.

The process is as follows:

1. Select an element, pivot, from the list.
2. Rearrange the elements in the list, so that all elements those are less than the pivot are arranged before the pivot and all elements those are greater than the pivot are arranged after the pivot. Now the pivot is in its position.
3. Sort the both sub lists – sub list of the elements which are less than the pivot and the list of elements which are more than the pivot recursively.

41.  Merge Sort: A comparison based sorting algorithm. The input order is preserved in the sorted output.

Merge Sort algorithm is as follows:

1. The length of the list is 0 or 1, and then it is considered as sorted.
2. Otherwise divide the unsorted list into two lists each about half the size.
3. Sort each sub list recursively. Implement the step 2 until the two sub lists are sorted.
4. As a final step, merge  both the lists back into one sorted list.

42.  What is Bubble Sort and Quick sort?

Bubble Sort: The simplest sorting algorithm. It involves the sorting the list in a repetitive fashion. It compares two adjacent elements in the list, and swaps them if they are not in the designated order. It continues until there are no swaps needed. This is the signal for the list that is sorted. It is also called as comparison sort as it uses comparisons.

Quick Sort: The best sorting algorithm which implements the ‘divide and conquer’ concept. It first divides the list into two parts by picking an element a ‘pivot’. It then arranges the elements those are smaller than pivot into one sub list and the elements those are greater than pivot into one sub list by keeping the pivot in its original place.

43.  What are the difference between a stack and a Queue?

Stack – Represents the collection of elements in Last In First Out order.
Operations includes testing null stack, finding the top element in the stack, removal of top most element and adding elements on the top of the stack.

Queue – Represents the collection of elements in First In First Out order.

Operations include testing null queue, finding the next element, removal of elements and inserting the elements from the queue.

Insertion of elements is at the end of the queue

Deletion of elements is from the beginning of the queue.

44.  Can a stack be described as a pointer? Explain.

A stack is represented as a pointer. The reason is that, it has a head pointer which points to the top of the stack. The stack operations are performed using the head pointer. Hence, the stack can be described as a pointer.

45.  Explain the terms Base case, Recursive case, Binding Time, Run-Time Stack and Tail Recursion.

Base case: A case in recursion, in which the answer is known when the termination for a recursive condition is to unwind back.

Recursive Case: A case which returns to the answer which is closer.

Run-time Stack: A run time stack used for saving the frame stack of a function when every recursion or every call occurs.

Tail Recursion: It is a situation where a single recursive call is consisted by a function, and it is the final statement to be executed. It can be replaced by iteration.

46.  Is it possible to insert different type of elements in a stack? How?

Different elements can be inserted into a stack. This is possible by implementing union / structure data type. It is efficient to use union rather than structure, as only one item’s memory is used at a time.

47.  Explain in brief a linked list.

A linked list is a dynamic data structure. It consists of a sequence of data elements and a reference to the next element in the sequence. Stacks, queues, hash tables, linear equations, prefix and post fix operations. The order of linked items is different that of arrays. The insertion or deletion operations are constant in number.

48.  Explain the types of linked lists.

The types of linked lists are:

Singly linked list: It has only head part and corresponding references to the next nodes.

Doubly linked list: A linked list which both has head and tail parts, thus allowing the traversal in bi-directional fashion. Except the first node, the head node refers to the previous node.

Circular linked list: A linked list whose last node has reference to the first node.

49.  How would you sort a linked list?

Step 1: Compare the current node in the unsorted list with every element in the rest of the list. If the current element is more than any other element go to step 2 otherwise go to step 3.

Step 2: Position the element with higher value after the position of the current element. Compare the next element. Go to step1 if an element exists, else stop the process.

Step 3: If the list is already in sorted order, insert the current node at the end of the list. Compare the next element, if any and go to step 1 or quit.


50.  What is sequential search? What is the average number of comparisons in a sequential search?


Sequential search: Searching an element in an array, the search starts from the first element till the last element.

The average number of comparisons in a sequential search is (N+1)/2 where N is the size of the array. If the element is in the 1st position, the number of comparisons will be 1 and if the element is in the last position, the number of comparisons will be N.

51.  What are binary search and Fibonacci search?

Binary Search: Binary search is the process of locating an element in a sorted list. The search starts by dividing the list into two parts. The algorithm compares the median value. If the search element is less than the median value, the top list only will be searched, after finding the middle element of that list. The process continues until the element is found or the search in the top list is completed. The same process is continued for the bottom list, until the element is found or the search in the bottom list is completed. If an element is found that must be the median value.

52.  Define Fibonacci Search: Fibonacci search is a process of searching a sorted array by utilising divide and conquer algorithm. Fibonacci search examines locations whose addresses have lower dispersion. When the search element has non-uniform access memory storage, the Fibonacci search algorithm reduces the average time needed for accessing a storage location.


Algorithms FAQs Part 2.

What is linear search?

Answer: Linear search is the simplest form of search. It searches for the element sequentially starting from the first element. This search has a disadvantage if the element is located at the end. Advantage lies in the simplicity of the search. Also it is most useful when the elements are arranged in a random order.

What is binary search?  

Answer: Binary search is most useful when the list is sorted. In binary search, element present in the middle of the list is determined. If the key (number to search) is smaller than the middle element, the binary search is done on the first half. If the key (number to search) is greater than the middle element, the binary search is done on the second half (right). The first and the last half are again divided into two by determining the middle element.

Explain the bubble sort algorithm.

Answer: Bubble sort algorithm is used for sorting a list. It makes use of a temporary variable for swapping. It compares two numbers at a time and swaps them if they are in wrong order. This process is repeated until no swapping is needed. The algorithm is very inefficient if the list is long.

E.g. List: – 7 4 5 3
1. 7 and 4 are compared

2. Since 4 < 7, 4 is stored in a temporary variable.

3. the content of 7 is now stored in the variable which was holding 4

4. Now, the content of temporary variable and the variable previously holding 7 are swapped.

What is quick sort?  

Answer: Quick sort is one the fastest sorting algorithm used for sorting a list. A pivot point is chosen. Remaining elements are portioned or divided such that elements less than the pivot point are in left and those greater than the pivot are on the right. Now, the elements on the left and right can be recursively sorted by repeating the algorithm.

Describe Trees using C++ with an example.

Tree is a structure that is similar to linked list. A tree will have two nodes that point to the left part of the tree and the right part of the tree. These two nodes must be of the similar type.

The following code snippet describes the declaration of trees. The advantage of trees is that the data is placed in nodes in sorted order.

struct TreeNode
int item; // The data in this node.
TreeNode *left; // Pointer to the left subtree.
TreeNode *right; // Pointer to the right subtree.

The following code snippet illustrates the display of tree data.

void showTree( TreeNode *root )
if ( root != NULL ) { // (Otherwise, there’s nothing to print.)
showTree(root->left); // Print items in left sub tree.
cout << root->item << ” “; // Print the root item.
showTree(root->right ); // Print items in right sub tree.
} // end inorderPrint()

Data Structure trees – posted on August 03, 2008 at 22:30 PM by Amit Satpute

Question – What is a B tree?

Answer: A B-tree of order m (the maximum number of children for each node) is a tree which satisfies the following properties :

1. Every node has <= m children.
2. Every node (except root and leaves) has >= m/2 children.
3. The root has at least 2 children.
4. All leaves appear in the same level, and carry no information.
5. A non-leaf node with k children contains k – 1 keys

Question – What are splay trees?

Answer: A splay tree is a self-balancing binary search tree. In this, recently accessed elements are quick to access again

It is useful for implementing caches and garbage collection algorithms.

When we move left going down this path, its called a zig and when we move right, its a zag.

Following are the splaying steps. There are six different splaying steps.
1. Zig Rotation (Right Rotation)
2. Zag Rotation (Left Rotation)
3. Zig-Zag (Zig followed by Zag)
4. Zag-Zig (Zag followed by Zig)
5. Zig-Zig
6. Zag-Zag

Question – What are red-black trees?

Answer :A red-black tree is a type of self-balancing binary search tree.
In red-black trees, the leaf nodes are not relevant and do not contain data.
Red-black trees, like all binary search trees, allow efficient in-order traversal of elements.
Each node has a color attribute, the value of which is either red or black.

The root and leaves are black
Both children of every red node are black.
Every simple path from a node to a descendant leaf contains the same number of black nodes, either counting or not counting the null black nodes

Question – What are threaded binary trees?

Answer: In a threaded binary tree, if a node ‘A’ has a right child ‘B’ then B’s left pointer must be either a child, or a thread back to A.

In the case of a left child, that left child must also have a left child or a thread back to A, and so we can follow B’s left children until we find a thread, pointing back to A.

This data structure is useful when stack memory is less and using this tree the treversal around the tree becomes faster.

Question – What is a B+ tree?

Answer: It is a dynamic, multilevel index, with maximum and minimum bounds on the number of keys in each index segment. all records are stored at the lowest level of the tree; only keys are stored in interior blocks.

Describe Tree database. Explain its common uses.

A tree is a data structure which resembles a hierarchical tree structure. Every element in the structure is a node. Every node is linked with the next node, either to its left or to its right. Each node has zero or more child nodes. The length of the longest downward path to a leaf from that node is known as the height of the node and the length of the path to its root is known as the depth of a node.

Common Uses:

– To manipulate hierarchical data
– Makes the information search, called tree traversal, easier.
– To manipulate data that is in the form of sorted list.

What is binary tree? Explain its uses.

A binary tree is a tree structure, in which each node has only two child nodes. The first node is known as root node. The parent has two nodes namely left child and right child.

Uses of binary tree:

– To create sorting routine.
– Persisting data items for the purpose of fast lookup later.
– For inserting a new item faster

How do you find the depth of a binary tree?

The depth of a binary tree is found using the following process:

1. Send the root node of a binary tree to a function
2. If the tree node is null, then return false value.
3. Calculate the depth of the left tree; call it ‘d1’ by traversing every node. Increment the counter by 1, as the traversing progresses until it reaches the leaf node. These operations are done recursively.
4. Repeat the 3rd step for the left node. Name the counter variable ‘d2’.
5. Find the maximum value between d1 and d2. Add 1 to the max value. Let us call it ‘depth’.
6. The variable ‘depth’ is the depth of the binary tree.

Explain pre-order and in-order tree traversal.

A non-empty binary tree is traversed in 3 types, namely pre-order, in-order and post-order in a recursive fashion.

Pre-order process is as follows:

– Visit the root node
– Traverse the left sub tree
– Traverse the right sub tree

In order process is as follows:
– Traverse the left sub tree
– Visit the root node
– Traverse the right sub tree

What is a B+ tree? Explain its uses.

B+ tree represents the way of insertion, retrieval and removal of the nodes in a sorted fashion. Every operation is identified by a ‘key’. B+ tree has maximum and minimum bounds on the number of keys in each index segment, dynamically. All the records in a B+ tree are persisted at the last level, i.e., leaf level in the order of keys.

B+ tree is used to visit the nodes starting from the root to the left or / and right sub tree. Or starting from the first node of the leaf level. A bi directional tree traversal is possible in the B+ tree.

Define threaded binary tree. Explain its common uses

A threaded binary tree is structured in an order that, all right child pointers would normally be null and points to the ‘in-order successor’ of the node. Similarly, all the left child pointers would normally be null and points to the ‘in-order predecessor’ of the node.

Uses of Threaded binary tree:

– Traversal is faster than unthreaded binary trees
– More subtle, by enabling the determination of predecessor and successor nodes that starts from any node, in an efficient manner.
– No stack overload can be carried out with the threads.
– Accessibility of any node from any other node
– It is easy to implement to insertion and deletion from a threaded tree.

Explain implementation of traversal of a binary tree.

Binary tree traversal is a process of visiting each and every node of the tree. The two fundamental binary tree traversals are ‘depth-first’ and ‘breadth-first’.

The depth-first traversal are classified into 3 types, namely, pre-order, in-order and post-order.

Pre-order: Pre-order traversal involves visiting the root node first, then traversing the left sub tree and finally the right sub tree.

In-order: In-order traversal involves visiting the left sub tree first, then visiting the root node and finally the right sub tree.

Post-order: Post-order traversal involves visiting the left sub tree first, then visiting the right sub tree and finally visiting the root node.

The breadth-first traversal is the ‘level-order traversal’. The level-order traversal does not follow the branches of the tree. The first-in first-out queue is needed to traversal in level-order traversal.

Explain implementation of deletion from a binary tree.

To implement the deletion from a binary tree, there is a need to consider the possibilities of deleting the nodes. They are:

– Node is a terminal node: In case the node is the left child node of its parent, then the left pointer of its parent is set to NULL. In all other cases, if the node is right child node of its parent, then the right pointer of its parent is set to NULL.

– Node has only one child: In this scenario, the appropriate pointer of its parent is set to child node.

– Node has two children: The predecessor is replaced by the node value, and then the predecessor of the node is deleted.


Describe stack operation. 

Stack is a data structure that follows Last in First out strategy.

Stack Operations:-

�  Push – Pushes (inserts) the element in the stack. The location is specified by the pointer.

�  Pop – Pulls (removes) the element out of the stack. The location is specified by the pointer

�  Swap: – the two top most elements of the stack can be swapped

�  Peek: – Returns the top element on the stack but does not remove it from the stack

�  Rotate:- the topmost (n) items can be moved on the stack in a rotating fashion

A stack has a fixed location in the memory. When a data element is pushed in the stack, the pointer points to the current element.

Describe queue operation.

Queue is a data structure that follows First in First out strategy.

Queue Operations:

�  Enqueue- Inserts the element in the queue at the end.

�  Dequeue – removes the element out of the queue from the front

�  Size – Returns the size of the queue

�  Front – Returns the first element of the queue.

�  Empty – to find if the queue is empty.

Discuss how to implement queue using stack.
A queue can be implemented by using 2 stacks:-

�  1. An element is inserted in the queue by pushing it into stack 1

�  2. An element is extracted from the queue by popping it from the stack 2

�  3. If the stack 2 is empty then all elements currently in stack 1 are transferred to stack 2 but in the reverse order

�  4. If the stack 2 is not empty just pop the value from stack 2.

Explain stacks and queues in detail.

A stack is a data structure based on the principle Last In First Out. Stack is container to hold nodes and has two operations – push and pop. Push operation is to add nodes into the stack and pop operation is to delete nodes from the stack and returns the top most node.

A queue is a data structure based on the principle First in First Out. The nodes are kept in an order. A node is inserted from the rear of the queue and a node is deleted from the front. The first element inserted in the first element first to delete.

Question – What are priority queues?
A priority queue is essentially a list of items in which each item is associated with a priority
Items are inserted into a priority queue in any, arbitrary order. However, items are withdrawn from a priority queue in order of their priorities starting with the highest priority item first.

Priority queues are often used in the implementation of algorithms

Question – What is a circular singly linked list?
In a circular singly linked list, the last node of the list is made to point to the first node. This eases the traveling through the list..

Describe the steps to insert data into a singly linked list.
Steps to insert data into a singly linked list:-

Insert in middle
Data: 1, 2, 3, 5
1. Locate the node after which a new node (say 4) needs to be inserted.(say after 3)

2. create the new node i.e. 4

3. Point the new node 4 to 5 (new_node->next = node->next(5))

4. Point the node 3 to node 4 (node->next =newnode)

Insert in the beginning

Data: 2, 3, 5

1. Locate the first node in the list (2)

2. Point the new node (say 1) to the first node. (new_node->next=first)

Insert in the end

Data: 2, 3, 5

1. Locate the last node in the list (5)

2. Point the last node (5) to the new node (say 6). (node->next=new_node)

Explain how to reverse singly link list.

Answer: Reverse a singly link list using iteration:-

1. First, set a pointer ( *current) to point to the first node i.e. current=base

2. Move ahead until current!=null (till the end)

3. set another pointer (* next) to point to the next node i.e. next=current->next

4. store reference of *next in a temporary variable ( *result) i.e current->next=result

5. swap the result value with current i.e. result=current

6. and now swap the current value with next. i.e. current=next

7. return result and repeat from step 2

A linked list can also be reversed using recursion which eliminates the use of a temporary variable.

Define circular linked list.

Answer: In a circular linked list the first and the last node are linked. This means that the last node points to the first node in the list. Circular linked list allow quick access to first and last record using a single pointer.

Describe linked list using C++ with an example.

A linked list is a chain of nodes. Each and every node has at least two nodes, one is the data and other is the link to the next node. These linked lists are known as single linked lists as they have only one pointer to point to the next node. If a linked list has two nodes, one is to point to the previous node and the other is to point to the next node, then the linked list is known as doubly linked list.

To use a linked list in C++ the following structure is to be declared:


typedef struct List

long Data;
List* Next;
List ()


typedef List* ListPtr.

The following code snippet is used to add a node.

void List::AddANode()

ListPtr->Next = new List;


The following code snippet is used to traverse the list

void showList(ListPtr listPtr)
� while(listPtr!=NULL) {
return temp;



/ / LListItr class; maintains “current position”.

/ I

/ / CONSTRUCTION: With no parameters. The LList class may

/ / construct a LListItr with a pointer to a LListNode.

/ /

/ / ******************PUBLOIPCE RATIONS*********************

/ / boo1 isValid( j –> True if not NULL

/ / void advance ( j –> Advance (if not already NULL)

/ / Object retrieve( ) –> Return item in current position

/ / ******************ERRORS********************************

/ / Throws BadIterator for illegal retrieve.


template <class Object>

class LListItr



LListItr( ) : current( NULL ) { 1


boo1 isValid( j const

{ return current ! = NULL; )


void advance ( j

{ if( isValid( j j current = current->next; 1


const Object & retrieve( ) const

{ if( !isValid( j j throw BadIterator( ) ;

return current->element; }



LListNode<Object> *current; / / Current position


LListItr( LListNode<Object> *theNode )

: current ( theNode ) {


friend class LList<Object>; / / Grant access to constructor





/ / LList class.

/ /

/ / CONSTRUCTION: with no initializer.

/ / Access is via LListItr class.

/ /

/ / ******************PUBLIoC~ ERATIoNs******************X**

/ / boo1 isEmpty( ) –> Return true if empty; else false

/ / void makeEmpty( ) –> Remove all items

/ / LListItr zeroth( ) –> Return position to prior to first

10 / / LListItr first( ) –> Return first position

11 / / void insert( x, p ) –> Insert x after position p

12 / / void remove( x ) –> Remove x

13 / / LListItr find( x ) –> Return position that views x

14 / / LListItr findprevious( x )

15 / / –> Return position prior to x

16 / / ******************ERR~R~*****~****************************

17 / / No special errors.


19 template <class Object>

20 class LList

21 I

22 public:

23 LList( ) ;

24 LList( const LList & rhs ) ;

25 -LList( ) ;


27 boo1 isEmpty( const;

28 void makeEmpty( ) ;

29 LListItr<Object> zeroth( ) const;

30 LListItr<Object> first ( ) const;

31 void insert( const Object & x, const ~~istItr<Object&> p ) ;

32 LListItr<Object> find( const Object & x ) const;

33 LListItr<Object> findprevious( const Object & x ) const;

34 void remove( const Object & x ) ;


36 const LList & operator=( const LList & rhs ) ;


38 private:

39 LListNode<Object> *header;



/ / Construct the list.

template <class Object>

LList<Object>: :LList ( )


header = new LListNode<Object>;



/ / Test if the list is logically empty.

/ / Return true if empty, false, otherwise.

10 template <class Object>

11 boo1 LList<Object>: :isEmpty( ) const

12 {

13 return header->next == NULL;

14 1


16 / / Return an iterator representing the header node.

17 template <class Object>

18 LListItr<Object> LList<Object>::zeroth( ) const

19 I

20 return LListItr<Object>( header ) ;

21 1


23 / / Return an iterator representing the first node in the list.

24 / / This operation is valid for empty lists.

25 template <class Object>

26 LListItr<Object> LList<Object>::first( ) const

27 {

28 return LListItr<Object>( header->next ) ;

29 };


/ / Simple print function.

template <class Object>

void printlist( const LList<Object> & theList )


if( theList.isEmptyi ) )

6 cout << “Empty list” << endl;


8 }

LListItr<Object> itr = theList.first( ) ;

10 for( ; itr.isValid( ) ; itreadvance( ) )

11 cout << itr.retrieve( ) << ” “;

12 {

13 cout << endl;

14 }


/ / Return iterator corresponding to the first node matching x.

/ / Iterator is not valid if item is not found.

template <class Object>

LListItr<Object> LList<Object>::find( const Object & x ) const


6 LListNode<Object> *p = header->next;


8 while( p ! = NULL && p->element ! = x )

9 p = p->next;


11 return LListItr<Object>( p ) ;

12 }

/ / Remove the first occurrence of an item X .

template <class Object>

void LList<Object>::remove( const Object & x


LListNode<Object> *p = findprevious( x ) .current;


if( p->next ! = NULL )

8 I

LListNode<Object> *oldNode = p->next;

10 p->next = p->next->next; / / Bypass

11 delete oldNode;

12 }

13 1

Figure 17.13 The remove routine for the LList class.

/ / Return iterator prior to the first node containing item x.

template <class Object>


LList<Object>: : f indprevious ( const Object & x ) const


LListNode<Object> *p = header;


8 while( p-znext ! = NULL && p->next->element ! = x )

p = p->next;


11 return LListItr<Object>( p ) ;

12 }

Figure 17.14 The f indprevious routine-similar to the find routine-for use

with remove.

/ / Insert item x after p.

template <class Object>

void LList<Object>::

insert( const Object & x, const LListItr<Object> & p )


if ( p.current ! = NULL )

p.current->next = new LListNodecObject>( x,

8 p.current->next ) ;


Figure 17.15 The insertion routine for the LList class.


/ / Make the list logically empty.

template <class Object>

void LList<Object>: :makeEmpty( )


while( !isEmptyi ) )

6 remove ( first ( ) .retrieve ( ) ) ;



/ / Destructor.

10 template <class Object>

11 LList<Object>::-LList( )

12 (

13 makeErnpty ( ) ;

14 delete header;

15 1

Figure 17.16 The makeEmpty method and the LLis t destructor.

/ / Copy constructor.

template <class Object>

LList<Object>::LList( const LList<Object> & rhs )


header = new LListNodeiObject>;

*this = rhs;



/ / Deep copy of linked lists.

10 template <class Object>

11 const LList<Object> &

12 LList<Object>::operator=( const ~~ist<Object&> rhs i

13 {

14 if ( this ! = &rhs )

15 (

16 makeEmpty ( ) ;


18 LListItr<Object> ritr = rhs.first( ) ;

19 LListItr<Object> itr = zeroth( ) ;

20 for( ; ritr.isValid( ) ; ritr.advance( ) , itr.advance( )

21 insert( ritr.retrieve( ) , itr ) ;

22 {

23 return *this;

24 }

Figure 17.17 Two LLis copy routines: operator= and copy constructor.


Q: Implement a Algorithm to check if the link list is in Ascending order?

A: template

bool linklist::isAscending() const{

nodeptr ptr = head;



if(ptr->_data > ptr->_next->_data)

return false;

ptr= ptr->_next;


return true;



Q: Write an algorithm to reverse a link list?

A: template

void linklist::reverselist()


nodeptr ptr= head;

nodeptr nextptr= ptr->_next;



nodeptr temp = nextptr->_next;

nextptr->_next = ptr;

ptr = nextptr;

nextptr = temp;


head->_next = 0;

head = ptr;




Scrum is an Agile Methodology, it is modelled and designed for Hyper-productive teams and high quality work, these teams operate at the rate of 5-10 times the velocity and very high quality than that of waterfall teams. Scrum is modelled so that it can scaled across the globe to any size.

In 2001, Sutherland, Schwaber, and fifteen colleagues got together in Snowbird, Colorado, and drafted the Agile Manifesto, which became a clarion call to software developers around the globe to pursue this radically different type of management.

Since then, Sutherland, Schwaber, and their colleagues have gone on to generate thousands of high-performance teams in hundreds of companies all around the world under the labels of Scrum and Agile.

SCRUM COACH Mike Cohn reports in his classic book, Succeeding with Agile :

“During the first year of making the switch, released 94 percent more features, delivered 38 percent more features per developer, and delivered over 500 percent more value to their customers compared to the previous year. . . . Fifteen months after adopting Scrum, surveyed its employees and found that 86 percent were having a ‘good time’ or the ‘best time’ working at the company. Prior to adopting Scrum, only 40 percent said the same thing. Further, 92 percent of employees said they would recommend an agile approach to others.”


High performance and productive is directly depends on the self-organizing capability of teams, Understanding this self-organizing capability and continuously improving on it is a challenge.

The huge forte of complex adaptive systems to be adapting new system and environments helps us to follow certain rules namely

  • Shock Therapy.
  • Choice uncertainty principle.
  • Punctuated Equilibrium.

There are teams who follow waterfall but use terms and jargon of “Agile”, thinking that is enough to be an agile team. Then there are teams who follow few SCRUM principles and not every person in the team and management follow completely SCRUM, these teams are known as “SCRUMBUTT”.

It is believed that majority of the teams are not SCRUM teams only 20% -25% are pure SCRUM teams capable of working above than the performance of Waterfall team.


The best way to check if the scrum team is truly following the practices of SCRUM and Agile principles is to ask the scrum team members to undertake NOKIA Test.
Following are the questions asked during NOKIA test:
– Do you know who the product owner is?
– Is there a product backlog prioritized by business value that has estimates created by the team?
– Does the team generate burndown charts and know their velocity?
– You cannot have project managers (or anyone else) disrupting the work of the team.
– Only 10% of teams worldwide meet this criteria.
If the average score of the team is more than 6, then the scrum is properly followed otherwise it is SCRUMBUTT team.


Another way of checking if the organization is implement very good scrum and have hyper productive scrum teams is by measuring if their revenue or return of investment of the organization per annum.
There are very few exceptional top performing scrum teams who had successfully meet the specified commitment and since the outcome was overwhelming. The management and sales & marketing team were so overwhelmed that were unable to share the pace of the output and had find excuses and drop the idea of marketing to the client. Few of the examples given by Jeff Sutherland are below:

  • The most productive team ever recorded at Borland produced a failed product.
  • The most productive distributed team (SirsiDynix) had quality problems, management problems, and internal company conflicts that caused the product to be killed.
  • The second most productive team in the world (Motorola – David Anderson data) was overwhelmed with bureaucracy, completely demotivated, their product was killed, and the team died a painful death


There are very few exceptional top performing scrum teams who had successfully meet the specified commitment and since the outcome was overwhelming. The management and sales & marketing team were so overwhelmed that were unable to share the pace of the output and had find excuses and drop the idea of marketing to the client. Few of the examples given by Jeff Sutherland are below:

  • The most productive team ever recorded at Borland produced a failed product.
  • The most productive distributed team (SirsiDynix) had quality problems, management problems, and internal company conflicts that caused the product to be killed.
  • The second most productive team in the world (Motorola – David Anderson data) was overwhelmed with bureaucracy, completely demotivated, their product was killed, and the team died a painful death

The best way to understand the scrum is by studying failed scrum projects which performing exceptionally well but were killed due to the other factors of the organization. It is similar to the analogy of Thomas Alva Edison who failed 10000 times to final invent a modern light bulb. So we need to study the failed projects and understand the impediments and try to overcome them in our scrum project.

The Toyota Way

One of the best way and well established process of hyperproductive teams is the “Toyota Way”. The development and manufacture process of No. 1 Car Maker of the World. The following are the guidelines implemented by Toyota

  • There are many things one doesn’t understand, and therefore we ask them why don’t you just go ahead and take action; try to do something? Agile Principle #3 #11
  • You realize how little you know and you face your own failures and redo it again and at the second trial you realize another mistake…so you can redo it once again. Agile Principle #11#12
  • So by constant improvement…one can rise to the higher level of practice and knowledge. Agile Principle #3
Toyota Fights Back!!

In 2007, Toyota had very bad week, they had to face lot of criticism and feedback from the owners of Toyota cars. Since Toyota follows very strict quality and agile policy they recalled all the defect cars from Japan. Later Toyota strived and worked hard to rectify the defects and improved the cars to greater extent and got their market leader share. Earlier that week their market share less than their competitors almost down to fourth or fifth place but regained it back very soon to 1st place in the market.

Invention of SCRUM

A well-known technologist “Alan Kay” who invented Personal workstation, Mouse, Ethernet, Windows Interface, Laser Printer and Small Talk has followed a strategy of selecting the data points for research and innovation.

Alan Kay’s Innovation Strategy

Every time Alan Kay followed extreme data points of the IT & Technology Field for inventing something in that domain. He never used to look for data points which were incremental to already existing products in the IT field, he never looked for Cross Discipline products for innovation. By following this kind of strategy Alan has been successful in inventing very successful and high impact products.

Similarly Jeff and Ken followed the extreme data points for identifying the projects for inventing scrum, They choose IBM Surgical team, Takeuchi & Nonaka research paper on high productive quality teams and Borland Quattro Project as research data or extreme data for inventing scrum.

Anyone who is planning to use scrum and be an industry leader has to high motivated person first, this motivation helps the team perform better. So if you are planning on scrum you need to be motivated and ready to strive for greater goals like being Industry leader and revenue of the project is skyrocketing. Anyone can aspire to be great and that aspiration can be your starting point for scrum implementation.

Are you practicing SCRUM?

Once you have planned for scrum and start using scrum, how to verify you are doing it rightly. For that there is Nokia test. You and your team need to undergo Nokia test and make sure you have the scoring of more than 6. IF your team score is less than 6 then you are not doing SCRUM.

Two Sibiling of Agile : Scrum and XP

The core and forte of a successful SCRUM has been utilizing the best practices of engineering and good communication. IF you see the first scrum at Easel Corporation implemented by Jeff Sutherland, it incorporated all the activities of extreme programming’s engineering practices. All the Most high performance teams use Scrum and XP together. It is hard to get a Scrum with extreme velocity without XP engineering practices. You cannot scale XP without Scrum.

Example 1: Anatomy of a failed project – SirsiDynix – ScrumButt

Example 2: Xebia ProRail project – Pretty Good Scrum

Learn from venture capital investments

As agile became popular both in Japan, US and slowly spread around the globe. VC start preferring companies which invest their process & practices in agile. Some VC have start using SCRUM and agile methodologies in their own management and their portfolio started to have SCRUM and XP practiced companies. All the VC felt that having clients in their portfolio are agile companies they felt it has double safe. So VC have started the following:

  • – Invest only in Agile projects
    • ! hyperproductive company out of 10 is good enough to meet investment goals
    • Invest in Scrum training could get 2 hyperproductive
  • – Invest only in market leading, industry standard processes – this means Scrum and XP
  • – Ensure teams implement basic Scrum practices
    • Everyone must pass Nokia test
    • Management held accountable at Board level for impediments

First Non Software Company Strategy : OpenView Venture Partners Strategy

OpenView Venture Partners strategy is invest in Software company which practised SCRUM: They also incorporated SCRUM in their company. They were the first non-software company to:

  • Investment partners practice Scrum
  • Invest only in Agile projects
    • Use only market leading, Industry standard process – this means Scrum and XP
    • Ensure teams implement best Scrum practices
  • Drive Scrum implementation at Board level. Ensure management is totally involved and understands Lean Product Development
  • Many portfolio companies run senior management team, sales, marketing, client services, and support with Scrum.

What Investors know about Companies?

All the investors start searching for the secret ingredient for their portfolio recipe and start asking
What is the secret recipe for building hyperproductive teams? They started summarizing what Jeff had advised to Open View, i.e.

  • – What is the secret recipe for building hyperproductive teams?
  • – First step is implementing basic Scrum practices and passing Nokia test.
  • – Second, management needs to get totally involved, understand team velocity, and remove impediments.
  • – Third, basic XP engineering practices need to be implemented, namely: o Test first development (may be pair programming, TDD);Continuous integration
  • – Then they are ready for the fun stuff

To be a Hyper Productive Team

According to Scrum, Hyperproductivity is defined as at least Toyota level of performance, it used to require at least two years for a non-agile company to reach 200 to 240% improvement. Now the scrum company requires only 300% improvement in 3 two weeks sprints. The only challenge is to keep the consistency of the teams to that optimum level very quickly and then remain in the hyper productive state. This is the main truth of self-organized teams to remain in hyper productive state

(Courtesy InfoQ)

Courtesy (Jeff Sutherland Agile Seminar)

How can an agile coach achieve Hyperproductivity in typical company?

Following are the successful examples of Shock Therapy are
1. MySpace in Beverly Hills.
2. JayWay in Sweden.
3. Pivotal Labs in San Francisco.


MySpace has been very good example of successful agile projects in most of the studies done in Agile.
MySpace has several hundred developers
– About 1/3 waterfall
– About 1/3 Scrum Butt with Project Managers
– About 1/3 pure Scrum

Scott Downey (Owner at Rapid Scrum LLC), when he was coaching MySpace the Agile practices and scrum training, he reportedly taken teams to high production state in a few weeks

It has been recorded that Average time to 240% of the velocity of a waterfall team is 2.9 days per team member where the team includes the Scrum Master and the Product Owner.

Establishing a Scrum Team

When a new team is sent for scrum training at MySpace, Scott Downey has a very strict and rigour training. Also Scott Downey sets all the rules of effectively running the scrum training and the projects. The rules are: My rules remain in effect until the team has met three criteria:

  • They are Hyper-Productive(>240% higher targeted value contribution)
  • They have completed three successful Sprints consecutively
  • They have identified a good business reason to change the rule

– The rules are roughly these:

  • Everyone on the team will attend scrum training session. I conduct an extremely condensed Scrum at MySpace course in about four hours, and the entire team comes together to a session. Until everyone has been trained, we won’t begin our first Sprint.
  • Sprints will be one week long.

The biggest and crucial point of defining a successful product from SCRUM Development team is the commitment to definition of DONE:

MySpace uses Jeff Definition of “DONE”

Definition of “Done” is this:

o 1. Feature Complete

o 2. Code Complete

o 3. No known defects

o 4. Approved by the Product Owner

o 5. Production Ready

Initially Scrum Master guides and helps the new team to do Daily SCRUM Meeting, He guides each and every move of the SCRUM team till the new team has become familiar with the best practices of SCRUM team.

Initially Scrum Master guides and monitors the Estimation and discussions in Sprint Planning meeting, as the team matures they start handling the activities of sprint planning into unique meetings. The Product Owner participates as visitor and advisor to the meeting, Scrum master guides development team in creating user stories. Stories are made to comply the INVEST principles. In the Sprint Backlog meeting Commitment of the team of Product Backlog is read aloud and taken consent of team again after explaining clearly the team what “Commit” does and does not mean so there is no ambiguity about the product requirements and definition of DONE. Once the team commits to the sprint work, the meeting ends.

The work should be completed in right order and with regression testing with no defects. A lot of emphasis is placed on priority order than multi-tasking, this helps in completing the commitments early rather than having incomplete items on the list. Every member of the scrum team follows the standard layouts for all the artifacts of SCRUM namely Sprint Planning Boards, User Stories, Story Cards, Burndown Charts and Velocity tracking.

Once the Master Scrum master feels that he has team matured enough, he moves on to another new team and new Scrum master is assigned to monitor the team’s scrum development process. The main aim is to make the team a hyperproductive team.

facilitating self-organization

As the team becomes matured and completely self-organized, the team member starts correcting each other and evokes lot of positive energy in the team. Soon the team becomes very agile, active and focused.

Cosmic Stopping Problem

The Cosmic Stopping Problem, otherwise known as the choice uncertainty principle.

Jeff Sutherland in one of his paper “AGILE DEVELOPMENT:LESSONS LEARNED FROM THE FIRST SCRUM” suggest that, The most interesting effect of Scrum on Easel’s development environment was an observed “punctuated equilibrium” effect. This occurs in biological evolution when a species is stable for long periods of time and then undergoes a sudden jump in capability During the long period of apparent stability, many internal changes in the organism are reconfigured that cannot be observed externally. When all pieces are in place to allow a significant jump in functionality, external change occurs suddenly. A fully integrated component design environment leads to unexpected, rapid evolution of a software system with emergent, adaptive properties resembling the process of punctuated equilibrium observed in biological species. Sudden leaps in functionality resulted in earlier than expected delivery of software in the first Scrum.

A punctuated equilibrium – the equilibrium being the “Safety Zone” of working in a stable system for a while (e.g. during a Scrum Sprint when the Sprint Backlog does not shift within the sprint) punctuated by events that allow the chaos/shifting world outside to affect the system, and then return to the “Safety Zone” to have an opportunity for behavior that fits the new reality to emerge. This has been observed in nature as well as contributing to effective evolution.

This punctuated equilibrium is accompanied by Choice Uncertainty Principle also known as “Cosmic Problem”. It is found everywhere, at every level of the world. A very good Scrum master is able to handle this at every step of development, a good scrum does the following to avoid Cosmic Problem:

  • o Don’t accept backlog that is not ready
  • o Minimize work in progress
  • o Stop the line when bad things happen and fix it so it can never happen again
  • o If it is not “Done” put it back on the product backlog

Another major factor why Scrum is better than waterfall, is that collocation factor affects velocity which affects the cost of the project. In a study done at PatientKeeper a project from Scrum development moved to Waterfall team, the company had expenses instead of savings of 30%, so $2M of Scrum development at PatientKeeper company costs $6M when outsourced to waterfall teams. So never outsourced to waterfall teams, only outsource to Scrum teams.

The Road to Hyper-Productivity

A success path to Hyperproductivity is based on the Complex Adaptive System theory.

The essential architectural design concept that encourages refactoring at granularity level with regression testing and rigours system testing to build a product with zero defects. Team develops the software components in the right hierarchical order with optimization of speed of displaying the new features of the product. Working on only the well-defined stories and removing the requirements that are not ready which otherwise led to cosmic Stopping Problem, minimizing work in progress, avoiding obstacles through self-organization and finally applying the Shock therapy that decreases the Sprints count.

Finally SCRUM recommends, those who can do ‘Pretty Good Scrum then should do Great SCRUM’
There are 3 types of companies namely,

  • – Doesn’t want to see or hear impediments. Suppress Scrum master. Prevents the process implementation. These companies always suffer and lose out.
  • – Talk about impediments, but doesn’t fix impediments, internal struggle goes on. These companies suffer for a long time to be better.
  • – Look for Impediments and fix it immediately. Avoid internal Conflicts. These companies are successful companies.

The Success of SCRUM driven work and its positive impact on the employees has forced almost all the non-software firms also to move to SCRUM, as clearly stated in Forbes

“The success of software development at firms like [CRM], along similar customer-driven iterative methods in auto manufacture at firms like Toyota, has led to the spread of this different way of managing to related fields.

  • • The Quality Software Engineering group at IBM [IBM] is responsible for software development processes and practices across the company. As part of the effort to promulgate Scrum in developing software, an iterative process of working was adopted for doing change management.
  • • At the Chicago software firm Total Attorneys, iterative work patterns were so successful that they spread to the staff of call centers: small cross-functional teams work in cycles of three weeks.
  • • At the Danish software firm, Systematic, iterative methods have been spreading from software development to other parts of the firm.
  • • At the Swedish software firm Trifork, iterative methods have spread from software development to conference management.
  • • And OpenView Venture Partners, a Boston-based venture capital firm, has expanded client-driven iterations into consulting and finance.”

The Power of SCRUM is now enthusing other firms to join the main stream of new methodology of management of work.