0% found this document useful (0 votes)

52 views15 pages

Data Mining Unit 2 Assignment

The document outlines various association rule algorithms for data mining, including Frequent Set, Apriori, Partition, Pincer Search, and Dynamic Itemset Counting algorithms. Each algorithm is implemented with Python code using a sample transaction dataset to generate frequent itemsets based on a minimum support threshold. The document also encourages the user to analyze and compare the efficiency of these algorithms on their chosen dataset.

Uploaded by

AU190 252 SANDEEP. R

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

52 views15 pages

Data Mining Unit 2 Assignment

Uploaded by

AU190 252 SANDEEP. R

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 15

BP231021 | SandeeP R

II MCa
ELECTIVE V : DATA MINING TECHNIQUES
UNIT 2 - ASSOCIATION RULES

Problem solving based Association Rule Algorithms

Use any transaction dataset, and apply
i. Frequent set algorithm
ii. Apriori algorithm
iii. Partition Algorithm
iv. Pincer Search
v. Dynamic Itemset Counting
Generate association rules.
Record your inference about one algorithm over the other.
According to you, which algorithm do you consider to be efficient for the dataset you have
chosen.

1. FREQUENT SET ALGORITHM

from itertools import combinations

from collections import defaultdict
def generate_candidates(itemsets, k):
"""Generate candidate itemsets of size k."""
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)
def calculate_support(transactions, candidates):
"""Calculate the support count for candidate itemsets."""
support_count = defaultdict(int)
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] +=
return support_count
def frequent_set_algorithm(transactions, min_support):
"""Frequent Set Algorithm to find all frequent itemsets."""
# Initialize variables
frequent_itemsets = {} # Dictionary to store itemsets with their support
k = 1 # Start with itemsets of size 1
current_itemsets = set(frozenset([item]) for transaction in transactions for item in
transaction)
while current_itemsets:
# Calculate support for the current itemsets
support_count = calculate_support(transactions, current_itemsets)
# Filter itemsets that meet the minimum support
current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
# Add the frequent itemsets to the result
frequent_itemsets.update(current_frequent)
# Generate candidates for the next iteration (itemsets of size k+1)
k += 1
current_itemsets = generate_candidates(set(current_frequent.keys()), k)

return frequent_itemsets

# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]

# Minimum support threshold

min_support = 2

# Run the Frequent Set Algorithm

frequent_itemsets = frequent_set_algorithm(transactions, min_support)

# Print the results

print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")

OUTPUT :

Frequent Itemsets:
{'butter'}: 3
{'milk'}: 3
{'bread'}: 4
{'beer'}: 3
{'butter', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter'}: 2
{'beer', 'bread'}: 3
{'milk', 'beer'}: 2
{'milk', 'butter', 'bread'}: 2
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}: 2
2. APRIORI ALGORITHM

from itertools import combinations

def generate_candidates(itemsets, k):

"""Generate candidate itemsets of size k."""
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)

def calculate_support(transactions, candidates):

"""Calculate support count for candidate itemsets."""
support_count = {itemset: 0 for itemset in candidates}
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

def apriori(transactions, min_support):

"""Apriori Algorithm to find frequent itemsets."""
# Initialize variables
frequent_itemsets = {} # Store frequent itemsets with their support
k = 1 # Start with 1-itemsets
current_itemsets = set(frozenset([item]) for transaction in transactions for item in
transaction)

while current_itemsets:
# Calculate support for the current itemsets
support_count = calculate_support(transactions, current_itemsets)

# Filter itemsets based on the minimum support threshold

current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}

# Add the frequent itemsets to the result

frequent_itemsets.update(current_frequent)

# Generate candidates for the next iteration (itemsets of size k+1)

k += 1
current_itemsets = generate_candidates(set(current_frequent.keys()), k)

return frequent_itemsets

# Minimum support threshold

min_support = 2

# Run the Apriori Algorithm

frequent_itemsets = apriori(transactions, min_support)

# Print the results

print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")
OUTPUT :

Frequent Itemsets:
{'butter'}: 3
{'beer'}: 3
{'milk'}: 3
{'bread'}: 4
{'beer', 'butter'}: 2
{'butter', 'bread'}: 3
{'milk', 'beer'}: 2
{'beer', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}: 2
{'milk', 'butter', 'bread'}: 2
3. PARTITION ALGORITHM

from itertools import combinations

from collections import defaultdict

# Helper function to generate candidate itemsets

def generate_candidates(itemsets, k):
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)

# Helper function to calculate support for itemsets in a partition

def calculate_support(transactions, candidates):
support_count = defaultdict(int)
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

# Partition Algorithm implementation

def partition_algorithm(transactions, n_partitions, min_support):
# Phase I: Divide the transactions into n partitions
partition_size = len(transactions) // n_partitions
partitions = [
transactions[i * partition_size:(i + 1) * partition_size]
for i in range(n_partitions)
]
if len(transactions) % n_partitions != 0:
partitions[-1].extend(transactions[n_partitions * partition_size :]) # Handle leftover

# Phase I: Generate local frequent itemsets for each partition

local_frequent_itemsets = []
for partition in partitions:
k=1
current_itemsets = set(frozenset([item]) for transaction in partition for item in
transaction)
partition_frequent = {}
while current_itemsets:
support_count = calculate_support(partition, current_itemsets)
frequent_itemsets = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
if not frequent_itemsets:
break
partition_frequent.update(frequent_itemsets)
k += 1
current_itemsets = generate_candidates(set(frequent_itemsets.keys()), k)
local_frequent_itemsets.append(partition_frequent)

# Merge Phase: Combine local frequent itemsets across partitions

global_candidates = defaultdict(int)
for partition_frequent in local_frequent_itemsets:
for itemset, count in partition_frequent.items():
global_candidates[itemset] += count

# Phase II: Validate global candidates with the entire dataset

final_support_count = calculate_support(transactions, set(global_candidates.keys()))
final_frequent_itemsets = {
itemset: count
for itemset, count in final_support_count.items()
if count >= min_support
}

return final_frequent_itemsets
# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]

# Number of partitions
n_partitions = 2

# Minimum support threshold

min_support = 2

# Run the Partition Algorithm

frequent_itemsets = partition_algorithm(transactions, n_partitions, min_support)

# Print the results

print("Frequent Itemsets:", frequent_itemsets)

OUTPUT :

Frequent Itemsets: {frozenset({'butter', 'bread'}): 3, frozenset({'butter'}): 3,

frozenset({'milk'}): 3, frozenset({'bread'}): 4, frozenset({'milk', 'bread'}): 3,
frozenset({'beer', 'bread'}): 3, frozenset({'beer'}): 3, frozenset({'milk', 'beer', 'bread'}): 2,
frozenset({'milk', 'beer'}): 2}
4. PINCER SEARCH ALGORITHM

from itertools import combinations

from collections import defaultdict

def generate_candidates(itemsets, k):

"""Generate candidate itemsets of size k."""
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)

def calculate_support(transactions, candidates):

"""Calculate the support of candidates in the transactions."""
support_count = defaultdict(int)
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

def pincer_search(transactions, min_support):

"""Pincer-Search Algorithm implementation."""
# Initialize variables
frequent_itemsets = {} # Store frequent itemsets with their support
infrequent_itemsets = set() # Store infrequent itemsets
global_support = {} # Keep track of support for all itemsets

# Generate initial 1-item candidates

k=1
current_itemsets = set(frozenset([item]) for transaction in transactions for item in
transaction)

while current_itemsets:
# Calculate support for current candidates
support_count = calculate_support(transactions, current_itemsets)
global_support.update(support_count)

# Split into frequent and infrequent itemsets

current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
frequent_itemsets.update(current_frequent)
infrequent_itemsets.update(
itemset for itemset, count in support_count.items() if count < min_support
)

# Check for termination: if no frequent itemsets, stop

if not current_frequent:
break

# Generate next candidates using frequent itemsets

k += 1
current_itemsets = generate_candidates(set(current_frequent.keys()), k)

# Prune candidates containing infrequent subsets

current_itemsets = {
candidate
for candidate in current_itemsets
if not any(subset in infrequent_itemsets for subset in combinations(candidate, k - 1))
}

return frequent_itemsets

# Minimum support threshold

min_support = 2

# Run the Pincer-Search Algorithm

frequent_itemsets = pincer_search(transactions, min_support)

# Print the results

print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")

OUTPUT :

from itertools import combinations

def calculate_support(transactions, candidates):

"""Calculate the support count for a set of candidate itemsets."""
support_count = {itemset: 0 for itemset in candidates}
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

def dic_algorithm(transactions, min_support):

"""Dynamic Itemset Counting (DIC) Algorithm."""
# Initialize variables
frequent_itemsets = {} # Store frequent itemsets with their support
k = 1 # Current size of itemsets
active_candidates = set(frozenset([item]) for transaction in transactions for item in
transaction)
inactive_candidates = set() # Itemsets to be activated later

while active_candidates:
# Calculate support for active candidates
support_count = calculate_support(transactions, active_candidates)

# Filter frequent itemsets from active candidates

current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
# Add the frequent itemsets to the results
frequent_itemsets.update(current_frequent)

# Move non-frequent active candidates to inactive

inactive_candidates.update(
itemset for itemset, count in support_count.items() if count < min_support
)

# Generate new candidates to activate dynamically

new_candidates = set()
for itemset in current_frequent:
for other in frequent_itemsets.keys():
if len(itemset.union(other)) == k + 1:
new_candidate = itemset.union(other)
if all(frozenset(subset) in frequent_itemsets for subset in
combinations(new_candidate, k)):
new_candidates.add(new_candidate)

# Activate new candidates

active_candidates = new_candidates
k += 1

return frequent_itemsets

# Run the DIC Algorithm

frequent_itemsets = dic_algorithm(transactions, min_support)

# Print the results

print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")

OUTPUT :
Frequent Itemsets:
{'butter'}: 3
{'beer'}: 3
{'milk'}: 3
{'bread'}: 4
{'butter', 'bread'}: 3
{'beer', 'butter'}: 2
{'beer', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'beer'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}: 2
{'milk', 'butter', 'bread'}: 2

DIDM e Projects
100% (7)
DIDM e Projects
9 pages
Chota Bheem
No ratings yet
Chota Bheem
6 pages
Apriori Algorithm for Groceries
No ratings yet
Apriori Algorithm for Groceries
3 pages
Apriori
No ratings yet
Apriori
3 pages
Algorithm
No ratings yet
Algorithm
8 pages
DWM Exp8
No ratings yet
DWM Exp8
8 pages
Apriori Algorithm
No ratings yet
Apriori Algorithm
56 pages
Fa22-Bcs-025 MOAZ Assignment 1
No ratings yet
Fa22-Bcs-025 MOAZ Assignment 1
9 pages
Unit 4
No ratings yet
Unit 4
21 pages
Abc
No ratings yet
Abc
5 pages
DWDM Lab
No ratings yet
DWDM Lab
5 pages
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
No ratings yet
Code:: To Find Frequent Itemsets and Association Between Different Itemsets Using Apriori Algorithm
28 pages
Program
No ratings yet
Program
2 pages
DMDW 3rd Module
No ratings yet
DMDW 3rd Module
34 pages
Equent Itemsets & Clustering
No ratings yet
Equent Itemsets & Clustering
27 pages
Frequent Pattern Mining Overview: Data Mining Techniques: Frequent Patterns in Sets and Sequences
No ratings yet
Frequent Pattern Mining Overview: Data Mining Techniques: Frequent Patterns in Sets and Sequences
14 pages
Dm&bi - L10-Association Rules
No ratings yet
Dm&bi - L10-Association Rules
43 pages
Prac7 8 9 10
No ratings yet
Prac7 8 9 10
12 pages
Apriori Algorithm Examples
No ratings yet
Apriori Algorithm Examples
45 pages
From Introduction To Data Mining: Data Mining Association Analysis: Basic Concepts and Algorithms
No ratings yet
From Introduction To Data Mining: Data Mining Association Analysis: Basic Concepts and Algorithms
37 pages
Big Data Prcatical
No ratings yet
Big Data Prcatical
3 pages
Apriori Algorithm (Python 3.0) - A Data Analyst
No ratings yet
Apriori Algorithm (Python 3.0) - A Data Analyst
13 pages
Pract4 63
No ratings yet
Pract4 63
3 pages
Module 4
No ratings yet
Module 4
71 pages
Homework 1 Data
No ratings yet
Homework 1 Data
5 pages
Associationrule 1
No ratings yet
Associationrule 1
30 pages
Python
No ratings yet
Python
1 page
Ds 2
No ratings yet
Ds 2
3 pages
Apriori Algorithm Example Problems
No ratings yet
Apriori Algorithm Example Problems
8 pages
DWDM Answer
No ratings yet
DWDM Answer
19 pages
Module 4 DM
No ratings yet
Module 4 DM
86 pages
Updated Apriori Algorithm Analysis
No ratings yet
Updated Apriori Algorithm Analysis
2 pages
Equent Patterns
No ratings yet
Equent Patterns
74 pages
Apriori
No ratings yet
Apriori
5 pages
Apriori Algorithm: Frequent Itemsets
No ratings yet
Apriori Algorithm: Frequent Itemsets
4 pages
Department of Computer Engineering: Experiment No.8
No ratings yet
Department of Computer Engineering: Experiment No.8
4 pages
Ex 9 TH
No ratings yet
Ex 9 TH
7 pages
15mining Freq Patterns-Part1
No ratings yet
15mining Freq Patterns-Part1
25 pages
Unit 3
No ratings yet
Unit 3
62 pages
Association Rules
No ratings yet
Association Rules
24 pages
Week 6 - Basic Association Analysis
No ratings yet
Week 6 - Basic Association Analysis
71 pages
Data Mining: Frequent Patterns
No ratings yet
Data Mining: Frequent Patterns
40 pages
Chapter - 6 Data Mining
No ratings yet
Chapter - 6 Data Mining
65 pages
Week 3
No ratings yet
Week 3
56 pages
Unit - 3 Mining Frequent Patterns
No ratings yet
Unit - 3 Mining Frequent Patterns
10 pages
FP-Growth Algorithm for Market Basket Analysis
No ratings yet
FP-Growth Algorithm for Market Basket Analysis
8 pages
Task-4: Algorithm
No ratings yet
Task-4: Algorithm
4 pages
Assoc 1
No ratings yet
Assoc 1
26 pages
Data Mining Unit 2 (Part 2) - 1
No ratings yet
Data Mining Unit 2 (Part 2) - 1
7 pages
Association Rule Mining
No ratings yet
Association Rule Mining
54 pages
Weantuday: T Deuhh Anytha
No ratings yet
Weantuday: T Deuhh Anytha
23 pages
DataAnalytics Practical3
No ratings yet
DataAnalytics Practical3
3 pages
Apriori Algorithm
No ratings yet
Apriori Algorithm
7 pages
2024 Lecture7
No ratings yet
2024 Lecture7
28 pages
Split Data
No ratings yet
Split Data
5 pages
DS2 Association
No ratings yet
DS2 Association
48 pages
Slide 06 Chapter6 Frequent Itemset Mining Methods
No ratings yet
Slide 06 Chapter6 Frequent Itemset Mining Methods
62 pages
Mod 3 Notes Full
No ratings yet
Mod 3 Notes Full
25 pages
Exp 9
No ratings yet
Exp 9
9 pages
Embedded Lab Record Edited
No ratings yet
Embedded Lab Record Edited
107 pages
Siddharth Project Report
No ratings yet
Siddharth Project Report
28 pages
Black Document
No ratings yet
Black Document
12 pages
Chapter 1 Fundamental of Software Security
No ratings yet
Chapter 1 Fundamental of Software Security
52 pages
7XV5655-0BA00-Hub Manual A3 en
No ratings yet
7XV5655-0BA00-Hub Manual A3 en
53 pages
8085 Assembly: Descending Sort Program
100% (1)
8085 Assembly: Descending Sort Program
3 pages
BE - Cyber - Security - and - Digital - Forensics - Question Bank
No ratings yet
BE - Cyber - Security - and - Digital - Forensics - Question Bank
2 pages
Vue.js Guide for Developers
100% (7)
Vue.js Guide for Developers
19 pages
Unit 4 New
No ratings yet
Unit 4 New
129 pages
Data Warehousing Essentials
No ratings yet
Data Warehousing Essentials
15 pages
21stCenturyLit Week7&8
No ratings yet
21stCenturyLit Week7&8
4 pages
Threats To Data
No ratings yet
Threats To Data
11 pages
ERP User Guide - Basic M3 Functions
No ratings yet
ERP User Guide - Basic M3 Functions
30 pages
MLIS English
No ratings yet
MLIS English
32 pages
Voila User Manual
No ratings yet
Voila User Manual
79 pages
WIF3001 P Veeyuan FILA1
No ratings yet
WIF3001 P Veeyuan FILA1
5 pages
Arch Fundamental
No ratings yet
Arch Fundamental
43 pages
CP Imp Programs
No ratings yet
CP Imp Programs
11 pages
New Intelisys: High-End Gen-Set Controller
No ratings yet
New Intelisys: High-End Gen-Set Controller
4 pages
State of AI Report 2018
No ratings yet
State of AI Report 2018
156 pages
Btm452 Hot Jan 2024 Set2
No ratings yet
Btm452 Hot Jan 2024 Set2
8 pages
Pertech 6100k Users Guide
No ratings yet
Pertech 6100k Users Guide
14 pages
Ubuntu
100% (1)
Ubuntu
382 pages
MQTC v2016 IIB Performance Final PDF
No ratings yet
MQTC v2016 IIB Performance Final PDF
143 pages
Motor Drive 3
No ratings yet
Motor Drive 3
13 pages
Ensayo de Estilo de Vida Saludable
100% (1)
Ensayo de Estilo de Vida Saludable
4 pages
Compact Performance CP Fieldbus Node 13: Programming and Diagnosis
No ratings yet
Compact Performance CP Fieldbus Node 13: Programming and Diagnosis
103 pages
This Section Reviews Batch Input Programming Concepts and Explains How To Program Data Transfer and Batch Input Processing Programs
No ratings yet
This Section Reviews Batch Input Programming Concepts and Explains How To Program Data Transfer and Batch Input Processing Programs
51 pages
VP9 Overview VP9 PDF
No ratings yet
VP9 Overview VP9 PDF
44 pages

Data Mining Unit 2 Assignment

Uploaded by

Data Mining Unit 2 Assignment

Uploaded by

BP231021 | SandeeP R

Problem solving based Association Rule Algorithms

1. FREQUENT SET ALGORITHM

from itertools import combinations

# Minimum support threshold

# Run the Frequent Set Algorithm

# Print the results

from itertools import combinations

def generate_candidates(itemsets, k):

def calculate_support(transactions, candidates):

def apriori(transactions, min_support):

# Filter itemsets based on the minimum support threshold

# Add the frequent itemsets to the result

# Generate candidates for the next iteration (itemsets of size k+1)

# Minimum support threshold

# Run the Apriori Algorithm

# Print the results

from itertools import combinations

# Helper function to generate candidate itemsets

# Helper function to calculate support for itemsets in a partition

# Partition Algorithm implementation

# Phase I: Generate local frequent itemsets for each partition

# Merge Phase: Combine local frequent itemsets across partitions

# Phase II: Validate global candidates with the entire dataset

# Minimum support threshold

# Run the Partition Algorithm

# Print the results

Frequent Itemsets: {frozenset({'butter', 'bread'}): 3, frozenset({'butter'}): 3,

from itertools import combinations

def generate_candidates(itemsets, k):

def calculate_support(transactions, candidates):

def pincer_search(transactions, min_support):

# Generate initial 1-item candidates

# Split into frequent and infrequent itemsets

# Check for termination: if no frequent itemsets, stop

# Generate next candidates using frequent itemsets

# Prune candidates containing infrequent subsets

# Minimum support threshold

# Run the Pincer-Search Algorithm

# Print the results

from itertools import combinations

def calculate_support(transactions, candidates):

def dic_algorithm(transactions, min_support):

# Filter frequent itemsets from active candidates

# Move non-frequent active candidates to inactive

# Generate new candidates to activate dynamically

# Activate new candidates

# Run the DIC Algorithm

# Print the results

You might also like