Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Learning: Naive Bayes Classifier #618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,13 +306,35 @@ def predict(example):
# ______________________________________________________________________________


def NaiveBayesLearner(dataset, continuous=True):
def NaiveBayesLearner(dataset, continuous=True, simple=False):
if simple:
return NaiveBayesSimple(dataset)
if(continuous):
return NaiveBayesContinuous(dataset)
else:
return NaiveBayesDiscrete(dataset)


def NaiveBayesSimple(distribution):
"""A simple naive bayes classifier that takes as input a dictionary of
CountingProbDist objects and classifies items according to these distributions.
The input dictionary is in the following form:
(ClassName, ClassProb): CountingProbDist"""
target_dist = {c_name: prob for c_name, prob in distribution.keys()}
attr_dists = {c_name: count_prob for (c_name, _), count_prob in distribution.items()}

def predict(example):
"""Predict the target value for example. Calculate probabilities for each
class and pick the max."""
def class_probability(targetval):
attr_dist = attr_dists[targetval]
return target_dist[targetval] * product(attr_dist[a] for a in example)

return argmax(target_dist.keys(), key=class_probability)

return predict


def NaiveBayesDiscrete(dataset):
"""Just count how many times each value of each input attribute
occurs, conditional on the target value. Count the different
Expand Down
14 changes: 14 additions & 0 deletions tests/test_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,20 @@ def test_naive_bayes():
assert nBC([6, 5, 3, 1.5]) == "versicolor"
assert nBC([7, 3, 6.5, 2]) == "virginica"

# Simple
data1 = 'a'*50 + 'b'*30 + 'c'*15
dist1 = CountingProbDist(data1)
data2 = 'a'*30 + 'b'*45 + 'c'*20
dist2 = CountingProbDist(data2)
data3 = 'a'*20 + 'b'*20 + 'c'*35
dist3 = CountingProbDist(data3)

dist = {('First', 0.5): dist1, ('Second', 0.3): dist2, ('Third', 0.2): dist3}
nBS = NaiveBayesLearner(dist, simple=True)
assert nBS('aab') == 'First'
assert nBS(['b', 'b']) == 'Second'
assert nBS('ccbcc') == 'Third'


def test_k_nearest_neighbors():
iris = DataSet(name="iris")
Expand Down