Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ indent-after-paren=4
indent-string=' '

# Maximum number of characters on a single line.
max-line-length=100
max-line-length=120

# Maximum number of lines in a module.
max-module-lines=1000
Expand Down Expand Up @@ -419,7 +419,8 @@ disable=raw-checker-failed,
missing-module-docstring,
missing-function-docstring,
invalid-name,
missing-class-docstring
missing-class-docstring,
R0801

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ notebooks/.ipynb_checkpoints
images
cache
.env
*.log

*.png
*__pycache__
.DS_Store
.vscode
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[![CI](https://github.com/alexgiving/LKMeans/actions/workflows/test.yml/badge.svg)](https://github.com/alexgiving/LKMeans/actions/workflows/test.yml)

To activate library
The library which support Minkowski metric in KMeans algorithm.

```
export PYTHONPATH=${PYTHONPATH}:.
export PYTHONPATH=${PYTHONPATH}:$(pwd)
```
9 changes: 3 additions & 6 deletions main.py → experiment_1_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,15 @@ def main():
n_clusters = 2
T_parameter = [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
minkowski_parameter = [0.2, 0.5, 1, 2]
repeats = 1000
n_points = [100, 500, 1000]
repeats = 5
n_points = [100]

sigma_list = [1, 1]
prob = 0.5
mu_list = [np.array([x + [0] * (dimension-2)])
for x in [[-4, 0], [4, 0]]]

repeats = 2
n_points = [100]
T_parameter = [0, 0.2, 0.4, 0.6, 0.8]
minkowski_parameter = [0.2, 0.5, 2]
minkowski_parameter = [0.2, 0.5, 0.7, 0.9, 1, 2, 5, 10]

for points in n_points:
experiment_name = f'Experiment 1, N_points:{points}'
Expand Down
45 changes: 45 additions & 0 deletions experiment_1_500.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path

import numpy as np

from lib import run_experiment


def main():
experiments_path = Path('experiments')

dimension = 20
n_clusters = 2
T_parameter = [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
minkowski_parameter = [0.2, 0.5, 1, 2]
repeats = 5
n_points = [500]

sigma_list = [1, 1]
prob = 0.5
mu_list = [np.array([x + [0] * (dimension-2)])
for x in [[-4, 0], [4, 0]]]

minkowski_parameter = [0.2, 0.5, 0.7, 0.9, 1, 2, 5, 10]

for points in n_points:
experiment_name = f'Experiment 1, N_points:{points}'
output_path = experiments_path / f'experiment_1_{points}'

run_experiment(
dimension=dimension,
n_clusters=n_clusters,
distance_parameters=T_parameter,
minkowski_parameters=minkowski_parameter,
repeats=repeats,
n_points=points,
sigma_list=sigma_list,
prob=prob,
mu_list=mu_list,
experiment_name=experiment_name,
output_path=output_path
)


if __name__ == '__main__':
main()
45 changes: 45 additions & 0 deletions experiment_2_100.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path

import numpy as np

from lib import run_experiment


def main():
experiments_path = Path('experiments')

dimension = 20
n_clusters = 3
T_parameter = [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
minkowski_parameter = [0.2, 0.5, 1, 2]
repeats = 5
n_points = [100]

sigma_list = [1, 1, 1]
prob = 1/3
mu_list = [np.array([x + [0] * (dimension-3)])
for x in [[4, 0, 0], [0, 4, 0], [0, 0, 4]]]

minkowski_parameter = [0.2, 0.5, 0.7, 0.9, 1, 2, 5, 10]

for points in n_points:
experiment_name = f'Experiment 2, N_points:{points}'
output_path = experiments_path / f'experiment_2_{points}'

run_experiment(
dimension=dimension,
n_clusters=n_clusters,
distance_parameters=T_parameter,
minkowski_parameters=minkowski_parameter,
repeats=repeats,
n_points=points,
sigma_list=sigma_list,
prob=prob,
mu_list=mu_list,
experiment_name=experiment_name,
output_path=output_path
)


if __name__ == '__main__':
main()
45 changes: 45 additions & 0 deletions experiment_2_500.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path

import numpy as np

from lib import run_experiment


def main():
experiments_path = Path('experiments')

dimension = 20
n_clusters = 3
T_parameter = [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
minkowski_parameter = [0.2, 0.5, 1, 2]
repeats = 5
n_points = [500]

sigma_list = [1, 1, 1]
prob = 1/3
mu_list = [np.array([x + [0] * (dimension-3)])
for x in [[4, 0, 0], [0, 4, 0], [0, 0, 4]]]

minkowski_parameter = [0.2, 0.5, 0.7, 0.9, 1, 2, 5, 10]

for points in n_points:
experiment_name = f'Experiment 2, N_points:{points}'
output_path = experiments_path / f'experiment_2_{points}'

run_experiment(
dimension=dimension,
n_clusters=n_clusters,
distance_parameters=T_parameter,
minkowski_parameters=minkowski_parameter,
repeats=repeats,
n_points=points,
sigma_list=sigma_list,
prob=prob,
mu_list=mu_list,
experiment_name=experiment_name,
output_path=output_path
)


if __name__ == '__main__':
main()
1 change: 0 additions & 1 deletion lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from lib.decomposition import get_tsne_clusters
from lib.experiment_metrics import get_average_experiment_metrics
from lib.kmeans import KMeans
from lib.metric_meter import MetricTable, insert_hline
from lib.experiment import run_experiment
137 changes: 100 additions & 37 deletions lib/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from typing import List

import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import adjusted_mutual_info_score, adjusted_rand_score

from lib.decomposition import get_tsne_clusters
from lib.experiment_metrics import get_average_experiment_metrics
from lib.kmeans import KMeans
from lib.metric_meter import MetricTable, insert_hline
from lib.metric_meter import (GraphicMeter, MetricMeter, MetricTable,
insert_hline)
from lib.points_generator import generate_mix_distribution
from lib.types import p_type

Expand All @@ -17,6 +18,52 @@ def get_covariance_matrix(sigma: float, dimension: int) -> np.ndarray:
return np.eye(dimension) * sigma


# pylint: disable=too-many-arguments, too-many-locals
def repeat_iteration(
repeats: int,
n_clusters: int,
n_points: int,
prob: float,
cov_matrix_list: List,
t: float,
mu_list: List[np.ndarray],
p: p_type,
makes_plot: bool,
output_path: Path

):
repeat_metric_meter = MetricMeter()
for _ in range(repeats):

clusters, labels, centroids = generate_mix_distribution(
probability=prob,
mu_list=mu_list,
cov_matrix_list=cov_matrix_list,
n_samples=n_points,
t=t
)

experiment_time = time.perf_counter()
kmeans = KMeans(n_clusters=n_clusters, p=p)
centroids, generated_labels = kmeans.fit(clusters)
experiment_time = time.perf_counter() - experiment_time

repeat_metric_meter.add_combination(
ari=adjusted_rand_score(labels, generated_labels),
ami=float(adjusted_mutual_info_score(labels, generated_labels)),
inertia=kmeans.inertia(clusters, centroids),
time=experiment_time
)
if makes_plot:
figure_name = f'factor_{t:.1f}'.replace('.', '_')
fig = get_tsne_clusters(clusters, labels, centroids)
fig.savefig(
str(output_path / f'{figure_name}.png'), dpi=300, bbox_inches='tight')
plt.close(fig)
average_ari, average_ami, average_inertia, average_time = repeat_metric_meter.get_average()
return average_ari, average_ami, average_inertia, average_time


# pylint: disable= too-many-arguments, too-many-locals
def run_experiment(
dimension: int,
Expand All @@ -38,47 +85,63 @@ def run_experiment(
cov_matrix_list = [get_covariance_matrix(
sigma, dimension) for sigma in sigma_list]

metrics = MetricTable()
for t in distance_parameters:
for p in minkowski_parameters:

repeats_ari = []
repeats_ami = []
repeats_time = []

for _ in range(repeats):

clusters, labels, centroids = generate_mix_distribution(
probability=prob,
mu_list=mu_list,
cov_matrix_list=cov_matrix_list,
n_samples=n_points,
t=t
)
table = MetricTable()

experiment_time = time.perf_counter()
kmeans = KMeans(n_clusters=n_clusters, p=p)
_, generated_labels = kmeans.fit(clusters)
generator = [GraphicMeter(distance_parameters, 't')
for _ in minkowski_parameters]
graphic_t_metrics_dict = dict(zip(minkowski_parameters, generator))

repeats_time.append(time.perf_counter()-experiment_time)
repeats_ari.append(adjusted_rand_score(
labels, generated_labels))
repeats_ami.append(adjusted_mutual_info_score(
labels, generated_labels))
for t in distance_parameters:

name = f'{experiment_name}, T:{t:.1f}, P:{p}'
frame = get_average_experiment_metrics(
repeats_ari, repeats_ami, name=name, time=repeats_time)
metrics.add_frame(frame)
graphic_p_metrics = GraphicMeter(minkowski_parameters, 'p')
for p in minkowski_parameters:

if makes_plot:
figure_name = f'factor_{t:.1f}'.replace('.', '_')
fig = get_tsne_clusters(clusters, labels, centroids)
fig.savefig(output_path / f'{figure_name}.png')
print(metrics.get_table())
average_ari, average_ami, average_inertia, average_time = repeat_iteration(
repeats, n_clusters, n_points, prob,
cov_matrix_list, t, mu_list, p, makes_plot, output_path
)

table.add_to_frame(
ari=average_ari,
ami=average_ami,
inertia=average_inertia,
time=average_time,
name=f'{experiment_name}, T:{t:.1f}, P:{p}'
)

graphic_p_metrics.add_combination(
ari=average_ari,
ami=average_ami,
inertia=average_inertia,
time=average_time
)

graphic_t_metrics_dict[p].add_combination(
ari=average_ari,
ami=average_ami,
inertia=average_inertia,
time=average_time
)

for metric_graph in ['ARI', 'AMI', 'Inertia', 'Time']:
figure_name = f'factor_{t:.1f}_{metric_graph}'.replace('.', '_')
fig = graphic_p_metrics.get_graph(metric_graph)
fig.savefig(
str(output_path / f'{figure_name}.png'), dpi=300, bbox_inches='tight')
plt.close(fig)

print(table.get_table())

for p, graph_t_meter in graphic_t_metrics_dict.items():
for metric in ['ARI', 'AMI', 'Inertia', 'Time']:
figure_name = f'{metric}_by_t_with_p_{p}'.replace('.', '_')
fig = graph_t_meter.get_graph(metric)
fig.savefig(
str(output_path / f'{figure_name}.png'), dpi=300, bbox_inches='tight')
plt.close(fig)

table_name = 'experiment 1'
table = metrics.get_latex_table(caption='Experiment 1')
table = table.get_latex_table(caption='Experiment 1')
table = insert_hline(table)

latex_logs = output_path / f'{table_name.replace(" ", "_")}.tex'
Expand Down
17 changes: 0 additions & 17 deletions lib/experiment_metrics.py

This file was deleted.

Loading