Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4549230

Browse files
committed
packaging up aging work related to cleanup on some ML models and their tests
1 parent e56c308 commit 4549230

File tree

13 files changed

+223
-20
lines changed

13 files changed

+223
-20
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ With Tablesaw, you can manipulate half a billion rows on a laptop and over 2 bil
1010
<dependency>
1111
<groupId>com.github.lwhite1</groupId>
1212
<artifactId>tablesaw</artifactId>
13-
<version>0.7.6</version>
13+
<version>0.7.6.3</version>
1414
</dependency>
1515

1616
### Documentation and support:

dependency-reduced-pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<groupId>com.github.lwhite1</groupId>
55
<artifactId>tablesaw</artifactId>
66
<name>Tablesaw</name>
7-
<version>0.7.6</version>
7+
<version>0.7.6.3</version>
88
<description>High-performance Java Dataframe with integrated columnar storage</description>
99
<url>https://jtablesaw.wordpress.com</url>
1010
<developers>

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>com.github.lwhite1</groupId>
88
<artifactId>tablesaw</artifactId>
9-
<version>0.7.6</version>
9+
<version>0.7.6.3</version>
1010
<name>Tablesaw</name>
1111
<description>High-performance Java Dataframe with integrated columnar storage</description>
1212
<url>https://jtablesaw.wordpress.com</url>

src/main/java/com/github/lwhite1/tablesaw/api/BooleanColumn.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@
2626
import java.util.Iterator;
2727
import java.util.Map;
2828

29-
import static com.github.lwhite1.tablesaw.columns.BooleanColumnUtils.isMissing;
30-
import static com.github.lwhite1.tablesaw.columns.BooleanColumnUtils.isNotMissing;
29+
import static com.github.lwhite1.tablesaw.columns.BooleanColumnUtils.*;
3130

3231
/**
3332
* A column in a base table that contains float values
@@ -460,6 +459,15 @@ public int[] toIntArray() {
460459
return output;
461460
}
462461

462+
public IntColumn toIntColumn() {
463+
IntColumn intColumn = IntColumn.create(this.name() + ": ints", size());
464+
ByteArrayList data = data();
465+
for (int i = 0; i < size(); i++) {
466+
intColumn.add(data.getByte(i));
467+
}
468+
return intColumn;
469+
}
470+
463471
static class BooleanColumnIterator implements Iterator<Boolean> {
464472

465473
final ByteIterator iterator;

src/main/java/com/github/lwhite1/tablesaw/api/ml/association/FrequentItemset.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,36 @@ public FrequentItemset(IntColumn sets, CategoryColumn items, double support) {
9696
this.model = new FPGrowth(itemsets, support);
9797
}
9898

99+
public FrequentItemset(ShortColumn sets, CategoryColumn items, double support) {
100+
101+
labelMap = items.dictionaryMap().keyToValueMap();
102+
Table temp = Table.create("temp");
103+
temp.addColumn(sets.copy());
104+
IntColumn encodedItems = items.toIntColumn();
105+
encodedItems.setName(items.name()); // Needs t
106+
temp.addColumn(encodedItems);
107+
temp.sortAscendingOn(sets.name(), items.name());
108+
109+
ViewGroup baskets = temp.splitOn(temp.column(0));
110+
111+
this.setCount = baskets.size();
112+
113+
int[][] itemsets = new int[setCount][];
114+
int basketIndex = 0;
115+
for (TemporaryView basket : baskets) {
116+
IntRBTreeSet set = new IntRBTreeSet(basket.intColumn(1).data());
117+
int itemIndex = 0;
118+
itemsets[basketIndex] = new int[set.size()];
119+
for (int item : set) {
120+
itemsets[basketIndex][itemIndex] = item;
121+
itemIndex++;
122+
}
123+
basketIndex++;
124+
}
125+
126+
this.model = new FPGrowth(itemsets, support);
127+
}
128+
99129
public FrequentItemset(ShortColumn sets, ShortColumn items, double support) {
100130

101131
Table temp = Table.create("temp");

src/main/java/com/github/lwhite1/tablesaw/api/ml/classification/AbstractClassifier.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ public abstract class AbstractClassifier {
99

1010
abstract int predictFromModel(double[] data);
1111

12+
1213
void populateMatrix(int[] labels, ConfusionMatrix confusion, NumericColumn[] predictors) {
1314
double[] data = new double[predictors.length];
1415
for (int row = 0; row < predictors[0].size(); row++) {

src/main/java/com/github/lwhite1/tablesaw/table/Relation.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ default NumericColumn numericColumn(int columnIndex) {
241241
if (c.type() == ColumnType.CATEGORY) {
242242
CategoryColumn categoryColumn = (CategoryColumn) c;
243243
return categoryColumn.toIntColumn();
244+
} else if (c.type() == ColumnType.BOOLEAN) {
245+
BooleanColumn booleanColumn = (BooleanColumn) c;
246+
return booleanColumn.toIntColumn();
244247
}
245248
return (NumericColumn) column(columnIndex);
246249
}
@@ -250,6 +253,9 @@ default NumericColumn numericColumn(String columnName) {
250253
if (c.type() == ColumnType.CATEGORY) {
251254
CategoryColumn categoryColumn = (CategoryColumn) c;
252255
return categoryColumn.toIntColumn();
256+
} else if (c.type() == ColumnType.BOOLEAN) {
257+
BooleanColumn booleanColumn = (BooleanColumn) c;
258+
return booleanColumn.toIntColumn();
253259
}
254260
return (NumericColumn) column(columnName);
255261
}

src/main/java/com/github/lwhite1/tablesaw/util/DictionaryMap.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,5 +91,4 @@ public Int2ObjectMap<String> keyToValueMap() {
9191
public Object2IntMap<String> valueToKeyMap() {
9292
return valueToKey;
9393
}
94-
9594
}

src/test/java/com/github/lwhite1/tablesaw/api/FloatColumnTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ public void testCountMissing() {
273273
assertEquals(10, floats.countMissing());
274274
}
275275

276+
276277
@Test
277278
public void testCountUnique() {
278279
FloatColumn floats = new FloatColumn("floats", 10);

src/test/java/com/github/lwhite1/tablesaw/api/ml/association/FrequentItemsetExample.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package com.github.lwhite1.tablesaw.api.ml.association;
22

3+
import com.github.lwhite1.tablesaw.api.CategoryColumn;
4+
import com.github.lwhite1.tablesaw.api.ShortColumn;
35
import com.github.lwhite1.tablesaw.api.Table;
46
import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
57
import it.unimi.dsi.fastutil.objects.Object2DoubleMap;
@@ -17,7 +19,18 @@ public static void main(String[] args) throws Exception {
1719

1820
Table table = Table.createFromCsv("data/movielens.data", true, '\t');
1921
out(table.structure().print());
20-
FrequentItemset model = new FrequentItemset(table.shortColumn("user"), table.shortColumn("movie"), .24);
22+
out(table.shape());
23+
ShortColumn movie = table.shortColumn("movie");
24+
CategoryColumn moviecat = CategoryColumn.create("MovieCat");
25+
for (int i = 0; i < movie.size(); i++) {
26+
moviecat.addCell(movie.getString(i));
27+
}
28+
table.addColumn(moviecat);
29+
30+
out(table.shortColumn("user").unique().size());
31+
out(table.shortColumn("movie").unique().size());
32+
33+
FrequentItemset model = new FrequentItemset(table.shortColumn("user"), table.categoryColumn("MovieCat"), .24);
2134
List<ItemSet> itemSetList = model.learn();
2235

2336
out("Frequent Itemsets");

0 commit comments

Comments
 (0)