-
Notifications
You must be signed in to change notification settings - Fork 96
Expand file tree
/
Copy pathTestWord2Vec.java
More file actions
62 lines (47 loc) · 1.62 KB
/
Copy pathTestWord2Vec.java
File metadata and controls
62 lines (47 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package test;
import org.nlp.util.LineIterator;
import org.nlp.util.Tokenizer;
import org.nlp.vec.VectorModel;
import org.nlp.vec.Word2Vec;
import java.io.*;
import java.util.Collections;
import java.util.List;
import java.util.Set;
/**
* @author siegfang
*/
public class TestWord2Vec {
public static void readByJava(String textFilePath, String modelFilePath){
Word2Vec wv = new Word2Vec.Factory()
.setMethod(Word2Vec.Method.Skip_Gram)
.setNumOfThread(1).build();
try (BufferedReader br =
new BufferedReader(new FileReader(textFilePath))) {
int lineCount = 0;
for (String line = br.readLine(); line != null;
line = br.readLine()){
wv.readTokens(new Tokenizer(line, " "));
// System.out.println(line);
lineCount ++;
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
wv.training();
wv.saveModel(new File(modelFilePath));
}
public static void testVector(String modelFilePath){
VectorModel vm = VectorModel.loadFromFile(modelFilePath);
Set<VectorModel.WordScore> result1 = Collections.emptySet();
result1 = vm.similar("亲");
for (VectorModel.WordScore we : result1){
System.out.println(we.name + " :\t" + we.score);
}
}
public static void main(String[] args){
String textFilePath = "D:/data/corpus.dat";
String modelFilePath = "D:/data/corpus.nn";
readByJava(textFilePath, modelFilePath);
testVector(modelFilePath);
}
}