Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 47ae84f

Browse files
committed
fix deprecated warning for julia 0.4
1 parent e1a242b commit 47ae84f

File tree

2 files changed

+18
-10
lines changed

2 files changed

+18
-10
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@ document. The space whence the words are drawn is termed the lexicon.
1414

1515
Formally, the model is defined as
1616

17+
```
1718
For each topic k,
1819
phi_k ~ Dirichlet(beta)
1920
For each document d,
2021
theta ~ Dirichlet(alpha)
2122
For each word w,
2223
z ~ Multinomial(theta)
2324
w ~ Multinomial(phi_z)
25+
```
2426

2527
alpha and beta are hyperparameters of the model. The number of topics, K,
2628
is a fixed parameter of the model, and w is observed. This package fits
@@ -31,8 +33,10 @@ the topics using collapsed Gibbs sampling (Griffiths and Steyvers, 2004).
3133
We describe the functions of the package using an example. First we load
3234
corpora from data files as follows:
3335

36+
```
3437
testDocuments = readDocuments(open("cora.documents"))
3538
testLexicon = readLexicon(open("cora.lexicon"))
39+
```
3640

3741
These read files in LDA-C format. The lexicon file is assumed to have one
3842
word per line. The document file consists of one document per line. Each
@@ -45,7 +49,9 @@ the number of tuples for that document.
4549

4650
With the documents loaded, we instantiate a model that we want to train:
4751

52+
```
4853
model = Model(fill(0.1, 10), 0.01, length(testLexicon), testDocuments)
54+
```
4955

5056
This is a model with 10 topics. alpha is set to a uniform Dirichlet prior
5157
with 0.1 weight on each topic (the dimension of this variable is used
@@ -54,7 +60,9 @@ the prior weight on phi (i.e. beta) should be set to 0.01. The third
5460
parameter is the lexicon size; here we just use the lexicon we have
5561
just read. The fourth parameter is the collection of documents.
5662

63+
```
5764
trainModel(testDocuments, model, 30)
65+
```
5866

5967
With the model defined, we can train the model on a corpus of documents.
6068
The trainModel command takes the corpus as the first argument, the model
@@ -64,7 +72,9 @@ will be mutated in place.
6472

6573
Finally we can examine the output of the trained model using topTopicWords.
6674

75+
```
6776
topWords = topTopicWords(model, testLexicon, 10)
77+
```
6878

6979
This function retrieves the top words associated with each topic; this
7080
serves as a useful summary of the model. The first parameter is the model,

src/TopicModels.jl

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ function updateSufficientStatistics(word::Int64,
133133
document::Int64,
134134
scale::Float64,
135135
model::Model)
136-
fr = float64(!model.frozen)
136+
fr = Float64(!model.frozen)
137137
@inbounds model.documentSums[topic, document] += scale
138138
@inbounds model.topicSums[topic] += scale * fr
139139
@inbounds model.topics[topic, word] += scale * fr
@@ -167,9 +167,9 @@ function sampleCorpus(model::Model)
167167
end
168168

169169
# Note, files are zero indexed, but we are 1-indexed.
170-
function termToWordSequence(term::String)
170+
function termToWordSequence(term::AbstractString)
171171
parts = split(term, ":")
172-
fill(int64(parts[1]) + 1, int64(parts[2]))
172+
fill(parse(Int64, parts[1]) + 1, parse(Int64, parts[2]))
173173
end
174174

175175
# The functions below are designed for public consumption
@@ -190,16 +190,15 @@ function topTopicWords(model::Model,
190190
end
191191

192192
function readDocuments(stream)
193-
lines = readlines(stream)
194-
convert(
195-
RaggedMatrix{Int64},
196-
[apply(vcat, [termToWordSequence(term) for term in split(line, " ")[2:end]])
197-
for line in lines])
193+
lines = readlines(stream)
194+
convert(RaggedMatrix{Int64},
195+
[vcat([termToWordSequence(term) for term in split(line, " ")[2:end]]...)
196+
for line in lines])
198197
end
199198

200199
function readLexicon(stream)
201200
lines = readlines(stream)
202-
map(chomp, convert(Array{String,1}, lines))
201+
map(chomp, convert(Array{AbstractString,1}, lines))
203202
end
204203

205204
export Corpus,
@@ -208,5 +207,4 @@ export Corpus,
208207
readLexicon,
209208
topTopicWords,
210209
trainModel
211-
212210
end

0 commit comments

Comments
 (0)