Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f7620eb

Browse files
authored
v1.3 compat (#8)
v1.3 compat fixed lexicon
1 parent 443615f commit f7620eb

File tree

5 files changed

+223
-17
lines changed

5 files changed

+223
-17
lines changed

.DS_Store

6 KB
Binary file not shown.

Manifest.toml

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
# This file is machine-generated - editing it directly is not advised
2+
3+
[[Arpack]]
4+
deps = ["Arpack_jll", "Libdl", "LinearAlgebra"]
5+
git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9"
6+
uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
7+
version = "0.4.0"
8+
9+
[[Arpack_jll]]
10+
deps = ["Libdl", "OpenBLAS_jll", "Pkg"]
11+
git-tree-sha1 = "68a90a692ddc0eb72d69a6993ca26e2a923bf195"
12+
uuid = "68821587-b530-5797-8361-c406ea357684"
13+
version = "3.5.0+2"
14+
15+
[[Base64]]
16+
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
17+
18+
[[BinaryProvider]]
19+
deps = ["Libdl", "SHA"]
20+
git-tree-sha1 = "5b08ed6036d9d3f0ee6369410b830f8873d4024c"
21+
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
22+
version = "0.5.8"
23+
24+
[[DataAPI]]
25+
git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
26+
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
27+
version = "1.1.0"
28+
29+
[[DataStructures]]
30+
deps = ["InteractiveUtils", "OrderedCollections"]
31+
git-tree-sha1 = "5a431d46abf2ef2a4d5d00bd0ae61f651cf854c8"
32+
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
33+
version = "0.17.10"
34+
35+
[[Dates]]
36+
deps = ["Printf"]
37+
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
38+
39+
[[Distributed]]
40+
deps = ["Random", "Serialization", "Sockets"]
41+
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
42+
43+
[[Distributions]]
44+
deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
45+
git-tree-sha1 = "6b19601c0e98de3a8964ed33ad73e130c7165b1d"
46+
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
47+
version = "0.22.4"
48+
49+
[[FillArrays]]
50+
deps = ["LinearAlgebra", "Random", "SparseArrays"]
51+
git-tree-sha1 = "85c6b57e2680fa28d5c8adc798967377646fbf66"
52+
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
53+
version = "0.8.5"
54+
55+
[[InteractiveUtils]]
56+
deps = ["Markdown"]
57+
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
58+
59+
[[LibGit2]]
60+
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
61+
62+
[[Libdl]]
63+
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
64+
65+
[[LinearAlgebra]]
66+
deps = ["Libdl"]
67+
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
68+
69+
[[Logging]]
70+
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
71+
72+
[[Markdown]]
73+
deps = ["Base64"]
74+
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
75+
76+
[[Missings]]
77+
deps = ["DataAPI"]
78+
git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5"
79+
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
80+
version = "0.4.3"
81+
82+
[[OpenBLAS_jll]]
83+
deps = ["Libdl", "Pkg"]
84+
git-tree-sha1 = "e2551d7c25d52f35b76d86a50917a3ba8988f519"
85+
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
86+
version = "0.3.7+5"
87+
88+
[[OpenSpecFun_jll]]
89+
deps = ["Libdl", "Pkg"]
90+
git-tree-sha1 = "65f672edebf3f4e613ddf37db9dcbd7a407e5e90"
91+
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
92+
version = "0.5.3+1"
93+
94+
[[OrderedCollections]]
95+
deps = ["Random", "Serialization", "Test"]
96+
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
97+
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
98+
version = "1.1.0"
99+
100+
[[PDMats]]
101+
deps = ["Arpack", "LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"]
102+
git-tree-sha1 = "5f303510529486bb02ac4d70da8295da38302194"
103+
uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
104+
version = "0.9.11"
105+
106+
[[Pkg]]
107+
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"]
108+
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
109+
110+
[[Printf]]
111+
deps = ["Unicode"]
112+
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
113+
114+
[[QuadGK]]
115+
deps = ["DataStructures", "LinearAlgebra"]
116+
git-tree-sha1 = "dc84e810393cfc6294248c9032a9cdacc14a3db4"
117+
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
118+
version = "2.3.1"
119+
120+
[[REPL]]
121+
deps = ["InteractiveUtils", "Markdown", "Sockets"]
122+
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
123+
124+
[[Random]]
125+
deps = ["Serialization"]
126+
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
127+
128+
[[Rmath]]
129+
deps = ["BinaryProvider", "Libdl", "Random", "Statistics"]
130+
git-tree-sha1 = "2bbddcb984a1d08612d0c4abb5b4774883f6fa98"
131+
uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
132+
version = "0.6.0"
133+
134+
[[SHA]]
135+
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
136+
137+
[[Serialization]]
138+
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
139+
140+
[[Sockets]]
141+
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
142+
143+
[[SortingAlgorithms]]
144+
deps = ["DataStructures", "Random", "Test"]
145+
git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd"
146+
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
147+
version = "0.3.1"
148+
149+
[[SparseArrays]]
150+
deps = ["LinearAlgebra", "Random"]
151+
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
152+
153+
[[SpecialFunctions]]
154+
deps = ["OpenSpecFun_jll"]
155+
git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c"
156+
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
157+
version = "0.10.0"
158+
159+
[[Statistics]]
160+
deps = ["LinearAlgebra", "SparseArrays"]
161+
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
162+
163+
[[StatsBase]]
164+
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
165+
git-tree-sha1 = "be5c7d45daa449d12868f4466dbf5882242cf2d9"
166+
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
167+
version = "0.32.1"
168+
169+
[[StatsFuns]]
170+
deps = ["Rmath", "SpecialFunctions"]
171+
git-tree-sha1 = "f290ddd5fdedeadd10e961eb3f4d3340f09d030a"
172+
uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
173+
version = "0.9.4"
174+
175+
[[SuiteSparse]]
176+
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
177+
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
178+
179+
[[Test]]
180+
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
181+
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
182+
183+
[[UUIDs]]
184+
deps = ["Random", "SHA"]
185+
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
186+
187+
[[Unicode]]
188+
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

Project.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name = "TopicModels"
2+
uuid = "e9825ca3-3499-4c9b-97dc-a93734876e50"
3+
authors = ["Jonathan Chang <slycoder @gmail.com>"]
4+
version = "0.1.0"
5+
6+
[deps]
7+
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
8+
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
9+
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
10+
11+
[compat]
12+
julia = "1.3"
13+
14+
[extras]
15+
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
16+
17+
[targets]
18+
test = ["Test"]

REQUIRE

Whitespace-only changes.

src/TopicModels.jl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ module TopicModels
22

33
import Base.length
44

5-
typealias RaggedMatrix{T} Array{Array{T,1},1}
5+
RaggedMatrix{T} = Array{Array{T,1},1}
66

7-
type Corpus
7+
struct Corpus
88
documents::RaggedMatrix{Int64}
99
weights::RaggedMatrix{Float64}
1010

@@ -15,7 +15,7 @@ type Corpus
1515
weights
1616
)
1717
end
18-
18+
1919
Corpus(documents::RaggedMatrix{Int64}) = begin
2020
weights = map(documents) do doc
2121
ones(Float64, length(doc))
@@ -27,7 +27,7 @@ type Corpus
2727
end
2828
end
2929

30-
type Model
30+
struct Model
3131
alphaPrior::Vector{Float64}
3232
betaPrior::Float64
3333
topics::Array{Float64,2}
@@ -37,9 +37,9 @@ type Model
3737
frozen::Bool
3838
corpus::Corpus
3939

40-
Model(alphaPrior::Vector{Float64},
41-
betaPrior::Float64,
42-
V::Int64,
40+
Model(alphaPrior::Vector{Float64},
41+
betaPrior::Float64,
42+
V::Int64,
4343
corpus::Corpus) = begin
4444
K = length(alphaPrior)
4545
m = new(
@@ -48,7 +48,7 @@ type Model
4848
zeros(Float64, K, V), # topics
4949
zeros(Float64, K), # topicSums
5050
zeros(Float64, K, length(corpus.documents)), #documentSums
51-
fill(Array(Int64, 0), length(corpus.documents)), # assignments
51+
Array{Array{Int64,1},1}(undef,length(corpus.documents)), # assignments
5252
false,
5353
corpus
5454
)
@@ -111,8 +111,8 @@ function wordDistribution(word::Int,
111111
out::Vector{Float64})
112112
V = size(model.topics, 2)
113113
for ii in 1:length(out)
114-
u = (model.documentSums[ii, document] + model.alphaPrior[ii]) *
115-
(model.topics[ii, word] + model.betaPrior) /
114+
u = (model.documentSums[ii, document] + model.alphaPrior[ii]) *
115+
(model.topics[ii, word] + model.betaPrior) /
116116
(model.topicSums[ii] + V * model.betaPrior)
117117
@inbounds out[ii] = u
118118
end
@@ -128,10 +128,10 @@ function sampleWord(word::Int,
128128
end
129129

130130

131-
function updateSufficientStatistics(word::Int64,
131+
function updateSufficientStatistics(word::Int64,
132132
topic::Int64,
133133
document::Int64,
134-
scale::Float64,
134+
scale::Float64,
135135
model::Model)
136136
fr = Float64(!model.frozen)
137137
@inbounds model.documentSums[topic, document] += scale
@@ -146,7 +146,7 @@ function sampleDocument(document::Int,
146146
Nw = length(words)
147147
@inbounds weights = model.corpus.weights[document]
148148
K = length(model.alphaPrior)
149-
p = Array(Float64, K)
149+
p = Array{Float64,1}(undef,K)
150150
@inbounds assignments = model.assignments[document]
151151
for ii in 1:Nw
152152
@inbounds word = words[ii]
@@ -170,10 +170,10 @@ end
170170
function termToWordSequence(term::AbstractString)
171171
parts = split(term, ":")
172172
fill(parse(Int64, parts[1]) + 1, parse(Int64, parts[2]))
173-
end
173+
end
174174

175175
# The functions below are designed for public consumption
176-
function trainModel(model::Model,
176+
function trainModel(model::Model,
177177
numIterations::Int64)
178178
for ii in 1:numIterations
179179
println(string("Iteration ", ii, "..."))
@@ -183,7 +183,7 @@ function trainModel(model::Model,
183183
end
184184

185185
function topTopicWords(model::Model,
186-
lexicon::Array{ASCIIString,1},
186+
lexicon::Array{String,1},
187187
numWords::Int64)
188188
[lexicon[reverse(sortperm(model.topics'[1:end, row]))[1:numWords]]
189189
for row in 1:size(model.topics,1)]
@@ -198,7 +198,7 @@ end
198198

199199
function readLexicon(stream)
200200
lines = readlines(stream)
201-
map(chomp, convert(Array{AbstractString,1}, lines))
201+
convert(Array{String,1},map(chomp, convert(Array{AbstractString,1}, lines)))
202202
end
203203

204204
export Corpus,

0 commit comments

Comments
 (0)