@@ -50,9 +50,9 @@ class MultipartiteRank(TopicRank):
50
50
stoplist += stopwords.words('english')
51
51
extractor.candidate_selection(pos=pos, stoplist=stoplist)
52
52
53
- # 4. build the Multipartite graph and rank candidates using random walk,
54
- # alpha controls the weight adjustment mechanism, see TopicRank for
55
- # threshold/method parameters.
53
+ # 4. build the Multipartite graph and rank candidates using random
54
+ # walk, alpha controls the weight adjustment mechanism, see
55
+ # TopicRank for threshold/method parameters.
56
56
extractor.candidate_weighting(alpha=1.1,
57
57
threshold=0.74,
58
58
method='average')
@@ -82,7 +82,7 @@ def topic_clustering(self,
82
82
Args:
83
83
threshold (float): the minimum similarity for clustering,
84
84
defaults to 0.74, i.e. more than 1/4 of stem overlap
85
- similarity.
85
+ similarity.
86
86
method (str): the linkage method, defaults to average.
87
87
"""
88
88
@@ -125,7 +125,8 @@ def build_topic_graph(self):
125
125
for node_i , node_j in combinations (self .candidates .keys (), 2 ):
126
126
127
127
# discard intra-topic edges
128
- if self .topic_identifiers [node_i ] == self .topic_identifiers [node_j ]:
128
+ if self .topic_identifiers [node_i ] \
129
+ == self .topic_identifiers [node_j ]:
129
130
continue
130
131
131
132
weights = []
@@ -136,14 +137,23 @@ def build_topic_graph(self):
136
137
gap = abs (p_i - p_j )
137
138
138
139
# alter gap according to candidate length
140
+ # if candidates overlap gap is 1
139
141
if p_i < p_j :
140
- gap -= len (self .candidates [node_i ].lexical_form ) - 1
142
+ len_i = len (self .candidates [node_i ].lexical_form )
143
+ if gap < len_i :
144
+ gap = 1
145
+ else :
146
+ gap -= len_i - 1
141
147
if p_j < p_i :
142
- gap -= len (self .candidates [node_j ].lexical_form ) - 1
148
+ len_j = len (self .candidates [node_j ].lexical_form )
149
+ if gap < len_j :
150
+ gap = 1
151
+ else :
152
+ gap -= len_j - 1
143
153
144
154
weights .append (1.0 / gap )
145
155
146
- # add weighted edges
156
+ # add weighted edges
147
157
if weights :
148
158
# node_i -> node_j
149
159
self .graph .add_edge (node_i , node_j , weight = sum (weights ))
@@ -154,8 +164,8 @@ def weight_adjustment(self, alpha=1.1):
154
164
""" Adjust edge weights for boosting some candidates.
155
165
156
166
Args:
157
- alpha (float): hyper-parameter that controls the strength of the
158
- weight adjustment, defaults to 1.1.
167
+ alpha (float): hyper-parameter that controls the strength of
168
+ the weight adjustment, defaults to 1.1.
159
169
"""
160
170
161
171
# weighted_edges = defaultdict(list)
@@ -195,7 +205,8 @@ def weight_adjustment(self, alpha=1.1):
195
205
node_i , node_j = nodes
196
206
position_i = 1.0 / (1 + self .candidates [node_i ].offsets [0 ])
197
207
position_i = math .exp (position_i )
198
- self .graph [node_j ][node_i ]['weight' ] += (boosters * alpha * position_i )
208
+ self .graph [node_j ][node_i ]['weight' ] += (
209
+ boosters * alpha * position_i )
199
210
200
211
def candidate_weighting (self ,
201
212
threshold = 0.74 ,
@@ -207,8 +218,8 @@ def candidate_weighting(self,
207
218
threshold (float): the minimum similarity for clustering,
208
219
defaults to 0.25.
209
220
method (str): the linkage method, defaults to average.
210
- alpha (float): hyper-parameter that controls the strength of the
211
- weight adjustment, defaults to 1.1.
221
+ alpha (float): hyper-parameter that controls the strength of
222
+ the weight adjustment, defaults to 1.1.
212
223
"""
213
224
if not self .candidates :
214
225
return
0 commit comments