svn-gvsig-desktop / branches / v2_0_0_prep / libraries / libGeocoding / src / org / gvsig / geocoding / impl / LuceneGeocoderImpl.java @ 32091
History | View | Annotate | Download (12 KB)
1 |
/* gvSIG. Geographic Information System of the Valencian Government
|
---|---|
2 |
*
|
3 |
* Copyright (C) 2007-2008 Infrastructures and Transports Department
|
4 |
* of the Valencian Government (CIT)
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 2
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
*/
|
22 |
|
23 |
/*
|
24 |
* AUTHORS (In addition to CIT):
|
25 |
* 2008 PRODEVELOP Main development
|
26 |
*/
|
27 |
|
28 |
package org.gvsig.geocoding.impl; |
29 |
|
30 |
import java.io.File; |
31 |
import java.io.IOException; |
32 |
import java.util.ArrayList; |
33 |
import java.util.Iterator; |
34 |
import java.util.List; |
35 |
import java.util.Set; |
36 |
import java.util.TreeSet; |
37 |
|
38 |
import org.apache.lucene.analysis.standard.StandardAnalyzer; |
39 |
import org.apache.lucene.document.Document; |
40 |
import org.apache.lucene.document.Field; |
41 |
import org.apache.lucene.index.CorruptIndexException; |
42 |
import org.apache.lucene.index.IndexWriter; |
43 |
import org.apache.lucene.queryParser.MultiFieldQueryParser; |
44 |
import org.apache.lucene.queryParser.ParseException; |
45 |
import org.apache.lucene.search.Hits; |
46 |
import org.apache.lucene.search.IndexSearcher; |
47 |
import org.apache.lucene.search.Query; |
48 |
import org.apache.lucene.store.LockObtainFailedException; |
49 |
import org.gvsig.fmap.dal.DALLocator; |
50 |
import org.gvsig.fmap.dal.DataManager; |
51 |
import org.gvsig.fmap.dal.DataStore; |
52 |
import org.gvsig.fmap.dal.exception.DataException; |
53 |
import org.gvsig.fmap.dal.feature.Feature; |
54 |
import org.gvsig.fmap.dal.feature.FeatureReference; |
55 |
import org.gvsig.fmap.dal.feature.FeatureSet; |
56 |
import org.gvsig.fmap.dal.feature.FeatureStore; |
57 |
import org.gvsig.fmap.dal.feature.spi.FeatureReferenceProviderServices; |
58 |
import org.gvsig.geocoding.DataGeocoder; |
59 |
import org.gvsig.geocoding.address.Address; |
60 |
import org.gvsig.geocoding.address.ComposedAddress; |
61 |
import org.gvsig.geocoding.address.Literal; |
62 |
import org.gvsig.geocoding.address.NumberAddress; |
63 |
import org.gvsig.geocoding.index.FeatureIndexedEvent; |
64 |
import org.gvsig.geocoding.index.IndexListener; |
65 |
import org.gvsig.geocoding.pattern.GeocodingPattern; |
66 |
import org.gvsig.geocoding.result.GeocodingResult; |
67 |
import org.gvsig.geocoding.result.ScoredFeature; |
68 |
import org.gvsig.geocoding.result.impl.DefaultScoredFeature; |
69 |
import org.gvsig.geocoding.styles.AbstractGeocodingStyle; |
70 |
import org.gvsig.geocoding.styles.impl.Composed; |
71 |
import org.gvsig.geocoding.styles.impl.DoubleRange; |
72 |
import org.gvsig.geocoding.styles.impl.SimpleCentroid; |
73 |
import org.gvsig.geocoding.styles.impl.SimpleRange; |
74 |
import org.gvsig.tools.locator.LocatorException; |
75 |
import org.slf4j.Logger; |
76 |
import org.slf4j.LoggerFactory; |
77 |
|
78 |
/**
|
79 |
* Data geocoder implementation
|
80 |
*
|
81 |
* @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
|
82 |
* @author <a href="mailto:vsanjaime@prodevelop.es"> Vicent Sanjaime Calvet</a>
|
83 |
*/
|
84 |
|
85 |
public class LuceneGeocoderImpl implements DataGeocoder { |
86 |
|
87 |
private Logger log = LoggerFactory.getLogger(LuceneGeocoderImpl.class); |
88 |
private GeocodingPattern pattern = null; |
89 |
private DataStore store = null; |
90 |
@SuppressWarnings("unused") |
91 |
private DataManager manager = null; |
92 |
private int position = 0; |
93 |
|
94 |
|
95 |
private IndexListener indexListener;
|
96 |
|
97 |
/*
|
98 |
* This path is a temporal folder. This code is a copy of the
|
99 |
* libRemoteServices and LibRaster
|
100 |
*/
|
101 |
private static String tempDirectoryPath = System |
102 |
.getProperty("java.io.tmpdir")
|
103 |
+ File.separator + "tmp-andami"; |
104 |
private static String GeocoID = "GEOC_ID"; |
105 |
|
106 |
/**
|
107 |
* Constructor with one pattern
|
108 |
*
|
109 |
* @param pattern
|
110 |
*/
|
111 |
public LuceneGeocoderImpl() {
|
112 |
manager = DALLocator.getDataManager(); |
113 |
} |
114 |
|
115 |
/**
|
116 |
* Constructor with one pattern
|
117 |
*
|
118 |
* @param pattern
|
119 |
*/
|
120 |
public LuceneGeocoderImpl(GeocodingPattern pattern) {
|
121 |
this.pattern = pattern;
|
122 |
manager = DALLocator.getDataManager(); |
123 |
} |
124 |
|
125 |
/**
|
126 |
* set the pattern
|
127 |
*
|
128 |
* @param pattern
|
129 |
*/
|
130 |
public void setPattern(GeocodingPattern pattern) { |
131 |
this.pattern = pattern;
|
132 |
|
133 |
} |
134 |
|
135 |
/**
|
136 |
* Get the pattern
|
137 |
*
|
138 |
* @return pattern
|
139 |
*/
|
140 |
public GeocodingPattern getPattern() {
|
141 |
return this.pattern; |
142 |
|
143 |
} |
144 |
|
145 |
/**
|
146 |
* Set the store
|
147 |
*/
|
148 |
public void setStore(DataStore store) { |
149 |
this.store = store;
|
150 |
|
151 |
} |
152 |
|
153 |
/**
|
154 |
* Get the store
|
155 |
*
|
156 |
* @return
|
157 |
*/
|
158 |
public DataStore getStore() {
|
159 |
return this.store; |
160 |
|
161 |
} |
162 |
|
163 |
/**
|
164 |
*
|
165 |
*/
|
166 |
public Set<GeocodingResult> geocode(Address address) |
167 |
throws LocatorException, DataException {
|
168 |
|
169 |
Set<GeocodingResult> results = new TreeSet<GeocodingResult>(); |
170 |
// get the style
|
171 |
AbstractGeocodingStyle astyle = getPattern().getSource().getStyle(); |
172 |
// get literal with relations with the store
|
173 |
Literal relationsLiteral = astyle.getRelationsLiteral(); |
174 |
// list of list with one or more literal results
|
175 |
List<List<ScoredFeature>> lists = new ArrayList<List<ScoredFeature>>(); |
176 |
// get layer store
|
177 |
|
178 |
// SIMPLE CENTROID
|
179 |
if (astyle instanceof SimpleCentroid) { |
180 |
SimpleCentroid style = (SimpleCentroid) astyle; |
181 |
Literal addressLiteral = address.getMainLiteral(); |
182 |
// literal search
|
183 |
List<ScoredFeature> literalResults = indexSearch(relationsLiteral,
|
184 |
addressLiteral, store); |
185 |
lists.add(literalResults); |
186 |
// Geom search
|
187 |
results = style.match(lists, address); |
188 |
} |
189 |
|
190 |
// SIMPLE RANGE
|
191 |
else if (astyle instanceof SimpleRange) { |
192 |
SimpleRange style = (SimpleRange) astyle; |
193 |
Literal addressLiteral = ((NumberAddress) address).getMainLiteral(); |
194 |
// literal search
|
195 |
List<ScoredFeature> literalResults = indexSearch(relationsLiteral,
|
196 |
addressLiteral, store); |
197 |
lists.add(literalResults); |
198 |
// geom search
|
199 |
results = style.match(lists, address); |
200 |
} |
201 |
|
202 |
// DOUBLE RANGE
|
203 |
else if (astyle instanceof DoubleRange) { |
204 |
DoubleRange style = (DoubleRange) astyle; |
205 |
Literal addressLiteral = ((NumberAddress) address).getMainLiteral(); |
206 |
// literal search
|
207 |
List<ScoredFeature> literalResults = indexSearch(relationsLiteral,
|
208 |
addressLiteral, store); |
209 |
lists.add(literalResults); |
210 |
// number search
|
211 |
results = style.match(lists, address); |
212 |
} |
213 |
|
214 |
// STYLE COMPOSED
|
215 |
else if (astyle instanceof Composed) { |
216 |
Composed style = (Composed) astyle; |
217 |
ComposedAddress cAddress = (ComposedAddress) address; |
218 |
|
219 |
// main literal search
|
220 |
Literal mainAddressLiteral = cAddress.getMainLiteral(); |
221 |
List<ScoredFeature> literalResults = indexSearch(relationsLiteral,
|
222 |
mainAddressLiteral, store); |
223 |
lists.add(literalResults); |
224 |
// search in others literals
|
225 |
List<Literal> intersectslist = cAddress.getIntersectionLiterals();
|
226 |
for (Literal addrLiteral : intersectslist) {
|
227 |
// literal search
|
228 |
List<ScoredFeature> secList = indexSearch(relationsLiteral,
|
229 |
addrLiteral, store); |
230 |
lists.add(secList); |
231 |
} |
232 |
// Match
|
233 |
results = style.match(lists, address); |
234 |
} |
235 |
return results;
|
236 |
} |
237 |
|
238 |
/**
|
239 |
*
|
240 |
* @param relationsLiteral
|
241 |
* @param addressLiteral
|
242 |
* @param store
|
243 |
* @return
|
244 |
* @throws DataException
|
245 |
*/
|
246 |
private List<ScoredFeature> indexSearch(Literal relationsLiteral, |
247 |
Literal addressLiteral, DataStore store) throws DataException {
|
248 |
|
249 |
List<ScoredFeature> scorefeats = new ArrayList<ScoredFeature>(); |
250 |
|
251 |
double minScore = getPattern().getSettings().getScore();
|
252 |
|
253 |
// search
|
254 |
Hits hits = seachOnIndex(relationsLiteral, addressLiteral); |
255 |
|
256 |
// if there are hits in search process
|
257 |
if (hits != null) { |
258 |
// filter results
|
259 |
try {
|
260 |
scorefeats = filterHitsByScore(store, hits, minScore); |
261 |
} catch (Exception e) { |
262 |
log.error("Error filtering hits by score", e);
|
263 |
scorefeats.clear(); |
264 |
} |
265 |
} |
266 |
|
267 |
return scorefeats;
|
268 |
} |
269 |
|
270 |
/**
|
271 |
*
|
272 |
* @param relationsLiteral
|
273 |
* @param addressLiteral
|
274 |
* @return
|
275 |
*/
|
276 |
private Hits seachOnIndex(Literal relationsLiteral, Literal addressLiteral)
|
277 |
{ |
278 |
|
279 |
log.debug("Buscar ...");
|
280 |
|
281 |
IndexSearcher searcher = null;
|
282 |
try {
|
283 |
searcher = new IndexSearcher(tempDirectoryPath);
|
284 |
} catch (Exception e) { |
285 |
log.debug("Error building index searcher", e);
|
286 |
log.debug("Rebuild the index file because the initial index is wrong");
|
287 |
try {
|
288 |
this.indexer(store, relationsLiteral);
|
289 |
} catch (Exception e1) { |
290 |
log.debug("Error building index searcher", e1);
|
291 |
return null; |
292 |
} |
293 |
} |
294 |
if (searcher != null) { |
295 |
String[] input = addressLiteral.getStringValuesArray(); |
296 |
String[] fields = relationsLiteral.getStringValuesArray(); |
297 |
|
298 |
Query query = null; |
299 |
try {
|
300 |
query = MultiFieldQueryParser.parse(input, fields, |
301 |
new StandardAnalyzer());
|
302 |
} catch (ParseException e) { |
303 |
log.debug("Error building the query", e);
|
304 |
return null; |
305 |
} |
306 |
Hits hits = null;
|
307 |
if(query != null){ |
308 |
try {
|
309 |
hits = searcher.search(query); |
310 |
} catch (IOException e) { |
311 |
log.debug("Error in the searching process", e);
|
312 |
return null; |
313 |
} |
314 |
} |
315 |
return hits;
|
316 |
} else {
|
317 |
return null; |
318 |
} |
319 |
} |
320 |
|
321 |
/**
|
322 |
*
|
323 |
* @param store
|
324 |
* @param hits
|
325 |
* @param score
|
326 |
* @return
|
327 |
* @throws IOException
|
328 |
* @throws DataException
|
329 |
*/
|
330 |
private List<ScoredFeature> filterHitsByScore(DataStore store, Hits hits, |
331 |
double score) throws IOException, DataException { |
332 |
|
333 |
FeatureStore fstore = (FeatureStore) store; |
334 |
List<ScoredFeature> scoreFeats = new ArrayList<ScoredFeature>(); |
335 |
for (int i = 0; i < hits.length(); i++) { |
336 |
if (hits.score(i) * 100 > score) { |
337 |
Document doc = hits.doc(i);
|
338 |
String id = doc.get(GeocoID);
|
339 |
FeatureSet fset = fstore.getFeatureSet(); |
340 |
Iterator it = fset.fastIterator((new Long(id)).longValue()); |
341 |
Feature feature = (Feature) it.next(); |
342 |
ScoredFeature scoFeat = new DefaultScoredFeature();
|
343 |
scoFeat.setReference(feature.getReference()); |
344 |
scoFeat.setScore(hits.score(i) * 100);
|
345 |
scoreFeats.add(scoFeat); |
346 |
} |
347 |
} |
348 |
return scoreFeats;
|
349 |
} |
350 |
|
351 |
/**
|
352 |
*
|
353 |
* @param store
|
354 |
* @param relationsLiteral
|
355 |
* @throws DataException
|
356 |
* @throws CorruptIndexException
|
357 |
* @throws LockObtainFailedException
|
358 |
* @throws IOException
|
359 |
*/
|
360 |
public void indexer(DataStore store, Literal relationsLiteral) |
361 |
throws DataException, CorruptIndexException,
|
362 |
LockObtainFailedException, IOException {
|
363 |
|
364 |
log.debug("Create indexes ...");
|
365 |
|
366 |
IndexWriter indexWriter = new IndexWriter(tempDirectoryPath,
|
367 |
new StandardAnalyzer(), true); |
368 |
FeatureStore fstore = (FeatureStore) store; |
369 |
FeatureSet features = fstore.getFeatureSet(); |
370 |
|
371 |
String[] fields = relationsLiteral.getStringValuesArray(); |
372 |
|
373 |
Feature feat = null;
|
374 |
|
375 |
Iterator it = features.fastIterator();
|
376 |
// process situation
|
377 |
this.position = 0; |
378 |
while (it.hasNext()) {
|
379 |
|
380 |
Document doc = new org.apache.lucene.document.Document(); |
381 |
feat = (Feature) it.next(); |
382 |
FeatureReference rfeat = feat.getReference(); |
383 |
FeatureReferenceProviderServices servFeat = (FeatureReferenceProviderServices) rfeat; |
384 |
Object oid = servFeat.getOID();
|
385 |
|
386 |
Field idfield = new Field(GeocoID, oid + "", Field.Store.YES, |
387 |
Field.Index.UN_TOKENIZED);
|
388 |
doc.add(idfield); |
389 |
for (int i = 0; i < fields.length; i++) { |
390 |
String fieldName = (String) fields[i]; |
391 |
String attribute = feat.getString(fieldName);
|
392 |
Field field = new Field(fieldName, attribute, Field.Store.YES, |
393 |
Field.Index.TOKENIZED);
|
394 |
doc.add(field); |
395 |
} |
396 |
indexWriter.addDocument(doc); |
397 |
// throw indexed event
|
398 |
if(this.indexListener != null){ |
399 |
this.indexListener.featureIndexed(new FeatureIndexedEvent(this)); |
400 |
} |
401 |
// new position
|
402 |
this.position++;
|
403 |
} |
404 |
indexWriter.optimize(); |
405 |
} |
406 |
|
407 |
|
408 |
|
409 |
/**
|
410 |
*
|
411 |
* @param listener
|
412 |
*/
|
413 |
public void registreIndexListener(IndexListener listener){ |
414 |
this.indexListener = listener;
|
415 |
} |
416 |
|
417 |
/**
|
418 |
*
|
419 |
* @return
|
420 |
*/
|
421 |
public int getIndexPosition() { |
422 |
return position;
|
423 |
} |
424 |
|
425 |
} |