svn-gvsig-desktop / branches / v2_0_0_prep / libraries / org.gvsig.geocoding / src-test / org / gvsig / geocoding / index / LuceneIndexDataStoreTest.java @ 32479
History | View | Annotate | Download (8.35 KB)
1 |
/* gvSIG. Geographic Information System of the Valencian Government
|
---|---|
2 |
*
|
3 |
* Copyright (C) 2007-2008 Infrastructures and Transports Department
|
4 |
* of the Valencian Government (CIT)
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 2
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
*/
|
22 |
|
23 |
/*
|
24 |
* AUTHORS (In addition to CIT):
|
25 |
* 2008 Prodevelop S.L vsanjaime programador
|
26 |
*/
|
27 |
|
28 |
package org.gvsig.geocoding.index; |
29 |
|
30 |
import java.io.File; |
31 |
import java.util.Iterator; |
32 |
|
33 |
import org.apache.lucene.analysis.snowball.SnowballAnalyzer; |
34 |
import org.apache.lucene.analysis.standard.StandardAnalyzer; |
35 |
import org.apache.lucene.document.Document; |
36 |
import org.apache.lucene.document.Field; |
37 |
import org.apache.lucene.index.IndexWriter; |
38 |
import org.apache.lucene.index.Term; |
39 |
import org.apache.lucene.queryParser.MultiFieldQueryParser; |
40 |
import org.apache.lucene.search.BooleanClause; |
41 |
import org.apache.lucene.search.BooleanQuery; |
42 |
import org.apache.lucene.search.FuzzyQuery; |
43 |
import org.apache.lucene.search.Hits; |
44 |
import org.apache.lucene.search.IndexSearcher; |
45 |
import org.apache.lucene.search.Query; |
46 |
import org.gvsig.LayerServer; |
47 |
import org.gvsig.fmap.dal.feature.Feature; |
48 |
import org.gvsig.fmap.dal.feature.FeatureReference; |
49 |
import org.gvsig.fmap.dal.feature.FeatureSet; |
50 |
import org.gvsig.fmap.dal.feature.FeatureStore; |
51 |
import org.gvsig.fmap.dal.feature.spi.FeatureReferenceProviderServices; |
52 |
import org.gvsig.geocoding.address.Literal; |
53 |
import org.gvsig.geocoding.address.impl.DefaultLiteral; |
54 |
import org.gvsig.geocoding.pattern.GeocodingPattern; |
55 |
import org.gvsig.geocoding.pattern.GeocodingSource; |
56 |
import org.gvsig.geocoding.pattern.impl.DefaultGeocodingPattern; |
57 |
import org.gvsig.geocoding.pattern.impl.DefaultGeocodingSource; |
58 |
import org.gvsig.geocoding.styles.AbstractGeocodingStyle; |
59 |
import org.gvsig.geocoding.styles.impl.SimpleCentroid; |
60 |
import org.slf4j.Logger; |
61 |
import org.slf4j.LoggerFactory; |
62 |
|
63 |
/**
|
64 |
* Test
|
65 |
*
|
66 |
* @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
|
67 |
* @author <a href="mailto:vsanjaime@prodevelop.es"> Vicent Sanjaime Calvet</a>
|
68 |
*/
|
69 |
public class LuceneIndexDataStoreTest extends |
70 |
org.gvsig.tools.junit.AbstractLibraryAutoInitTestCase { |
71 |
|
72 |
private static final Logger log = LoggerFactory |
73 |
.getLogger(LuceneIndexDataStoreTest.class); |
74 |
|
75 |
@Override
|
76 |
protected void doSetUp() throws Exception { |
77 |
// TODO Auto-generated method stub
|
78 |
|
79 |
} |
80 |
|
81 |
public void testSnowballIndex() throws Exception { |
82 |
log.debug("///////////// SNOWBALL TEST ...");
|
83 |
log.debug("Load pattern ...");
|
84 |
GeocodingPattern pat = getPattern(); |
85 |
|
86 |
log.debug("Load data store ...");
|
87 |
FeatureStore store = LayerServer.getSHPStore(LayerServer |
88 |
.getStreetsFile()); |
89 |
|
90 |
log.debug("Create indexes ...");
|
91 |
File tmpFile = new File("/home/vsanjaime/tmp/index"); |
92 |
IndexWriter indexWriter = new IndexWriter(tmpFile.getAbsolutePath(),
|
93 |
new SnowballAnalyzer("Spanish"), true); |
94 |
FeatureSet features = store.getFeatureSet(); |
95 |
Iterator it = features.fastIterator();
|
96 |
|
97 |
Literal lfields = pat.getDataSource().getStyle().getRelationsLiteral(); |
98 |
String[] fields = lfields.getStringValuesArray(); |
99 |
|
100 |
Feature feat = null;
|
101 |
|
102 |
|
103 |
while (it.hasNext()) {
|
104 |
|
105 |
Document doc = new org.apache.lucene.document.Document(); |
106 |
feat = (Feature) it.next(); |
107 |
|
108 |
|
109 |
FeatureReference ref = feat.getReference(); |
110 |
FeatureReferenceProviderServices serv = (FeatureReferenceProviderServices)ref; |
111 |
// Field field = new Field("id", attribute, Field.Store.YES,
|
112 |
// Field.Index.UTOKENIZED);
|
113 |
// doc.add(field);
|
114 |
for (int i = 0; i < fields.length; i++) { |
115 |
String fieldName = (String) fields[i]; |
116 |
String attribute = feat.getString(fieldName);
|
117 |
Field field = new Field(fieldName, attribute, Field.Store.YES, |
118 |
Field.Index.TOKENIZED);
|
119 |
|
120 |
doc.add(field); |
121 |
|
122 |
} |
123 |
//log.info("Field 1: "+doc.getFields().get(0).toString()+"Field 2: "+doc.getFields().get(1).toString());
|
124 |
indexWriter.addDocument(doc); |
125 |
} |
126 |
indexWriter.optimize(); |
127 |
|
128 |
log.debug("Buscar ...");
|
129 |
String path = tmpFile.getAbsolutePath();
|
130 |
IndexSearcher searcher = new IndexSearcher(path);
|
131 |
|
132 |
String[] input = new String[] { "castr","valencia" }; |
133 |
|
134 |
Query query = MultiFieldQueryParser.parse(input, fields,
|
135 |
new SnowballAnalyzer("Spanish")); |
136 |
Hits hits = searcher.search(query); |
137 |
for (int i = 0; i < 20; i++) { |
138 |
Document docu = hits.doc(i);
|
139 |
|
140 |
log.debug(docu.get("GID") + "\t " + docu.get("STREET_NAM") +" - "+docu.get("PROV") + "(" |
141 |
+ hits.score(i) + ")");
|
142 |
} |
143 |
log.debug("Resultados: " + hits.length());
|
144 |
log.debug("FIN ....");
|
145 |
} |
146 |
|
147 |
// public void testStandardIndex() throws Exception {
|
148 |
// log.info("///////////// STANDARD TEST ...");
|
149 |
// log.info("Load pattern ...");
|
150 |
// GeocodingPattern pat = getPattern();
|
151 |
//
|
152 |
// log.info("Load data store ...");
|
153 |
// FeatureStore store = LayerServer.getSHPStore(LayerServer
|
154 |
// .getStreetsFile());
|
155 |
//
|
156 |
// log.info("Create indexes ...");
|
157 |
// File tmpFile = new File("/home/vsanjaime/tmp/index");
|
158 |
// IndexWriter indexWriter = new IndexWriter(tmpFile.getAbsolutePath(),
|
159 |
// new StandardAnalyzer(), true);
|
160 |
// FeatureSet features = store.getFeatureSet();
|
161 |
// Iterator it = features.fastIterator();
|
162 |
//
|
163 |
// Literal lfields = pat.getSource().getStyle().getRelationsLiteral();
|
164 |
// String[] fields = lfields.getStringValuesArray();
|
165 |
//
|
166 |
// Feature feat = null;
|
167 |
//
|
168 |
// int idd = 0;
|
169 |
// while (it.hasNext()) {
|
170 |
//
|
171 |
// Document doc = new org.apache.lucene.document.Document();
|
172 |
// feat = (Feature) it.next();
|
173 |
// Field idfield = new Field("GID", idd+"", Field.Store.YES,
|
174 |
// Field.Index.UN_TOKENIZED);
|
175 |
// doc.add(idfield);
|
176 |
// for (int i = 0; i < fields.length; i++) {
|
177 |
// String fieldName = (String) fields[i];
|
178 |
// String attribute = feat.getString(fieldName);
|
179 |
// Field field = new Field(fieldName, attribute, Field.Store.YES,
|
180 |
// Field.Index.TOKENIZED);
|
181 |
// doc.add(field);
|
182 |
//
|
183 |
// }
|
184 |
// indexWriter.addDocument(doc);
|
185 |
// idd++;
|
186 |
// }
|
187 |
// indexWriter.optimize();
|
188 |
//
|
189 |
// log.info("Buscar ...");
|
190 |
// String path = tmpFile.getAbsolutePath();
|
191 |
// IndexSearcher searcher = new IndexSearcher(path);
|
192 |
//
|
193 |
//// String[] input = new String[] { "guilem","valenci" };
|
194 |
//
|
195 |
//// Query query = MultiFieldQueryParser.parse(input, fields,
|
196 |
//// new StandardAnalyzer());
|
197 |
//
|
198 |
// Query consulta = new BooleanQuery();
|
199 |
// BooleanQuery boolConsulta = (BooleanQuery) consulta;
|
200 |
//// boolConsulta.add(
|
201 |
//// new FuzzyQuery(new Term("STRET_NAM", "na"), 0.5f),
|
202 |
//// BooleanClause.Occur.SHOULD);
|
203 |
// boolConsulta.add(
|
204 |
// new FuzzyQuery(new Term("STRET_NAM", "jordana"), 0.5f),
|
205 |
// BooleanClause.Occur.SHOULD);
|
206 |
// boolConsulta.add(
|
207 |
// new FuzzyQuery(new Term("PROV", "valencia"), 0.5f),
|
208 |
// BooleanClause.Occur.SHOULD);
|
209 |
//
|
210 |
// Hits hits = searcher.search(consulta);
|
211 |
// for (int i = 0; i < 20; i++) {
|
212 |
// Document docu = hits.doc(i);
|
213 |
//
|
214 |
// log.info(docu.get("GID") + "\t " + docu.get("STREET_NAM") +" - "+docu.get("PROV") + "("
|
215 |
// + hits.score(i) + ")");
|
216 |
// }
|
217 |
// log.info("Resultados: " + hits.length());
|
218 |
// log.info("FIN ....");
|
219 |
// }
|
220 |
|
221 |
/**
|
222 |
* get pattern
|
223 |
*
|
224 |
* @return
|
225 |
*/
|
226 |
private GeocodingPattern getPattern() {
|
227 |
|
228 |
GeocodingPattern pat = new DefaultGeocodingPattern();
|
229 |
|
230 |
try {
|
231 |
pat.setPatternName("indexLucenePattern");
|
232 |
|
233 |
GeocodingSource source = new DefaultGeocodingSource();
|
234 |
source.setLayerName("Streets.shp");
|
235 |
source.setLayerProvider("Shape file");
|
236 |
|
237 |
AbstractGeocodingStyle style = new SimpleCentroid();
|
238 |
Literal relations = new DefaultLiteral();
|
239 |
relations.put("Calle", "STREET_NAM"); |
240 |
relations.put("Provincia", "PROV"); |
241 |
|
242 |
style.setRelationsLiteral(relations); |
243 |
source.setStyle(style); |
244 |
|
245 |
pat.setSource(source); |
246 |
|
247 |
} catch (Exception e) { |
248 |
log.error("Building a pattern", e);
|
249 |
} |
250 |
|
251 |
return pat;
|
252 |
} |
253 |
|
254 |
} |