Statistics
| Revision:

svn-gvsig-desktop / branches / v2_0_0_prep / libraries / org.gvsig.geocoding / src-test / org / gvsig / geocoding / index / LuceneIndexDataStoreTest.java @ 32479

History | View | Annotate | Download (8.35 KB)

1
/* gvSIG. Geographic Information System of the Valencian Government
2
 *
3
 * Copyright (C) 2007-2008 Infrastructures and Transports Department
4
 * of the Valencian Government (CIT)
5
 * 
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 2
9
 * of the License, or (at your option) any later version.
10
 * 
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 * 
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
19
 * MA  02110-1301, USA.
20
 * 
21
 */
22

    
23
/*
24
 * AUTHORS (In addition to CIT):
25
 * 2008 Prodevelop S.L  vsanjaime   programador
26
 */
27

    
28
package org.gvsig.geocoding.index;
29

    
30
import java.io.File;
31
import java.util.Iterator;
32

    
33
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
34
import org.apache.lucene.analysis.standard.StandardAnalyzer;
35
import org.apache.lucene.document.Document;
36
import org.apache.lucene.document.Field;
37
import org.apache.lucene.index.IndexWriter;
38
import org.apache.lucene.index.Term;
39
import org.apache.lucene.queryParser.MultiFieldQueryParser;
40
import org.apache.lucene.search.BooleanClause;
41
import org.apache.lucene.search.BooleanQuery;
42
import org.apache.lucene.search.FuzzyQuery;
43
import org.apache.lucene.search.Hits;
44
import org.apache.lucene.search.IndexSearcher;
45
import org.apache.lucene.search.Query;
46
import org.gvsig.LayerServer;
47
import org.gvsig.fmap.dal.feature.Feature;
48
import org.gvsig.fmap.dal.feature.FeatureReference;
49
import org.gvsig.fmap.dal.feature.FeatureSet;
50
import org.gvsig.fmap.dal.feature.FeatureStore;
51
import org.gvsig.fmap.dal.feature.spi.FeatureReferenceProviderServices;
52
import org.gvsig.geocoding.address.Literal;
53
import org.gvsig.geocoding.address.impl.DefaultLiteral;
54
import org.gvsig.geocoding.pattern.GeocodingPattern;
55
import org.gvsig.geocoding.pattern.GeocodingSource;
56
import org.gvsig.geocoding.pattern.impl.DefaultGeocodingPattern;
57
import org.gvsig.geocoding.pattern.impl.DefaultGeocodingSource;
58
import org.gvsig.geocoding.styles.AbstractGeocodingStyle;
59
import org.gvsig.geocoding.styles.impl.SimpleCentroid;
60
import org.slf4j.Logger;
61
import org.slf4j.LoggerFactory;
62

    
63
/**
64
 * Test
65
 * 
66
 * @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
67
 * @author <a href="mailto:vsanjaime@prodevelop.es"> Vicent Sanjaime Calvet</a>
68
 */
69
public class LuceneIndexDataStoreTest extends
70
                org.gvsig.tools.junit.AbstractLibraryAutoInitTestCase {
71

    
72
        private static final Logger log = LoggerFactory
73
                        .getLogger(LuceneIndexDataStoreTest.class);
74

    
75
        @Override
76
        protected void doSetUp() throws Exception {
77
                // TODO Auto-generated method stub
78

    
79
        }
80

    
81
        public void testSnowballIndex() throws Exception {
82
                log.debug("/////////////  SNOWBALL TEST ...");
83
                log.debug("Load pattern ...");
84
                GeocodingPattern pat = getPattern();
85

    
86
                log.debug("Load data store ...");
87
                FeatureStore store = LayerServer.getSHPStore(LayerServer
88
                                .getStreetsFile());
89

    
90
                log.debug("Create indexes ...");
91
                File tmpFile = new File("/home/vsanjaime/tmp/index");
92
                IndexWriter indexWriter = new IndexWriter(tmpFile.getAbsolutePath(),
93
                                new SnowballAnalyzer("Spanish"), true);
94
                FeatureSet features = store.getFeatureSet();
95
                Iterator it = features.fastIterator();
96

    
97
                Literal lfields = pat.getDataSource().getStyle().getRelationsLiteral();
98
                String[] fields = lfields.getStringValuesArray();
99

    
100
                Feature feat = null;
101

    
102
                
103
                while (it.hasNext()) {
104
                        
105
                        Document doc = new org.apache.lucene.document.Document();
106
                        feat = (Feature) it.next();
107
                        
108
                        
109
                        FeatureReference ref = feat.getReference();
110
                        FeatureReferenceProviderServices serv = (FeatureReferenceProviderServices)ref;
111
//                        Field field = new Field("id", attribute, Field.Store.YES,
112
//                                        Field.Index.UTOKENIZED);
113
//                        doc.add(field);
114
                        for (int i = 0; i < fields.length; i++) {
115
                                String fieldName = (String) fields[i];
116
                                String attribute = feat.getString(fieldName);
117
                                Field field = new Field(fieldName, attribute, Field.Store.YES,
118
                                                Field.Index.TOKENIZED);
119
                                
120
                                doc.add(field);
121

    
122
                        }
123
                        //log.info("Field 1: "+doc.getFields().get(0).toString()+"Field 2: "+doc.getFields().get(1).toString());
124
                        indexWriter.addDocument(doc);
125
                }
126
                indexWriter.optimize();
127

    
128
                log.debug("Buscar ...");
129
                String path = tmpFile.getAbsolutePath();
130
                IndexSearcher searcher = new IndexSearcher(path);
131

    
132
                String[] input = new String[] { "castr","valencia" };
133

    
134
                Query query = MultiFieldQueryParser.parse(input, fields,
135
                                new SnowballAnalyzer("Spanish"));
136
                Hits hits = searcher.search(query);
137
                for (int i = 0; i < 20; i++) {
138
                         Document docu = hits.doc(i);
139
                        
140
                         log.debug(docu.get("GID") + "\t " + docu.get("STREET_NAM") +" - "+docu.get("PROV") + "("
141
                         + hits.score(i) + ")");
142
                }
143
                log.debug("Resultados: " + hits.length());
144
                log.debug("FIN ....");
145
        }
146
        
147
//        public void testStandardIndex() throws Exception {
148
//                log.info("/////////////  STANDARD TEST ...");
149
//                log.info("Load pattern ...");
150
//                GeocodingPattern pat = getPattern();
151
//
152
//                log.info("Load data store ...");
153
//                FeatureStore store = LayerServer.getSHPStore(LayerServer
154
//                                .getStreetsFile());
155
//
156
//                log.info("Create indexes ...");
157
//                File tmpFile = new File("/home/vsanjaime/tmp/index");
158
//                IndexWriter indexWriter = new IndexWriter(tmpFile.getAbsolutePath(),
159
//                                new StandardAnalyzer(), true);
160
//                FeatureSet features = store.getFeatureSet();
161
//                Iterator it = features.fastIterator();
162
//
163
//                Literal lfields = pat.getSource().getStyle().getRelationsLiteral();
164
//                String[] fields = lfields.getStringValuesArray();
165
//
166
//                Feature feat = null;
167
//
168
//                int idd = 0;
169
//                while (it.hasNext()) {
170
//                        
171
//                        Document doc = new org.apache.lucene.document.Document();
172
//                        feat = (Feature) it.next();
173
//                        Field idfield = new Field("GID", idd+"", Field.Store.YES,
174
//                                        Field.Index.UN_TOKENIZED);                                
175
//                        doc.add(idfield);
176
//                        for (int i = 0; i < fields.length; i++) {
177
//                                String fieldName = (String) fields[i];
178
//                                String attribute = feat.getString(fieldName);
179
//                                Field field = new Field(fieldName, attribute, Field.Store.YES,
180
//                                                Field.Index.TOKENIZED);                                
181
//                                doc.add(field);
182
//
183
//                        }                        
184
//                        indexWriter.addDocument(doc);
185
//                        idd++;
186
//                }
187
//                indexWriter.optimize();
188
//
189
//                log.info("Buscar ...");
190
//                String path = tmpFile.getAbsolutePath();
191
//                IndexSearcher searcher = new IndexSearcher(path);
192
//
193
////                String[] input = new String[] { "guilem","valenci" };
194
//
195
////                Query query = MultiFieldQueryParser.parse(input, fields,
196
////                                new StandardAnalyzer());
197
//                
198
//                Query consulta = new BooleanQuery();
199
//                 BooleanQuery boolConsulta = (BooleanQuery) consulta;
200
////                 boolConsulta.add(
201
////                 new FuzzyQuery(new Term("STRET_NAM", "na"), 0.5f),
202
////                 BooleanClause.Occur.SHOULD);
203
//                 boolConsulta.add(
204
//                 new FuzzyQuery(new Term("STRET_NAM", "jordana"), 0.5f),
205
//                 BooleanClause.Occur.SHOULD);
206
//                 boolConsulta.add(
207
//                 new FuzzyQuery(new Term("PROV", "valencia"), 0.5f),
208
//                 BooleanClause.Occur.SHOULD);
209
//                
210
//                Hits hits = searcher.search(consulta);
211
//                for (int i = 0; i < 20; i++) {
212
//                         Document docu = hits.doc(i);
213
//                        
214
//                         log.info(docu.get("GID") + "\t " + docu.get("STREET_NAM") +" - "+docu.get("PROV") + "("
215
//                                         + hits.score(i) + ")");
216
//                }
217
//                log.info("Resultados: " + hits.length());
218
//                log.info("FIN ....");
219
//        }
220

    
221
        /**
222
         * get pattern
223
         * 
224
         * @return
225
         */
226
        private GeocodingPattern getPattern() {
227

    
228
                GeocodingPattern pat = new DefaultGeocodingPattern();
229

    
230
                try {
231
                        pat.setPatternName("indexLucenePattern");
232

    
233
                        GeocodingSource source = new DefaultGeocodingSource();
234
                        source.setLayerName("Streets.shp");
235
                        source.setLayerProvider("Shape file");
236

    
237
                        AbstractGeocodingStyle style = new SimpleCentroid();
238
                        Literal relations = new DefaultLiteral();
239
                        relations.put("Calle", "STREET_NAM");
240
                        relations.put("Provincia", "PROV");
241

    
242
                        style.setRelationsLiteral(relations);
243
                        source.setStyle(style);
244

    
245
                        pat.setSource(source);
246

    
247
                } catch (Exception e) {
248
                        log.error("Building a pattern", e);
249
                }
250

    
251
                return pat;
252
        }
253

    
254
}