root / branches / v2_0_0_prep / libraries / libGeocoding / src / org / gvsig / normalization / operations / NormAlgorithm.java @ 29125
History | View | Annotate | Download (14.8 KB)
1 |
/* gvSIG. Geographic Information System of the Valencian Government
|
---|---|
2 |
*
|
3 |
* Copyright (C) 2007-2008 Infrastructures and Transports Department
|
4 |
* of the Valencian Government (CIT)
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 2
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
*/
|
22 |
|
23 |
/*
|
24 |
* AUTHORS (In addition to CIT):
|
25 |
* 2008 Prodevelop S.L. main development
|
26 |
*/
|
27 |
|
28 |
package org.gvsig.normalization.operations; |
29 |
|
30 |
import java.text.DecimalFormat; |
31 |
import java.text.DecimalFormatSymbols; |
32 |
import java.util.ArrayList; |
33 |
import java.util.List; |
34 |
import java.util.Locale; |
35 |
|
36 |
import javax.swing.event.ChangeEvent; |
37 |
import javax.swing.event.ChangeListener; |
38 |
|
39 |
import org.apache.log4j.Logger; |
40 |
import org.gvsig.normalization.pattern.Datevalue; |
41 |
import org.gvsig.normalization.pattern.Decimalvalue; |
42 |
import org.gvsig.normalization.pattern.Element; |
43 |
import org.gvsig.normalization.pattern.Fieldtype; |
44 |
import org.gvsig.normalization.pattern.Integervalue; |
45 |
import org.gvsig.normalization.pattern.Patternnormalization; |
46 |
import org.gvsig.normalization.pattern.Stringvalue; |
47 |
|
48 |
/**
|
49 |
* This class tokens strings
|
50 |
*
|
51 |
* @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
|
52 |
* @author <a href="mailto:vsanjaime@prodevelop.es"> Vicente Sanjaime Calvet</a>
|
53 |
*
|
54 |
*/
|
55 |
|
56 |
public class NormAlgorithm { |
57 |
|
58 |
private static final Logger log = Logger.getLogger(NormAlgorithm.class); |
59 |
|
60 |
private Patternnormalization pat = null; |
61 |
private int numFields; |
62 |
private int row; |
63 |
private Element[] elements = null; |
64 |
private List listeners = new ArrayList(); |
65 |
|
66 |
/**
|
67 |
* Constructor
|
68 |
*
|
69 |
* @param _pat
|
70 |
* Normalization pattern
|
71 |
*/
|
72 |
public NormAlgorithm(Patternnormalization _pat) {
|
73 |
this.pat = _pat;
|
74 |
this.elements = pat.getArrayElements();
|
75 |
this.numFields = elements.length;
|
76 |
} |
77 |
|
78 |
// METHODS
|
79 |
|
80 |
/**
|
81 |
* This method cuts a chain in several parts and they are returned in a List
|
82 |
*
|
83 |
* @param _chain
|
84 |
* strings
|
85 |
* @return list with the strings tokenized
|
86 |
*/
|
87 |
public List splitChain(String _chain) { |
88 |
String preChain = _chain;
|
89 |
String postChain = ""; |
90 |
List subStrings = new ArrayList(); |
91 |
|
92 |
// EXCEPTIONAL CASES
|
93 |
if (_chain == null) { |
94 |
return subStrings;
|
95 |
} |
96 |
|
97 |
else if (_chain.compareToIgnoreCase("") == 0) { |
98 |
subStrings.add(_chain); |
99 |
return subStrings;
|
100 |
} |
101 |
|
102 |
// NORMAL CASE
|
103 |
else {
|
104 |
int fw = 0; |
105 |
int init = 0; |
106 |
String subChain = ""; |
107 |
|
108 |
for (int i = 0; i < numFields; i++) { |
109 |
|
110 |
if (preChain.length() == 0) { |
111 |
return subStrings;
|
112 |
} else {
|
113 |
|
114 |
fw = elements[i].getFieldwidth(); |
115 |
// Cut chain with fixed width
|
116 |
if (fw > 0 && preChain.length() > fw) { |
117 |
subChain = preChain.substring(init, fw); |
118 |
subStrings.add(subChain); |
119 |
postChain = preChain.substring(fw); |
120 |
preChain = postChain; |
121 |
|
122 |
} else if (fw > 0 && preChain.length() <= fw) { |
123 |
subStrings.add(preChain); |
124 |
return subStrings;
|
125 |
} |
126 |
// Cut chain with separators
|
127 |
else {
|
128 |
// Load specific separators
|
129 |
List separators = loadSpecificSeparators(elements[i]);
|
130 |
boolean join = withJoinSeparators(elements[i]);
|
131 |
// Search the first delimiter in the chain
|
132 |
int posi = calculatePosition(separators, preChain);
|
133 |
int tamSep = 0; |
134 |
if (!join) {
|
135 |
tamSep = calculateSizeSep(separators, preChain); |
136 |
} else {
|
137 |
tamSep = calculateSizeJoinSep(separators, preChain, |
138 |
posi); |
139 |
} |
140 |
// Firsts elements
|
141 |
if (i < numFields - 1) { |
142 |
|
143 |
if (join) {
|
144 |
while (posi == 0) { |
145 |
preChain = deleteFirst(preChain); |
146 |
posi = calculatePosition(separators, |
147 |
preChain); |
148 |
if (preChain.length() == 0) { |
149 |
break;
|
150 |
} |
151 |
} |
152 |
subChain = preChain.substring(0, posi);
|
153 |
try {
|
154 |
postChain = preChain.substring(posi |
155 |
+ tamSep); |
156 |
} catch (Exception e) { |
157 |
postChain = "";
|
158 |
} |
159 |
subStrings.add(subChain); |
160 |
preChain = postChain; |
161 |
} else {
|
162 |
subChain = preChain.substring(0, posi);
|
163 |
postChain = preChain.substring(posi + tamSep); |
164 |
subStrings.add(subChain); |
165 |
preChain = postChain; |
166 |
} |
167 |
|
168 |
} |
169 |
// Last element
|
170 |
else {
|
171 |
subStrings.add(preChain); |
172 |
} |
173 |
} |
174 |
} |
175 |
} |
176 |
} |
177 |
return subStrings;
|
178 |
} |
179 |
|
180 |
/**
|
181 |
* This method cuts a chain in several parts from separators
|
182 |
*
|
183 |
* @param chain
|
184 |
* string
|
185 |
* @param fields
|
186 |
* fields number
|
187 |
* @param separators
|
188 |
* array of characters
|
189 |
* @param joinDelimiters
|
190 |
* with or without joinDelimiters
|
191 |
* @return list with the strings tokenized
|
192 |
*/
|
193 |
public static List splitChainBySeparators(String chain, int fields, |
194 |
String[] separators, boolean joinDelimiters) { |
195 |
|
196 |
List subStrings = new ArrayList(); |
197 |
int posTemp = -1; |
198 |
String separator;
|
199 |
String chain2 = chain;
|
200 |
int campos = fields;
|
201 |
|
202 |
// EXCEPTIONAL CASES
|
203 |
if (chain.compareToIgnoreCase("") == 0 || campos == 0 |
204 |
|| separators.length < 1) {
|
205 |
subStrings.add(chain); |
206 |
return subStrings;
|
207 |
} |
208 |
|
209 |
// NORMAL CASE
|
210 |
else {
|
211 |
|
212 |
// Only (parts-1) loops
|
213 |
for (int i = 0; i < (campos - 1); i++) { |
214 |
int posi = Integer.MAX_VALUE; |
215 |
String firstChain;
|
216 |
for (int j = 0; j < separators.length; j++) { |
217 |
separator = separators[j]; |
218 |
posTemp = chain2.indexOf(separator); |
219 |
if (posTemp != -1 && posTemp < posi) { |
220 |
posi = posTemp; |
221 |
} |
222 |
posTemp = -1;
|
223 |
} |
224 |
if (posi == 0 && joinDelimiters) { |
225 |
campos++; |
226 |
chain2 = chain2.substring(posi + 1);
|
227 |
} else {
|
228 |
firstChain = chain2.substring(0, posi);
|
229 |
chain2 = chain2.substring(posi + 1);
|
230 |
subStrings.add(firstChain); |
231 |
// In the last loop add the first chain and the rest of
|
232 |
// chain
|
233 |
if (i == (campos - 2)) { |
234 |
subStrings.add(chain2); |
235 |
} |
236 |
} |
237 |
} |
238 |
return subStrings;
|
239 |
} |
240 |
} |
241 |
|
242 |
/**
|
243 |
* This method cuts a chain in several parts from fixed width
|
244 |
*
|
245 |
* @param chain
|
246 |
* string
|
247 |
* @param fieldWidth
|
248 |
* array with fields widths
|
249 |
* @return list with the strings tokenized
|
250 |
*/
|
251 |
public static List splitChainByFixedWidth(String chain, int[] fieldWidth) { |
252 |
|
253 |
List subStrings = new ArrayList(); |
254 |
int elements = fieldWidth.length;
|
255 |
String subChain;
|
256 |
int inicio = 0; |
257 |
int fin = 0; |
258 |
|
259 |
// EXCEPTIONAL CASES
|
260 |
if (chain.compareToIgnoreCase("") == 0 || fieldWidth.length < 1) { |
261 |
subStrings.add(chain); |
262 |
return subStrings;
|
263 |
} |
264 |
|
265 |
// NORMAL CASE
|
266 |
else {
|
267 |
for (int i = 0; i < elements; i++) { |
268 |
fin = fin + fieldWidth[i]; |
269 |
subChain = chain.substring(inicio, fin); |
270 |
subStrings.add(subChain); |
271 |
inicio = fin; |
272 |
} |
273 |
return subStrings;
|
274 |
} |
275 |
} |
276 |
|
277 |
/**
|
278 |
* This method filters the split chains with the in-separators
|
279 |
*
|
280 |
* @param chains
|
281 |
* strings of the one row
|
282 |
* @return strings filtered by type
|
283 |
*/
|
284 |
public List filterSplitChains(List chains) { |
285 |
|
286 |
DecimalFormat numForm = (DecimalFormat) DecimalFormat |
287 |
.getInstance(Locale.getDefault());
|
288 |
DecimalFormatSymbols simb = numForm.getDecimalFormatSymbols();
|
289 |
|
290 |
Fieldtype nft = null;
|
291 |
|
292 |
String decsep;
|
293 |
char cdecsep;
|
294 |
String thosep;
|
295 |
char cthosep;
|
296 |
String txsep;
|
297 |
|
298 |
List postChain = new ArrayList(); |
299 |
String aux = ""; |
300 |
|
301 |
for (int i = 0; i < numFields; i++) { |
302 |
|
303 |
nft = elements[i].getFieldtype(); |
304 |
|
305 |
decsep = elements[i].getInfieldseparators().getDecimalseparator() |
306 |
.trim(); |
307 |
cdecsep = decsep.compareTo("") == 0 ? (char) 0x20 : decsep |
308 |
.charAt(0);
|
309 |
thosep = elements[i].getInfieldseparators().getThousandseparator() |
310 |
.trim(); |
311 |
cthosep = thosep.compareTo("") == 0 ? (char) 0x20 : thosep |
312 |
.charAt(0);
|
313 |
|
314 |
txsep = elements[i].getInfieldseparators().getTextseparator() |
315 |
.trim(); |
316 |
|
317 |
if (txsep.compareToIgnoreCase("\"") == 0) { |
318 |
txsep = "\"";
|
319 |
} |
320 |
|
321 |
simb.setDecimalSeparator(cdecsep); |
322 |
try {
|
323 |
simb.setGroupingSeparator(cthosep); |
324 |
} catch (RuntimeException e1) { |
325 |
log.error("Error setting the group separator", e1);
|
326 |
} |
327 |
// Fill fields empties
|
328 |
if (chains.size() < numFields) {
|
329 |
|
330 |
for (int j = chains.size(); j < numFields; j++) { |
331 |
chains.add(j, "");
|
332 |
} |
333 |
} |
334 |
|
335 |
if (((Integervalue) nft.getIntegervalue()) != null |
336 |
|| ((Decimalvalue) nft.getDecimalvalue()) != null) {
|
337 |
numForm.setDecimalFormatSymbols(simb); |
338 |
try {
|
339 |
String cadena = ((String) chains.get(i)).trim(); |
340 |
// aux = (numForm.parse(cadena)).toString().trim();
|
341 |
|
342 |
int num = cadena.length();
|
343 |
boolean comproba = test(cadena, cdecsep, cthosep);
|
344 |
if (num > 0 && comproba) { |
345 |
aux = (numForm.parse(cadena)).toString().trim(); |
346 |
} else {
|
347 |
aux = "";
|
348 |
} |
349 |
|
350 |
} catch (Exception e) { |
351 |
aux = "";
|
352 |
} |
353 |
} |
354 |
|
355 |
// Field type (VARCHAR)
|
356 |
if (((Stringvalue) nft.getStringvalue()) != null) { |
357 |
String cadena = (String) chains.get(i); |
358 |
if (cadena != null) { |
359 |
aux = cadena.replace(txsep, "");
|
360 |
} else {
|
361 |
aux = "";
|
362 |
} |
363 |
} |
364 |
|
365 |
// Field type (DATE)
|
366 |
if (((Datevalue) nft.getDatevalue()) != null) { |
367 |
String cadena = (String) chains.get(i); |
368 |
if (cadena.length() > 0 || cadena != null) { |
369 |
aux = cadena.replace(txsep, "");
|
370 |
} else {
|
371 |
aux = "";
|
372 |
} |
373 |
} |
374 |
postChain.add(aux); |
375 |
} |
376 |
return postChain;
|
377 |
|
378 |
} |
379 |
|
380 |
/**
|
381 |
* This method registers the listeners
|
382 |
*
|
383 |
* @param l
|
384 |
* listener
|
385 |
*/
|
386 |
public void registerListener(ChangeListener l) { |
387 |
this.listeners.add(l);
|
388 |
} |
389 |
|
390 |
/**
|
391 |
* This method remove the listeners registred
|
392 |
*
|
393 |
* @param l
|
394 |
* listener
|
395 |
*/
|
396 |
public void removeListener(ChangeListener l) { |
397 |
this.listeners.remove(l);
|
398 |
} |
399 |
|
400 |
/**
|
401 |
* This method removes all listeners
|
402 |
*/
|
403 |
public void removeAllListeners() { |
404 |
this.listeners.clear();
|
405 |
} |
406 |
|
407 |
/**
|
408 |
*
|
409 |
* @param evt
|
410 |
* event
|
411 |
*/
|
412 |
public void update(ChangeEvent evt) { |
413 |
|
414 |
for (int i = 0; i < listeners.size(); i++) { |
415 |
((ChangeListener) listeners.get(i)).stateChanged(evt);
|
416 |
} |
417 |
} |
418 |
|
419 |
/**
|
420 |
* Add message
|
421 |
*
|
422 |
* @param message
|
423 |
*/
|
424 |
public void update(String message) { |
425 |
ChangeEvent evt = new ChangeEvent(message); |
426 |
update(evt); |
427 |
} |
428 |
|
429 |
/**
|
430 |
* Set the row
|
431 |
*
|
432 |
* @param _row
|
433 |
*/
|
434 |
public void setRow(int _row) { |
435 |
row = _row; |
436 |
} |
437 |
|
438 |
/**
|
439 |
* This method loads the join separators attribute of one Element
|
440 |
*
|
441 |
* @param ad
|
442 |
* @return with or without joinSeparators
|
443 |
*/
|
444 |
private boolean withJoinSeparators(Element ad) { |
445 |
return ad.getFieldseparator().getJoinsep();
|
446 |
} |
447 |
|
448 |
/**
|
449 |
* This method deletes the first element of one substring
|
450 |
*
|
451 |
* @param chain
|
452 |
* initial string
|
453 |
* @return string
|
454 |
*/
|
455 |
private String deleteFirst(String chain) { |
456 |
String del = chain.substring(1); |
457 |
return del;
|
458 |
} |
459 |
|
460 |
/**
|
461 |
* This method gets the first position in the string of the separators
|
462 |
* group.
|
463 |
*
|
464 |
* @param separators
|
465 |
* separators characters list
|
466 |
* @param preChain
|
467 |
* initial string
|
468 |
* @return
|
469 |
*/
|
470 |
private int calculatePosition(List separators, String preChain) { |
471 |
|
472 |
String separator;
|
473 |
int posTemp = -1; |
474 |
int posi = Integer.MAX_VALUE; |
475 |
for (int j = 0; j < separators.size(); j++) { |
476 |
separator = (String) separators.get(j);
|
477 |
posTemp = preChain.indexOf(separator); |
478 |
if (posTemp != -1 && posTemp < posi) { |
479 |
posi = posTemp; |
480 |
} |
481 |
posTemp = -1;
|
482 |
} |
483 |
if (posi > preChain.length()) {
|
484 |
posi = preChain.length(); |
485 |
} |
486 |
|
487 |
return posi;
|
488 |
} |
489 |
|
490 |
/**
|
491 |
* This method calculates the number of elements of separators
|
492 |
*
|
493 |
* @param separators
|
494 |
* @param preChain
|
495 |
* @return number of elements
|
496 |
*/
|
497 |
|
498 |
private int calculateSizeSep(List separators, String preChain) { |
499 |
|
500 |
String separator;
|
501 |
int posTemp = -1; |
502 |
int posi = Integer.MAX_VALUE; |
503 |
String sep = ""; |
504 |
for (int j = 0; j < separators.size(); j++) { |
505 |
separator = (String) separators.get(j);
|
506 |
posTemp = preChain.indexOf(separator); |
507 |
if (posTemp != -1 && posTemp < posi) { |
508 |
posi = posTemp; |
509 |
sep = separator; |
510 |
|
511 |
} |
512 |
posTemp = -1;
|
513 |
} |
514 |
|
515 |
return sep.length();
|
516 |
} |
517 |
|
518 |
/**
|
519 |
* This method calculates the number of elements of joins separators
|
520 |
*
|
521 |
* @param separators
|
522 |
* @param preChain
|
523 |
* @return number of elements
|
524 |
*/
|
525 |
|
526 |
private int calculateSizeJoinSep(List separators, String preChain, |
527 |
int position) {
|
528 |
|
529 |
String chain = preChain.substring(position);
|
530 |
int tam = 0; |
531 |
for (int i = 0; i < chain.length(); i++) { |
532 |
boolean exist = false; |
533 |
String cha = chain.substring(i, i + 1); |
534 |
for (int j = 0; j < separators.size(); j++) { |
535 |
String sep = (String) separators.get(j); |
536 |
if (cha.compareTo(sep) == 0) { |
537 |
tam = i; |
538 |
exist = true;
|
539 |
break;
|
540 |
} |
541 |
} |
542 |
if (!exist) {
|
543 |
break;
|
544 |
} |
545 |
} |
546 |
return tam + 1; |
547 |
} |
548 |
|
549 |
/**
|
550 |
* This method loads the specifics separators of one Element
|
551 |
*
|
552 |
* @param adrElem
|
553 |
* @return separators list
|
554 |
*/
|
555 |
private List loadSpecificSeparators(Element adrElem) { |
556 |
List separators = new ArrayList(); |
557 |
if (adrElem.getFieldseparator().getColonsep()) {
|
558 |
separators.add(",");
|
559 |
} |
560 |
if (adrElem.getFieldseparator().getSemicolonsep()) {
|
561 |
separators.add(";");
|
562 |
} |
563 |
if (adrElem.getFieldseparator().getTabsep()) {
|
564 |
separators.add("\t");
|
565 |
} |
566 |
if (adrElem.getFieldseparator().getSpacesep()) {
|
567 |
separators.add(" ");
|
568 |
} |
569 |
if (adrElem.getFieldseparator().getOthersep() != null |
570 |
&& adrElem.getFieldseparator().getOthersep() |
571 |
.compareToIgnoreCase("") != 0) { |
572 |
String sepOth = (String) adrElem.getFieldseparator().getOthersep(); |
573 |
separators.add(sepOth); |
574 |
} |
575 |
|
576 |
return separators;
|
577 |
} |
578 |
|
579 |
/**
|
580 |
* This method tests the numbers format
|
581 |
*
|
582 |
* @param str
|
583 |
* @param dec
|
584 |
* @param sep
|
585 |
* @return true if there aren't other characters
|
586 |
*/
|
587 |
private boolean test(String str, char dec, char sep) { |
588 |
String str2 = str.replaceAll("[0-9]", ""); |
589 |
str2 = str2.replaceAll("-", ""); |
590 |
str2 = str2.replace("E", ""); |
591 |
|
592 |
String str3 = str2;
|
593 |
if (str2.indexOf(dec) >= 0) { |
594 |
int ind = str2.indexOf(String.valueOf(dec)); |
595 |
str3 = str2.substring(0, ind)
|
596 |
+ str2.substring(ind + 1, str2.length());
|
597 |
} |
598 |
String str4 = str3;
|
599 |
if (str3.indexOf(sep) >= 0) { |
600 |
int ind = str3.indexOf(String.valueOf(sep)); |
601 |
str4 = str3.substring(0, ind)
|
602 |
+ str3.substring(ind + 1, str3.length());
|
603 |
} |
604 |
|
605 |
return str4.length() == 0; |
606 |
} |
607 |
|
608 |
} |