Statistics
| Revision:

root / branches / v2_0_0_prep / libraries / libGeocoding / src / org / gvsig / normalization / operations / NormAlgorithm.java @ 29125

History | View | Annotate | Download (14.8 KB)

1
/* gvSIG. Geographic Information System of the Valencian Government
2
 *
3
 * Copyright (C) 2007-2008 Infrastructures and Transports Department
4
 * of the Valencian Government (CIT)
5
 * 
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 2
9
 * of the License, or (at your option) any later version.
10
 * 
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 * 
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
19
 * MA  02110-1301, USA.
20
 * 
21
 */
22

    
23
/*
24
 * AUTHORS (In addition to CIT):
25
 * 2008 Prodevelop S.L. main development
26
 */
27

    
28
package org.gvsig.normalization.operations;
29

    
30
import java.text.DecimalFormat;
31
import java.text.DecimalFormatSymbols;
32
import java.util.ArrayList;
33
import java.util.List;
34
import java.util.Locale;
35

    
36
import javax.swing.event.ChangeEvent;
37
import javax.swing.event.ChangeListener;
38

    
39
import org.apache.log4j.Logger;
40
import org.gvsig.normalization.pattern.Datevalue;
41
import org.gvsig.normalization.pattern.Decimalvalue;
42
import org.gvsig.normalization.pattern.Element;
43
import org.gvsig.normalization.pattern.Fieldtype;
44
import org.gvsig.normalization.pattern.Integervalue;
45
import org.gvsig.normalization.pattern.Patternnormalization;
46
import org.gvsig.normalization.pattern.Stringvalue;
47

    
48
/**
49
 * This class tokens strings
50
 * 
51
 * @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
52
 * @author <a href="mailto:vsanjaime@prodevelop.es"> Vicente Sanjaime Calvet</a>
53
 * 
54
 */
55

    
56
public class NormAlgorithm {
57

    
58
        private static final Logger log = Logger.getLogger(NormAlgorithm.class);
59

    
60
        private Patternnormalization pat = null;
61
        private int numFields;
62
        private int row;
63
        private Element[] elements = null;
64
        private List listeners = new ArrayList();
65

    
66
        /**
67
         * Constructor
68
         * 
69
         * @param _pat
70
         *            Normalization pattern
71
         */
72
        public NormAlgorithm(Patternnormalization _pat) {
73
                this.pat = _pat;
74
                this.elements = pat.getArrayElements();
75
                this.numFields = elements.length;
76
        }
77

    
78
        // METHODS
79

    
80
        /**
81
         * This method cuts a chain in several parts and they are returned in a List
82
         * 
83
         * @param _chain
84
         *            strings
85
         * @return list with the strings tokenized
86
         */
87
        public List splitChain(String _chain) {
88
                String preChain = _chain;
89
                String postChain = "";
90
                List subStrings = new ArrayList();
91

    
92
                // EXCEPTIONAL CASES
93
                if (_chain == null) {
94
                        return subStrings;
95
                }
96

    
97
                else if (_chain.compareToIgnoreCase("") == 0) {
98
                        subStrings.add(_chain);
99
                        return subStrings;
100
                }
101

    
102
                // NORMAL CASE
103
                else {
104
                        int fw = 0;
105
                        int init = 0;
106
                        String subChain = "";
107

    
108
                        for (int i = 0; i < numFields; i++) {
109

    
110
                                if (preChain.length() == 0) {
111
                                        return subStrings;
112
                                } else {
113

    
114
                                        fw = elements[i].getFieldwidth();
115
                                        // Cut chain with fixed width
116
                                        if (fw > 0 && preChain.length() > fw) {
117
                                                subChain = preChain.substring(init, fw);
118
                                                subStrings.add(subChain);
119
                                                postChain = preChain.substring(fw);
120
                                                preChain = postChain;
121

    
122
                                        } else if (fw > 0 && preChain.length() <= fw) {
123
                                                subStrings.add(preChain);
124
                                                return subStrings;
125
                                        }
126
                                        // Cut chain with separators
127
                                        else {
128
                                                // Load specific separators
129
                                                List separators = loadSpecificSeparators(elements[i]);
130
                                                boolean join = withJoinSeparators(elements[i]);
131
                                                // Search the first delimiter in the chain
132
                                                int posi = calculatePosition(separators, preChain);
133
                                                int tamSep = 0;
134
                                                if (!join) {
135
                                                        tamSep = calculateSizeSep(separators, preChain);
136
                                                } else {
137
                                                        tamSep = calculateSizeJoinSep(separators, preChain,
138
                                                                        posi);
139
                                                }
140
                                                // Firsts elements
141
                                                if (i < numFields - 1) {
142

    
143
                                                        if (join) {
144
                                                                while (posi == 0) {
145
                                                                        preChain = deleteFirst(preChain);
146
                                                                        posi = calculatePosition(separators,
147
                                                                                        preChain);
148
                                                                        if (preChain.length() == 0) {
149
                                                                                break;
150
                                                                        }
151
                                                                }
152
                                                                subChain = preChain.substring(0, posi);
153
                                                                try {
154
                                                                        postChain = preChain.substring(posi
155
                                                                                        + tamSep);
156
                                                                } catch (Exception e) {
157
                                                                        postChain = "";
158
                                                                }
159
                                                                subStrings.add(subChain);
160
                                                                preChain = postChain;
161
                                                        } else {
162
                                                                subChain = preChain.substring(0, posi);
163
                                                                postChain = preChain.substring(posi + tamSep);
164
                                                                subStrings.add(subChain);
165
                                                                preChain = postChain;
166
                                                        }
167

    
168
                                                }
169
                                                // Last element
170
                                                else {
171
                                                        subStrings.add(preChain);
172
                                                }
173
                                        }
174
                                }
175
                        }
176
                }
177
                return subStrings;
178
        }
179

    
180
        /**
181
         * This method cuts a chain in several parts from separators
182
         * 
183
         * @param chain
184
         *            string
185
         * @param fields
186
         *            fields number
187
         * @param separators
188
         *            array of characters
189
         * @param joinDelimiters
190
         *            with or without joinDelimiters
191
         * @return list with the strings tokenized
192
         */
193
        public static List splitChainBySeparators(String chain, int fields,
194
                        String[] separators, boolean joinDelimiters) {
195

    
196
                List subStrings = new ArrayList();
197
                int posTemp = -1;
198
                String separator;
199
                String chain2 = chain;
200
                int campos = fields;
201

    
202
                // EXCEPTIONAL CASES
203
                if (chain.compareToIgnoreCase("") == 0 || campos == 0
204
                                || separators.length < 1) {
205
                        subStrings.add(chain);
206
                        return subStrings;
207
                }
208

    
209
                // NORMAL CASE
210
                else {
211

    
212
                        // Only (parts-1) loops
213
                        for (int i = 0; i < (campos - 1); i++) {
214
                                int posi = Integer.MAX_VALUE;
215
                                String firstChain;
216
                                for (int j = 0; j < separators.length; j++) {
217
                                        separator = separators[j];
218
                                        posTemp = chain2.indexOf(separator);
219
                                        if (posTemp != -1 && posTemp < posi) {
220
                                                posi = posTemp;
221
                                        }
222
                                        posTemp = -1;
223
                                }
224
                                if (posi == 0 && joinDelimiters) {
225
                                        campos++;
226
                                        chain2 = chain2.substring(posi + 1);
227
                                } else {
228
                                        firstChain = chain2.substring(0, posi);
229
                                        chain2 = chain2.substring(posi + 1);
230
                                        subStrings.add(firstChain);
231
                                        // In the last loop add the first chain and the rest of
232
                                        // chain
233
                                        if (i == (campos - 2)) {
234
                                                subStrings.add(chain2);
235
                                        }
236
                                }
237
                        }
238
                        return subStrings;
239
                }
240
        }
241

    
242
        /**
243
         * This method cuts a chain in several parts from fixed width
244
         * 
245
         * @param chain
246
         *            string
247
         * @param fieldWidth
248
         *            array with fields widths
249
         * @return list with the strings tokenized
250
         */
251
        public static List splitChainByFixedWidth(String chain, int[] fieldWidth) {
252

    
253
                List subStrings = new ArrayList();
254
                int elements = fieldWidth.length;
255
                String subChain;
256
                int inicio = 0;
257
                int fin = 0;
258

    
259
                // EXCEPTIONAL CASES
260
                if (chain.compareToIgnoreCase("") == 0 || fieldWidth.length < 1) {
261
                        subStrings.add(chain);
262
                        return subStrings;
263
                }
264

    
265
                // NORMAL CASE
266
                else {
267
                        for (int i = 0; i < elements; i++) {
268
                                fin = fin + fieldWidth[i];
269
                                subChain = chain.substring(inicio, fin);
270
                                subStrings.add(subChain);
271
                                inicio = fin;
272
                        }
273
                        return subStrings;
274
                }
275
        }
276

    
277
        /**
278
         * This method filters the split chains with the in-separators
279
         * 
280
         * @param chains
281
         *            strings of the one row
282
         * @return strings filtered by type
283
         */
284
        public List filterSplitChains(List chains) {
285

    
286
                DecimalFormat numForm = (DecimalFormat) DecimalFormat
287
                                .getInstance(Locale.getDefault());
288
                DecimalFormatSymbols simb = numForm.getDecimalFormatSymbols();
289

    
290
                Fieldtype nft = null;
291

    
292
                String decsep;
293
                char cdecsep;
294
                String thosep;
295
                char cthosep;
296
                String txsep;
297

    
298
                List postChain = new ArrayList();
299
                String aux = "";
300

    
301
                for (int i = 0; i < numFields; i++) {
302

    
303
                        nft = elements[i].getFieldtype();
304

    
305
                        decsep = elements[i].getInfieldseparators().getDecimalseparator()
306
                                        .trim();
307
                        cdecsep = decsep.compareTo("") == 0 ? (char) 0x20 : decsep
308
                                        .charAt(0);
309
                        thosep = elements[i].getInfieldseparators().getThousandseparator()
310
                                        .trim();
311
                        cthosep = thosep.compareTo("") == 0 ? (char) 0x20 : thosep
312
                                        .charAt(0);
313

    
314
                        txsep = elements[i].getInfieldseparators().getTextseparator()
315
                                        .trim();
316

    
317
                        if (txsep.compareToIgnoreCase("\"") == 0) {
318
                                txsep = "\"";
319
                        }
320

    
321
                        simb.setDecimalSeparator(cdecsep);
322
                        try {
323
                                simb.setGroupingSeparator(cthosep);
324
                        } catch (RuntimeException e1) {
325
                                log.error("Error setting the group separator", e1);
326
                        }
327
                        // Fill fields empties
328
                        if (chains.size() < numFields) {
329

    
330
                                for (int j = chains.size(); j < numFields; j++) {
331
                                        chains.add(j, "");
332
                                }
333
                        }
334

    
335
                        if (((Integervalue) nft.getIntegervalue()) != null
336
                                        || ((Decimalvalue) nft.getDecimalvalue()) != null) {
337
                                numForm.setDecimalFormatSymbols(simb);
338
                                try {
339
                                        String cadena = ((String) chains.get(i)).trim();
340
                                        // aux = (numForm.parse(cadena)).toString().trim();
341

    
342
                                        int num = cadena.length();
343
                                        boolean comproba = test(cadena, cdecsep, cthosep);
344
                                        if (num > 0 && comproba) {
345
                                                aux = (numForm.parse(cadena)).toString().trim();
346
                                        } else {
347
                                                aux = "";
348
                                        }
349

    
350
                                } catch (Exception e) {
351
                                        aux = "";
352
                                }
353
                        }
354

    
355
                        // Field type (VARCHAR)
356
                        if (((Stringvalue) nft.getStringvalue()) != null) {
357
                                String cadena = (String) chains.get(i);
358
                                if (cadena != null) {
359
                                        aux = cadena.replace(txsep, "");
360
                                } else {
361
                                        aux = "";
362
                                }
363
                        }
364

    
365
                        // Field type (DATE)
366
                        if (((Datevalue) nft.getDatevalue()) != null) {
367
                                String cadena = (String) chains.get(i);
368
                                if (cadena.length() > 0 || cadena != null) {
369
                                        aux = cadena.replace(txsep, "");
370
                                } else {
371
                                        aux = "";
372
                                }
373
                        }
374
                        postChain.add(aux);
375
                }
376
                return postChain;
377

    
378
        }
379

    
380
        /**
381
         * This method registers the listeners
382
         * 
383
         * @param l
384
         *            listener
385
         */
386
        public void registerListener(ChangeListener l) {
387
                this.listeners.add(l);
388
        }
389

    
390
        /**
391
         * This method remove the listeners registred
392
         * 
393
         * @param l
394
         *            listener
395
         */
396
        public void removeListener(ChangeListener l) {
397
                this.listeners.remove(l);
398
        }
399

    
400
        /**
401
         * This method removes all listeners
402
         */
403
        public void removeAllListeners() {
404
                this.listeners.clear();
405
        }
406

    
407
        /**
408
         * 
409
         * @param evt
410
         *            event
411
         */
412
        public void update(ChangeEvent evt) {
413

    
414
                for (int i = 0; i < listeners.size(); i++) {
415
                        ((ChangeListener) listeners.get(i)).stateChanged(evt);
416
                }
417
        }
418

    
419
        /**
420
         * Add message
421
         * 
422
         * @param message
423
         */
424
        public void update(String message) {
425
                ChangeEvent evt = new ChangeEvent(message);
426
                update(evt);
427
        }
428

    
429
        /**
430
         * Set the row
431
         * 
432
         * @param _row
433
         */
434
        public void setRow(int _row) {
435
                row = _row;
436
        }
437

    
438
        /**
439
         * This method loads the join separators attribute of one Element
440
         * 
441
         * @param ad
442
         * @return with or without joinSeparators
443
         */
444
        private boolean withJoinSeparators(Element ad) {
445
                return ad.getFieldseparator().getJoinsep();
446
        }
447

    
448
        /**
449
         * This method deletes the first element of one substring
450
         * 
451
         * @param chain
452
         *            initial string
453
         * @return string
454
         */
455
        private String deleteFirst(String chain) {
456
                String del = chain.substring(1);
457
                return del;
458
        }
459

    
460
        /**
461
         * This method gets the first position in the string of the separators
462
         * group.
463
         * 
464
         * @param separators
465
         *            separators characters list
466
         * @param preChain
467
         *            initial string
468
         * @return
469
         */
470
        private int calculatePosition(List separators, String preChain) {
471

    
472
                String separator;
473
                int posTemp = -1;
474
                int posi = Integer.MAX_VALUE;
475
                for (int j = 0; j < separators.size(); j++) {
476
                        separator = (String) separators.get(j);
477
                        posTemp = preChain.indexOf(separator);
478
                        if (posTemp != -1 && posTemp < posi) {
479
                                posi = posTemp;
480
                        }
481
                        posTemp = -1;
482
                }
483
                if (posi > preChain.length()) {
484
                        posi = preChain.length();
485
                }
486

    
487
                return posi;
488
        }
489

    
490
        /**
491
         * This method calculates the number of elements of separators
492
         * 
493
         * @param separators
494
         * @param preChain
495
         * @return number of elements
496
         */
497

    
498
        private int calculateSizeSep(List separators, String preChain) {
499

    
500
                String separator;
501
                int posTemp = -1;
502
                int posi = Integer.MAX_VALUE;
503
                String sep = "";
504
                for (int j = 0; j < separators.size(); j++) {
505
                        separator = (String) separators.get(j);
506
                        posTemp = preChain.indexOf(separator);
507
                        if (posTemp != -1 && posTemp < posi) {
508
                                posi = posTemp;
509
                                sep = separator;
510

    
511
                        }
512
                        posTemp = -1;
513
                }
514

    
515
                return sep.length();
516
        }
517

    
518
        /**
519
         * This method calculates the number of elements of joins separators
520
         * 
521
         * @param separators
522
         * @param preChain
523
         * @return number of elements
524
         */
525

    
526
        private int calculateSizeJoinSep(List separators, String preChain,
527
                        int position) {
528

    
529
                String chain = preChain.substring(position);
530
                int tam = 0;
531
                for (int i = 0; i < chain.length(); i++) {
532
                        boolean exist = false;
533
                        String cha = chain.substring(i, i + 1);
534
                        for (int j = 0; j < separators.size(); j++) {
535
                                String sep = (String) separators.get(j);
536
                                if (cha.compareTo(sep) == 0) {
537
                                        tam = i;
538
                                        exist = true;
539
                                        break;
540
                                }
541
                        }
542
                        if (!exist) {
543
                                break;
544
                        }
545
                }
546
                return tam + 1;
547
        }
548

    
549
        /**
550
         * This method loads the specifics separators of one Element
551
         * 
552
         * @param adrElem
553
         * @return separators list
554
         */
555
        private List loadSpecificSeparators(Element adrElem) {
556
                List separators = new ArrayList();
557
                if (adrElem.getFieldseparator().getColonsep()) {
558
                        separators.add(",");
559
                }
560
                if (adrElem.getFieldseparator().getSemicolonsep()) {
561
                        separators.add(";");
562
                }
563
                if (adrElem.getFieldseparator().getTabsep()) {
564
                        separators.add("\t");
565
                }
566
                if (adrElem.getFieldseparator().getSpacesep()) {
567
                        separators.add(" ");
568
                }
569
                if (adrElem.getFieldseparator().getOthersep() != null
570
                                && adrElem.getFieldseparator().getOthersep()
571
                                                .compareToIgnoreCase("") != 0) {
572
                        String sepOth = (String) adrElem.getFieldseparator().getOthersep();
573
                        separators.add(sepOth);
574
                }
575

    
576
                return separators;
577
        }
578

    
579
        /**
580
         * This method tests the numbers format
581
         * 
582
         * @param str
583
         * @param dec
584
         * @param sep
585
         * @return true if there aren't other characters
586
         */
587
        private boolean test(String str, char dec, char sep) {
588
                String str2 = str.replaceAll("[0-9]", "");
589
                str2 = str2.replaceAll("-", "");
590
                str2 = str2.replace("E", "");
591

    
592
                String str3 = str2;
593
                if (str2.indexOf(dec) >= 0) {
594
                        int ind = str2.indexOf(String.valueOf(dec));
595
                        str3 = str2.substring(0, ind)
596
                                        + str2.substring(ind + 1, str2.length());
597
                }
598
                String str4 = str3;
599
                if (str3.indexOf(sep) >= 0) {
600
                        int ind = str3.indexOf(String.valueOf(sep));
601
                        str4 = str3.substring(0, ind)
602
                                        + str3.substring(ind + 1, str3.length());
603
                }
604

    
605
                return str4.length() == 0;
606
        }
607

    
608
}