Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / AutomaticDetectionOfTypes.java @ 45775

History | View | Annotate | Download (12.1 KB)

1
package org.gvsig.fmap.dal.store.csv;
2

    
3
import java.io.IOException;
4
import java.math.BigDecimal;
5
import java.net.URL;
6
import java.util.ArrayList;
7
import java.util.List;
8
import java.util.Locale;
9
import org.apache.commons.lang3.StringUtils;
10
import org.gvsig.fmap.dal.DataTypes;
11
import org.gvsig.tools.ToolsLocator;
12
import org.gvsig.tools.dataTypes.DataTypeUtils;
13
import org.gvsig.tools.dataTypes.DataTypesManager;
14
import org.gvsig.tools.dataTypes.Coercion;
15
import org.gvsig.tools.dataTypes.CoercionContext;
16

    
17
/**
18
 *
19
 * @author jjdelcerro
20
 */
21
public class AutomaticDetectionOfTypes {
22

    
23
    public interface Rows {
24

    
25
        public List<String> nextRowValues();
26
    }
27
    
28
    public interface DetectedValue {
29
        public int getType();
30
        public int getDisplaySize();
31
        public int getPrecision();
32
        public int getScale();
33
        public boolean isBlank();
34
    }
35
    
36
    private static class DetectedValueImpl implements DetectedValue {
37

    
38
        private int type;
39
        private int displaySize;
40
        private int integerDigits;
41
        private int decimalDigits;
42
        private boolean blank;
43
        
44
        @Override
45
        public int getType() {
46
            return this.type;
47
        }
48

    
49
        @Override
50
        public int getDisplaySize() {
51
            return this.displaySize;
52
        }
53

    
54
        @Override
55
        public int getPrecision() {
56
            return this.decimalDigits + this.integerDigits;
57
        }
58

    
59
        @Override
60
        public int getScale() {
61
            return this.decimalDigits;
62
        }
63
        
64
        public boolean isBlank() {
65
            return this.blank;
66
        }
67
    }
68

    
69
    private static class PossibleDataType {
70

    
71
        public boolean possibleInt = true;
72
        public boolean possibleFloat = true;
73
        public boolean possibleDouble = true;
74
        public boolean possibleDecimal = true;
75
        public boolean possibleLong = true;
76
        public boolean possibleURL = true;
77
        public boolean possibleDate = true;
78
        public boolean possibleTime = true;
79
        public boolean possibleTimestamp = true;
80
        public boolean possibleGeometry = true;
81
    }
82

    
83
    private final String filename;
84

    
85
    public AutomaticDetectionOfTypes() {
86
        this("(unknown)");
87
    }
88

    
89
    public AutomaticDetectionOfTypes(String filename) {
90
        this.filename = filename;
91
    }
92

    
93
    private String getFullFileName() {
94
        return this.filename;
95
    }
96

    
97
    @SuppressWarnings({"UseSpecificCatch", "ResultOfObjectAllocationIgnored"})
98
    public DetectedValue[] detect(int columns,
99
            Rows rows,
100
            boolean isFirstLineHeader,
101
            Locale locale
102
    ) throws IOException {
103
        List<PossibleDataType> possibleDataTypes;
104
        DetectedValueImpl[] detectedValues = new DetectedValueImpl [columns];
105

    
106
        int lineno = 0;
107
        try {
108
            if (isFirstLineHeader) {
109
                rows.nextRowValues();
110
                lineno++;
111
            }
112
            possibleDataTypes = new ArrayList<>(columns);
113
            for (int i = 0; i < columns; i++) {
114
                possibleDataTypes.add(new PossibleDataType());
115
                detectedValues[i] = new DetectedValueImpl();
116
            }
117
            if (locale == null) {
118
                locale = Locale.getDefault();
119
            }
120
            DataTypesManager typeManager = ToolsLocator.getDataTypesManager();
121
            Coercion toDecimal = typeManager.getCoercion(DataTypes.DECIMAL);
122
            Coercion toDouble = typeManager.getCoercion(DataTypes.DOUBLE);
123
            Coercion toFloat = typeManager.getCoercion(DataTypes.FLOAT);
124
            Coercion toDate = typeManager.getCoercion(DataTypes.DATE);
125
            Coercion toTime = typeManager.getCoercion(DataTypes.TIME);
126
            Coercion toTimestamp = typeManager.getCoercion(DataTypes.TIMESTAMP);
127
            Coercion toInt = typeManager.getCoercion(DataTypes.INT);
128
            Coercion toLong = typeManager.getCoercion(DataTypes.LONG);
129
            Coercion toGeom = typeManager.getCoercion(DataTypes.GEOMETRY);
130

    
131
            CoercionContext coercionContext = DataTypeUtils.coerceContextLocale(locale);
132
            List<String> row = rows.nextRowValues();
133
            lineno++;
134

    
135
            while (row != null) {
136
                for (int i = 0; i < row.size(); i++) {
137
                    while( possibleDataTypes.size()<row.size() ) {
138
                        possibleDataTypes.add(new PossibleDataType());
139
                    }
140
                    String rawvalue = row.get(i);
141
                    if( rawvalue == null ) {
142
                       continue; 
143
                    }
144
                    PossibleDataType possibleDataType = possibleDataTypes.get(i);
145
                    DetectedValueImpl detectedValue = detectedValues[i];
146
                    if( detectedValue.blank ) {
147
                        detectedValue.blank = StringUtils.isBlank(rawvalue);
148
                    }
149
                    int displaySize = rawvalue.length();
150
                    if( displaySize>detectedValue.displaySize ) {
151
                        detectedValue.displaySize = displaySize;
152
                    }
153
                    if (possibleDataType.possibleDecimal) {
154
                        try {
155
                            BigDecimal decimal = (BigDecimal) toDecimal.coerce(rawvalue, coercionContext);
156
                            possibleDataType.possibleDecimal = true;
157
                            if( decimal.scale() > detectedValue.decimalDigits ) {
158
                                detectedValue.decimalDigits = decimal.scale();
159
                            }
160
                            int integerDigits = decimal.precision() - decimal.scale();
161
                            if( integerDigits>detectedValue.integerDigits ) {
162
                                detectedValue.integerDigits = integerDigits;
163
                            }
164
                        } catch (Exception ex) {
165
                            possibleDataType.possibleDecimal = false;
166
                        }
167
                    }
168
                    if (possibleDataType.possibleDouble) {
169
                        try {
170
                            toDouble.coerce(rawvalue, coercionContext);
171
                            possibleDataType.possibleDouble = true;
172
                        } catch (Exception ex) {
173
                            possibleDataType.possibleDouble = false;
174
                        }
175
                    }
176
                    if (possibleDataType.possibleFloat) {
177
                        try {
178
                            toFloat.coerce(rawvalue, coercionContext);
179
                            possibleDataType.possibleFloat = true;
180
                        } catch (Exception ex) {
181
                            possibleDataType.possibleFloat = false;
182
                        }
183
                    }
184
                    if (possibleDataType.possibleLong) {
185
                        possibleDataType.possibleLong = isValidLong(rawvalue);
186
                    }
187
                    if (possibleDataType.possibleInt) {
188
                        possibleDataType.possibleInt = isValidInteger(rawvalue);
189
                    }
190
                    if (possibleDataType.possibleDate) {
191
                        try {
192
                            toDate.coerce(rawvalue, coercionContext);
193
                            possibleDataType.possibleDate = true;
194
                        } catch (Exception ex) {
195
                            possibleDataType.possibleDate = false;
196
                        }
197
                    }
198
                    if (possibleDataType.possibleTime) {
199
                        try {
200
                            toTime.coerce(rawvalue, coercionContext);
201
                            possibleDataType.possibleTime = true;
202
                        } catch (Exception ex) {
203
                            possibleDataType.possibleTime = false;
204
                        }
205
                    }
206
                    if (possibleDataType.possibleTimestamp) {
207
                        try {
208
                            toTimestamp.coerce(rawvalue, coercionContext);
209
                            possibleDataType.possibleTimestamp = true;
210
                        } catch (Exception ex) {
211
                            possibleDataType.possibleTimestamp = false;
212
                        }
213
                    }
214
                    if (possibleDataType.possibleURL) {
215
                        try {
216
                            new URL((String) rawvalue);
217
                            possibleDataType.possibleURL = true;
218
                        } catch (Exception ex) {
219
                            possibleDataType.possibleURL = false;
220
                        }
221
                    }
222
                    
223
                    if (possibleDataType.possibleGeometry) {
224
                        try {
225
                            toGeom.coerce((String) rawvalue);
226
                            possibleDataType.possibleGeometry = true;
227
                        } catch (Exception ex) {
228
                            possibleDataType.possibleGeometry = false;
229
                        }
230
                    }
231
                }
232
                row = rows.nextRowValues();
233
                lineno++;
234
            }
235
            int n = 0;
236
            for (PossibleDataType possibleDataType : possibleDataTypes) {
237
                if (possibleDataType.possibleInt) {
238
                    detectedValues[n++].type = DataTypes.INT;
239
                    continue;
240
                }
241
                if (possibleDataType.possibleLong) {
242
                    detectedValues[n++].type = DataTypes.LONG;
243
                    continue;
244
                }
245
                if (possibleDataType.possibleDecimal) {
246
                    // Preferimos un Decimal que un Float/Double
247
                    detectedValues[n++].type = DataTypes.DECIMAL;
248
                    continue;
249
                }
250
                if (possibleDataType.possibleFloat) {
251
                    // Forzamos los float a double para evitar perder precision
252
                    detectedValues[n++].type = DataTypes.DOUBLE;
253
                    continue;
254
                }
255
                if (possibleDataType.possibleDouble) {
256
                    detectedValues[n++].type = DataTypes.DOUBLE;
257
                    continue;
258
                }
259
                if (possibleDataType.possibleURL) {
260
                    detectedValues[n++].type = DataTypes.URL;
261
                    continue;
262
                }
263
                if (possibleDataType.possibleDate) {
264
                    detectedValues[n++].type = DataTypes.DATE;
265
                    continue;
266
                }
267
                if (possibleDataType.possibleTime) {
268
                    detectedValues[n++].type = DataTypes.TIME;
269
                    continue;
270
                }
271
                if (possibleDataType.possibleTimestamp) {
272
                    detectedValues[n++].type = DataTypes.TIMESTAMP;
273
                    continue;
274
                }
275
                if (possibleDataType.possibleGeometry) {
276
                    detectedValues[n++].type = DataTypes.GEOMETRY;
277
                    continue;
278
                }
279
                detectedValues[n++].type = DataTypes.STRING;
280
            }
281
        } catch (Throwable ex) {
282
            throw new RuntimeException("Problems reading file '" + this.getFullFileName() + "' near line " + lineno + ".", ex);
283
        }
284
        return detectedValues;
285
    }
286

    
287
    @SuppressWarnings("UseSpecificCatch")
288
    private boolean isValidLong(String s) {
289
        if (s == null) {
290
            return true;
291
        }
292
        s = s.trim().toLowerCase();
293
        if (s.isEmpty()) {
294
            return true;
295
        }
296
        try {
297
            if (s.startsWith("0x")) {
298
                Long.valueOf(s.substring(2), 16);
299
            } else {
300
                Long.valueOf(s);
301
            }
302
            return true;
303
        } catch (Exception ex) {
304
            return false;
305
        }
306
    }
307

    
308
    @SuppressWarnings("UseSpecificCatch")
309
    private boolean isValidInteger(String s) {
310
        if (s == null) {
311
            return true;
312
        }
313
        s = s.trim().toLowerCase();
314
        if (s.isEmpty()) {
315
            return true;
316
        }
317
        try {
318
            if (s.startsWith("0x")) {
319
                Integer.valueOf(s.substring(2), 16);
320
            } else {
321
                Integer.valueOf(s);
322
            }
323
            return true;
324
        } catch (Exception ex) {
325
            return false;
326
        }
327
    }
328

    
329
}