svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.lib / src / main / java / org / gvsig / fmap / dal / store / simplereader / AutomaticDetectionOfTypes.java @ 47655
History | View | Annotate | Download (13.8 KB)
1 |
package org.gvsig.fmap.dal.store.simplereader; |
---|---|
2 |
|
3 |
import java.io.IOException; |
4 |
import java.math.BigDecimal; |
5 |
import java.net.URL; |
6 |
import java.util.ArrayList; |
7 |
import java.util.List; |
8 |
import java.util.Locale; |
9 |
import org.apache.commons.lang3.StringUtils; |
10 |
import org.gvsig.fmap.dal.DataTypes; |
11 |
import org.gvsig.fmap.geom.GeometryCoercionContext; |
12 |
import org.gvsig.fmap.geom.GeometryLocator; |
13 |
import org.gvsig.tools.ToolsLocator; |
14 |
import org.gvsig.tools.dataTypes.DataTypeUtils; |
15 |
import org.gvsig.tools.dataTypes.DataTypesManager; |
16 |
import org.gvsig.tools.dataTypes.Coercion; |
17 |
import org.gvsig.tools.dataTypes.CoercionContext; |
18 |
import org.gvsig.tools.i18n.I18nManager; |
19 |
import org.gvsig.tools.task.SimpleTaskStatus; |
20 |
|
21 |
/**
|
22 |
*
|
23 |
* @author jjdelcerro
|
24 |
*/
|
25 |
public class AutomaticDetectionOfTypes { |
26 |
|
27 |
public interface Rows { |
28 |
|
29 |
public List<String> nextRowValues(); |
30 |
} |
31 |
|
32 |
public interface DetectedValue { |
33 |
public int getType(); |
34 |
public int getDisplaySize(); |
35 |
public int getPrecision(); |
36 |
public int getScale(); |
37 |
public boolean isBlank(); |
38 |
} |
39 |
|
40 |
private static class DetectedValueImpl implements DetectedValue { |
41 |
|
42 |
private int type; |
43 |
private int displaySize; |
44 |
private int integerDigits; |
45 |
private int decimalDigits; |
46 |
private boolean blank; |
47 |
|
48 |
@Override
|
49 |
public int getType() { |
50 |
return this.type; |
51 |
} |
52 |
|
53 |
@Override
|
54 |
public int getDisplaySize() { |
55 |
return this.displaySize; |
56 |
} |
57 |
|
58 |
@Override
|
59 |
public int getPrecision() { |
60 |
return this.decimalDigits + this.integerDigits; |
61 |
} |
62 |
|
63 |
@Override
|
64 |
public int getScale() { |
65 |
return this.decimalDigits; |
66 |
} |
67 |
|
68 |
public boolean isBlank() { |
69 |
return this.blank; |
70 |
} |
71 |
} |
72 |
|
73 |
private static class PossibleDataType { |
74 |
|
75 |
public boolean possibleInt = true; |
76 |
public boolean possibleFloat = true; |
77 |
public boolean possibleDouble = true; |
78 |
public boolean possibleDecimal = true; |
79 |
public boolean possibleLong = true; |
80 |
public boolean possibleURL = true; |
81 |
public boolean possibleDate = true; |
82 |
public boolean possibleTime = true; |
83 |
public boolean possibleTimestamp = true; |
84 |
public boolean possibleGeometry = true; |
85 |
} |
86 |
|
87 |
private final String filename; |
88 |
|
89 |
public AutomaticDetectionOfTypes() {
|
90 |
this("(unknown)"); |
91 |
} |
92 |
|
93 |
public AutomaticDetectionOfTypes(String filename) { |
94 |
this.filename = filename;
|
95 |
} |
96 |
|
97 |
private String getFullFileName() { |
98 |
return this.filename; |
99 |
} |
100 |
|
101 |
@SuppressWarnings({"UseSpecificCatch", "ResultOfObjectAllocationIgnored"}) |
102 |
public DetectedValue[] detect(int columns, |
103 |
Rows rows, |
104 |
boolean isFirstLineHeader,
|
105 |
Locale locale,
|
106 |
SimpleTaskStatus status |
107 |
) throws IOException { |
108 |
List<PossibleDataType> possibleDataTypes;
|
109 |
List<DetectedValueImpl> detectedValues = new ArrayList<>(columns); |
110 |
I18nManager i18n = ToolsLocator.getI18nManager(); |
111 |
|
112 |
if( status!=null ) { |
113 |
status.message(i18n.getTranslation("_Types_detection"));
|
114 |
} |
115 |
|
116 |
int lineno = 0; |
117 |
try {
|
118 |
if (isFirstLineHeader) {
|
119 |
rows.nextRowValues(); |
120 |
lineno++; |
121 |
} |
122 |
possibleDataTypes = new ArrayList<>(columns); |
123 |
for (int i = 0; i < columns; i++) { |
124 |
possibleDataTypes.add(new PossibleDataType());
|
125 |
detectedValues.add(new DetectedValueImpl());
|
126 |
} |
127 |
if (locale == null) { |
128 |
locale = Locale.getDefault();
|
129 |
} |
130 |
DataTypesManager typeManager = ToolsLocator.getDataTypesManager(); |
131 |
Coercion toDecimal = typeManager.getCoercion(DataTypes.DECIMAL); |
132 |
Coercion toDouble = typeManager.getCoercion(DataTypes.DOUBLE); |
133 |
Coercion toFloat = typeManager.getCoercion(DataTypes.FLOAT); |
134 |
Coercion toDate = typeManager.getCoercion(DataTypes.DATE); |
135 |
Coercion toTime = typeManager.getCoercion(DataTypes.TIME); |
136 |
Coercion toTimestamp = typeManager.getCoercion(DataTypes.TIMESTAMP); |
137 |
Coercion toInt = typeManager.getCoercion(DataTypes.INT); |
138 |
Coercion toLong = typeManager.getCoercion(DataTypes.LONG); |
139 |
Coercion toGeom = typeManager.getCoercion(DataTypes.GEOMETRY); |
140 |
|
141 |
GeometryCoercionContext geometryCoercionContext = GeometryLocator.getGeometryManager().createGeometryCoercionContext(); |
142 |
geometryCoercionContext.setMode(GeometryCoercionContext.MODE_ONERROR_THROW); |
143 |
|
144 |
CoercionContext coercionContext = DataTypeUtils.coerceContextLocale(locale); |
145 |
List<String> row = rows.nextRowValues(); |
146 |
lineno++; |
147 |
|
148 |
int detectedValuesSize = detectedValues.size();
|
149 |
while (row != null) { |
150 |
if( status!=null ) { |
151 |
status.incrementCurrentValue(); |
152 |
if( status.isCancellationRequested() ) {
|
153 |
status.cancel(); |
154 |
break;
|
155 |
} |
156 |
} |
157 |
int rowsize = row.size();
|
158 |
if( rowsize>detectedValuesSize ) {
|
159 |
for (int i = detectedValuesSize; i < rowsize; i++) { |
160 |
possibleDataTypes.add(new PossibleDataType());
|
161 |
detectedValues.add(new DetectedValueImpl());
|
162 |
} |
163 |
detectedValuesSize = detectedValues.size(); |
164 |
} |
165 |
for (int i = 0; i < rowsize; i++) { |
166 |
while( possibleDataTypes.size()<row.size() ) {
|
167 |
possibleDataTypes.add(new PossibleDataType());
|
168 |
} |
169 |
String rawvalue = row.get(i);
|
170 |
if( rawvalue == null ) { |
171 |
continue;
|
172 |
} |
173 |
|
174 |
PossibleDataType possibleDataType = possibleDataTypes.get(i); |
175 |
DetectedValueImpl detectedValue = detectedValues.get(i); |
176 |
if( detectedValue.blank ) {
|
177 |
detectedValue.blank = StringUtils.isBlank(rawvalue); |
178 |
} |
179 |
int displaySize = rawvalue.length();
|
180 |
if( displaySize>detectedValue.displaySize ) {
|
181 |
detectedValue.displaySize = displaySize; |
182 |
} |
183 |
if (possibleDataType.possibleDecimal) {
|
184 |
try {
|
185 |
BigDecimal decimal = (BigDecimal) toDecimal.coerce(rawvalue, coercionContext); |
186 |
possibleDataType.possibleDecimal = true;
|
187 |
if( decimal.scale() > detectedValue.decimalDigits ) {
|
188 |
detectedValue.decimalDigits = decimal.scale(); |
189 |
} |
190 |
int integerDigits = decimal.precision() - decimal.scale();
|
191 |
if( integerDigits>detectedValue.integerDigits ) {
|
192 |
detectedValue.integerDigits = integerDigits; |
193 |
} |
194 |
} catch (Exception ex) { |
195 |
possibleDataType.possibleDecimal = false;
|
196 |
} |
197 |
} |
198 |
if (possibleDataType.possibleDouble) {
|
199 |
try {
|
200 |
toDouble.coerce(rawvalue, coercionContext); |
201 |
possibleDataType.possibleDouble = true;
|
202 |
} catch (Exception ex) { |
203 |
possibleDataType.possibleDouble = false;
|
204 |
} |
205 |
} |
206 |
if (possibleDataType.possibleFloat) {
|
207 |
try {
|
208 |
toFloat.coerce(rawvalue, coercionContext); |
209 |
possibleDataType.possibleFloat = true;
|
210 |
} catch (Exception ex) { |
211 |
possibleDataType.possibleFloat = false;
|
212 |
} |
213 |
} |
214 |
if (possibleDataType.possibleLong) {
|
215 |
possibleDataType.possibleLong = isValidLong(rawvalue); |
216 |
} |
217 |
if (possibleDataType.possibleInt) {
|
218 |
possibleDataType.possibleInt = isValidInteger(rawvalue); |
219 |
} |
220 |
if (possibleDataType.possibleDate) {
|
221 |
try {
|
222 |
toDate.coerce(rawvalue, coercionContext); |
223 |
possibleDataType.possibleDate = true;
|
224 |
} catch (Exception ex) { |
225 |
possibleDataType.possibleDate = false;
|
226 |
} |
227 |
} |
228 |
if (possibleDataType.possibleTime) {
|
229 |
try {
|
230 |
toTime.coerce(rawvalue, coercionContext); |
231 |
possibleDataType.possibleTime = true;
|
232 |
} catch (Exception ex) { |
233 |
possibleDataType.possibleTime = false;
|
234 |
} |
235 |
} |
236 |
if (possibleDataType.possibleTimestamp) {
|
237 |
try {
|
238 |
toTimestamp.coerce(rawvalue, coercionContext); |
239 |
possibleDataType.possibleTimestamp = true;
|
240 |
} catch (Exception ex) { |
241 |
possibleDataType.possibleTimestamp = false;
|
242 |
} |
243 |
} |
244 |
if (possibleDataType.possibleURL) {
|
245 |
try {
|
246 |
new URL((String) rawvalue); |
247 |
possibleDataType.possibleURL = true;
|
248 |
} catch (Exception ex) { |
249 |
possibleDataType.possibleURL = false;
|
250 |
} |
251 |
} |
252 |
|
253 |
if (possibleDataType.possibleGeometry) {
|
254 |
try {
|
255 |
toGeom.coerce((String) rawvalue, geometryCoercionContext);
|
256 |
possibleDataType.possibleGeometry = true;
|
257 |
} catch (Exception ex) { |
258 |
possibleDataType.possibleGeometry = false;
|
259 |
} |
260 |
} |
261 |
} |
262 |
row = rows.nextRowValues(); |
263 |
lineno++; |
264 |
} |
265 |
if( status!=null ) { |
266 |
status.setRangeOfValues(0, lineno);
|
267 |
status.setCurValue(0);
|
268 |
} |
269 |
int n = 0; |
270 |
for (PossibleDataType possibleDataType : possibleDataTypes) {
|
271 |
if (possibleDataType.possibleInt) {
|
272 |
detectedValues.get(n++).type = DataTypes.INT; |
273 |
continue;
|
274 |
} |
275 |
if (possibleDataType.possibleLong) {
|
276 |
detectedValues.get(n++).type = DataTypes.LONG; |
277 |
continue;
|
278 |
} |
279 |
if (possibleDataType.possibleDecimal) {
|
280 |
// Preferimos un Decimal que un Float/Double
|
281 |
detectedValues.get(n++).type = DataTypes.DECIMAL; |
282 |
continue;
|
283 |
} |
284 |
if (possibleDataType.possibleFloat) {
|
285 |
// Forzamos los float a double para evitar perder precision
|
286 |
detectedValues.get(n++).type = DataTypes.DOUBLE; |
287 |
continue;
|
288 |
} |
289 |
if (possibleDataType.possibleDouble) {
|
290 |
detectedValues.get(n++).type = DataTypes.DOUBLE; |
291 |
continue;
|
292 |
} |
293 |
if (possibleDataType.possibleURL) {
|
294 |
detectedValues.get(n++).type = DataTypes.URL; |
295 |
continue;
|
296 |
} |
297 |
if (possibleDataType.possibleDate) {
|
298 |
detectedValues.get(n++).type = DataTypes.DATE; |
299 |
continue;
|
300 |
} |
301 |
if (possibleDataType.possibleTime) {
|
302 |
detectedValues.get(n++).type = DataTypes.TIME; |
303 |
continue;
|
304 |
} |
305 |
if (possibleDataType.possibleTimestamp) {
|
306 |
detectedValues.get(n++).type = DataTypes.TIMESTAMP; |
307 |
continue;
|
308 |
} |
309 |
if (possibleDataType.possibleGeometry) {
|
310 |
detectedValues.get(n++).type = DataTypes.GEOMETRY; |
311 |
continue;
|
312 |
} |
313 |
detectedValues.get(n++).type = DataTypes.STRING; |
314 |
} |
315 |
} catch (Throwable ex) { |
316 |
status.abort(); |
317 |
throw new RuntimeException("Problems reading file '" + this.getFullFileName() + "' near line " + lineno + ".", ex); |
318 |
} |
319 |
DetectedValue[] r = detectedValues.toArray(new DetectedValue[detectedValues.size()]); |
320 |
return r;
|
321 |
} |
322 |
|
323 |
@SuppressWarnings("UseSpecificCatch") |
324 |
private boolean isValidLong(String s) { |
325 |
if (s == null) { |
326 |
return true; |
327 |
} |
328 |
s = s.trim().toLowerCase(); |
329 |
if (s.isEmpty()) {
|
330 |
return true; |
331 |
} |
332 |
try {
|
333 |
if (s.startsWith("0x")) { |
334 |
Long.valueOf(s.substring(2), 16); |
335 |
} else {
|
336 |
Long.valueOf(s);
|
337 |
} |
338 |
return true; |
339 |
} catch (Exception ex) { |
340 |
return false; |
341 |
} |
342 |
} |
343 |
|
344 |
@SuppressWarnings("UseSpecificCatch") |
345 |
private boolean isValidInteger(String s) { |
346 |
if (s == null) { |
347 |
return true; |
348 |
} |
349 |
s = s.trim().toLowerCase(); |
350 |
if (s.isEmpty()) {
|
351 |
return true; |
352 |
} |
353 |
try {
|
354 |
if (s.startsWith("0x")) { |
355 |
Integer.valueOf(s.substring(2), 16); |
356 |
} else {
|
357 |
Integer.valueOf(s);
|
358 |
} |
359 |
return true; |
360 |
} catch (Exception ex) { |
361 |
return false; |
362 |
} |
363 |
} |
364 |
|
365 |
} |