svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / AutomaticDetectionOfTypes.java @ 45775
History | View | Annotate | Download (12.1 KB)
1 |
package org.gvsig.fmap.dal.store.csv; |
---|---|
2 |
|
3 |
import java.io.IOException; |
4 |
import java.math.BigDecimal; |
5 |
import java.net.URL; |
6 |
import java.util.ArrayList; |
7 |
import java.util.List; |
8 |
import java.util.Locale; |
9 |
import org.apache.commons.lang3.StringUtils; |
10 |
import org.gvsig.fmap.dal.DataTypes; |
11 |
import org.gvsig.tools.ToolsLocator; |
12 |
import org.gvsig.tools.dataTypes.DataTypeUtils; |
13 |
import org.gvsig.tools.dataTypes.DataTypesManager; |
14 |
import org.gvsig.tools.dataTypes.Coercion; |
15 |
import org.gvsig.tools.dataTypes.CoercionContext; |
16 |
|
17 |
/**
|
18 |
*
|
19 |
* @author jjdelcerro
|
20 |
*/
|
21 |
public class AutomaticDetectionOfTypes { |
22 |
|
23 |
public interface Rows { |
24 |
|
25 |
public List<String> nextRowValues(); |
26 |
} |
27 |
|
28 |
public interface DetectedValue { |
29 |
public int getType(); |
30 |
public int getDisplaySize(); |
31 |
public int getPrecision(); |
32 |
public int getScale(); |
33 |
public boolean isBlank(); |
34 |
} |
35 |
|
36 |
private static class DetectedValueImpl implements DetectedValue { |
37 |
|
38 |
private int type; |
39 |
private int displaySize; |
40 |
private int integerDigits; |
41 |
private int decimalDigits; |
42 |
private boolean blank; |
43 |
|
44 |
@Override
|
45 |
public int getType() { |
46 |
return this.type; |
47 |
} |
48 |
|
49 |
@Override
|
50 |
public int getDisplaySize() { |
51 |
return this.displaySize; |
52 |
} |
53 |
|
54 |
@Override
|
55 |
public int getPrecision() { |
56 |
return this.decimalDigits + this.integerDigits; |
57 |
} |
58 |
|
59 |
@Override
|
60 |
public int getScale() { |
61 |
return this.decimalDigits; |
62 |
} |
63 |
|
64 |
public boolean isBlank() { |
65 |
return this.blank; |
66 |
} |
67 |
} |
68 |
|
69 |
private static class PossibleDataType { |
70 |
|
71 |
public boolean possibleInt = true; |
72 |
public boolean possibleFloat = true; |
73 |
public boolean possibleDouble = true; |
74 |
public boolean possibleDecimal = true; |
75 |
public boolean possibleLong = true; |
76 |
public boolean possibleURL = true; |
77 |
public boolean possibleDate = true; |
78 |
public boolean possibleTime = true; |
79 |
public boolean possibleTimestamp = true; |
80 |
public boolean possibleGeometry = true; |
81 |
} |
82 |
|
83 |
private final String filename; |
84 |
|
85 |
public AutomaticDetectionOfTypes() {
|
86 |
this("(unknown)"); |
87 |
} |
88 |
|
89 |
public AutomaticDetectionOfTypes(String filename) { |
90 |
this.filename = filename;
|
91 |
} |
92 |
|
93 |
private String getFullFileName() { |
94 |
return this.filename; |
95 |
} |
96 |
|
97 |
@SuppressWarnings({"UseSpecificCatch", "ResultOfObjectAllocationIgnored"}) |
98 |
public DetectedValue[] detect(int columns, |
99 |
Rows rows, |
100 |
boolean isFirstLineHeader,
|
101 |
Locale locale
|
102 |
) throws IOException { |
103 |
List<PossibleDataType> possibleDataTypes;
|
104 |
DetectedValueImpl[] detectedValues = new DetectedValueImpl [columns]; |
105 |
|
106 |
int lineno = 0; |
107 |
try {
|
108 |
if (isFirstLineHeader) {
|
109 |
rows.nextRowValues(); |
110 |
lineno++; |
111 |
} |
112 |
possibleDataTypes = new ArrayList<>(columns); |
113 |
for (int i = 0; i < columns; i++) { |
114 |
possibleDataTypes.add(new PossibleDataType());
|
115 |
detectedValues[i] = new DetectedValueImpl();
|
116 |
} |
117 |
if (locale == null) { |
118 |
locale = Locale.getDefault();
|
119 |
} |
120 |
DataTypesManager typeManager = ToolsLocator.getDataTypesManager(); |
121 |
Coercion toDecimal = typeManager.getCoercion(DataTypes.DECIMAL); |
122 |
Coercion toDouble = typeManager.getCoercion(DataTypes.DOUBLE); |
123 |
Coercion toFloat = typeManager.getCoercion(DataTypes.FLOAT); |
124 |
Coercion toDate = typeManager.getCoercion(DataTypes.DATE); |
125 |
Coercion toTime = typeManager.getCoercion(DataTypes.TIME); |
126 |
Coercion toTimestamp = typeManager.getCoercion(DataTypes.TIMESTAMP); |
127 |
Coercion toInt = typeManager.getCoercion(DataTypes.INT); |
128 |
Coercion toLong = typeManager.getCoercion(DataTypes.LONG); |
129 |
Coercion toGeom = typeManager.getCoercion(DataTypes.GEOMETRY); |
130 |
|
131 |
CoercionContext coercionContext = DataTypeUtils.coerceContextLocale(locale); |
132 |
List<String> row = rows.nextRowValues(); |
133 |
lineno++; |
134 |
|
135 |
while (row != null) { |
136 |
for (int i = 0; i < row.size(); i++) { |
137 |
while( possibleDataTypes.size()<row.size() ) {
|
138 |
possibleDataTypes.add(new PossibleDataType());
|
139 |
} |
140 |
String rawvalue = row.get(i);
|
141 |
if( rawvalue == null ) { |
142 |
continue;
|
143 |
} |
144 |
PossibleDataType possibleDataType = possibleDataTypes.get(i); |
145 |
DetectedValueImpl detectedValue = detectedValues[i]; |
146 |
if( detectedValue.blank ) {
|
147 |
detectedValue.blank = StringUtils.isBlank(rawvalue); |
148 |
} |
149 |
int displaySize = rawvalue.length();
|
150 |
if( displaySize>detectedValue.displaySize ) {
|
151 |
detectedValue.displaySize = displaySize; |
152 |
} |
153 |
if (possibleDataType.possibleDecimal) {
|
154 |
try {
|
155 |
BigDecimal decimal = (BigDecimal) toDecimal.coerce(rawvalue, coercionContext); |
156 |
possibleDataType.possibleDecimal = true;
|
157 |
if( decimal.scale() > detectedValue.decimalDigits ) {
|
158 |
detectedValue.decimalDigits = decimal.scale(); |
159 |
} |
160 |
int integerDigits = decimal.precision() - decimal.scale();
|
161 |
if( integerDigits>detectedValue.integerDigits ) {
|
162 |
detectedValue.integerDigits = integerDigits; |
163 |
} |
164 |
} catch (Exception ex) { |
165 |
possibleDataType.possibleDecimal = false;
|
166 |
} |
167 |
} |
168 |
if (possibleDataType.possibleDouble) {
|
169 |
try {
|
170 |
toDouble.coerce(rawvalue, coercionContext); |
171 |
possibleDataType.possibleDouble = true;
|
172 |
} catch (Exception ex) { |
173 |
possibleDataType.possibleDouble = false;
|
174 |
} |
175 |
} |
176 |
if (possibleDataType.possibleFloat) {
|
177 |
try {
|
178 |
toFloat.coerce(rawvalue, coercionContext); |
179 |
possibleDataType.possibleFloat = true;
|
180 |
} catch (Exception ex) { |
181 |
possibleDataType.possibleFloat = false;
|
182 |
} |
183 |
} |
184 |
if (possibleDataType.possibleLong) {
|
185 |
possibleDataType.possibleLong = isValidLong(rawvalue); |
186 |
} |
187 |
if (possibleDataType.possibleInt) {
|
188 |
possibleDataType.possibleInt = isValidInteger(rawvalue); |
189 |
} |
190 |
if (possibleDataType.possibleDate) {
|
191 |
try {
|
192 |
toDate.coerce(rawvalue, coercionContext); |
193 |
possibleDataType.possibleDate = true;
|
194 |
} catch (Exception ex) { |
195 |
possibleDataType.possibleDate = false;
|
196 |
} |
197 |
} |
198 |
if (possibleDataType.possibleTime) {
|
199 |
try {
|
200 |
toTime.coerce(rawvalue, coercionContext); |
201 |
possibleDataType.possibleTime = true;
|
202 |
} catch (Exception ex) { |
203 |
possibleDataType.possibleTime = false;
|
204 |
} |
205 |
} |
206 |
if (possibleDataType.possibleTimestamp) {
|
207 |
try {
|
208 |
toTimestamp.coerce(rawvalue, coercionContext); |
209 |
possibleDataType.possibleTimestamp = true;
|
210 |
} catch (Exception ex) { |
211 |
possibleDataType.possibleTimestamp = false;
|
212 |
} |
213 |
} |
214 |
if (possibleDataType.possibleURL) {
|
215 |
try {
|
216 |
new URL((String) rawvalue); |
217 |
possibleDataType.possibleURL = true;
|
218 |
} catch (Exception ex) { |
219 |
possibleDataType.possibleURL = false;
|
220 |
} |
221 |
} |
222 |
|
223 |
if (possibleDataType.possibleGeometry) {
|
224 |
try {
|
225 |
toGeom.coerce((String) rawvalue);
|
226 |
possibleDataType.possibleGeometry = true;
|
227 |
} catch (Exception ex) { |
228 |
possibleDataType.possibleGeometry = false;
|
229 |
} |
230 |
} |
231 |
} |
232 |
row = rows.nextRowValues(); |
233 |
lineno++; |
234 |
} |
235 |
int n = 0; |
236 |
for (PossibleDataType possibleDataType : possibleDataTypes) {
|
237 |
if (possibleDataType.possibleInt) {
|
238 |
detectedValues[n++].type = DataTypes.INT; |
239 |
continue;
|
240 |
} |
241 |
if (possibleDataType.possibleLong) {
|
242 |
detectedValues[n++].type = DataTypes.LONG; |
243 |
continue;
|
244 |
} |
245 |
if (possibleDataType.possibleDecimal) {
|
246 |
// Preferimos un Decimal que un Float/Double
|
247 |
detectedValues[n++].type = DataTypes.DECIMAL; |
248 |
continue;
|
249 |
} |
250 |
if (possibleDataType.possibleFloat) {
|
251 |
// Forzamos los float a double para evitar perder precision
|
252 |
detectedValues[n++].type = DataTypes.DOUBLE; |
253 |
continue;
|
254 |
} |
255 |
if (possibleDataType.possibleDouble) {
|
256 |
detectedValues[n++].type = DataTypes.DOUBLE; |
257 |
continue;
|
258 |
} |
259 |
if (possibleDataType.possibleURL) {
|
260 |
detectedValues[n++].type = DataTypes.URL; |
261 |
continue;
|
262 |
} |
263 |
if (possibleDataType.possibleDate) {
|
264 |
detectedValues[n++].type = DataTypes.DATE; |
265 |
continue;
|
266 |
} |
267 |
if (possibleDataType.possibleTime) {
|
268 |
detectedValues[n++].type = DataTypes.TIME; |
269 |
continue;
|
270 |
} |
271 |
if (possibleDataType.possibleTimestamp) {
|
272 |
detectedValues[n++].type = DataTypes.TIMESTAMP; |
273 |
continue;
|
274 |
} |
275 |
if (possibleDataType.possibleGeometry) {
|
276 |
detectedValues[n++].type = DataTypes.GEOMETRY; |
277 |
continue;
|
278 |
} |
279 |
detectedValues[n++].type = DataTypes.STRING; |
280 |
} |
281 |
} catch (Throwable ex) { |
282 |
throw new RuntimeException("Problems reading file '" + this.getFullFileName() + "' near line " + lineno + ".", ex); |
283 |
} |
284 |
return detectedValues;
|
285 |
} |
286 |
|
287 |
@SuppressWarnings("UseSpecificCatch") |
288 |
private boolean isValidLong(String s) { |
289 |
if (s == null) { |
290 |
return true; |
291 |
} |
292 |
s = s.trim().toLowerCase(); |
293 |
if (s.isEmpty()) {
|
294 |
return true; |
295 |
} |
296 |
try {
|
297 |
if (s.startsWith("0x")) { |
298 |
Long.valueOf(s.substring(2), 16); |
299 |
} else {
|
300 |
Long.valueOf(s);
|
301 |
} |
302 |
return true; |
303 |
} catch (Exception ex) { |
304 |
return false; |
305 |
} |
306 |
} |
307 |
|
308 |
@SuppressWarnings("UseSpecificCatch") |
309 |
private boolean isValidInteger(String s) { |
310 |
if (s == null) { |
311 |
return true; |
312 |
} |
313 |
s = s.trim().toLowerCase(); |
314 |
if (s.isEmpty()) {
|
315 |
return true; |
316 |
} |
317 |
try {
|
318 |
if (s.startsWith("0x")) { |
319 |
Integer.valueOf(s.substring(2), 16); |
320 |
} else {
|
321 |
Integer.valueOf(s);
|
322 |
} |
323 |
return true; |
324 |
} catch (Exception ex) { |
325 |
return false; |
326 |
} |
327 |
} |
328 |
|
329 |
} |