Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / CSVUtils.java @ 47636

History | View | Annotate | Download (18.4 KB)

1
/**
2
 * gvSIG. Desktop Geographic Information System.
3
 *
4
 * Copyright (C) 2007-2013 gvSIG Association.
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 3
9
 * of the License, or (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
 * MA 02110-1301, USA.
20
 *
21
 * For any additional information, do not hesitate to contact us
22
 * at info AT gvsig.com, or visit our website www.gvsig.com.
23
 */
24

    
25
package org.gvsig.fmap.dal.store.csv;
26

    
27
import java.io.File;
28
import java.io.FileInputStream;
29
import java.io.FileNotFoundException;
30
import java.io.FileReader;
31
import java.io.IOException;
32
import java.io.InputStream;
33
import java.io.InputStreamReader;
34
import java.io.Reader;
35
import java.nio.charset.Charset;
36
import java.util.Locale;
37
import java.util.Map;
38
import org.apache.commons.io.FilenameUtils;
39
import org.apache.commons.io.IOUtils;
40
import org.apache.commons.io.input.BOMInputStream;
41
import org.apache.commons.lang3.ArrayUtils;
42
import org.apache.commons.lang3.StringUtils;
43
import org.gvsig.fmap.dal.DataTypes;
44
import org.gvsig.fmap.dal.feature.EditableFeatureAttributeDescriptor;
45
import org.gvsig.fmap.dal.feature.EditableFeatureType;
46
import static org.gvsig.fmap.dal.store.csv.CSVStoreProvider.NAME;
47
import org.gvsig.fmap.dal.store.csv.simplereaders.CSVReaderSuperCSV;
48
import org.gvsig.fmap.dal.store.csv.simplereaders.FixedLenReader;
49
import org.gvsig.fmap.dal.store.csv.simplereaders.GMLReader;
50
import org.gvsig.fmap.dal.store.csv.simplereaders.JSonReader;
51
import org.gvsig.fmap.dal.store.simplereader.AutomaticDetectionOfTypes;
52
import org.gvsig.fmap.dal.store.simplereader.FieldTypeParser;
53
import org.gvsig.fmap.dal.store.simplereader.SimpleReader;
54
import org.gvsig.fmap.geom.Geometry;
55
import org.gvsig.fmap.geom.GeometryLocator;
56
import org.gvsig.fmap.geom.GeometryManager;
57
import org.gvsig.fmap.geom.type.GeometryType;
58
import org.gvsig.tools.dynobject.Tags;
59
import org.gvsig.tools.task.SimpleTaskStatus;
60
import org.slf4j.Logger;
61
import org.slf4j.LoggerFactory;
62

    
63
/**
64
 *
65
 * @author gvSIG  Team
66
 */
67
@SuppressWarnings("UseSpecificCatch")
68
public class CSVUtils {
69
    
70
    private static final Logger LOGGER = LoggerFactory.getLogger(CSVUtils.class);
71

    
72
    private CSVUtils() {
73
        
74
    }
75
    public static InputStreamReader openFile(File f, String charsetName) throws FileNotFoundException {
76
        String fullFileName = f==null? "NULL":f.getAbsolutePath();
77
        Charset charset = Charset.defaultCharset();
78
        InputStream fis = new BOMInputStream(new FileInputStream(f));
79
        if (StringUtils.isNotBlank(charsetName)) {
80
            if (Charset.isSupported(charsetName)) {
81
                try {
82
                    charset = Charset.forName(charsetName);
83
                } catch (Throwable th) {
84
                    LOGGER.warn("Can't use charset '" + charsetName + "' for read csv '" + fullFileName + "'.", th);
85
                }
86
            } else {
87
                LOGGER.warn("charset '" + charsetName + "' not supported for read csv '" + fullFileName + "'.");
88
            }
89
        }
90
        InputStreamReader isr = new InputStreamReader(fis, charset);
91
        return isr;
92
    }
93
    
94
    public static boolean loadFeatureType(CSVStoreParameters parameters, EditableFeatureType featureType, boolean  detectTypes, SimpleTaskStatus status) throws IOException {
95
        InputStreamReader in = null;
96
        SimpleReader reader = null;
97
        try {
98
            String headers[];
99

    
100
            in = openFile(
101
                    parameters.getFile(),
102
                    CSVStoreParameters.getCharset(parameters)
103
            );
104

    
105
            reader = getSimpleReader(parameters, in);
106

    
107
            headers = CSVStoreParameters.getHeaders(parameters);
108
            if (headers == null) {
109
                if (CSVStoreParameters.isFirstLineHeader(parameters)) {
110
                    headers = reader.getHeader();
111
                    if (headers == null) {
112
                        if (CSVStoreParameters.getIgnoreErrors(parameters)) {
113
                            headers = getFixedHeaders(reader.getColumnsCount());
114
                        } else {
115
                            String msg = "Can't retrieve header from csv file '"
116
                                    + parameters.getFile()
117
                                            .getAbsolutePath()
118
                                    + "' and not specified in the parameters.";
119
                            LOGGER.warn(msg);
120
                            throw new RuntimeException(msg);
121
                        }
122
                    }
123
                } else {
124
                    headers = getFixedHeaders(reader.getColumnsCount());
125
                }
126
            } else {
127
                if (CSVStoreParameters.isFirstLineHeader(parameters)) {
128
                    reader.getHeader(); // Skip and ignore the header of file
129
                }
130
            }
131

    
132
            AutomaticDetectionOfTypes.DetectedValue[] detectedTypes = null;
133
            if( detectTypes ) {
134
                detectedTypes = automaticDetectionOfTypes(parameters, headers, status);
135
            }
136
            if( StringUtils.isBlank(headers[headers.length-1]) &&
137
                (detectedTypes==null || detectedTypes[headers.length-1].isBlank()) ) {
138
                headers = ArrayUtils.remove(headers, headers.length-1);
139
            }
140
            if (detectedTypes != null && detectedTypes.length > headers.length) {
141
                // Se han detectado mas columnas que las que hay en la cabezera,
142
                // a?adimos mas columnas a la cabezera.
143
                String[] headers2 = new String[detectedTypes.length];
144
                for (int i = 0; i < headers2.length; i++) {
145
                    if (i < headers.length) {
146
                        headers2[i] = headers[i];
147
                    } else {
148
                        headers2[i] = getFixedHeader(i);
149
                    }
150
                }
151
                headers = headers2;
152
            }
153
            for (int i = 0; i < headers.length; i++) {
154
                if (StringUtils.isBlank(headers[i])) {
155
                    headers[i] = getFixedHeader(i);
156
                }
157
            }
158
            // Initialize the feature types
159
            return fillFeatureType(parameters, featureType,  headers, detectedTypes);
160
        } finally {
161
            IOUtils.closeQuietly(in);
162
            IOUtils.closeQuietly(reader);
163
        }
164
    }
165
    public static SimpleReader getSimpleReader(CSVStoreParameters parameters, Reader in) throws IOException {
166
        SimpleReader reader;
167
        String filename = CSVStoreParameters.getFileName(parameters);
168
        if (FilenameUtils.isExtension(filename, "json")){
169
            reader= new JSonReader(in,parameters);
170
        } else if (FilenameUtils.isExtension(filename, "gml")){
171
            reader= new GMLReader(in,parameters);
172
        } else if (CSVStoreParameters.getRawFieldsDefinition(parameters) != null) {
173
            reader = new FixedLenReader(in, parameters);
174
        } else {
175
            reader = new CSVReaderSuperCSV(in, parameters);
176
        }
177
        return reader;
178
    }
179

    
180
    private static String getFixedHeader(int column) {
181
        char[] header = new char[3];
182

    
183
        String s = String.format("%03d", column);
184
        header[0] = (char) (s.charAt(0) + 17);
185
        header[1] = (char) (s.charAt(1) + 17);
186
        header[2] = (char) (s.charAt(2) + 17);
187
        return String.valueOf(header);
188
    }
189

    
190
    private static String[] getFixedHeaders(int count) {
191
        String[] headers = new String[count];
192
        for (int i = 0; i < headers.length; i++) {
193
            headers[i] = getFixedHeader(i);
194
        }
195
        return headers;
196
    }
197

    
198
    private static AutomaticDetectionOfTypes.DetectedValue[] automaticDetectionOfTypes(CSVStoreParameters parameters, String[] headers, SimpleTaskStatus status) throws IOException {
199
        String fullFileName = parameters.getFile()==null? "NULL":parameters.getFile().getAbsolutePath();
200
        boolean automatic_types_detection = CSVStoreParameters.getAutomaticTypesDetection(parameters);
201
        if (!automatic_types_detection) {
202
            return null;
203
        }
204
        AutomaticDetectionOfTypes.DetectedValue[] types = null;
205

    
206
        Reader in = null;
207
        SimpleReader reader = null;
208

    
209
        try {
210
            in = openFile(
211
                    parameters.getFile(),
212
                    CSVStoreParameters.getCharset(parameters)
213
            );
214
            reader = getSimpleReader(parameters, in);
215
            AutomaticDetectionOfTypes x = new AutomaticDetectionOfTypes(
216
                    fullFileName
217
            );
218
            types = x.detect(
219
                    headers.length,
220
                    reader,
221
                    CSVStoreParameters.isFirstLineHeader(parameters),
222
                    CSVStoreParameters.getLocale(parameters),
223
                    status
224
            );
225
        } catch (Exception ex) {
226
            int lineno = 0;
227
            if (reader != null) {
228
                lineno = reader.getLine();
229
            }
230
            throw new RuntimeException("Problems reading file '" + fullFileName + "' near line " + lineno + ".", ex);
231

    
232
        } finally {
233
            IOUtils.closeQuietly(reader);
234
            IOUtils.closeQuietly(in);
235
        }
236
        return types;
237
    }
238
    
239
    private static boolean fillFeatureType(CSVStoreParameters parameters, EditableFeatureType fType, String headers[], AutomaticDetectionOfTypes.DetectedValue automaticTypes[]) {
240
        String fullFileName = parameters.getFile()==null? "":parameters.getFile().getAbsolutePath();
241
        String providerName = NAME;
242
        
243
        fType.setHasOID(true);
244

    
245

    
246
        FieldTypeParser[] fieldTypes = new FieldTypeParser[headers.length];
247
        //
248
        // Calculamos cuales pueden ser los tipos de datos
249
        //
250
        for (int i = 0; i < fieldTypes.length; i++) {
251
            fieldTypes[i] = new FieldTypeParser(providerName, fullFileName);
252
        }
253

    
254
        // Asuminos los tipos pasados por parametro, que se supone
255
        // son los detectados automaticamente.
256
        if (automaticTypes != null) {
257
            for (int i = 0; i < fieldTypes.length && i < automaticTypes.length; i++) {
258
                fieldTypes[i].detectedValue = automaticTypes[i];
259
                fieldTypes[i].type = automaticTypes[i].getType();
260
            }
261
        }
262
        // Luego probamos con lo que diga las cabezeras del CVS, sobreescribiendo
263
        // los tipos anteriores en caso de definirse en la cabezara.
264
        boolean all_fields_declare_type = true;
265
        for (int i = 0; i < fieldTypes.length; i++) {
266
            if (!fieldTypes[i].parse(headers[i])) {
267
                LOGGER.warn("Can't parse header of field "+i+ "( "+headers[i]+") in '"+providerName+"' file '" + fullFileName + "'.");
268
            }
269
            if( fieldTypes[i].type == DataTypes.UNKNOWN ) {
270
                all_fields_declare_type = false;
271
                fieldTypes[i].type = DataTypes.STRING;
272
            }
273
        }
274

    
275
        // Y por ultimo hacemos caso a lo que se haya especificado en los parametros
276
        // de apertura del CSV, teniendo esto prioridad sobre todo.
277
        String param_types_def = CSVStoreParameters.getRawFieldTypes(parameters);
278
        if (StringUtils.isNotBlank(param_types_def)) {
279
            String sep = CSVStoreParameters.getDelimiter(param_types_def);
280
            if (StringUtils.isNotBlank(sep)) {
281
                String[] param_types = param_types_def.split(sep);
282
                FieldTypeParser parser = new FieldTypeParser(providerName, fullFileName);
283
                for (String param_type : param_types) {
284
                    parser.clear();
285
                    parser.parse(param_type);
286
                    for (FieldTypeParser fieldType : fieldTypes) {
287
                        if (StringUtils.equalsIgnoreCase(fieldType.name, parser.name)) {
288
                            fieldType.copyFrom(parser);
289
                            break;
290
                        }
291
                    }
292
                }
293
            }
294
        }
295
        //
296
        // Una vez ya sabemos los tipos de datos rellenamos el feature-type
297
        //
298
        Tags ftypeTags = fType.getTags();
299
        for (FieldTypeParser fieldType : fieldTypes) {
300
            EditableFeatureAttributeDescriptor fad = fType.add(fieldType.name, fieldType.type);
301
            if( fieldType.detectedValue!=null ) {
302
                fad.setDisplaySize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
303
                fad.setSize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
304
                if( fad.getPrecision()<fieldType.detectedValue.getPrecision() ) {
305
                    fad.setPrecision(fieldType.detectedValue.getPrecision());
306
                }
307
                if( fad.getScale()<fieldType.detectedValue.getScale()) {
308
                    fad.setScale(fieldType.detectedValue.getScale());
309
                }
310
            } else {
311
                fad.setDisplaySize(fieldType.size);
312
            }
313
            if (fieldType.type == DataTypes.GEOMETRY ) {
314
                fad.setGeometryType(fieldType.geomType, fieldType.geomSubtype);
315
                if( fType.getDefaultGeometryAttributeName() == null ) {
316
                    fType.setDefaultGeometryAttributeName(fieldType.name);
317
                }
318
            } 
319
            Locale locale = null;
320
            if (fieldType.type == DataTypes.TIMESTAMP ) {
321
                if(!CSVStoreParameters.isBlankOrDefaultLocale(parameters)){
322
                    locale = CSVStoreParameters.getLocale(parameters);
323
                }
324
            } else {
325
                locale = CSVStoreParameters.getLocale(parameters);
326
            }
327
            fad.setLocale(locale);
328
            for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) {
329
                try {
330
                    switch(entry.getKey().toLowerCase()) {
331
                        case "expression":
332
                            // Los campos calculados los procesamos en una segunda
333
                            // pasada, cuando ya estan definidos el resto de los campos
334
                            // ya que pueden requerir campos que aun no se han definido.
335
                            break;
336
                        default:
337
                                fad.set(entry.getKey(), entry.getValue());
338
                            }
339
                } catch (Exception ex) {
340
                    LOGGER.warn("Can't set property '"+entry.getKey()+"' of '"+fad.getName()+"'.", ex);
341
                }
342
            }            
343
            Tags tags = fad.getTags();
344
            for (Map.Entry<String, String> entry : fieldType.tags.entrySet()) {
345
                tags.set(entry.getKey(), entry.getValue());
346
            }
347
            for (Map.Entry<String, String> entry : fieldType.typetags.entrySet()) {
348
                ftypeTags.set(entry.getKey(), entry.getValue());
349
            }
350
            for (Map.Entry<String, String> entry : fieldType.typeAssignments.entrySet()) {
351
                try {
352
                    fType.set(entry.getKey(), entry.getValue());
353
                } catch(Exception ex) {
354
                    LOGGER.warn("Can't set attribute '"+entry.getKey()+"' in the feature type.", ex);
355
                }
356
            }
357
        }
358
        // Processamos ahora los campos calculados
359
        for (FieldTypeParser fieldType : fieldTypes) {
360
            EditableFeatureAttributeDescriptor fad = fType.getEditableAttributeDescriptor(fieldType.name);
361
            for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) {
362
                try {
363
                    switch(entry.getKey().toLowerCase()) {
364
                        case "expression":
365
                            fad.set(entry.getKey(), entry.getValue());
366
                            break;
367
                    }
368
                } catch (Exception ex) {
369
                    LOGGER.warn("Can't set property '"+entry.getKey()+"' in '"+fad.getName()+"' of '"+fullFileName+"'.", ex);
370
                }
371
            }
372
        }
373
        String[] pointDimensionNames = CSVStoreParameters.getPointDimensionNames(parameters);
374
        if ( pointDimensionNames != null ) {
375
            CSVPointAttributeEmulator emulator = new CSVPointAttributeEmulator(pointDimensionNames);
376
            String columnName = CSVStoreParameters.getPointColumnName(parameters);
377
            if( StringUtils.isBlank(columnName) ) {
378
                columnName = "geom";
379
            }
380
            EditableFeatureAttributeDescriptor attr = fType.add(columnName, DataTypes.GEOMETRY, emulator);
381
            GeometryManager geommgr = GeometryLocator.getGeometryManager();
382
            GeometryType gt;
383
            try {
384
                if ( emulator.getFieldNames() != null && emulator.getFieldNames().length <= 2 ) {
385
                        gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM2D);
386
                } else {
387
                        gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM3D);
388
                }
389
                attr.setGeometryType(gt);
390
            } catch (Exception e) {
391
                LOGGER.warn("Can't set geometry type for the calculated field in '"+providerName+"' file '" + fullFileName + "'.", e);
392
            }
393
        }        
394
        
395
        String geometry_column = CSVStoreParameters.getGeometryColumn(parameters);
396
        if (!StringUtils.isEmpty(geometry_column)) {
397
            EditableFeatureAttributeDescriptor attr = (EditableFeatureAttributeDescriptor) fType.get(geometry_column);
398
            if (attr != null ) {
399
                if( attr.getType() != DataTypes.GEOMETRY ) {
400
                    attr.setDataType(DataTypes.GEOMETRY);
401
                }
402
                GeometryManager geommgr = GeometryLocator.getGeometryManager();
403
                GeometryType gt;
404
                try {
405
                    gt = geommgr.getGeometryType(
406
                            CSVStoreParameters.getGeometryType(parameters),
407
                            CSVStoreParameters.getGeometrySubType(parameters)
408
                    );
409
                    attr.setGeometryType(gt);
410
                } catch (Exception e) {
411
                    LOGGER.warn("Can't set geometry type for the calculated field in CSV file '" + fullFileName + "'.", e);
412
                }
413
                fType.setDefaultGeometryAttributeName(geometry_column);
414
            }
415
        }
416
        return all_fields_declare_type;
417
    }
418
    
419
}