Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / simplereaders / CSVReaderSuperCSV.java @ 47652

History | View | Annotate | Download (11.3 KB)

1
package org.gvsig.fmap.dal.store.csv.simplereaders;
2

    
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileInputStream;
6
import java.io.FileNotFoundException;
7
import java.io.FileReader;
8
import java.io.IOException;
9
import java.io.InputStreamReader;
10
import java.io.Reader;
11
import java.io.UnsupportedEncodingException;
12
import java.util.List;
13
import java.util.function.Function;
14
import org.apache.commons.io.FilenameUtils;
15
import org.apache.commons.io.IOUtils;
16
import org.apache.commons.io.input.CloseShieldReader;
17
import org.apache.commons.lang3.StringUtils;
18
import org.apache.commons.text.StringEscapeUtils;
19
import org.gvsig.fmap.dal.store.csv.CSVStoreParameters;
20
import org.gvsig.fmap.dal.store.simplereader.virtualrows.RandomAccessFileIndex;
21
import org.gvsig.fmap.dal.store.simplereader.virtualrows.RandomAccessFileReader;
22
import static org.gvsig.fmap.dal.store.simplereader.virtualrows.RandomAccessFileReader.FILTER_NONE;
23
import org.gvsig.fmap.dal.store.csv.virtualrows.SuperCSVList;
24
import org.gvsig.fmap.dal.store.simplereader.simplereaders.AbstractSimpleReader;
25
import org.gvsig.tools.dynobject.DynObject;
26
import org.gvsig.tools.task.SimpleTaskStatus;
27
import org.gvsig.tools.util.GetItemWithSize64;
28
import org.slf4j.Logger;
29
import org.slf4j.LoggerFactory;
30
import org.supercsv.comment.CommentStartsWith;
31
import org.supercsv.io.CsvListReader;
32
import org.supercsv.prefs.CsvPreference;
33
import org.supercsv.quote.QuoteMode;
34

    
35
public class CSVReaderSuperCSV extends AbstractSimpleReader {
36

    
37
    //
38
    // http://supercsv.sourceforge.net/examples_reading.html
39
    // http://supercsv.sourceforge.net/apidocs/index.html
40
    //
41
    private static final Logger LOGGER = LoggerFactory.getLogger(CSVReaderSuperCSV.class);
42

    
43
    private CsvListReader reader;
44
    private final CSVStoreParameters parameters;
45
    private List<String>  nextLine;
46
    private int columns;
47

    
48
    public CSVReaderSuperCSV(Reader in, CSVStoreParameters parameters) {
49
        this(parameters);
50
        this.reader = new CsvListReader(in, getCSVPreferences());
51
    }    
52
    
53
    public CSVReaderSuperCSV(CSVStoreParameters parameters) {
54
        this.reader = null;
55
        this.parameters = parameters;
56
        this.reader = null;
57
        this.nextLine = null;
58
        this.columns = 0;
59
    }
60

    
61
    public CSVStoreParameters getParameters() {
62
        return this.parameters;
63
    }
64

    
65
    @Override
66
    public String[] getHeader() throws IOException {
67
        return this.reader.getHeader(true);
68
    }
69
    
70
    @Override
71
    public int getColumnsCount() throws IOException {
72
        if( this.columns <= 0 ) {
73
            this.columns = reader.length();
74
            if( this.columns <= 0 ) {
75
                this.nextLine = this.reader.read();
76
                this.columns = reader.length();
77
            }
78
        }
79
        return this.columns;
80
    }
81
    
82
    private boolean hasMultilineRecords(SimpleTaskStatus status) throws FileNotFoundException, UnsupportedEncodingException, IOException {
83
        FileInputStream fis = null;
84
        InputStreamReader theReader = null;
85
        BufferedReader breader = null;
86
        try {
87
            CSVStoreParameters params = getParameters();
88
            File data_file = CSVStoreParameters.getFile(params);
89
            String charset = CSVStoreParameters.getCharset(params);
90
            fis = new FileInputStream(data_file);
91
            theReader = new InputStreamReader(fis, charset);
92
            breader = new BufferedReader(theReader);
93
            CsvListReader parser = new CsvListReader(breader, getCSVPreferences());
94
//        int firstRecordLine = 0;
95
            while (parser.read() != null) {
96
                if (parser.getLineNumber() != parser.getRowNumber()) {
97
                    return true;
98
                }
99
            }
100
            return false;
101
        } finally {
102
            IOUtils.closeQuietly(breader);
103
            IOUtils.closeQuietly(theReader);
104
            IOUtils.closeQuietly(fis);
105
        }
106
    }
107

    
108
    @Override
109
    public GetItemWithSize64<List<String>>  getVirtualRows(SimpleTaskStatus status) {
110
        RandomAccessFileReader theReader = null;
111
        RandomAccessFileIndex theIndex = null;
112
        try {
113
            CSVStoreParameters params = getParameters();
114
            File data_file = CSVStoreParameters.getFile(params);
115
            if( data_file.length()< 10*1024*1024 ) {
116
                return null;
117
            }
118
            Function<BufferedReader, Integer> numberOfLinesInRecord = null;
119
            if(this.hasMultilineRecords(status)){
120
                numberOfLinesInRecord = new Function<BufferedReader, Integer>() {
121
                    @Override
122
                    public Integer apply(BufferedReader breader) {
123
                        CloseShieldReader theReader = CloseShieldReader.wrap(breader);
124
                        CsvListReader parser = new CsvListReader(theReader, getCSVPreferences());
125
                        try {
126
                            List<String> values = parser.read();
127
                        } catch (IOException ex) {
128
                            return 1;
129
                        }
130
                        return parser.getLineNumber();
131
                    }
132
                };
133
            }
134

    
135
            String charset = CSVStoreParameters.getCharset(params);
136
            File index_file = getIndexFile(data_file);
137
            
138
            theReader = new RandomAccessFileReader(data_file, charset);
139
            theIndex = theReader.createOrOpenIndexOfLines(index_file, false, FILTER_NONE, status, numberOfLinesInRecord);
140
            
141
            SuperCSVList list = new SuperCSVList(
142
                    theReader, 
143
                    theIndex, 
144
                    CSVStoreParameters.isFirstLineHeader(getParameters())?1:0
145
            );
146
            
147
            list.setPreferences(this.getCSVPreferences());
148
            return list;
149
        } catch (IOException ex) {
150
            return null;
151
        } finally {
152
            // We do not close the index or the reader because we need it to remain open
153
//            IOUtils.closeQuietly(theReader);
154
//            IOUtils.closeQuietly(theIndex);
155
        }
156
    }
157
    
158
    @Override
159
    public List<String> read() throws IOException {
160
        List<String> line;
161
        if( this.nextLine != null ) {
162
            line = this.nextLine;
163
            this.nextLine = null;
164
        } else {
165
            line = this.reader.read();
166
        }
167
        if( line!=null ) {
168
            for (int i = 0; i < line.size(); i++) {
169
                String s = line.get(i);
170
                if( s!=null ) {
171
                    line.set(i, unescapeCRLF(s));
172
                }
173
            }
174
        }
175
        return line;
176
    }
177

    
178
    @Override
179
    public void close() throws IOException {
180
        this.reader.close();
181
    }
182

    
183
    @Override
184
    public List<String> skip(int lines) throws IOException {
185
        if( lines <= 0 ) {
186
            return null;
187
        }
188
        if( this.nextLine != null ) {
189
            this.nextLine = null;
190
            lines--;
191
        }
192
        List<String> row = null;
193
        for ( int i = 0; i < lines; i++ ) {
194
            row = reader.read();
195
        }
196
        return row;
197
    }
198

    
199
    public final CsvPreference getCSVPreferences() {
200
        try {
201
            String s;
202
            char quoteChar;
203
            int delimiterChar;
204
            String endOfLineSymbols;
205

    
206
            DynObject params = this.getParameters();
207

    
208
            CsvPreference.Builder builder;
209

    
210
            CsvPreference defaultPreference = CSVStoreParameters
211
                    .getPredefinedCSVPreferences(params);
212
            if ( defaultPreference == null ) {
213
                defaultPreference = CsvPreference.STANDARD_PREFERENCE;
214
            }
215

    
216
            endOfLineSymbols = CSVStoreParameters.getRecordSeparator(params);
217
            if ( StringUtils.isBlank(endOfLineSymbols) ) {
218
                endOfLineSymbols = defaultPreference.getEndOfLineSymbols();
219
            }
220
            s = CSVStoreParameters.getQuoteCharacter(params);
221
            if ( StringUtils.isBlank(s) ) {
222
                quoteChar = (char) defaultPreference.getQuoteChar();
223
            } else {
224
                quoteChar = s.charAt(0);
225
            }
226
            s = CSVStoreParameters.getDelimiter(params);
227
            if ( StringUtils.isBlank(s) ) {
228
                delimiterChar = defaultPreference.getDelimiterChar();
229
            } else {
230
                delimiterChar = s.charAt(0);
231
            }
232

    
233
            builder = new CsvPreference.Builder(quoteChar, delimiterChar,
234
                    endOfLineSymbols);
235

    
236
            s = CSVStoreParameters.getCommentStartMarker(params);
237
            if ( !StringUtils.isBlank(s) ) {
238
                CommentStartsWith cs = new CommentStartsWith(s);
239
                builder.skipComments(cs);
240
            }
241

    
242
            builder.surroundingSpacesNeedQuotes(CSVStoreParameters
243
                    .getSurroundingSpacesNeedQuotes(params));
244
            QuoteMode quoteMode = CSVStoreParameters.getQuoteMode(params);
245
            if ( quoteMode != null ) {
246
                builder.useQuoteMode(quoteMode);
247
            }
248
            return builder.build();
249
        } catch (Exception e) {
250
            LOGGER.warn("Can't make preferences for CSV '" + getFullFileName()
251
                    + "'.", e);
252
            return null;
253
        }
254
    }
255
    
256
    private String getFullFileName() {
257
        // Usar solo para mostrar mensajes en el logger.
258
        String s;
259
        try {
260
            s = getParameters().getFile().getAbsolutePath();
261
        } catch (Exception e2) {
262
            s = "(unknow)";
263
        }
264
        return s;        
265
    }
266

    
267
    @Override
268
    public int getLine() {
269
        if( this.reader==null ) {
270
            return 0;
271
        }
272
        return this.reader.getLineNumber();
273
    }
274

    
275
    @Override
276
    public List<String> nextRowValues() {
277
        try {
278
            return this.read();
279
        } catch (IOException ex) {
280
            throw new RuntimeException(ex);
281
        }
282
    }
283

    
284
    public static String escapeCRLF(String s) {
285
        if( s==null ) {
286
            return s;
287
        }
288
        String s1 = s;
289
        s1 = StringUtils.replace(s1, "\\", "\\\\");
290
        s1 = StringUtils.replace(s1, "\n", "\\n");
291
        s1 = StringUtils.replace(s1, "\r", "\\r");
292
        return s1;
293
    }
294
    
295
    public static String unescapeCRLF(String s) {
296
        if( s==null || s.indexOf('\\')==-1 ) {
297
            return s;
298
        }
299
        String s1 = s;
300
        s1 = s.replaceAll("(?:^\\\\n)|(?:([^\\\\])\\\\n)","$1\n");
301
        s1 = s1.replaceAll("(?:^\\\\r)|(?:([^\\\\])\\\\n)","$1\r");
302
        s1 = StringUtils.replace(s1, "\\\\", "\\");
303
        return s1;
304
    }
305
    
306
    
307
    public static void main(String[] args) {
308
        String s0 = "\\n{\\n   \"ANGULO\":\"0.000\",\\n    \"\tEXTO\":\"RAVAL ROIG\\\\r\\\\n - \\\\r\\\\nVIRGEN DEL SOCORRO\",\\n    \"LINK_POLIGONO\":\"HVCSGISCODE_ENT_11230100000000001\",\\n    \"GEOMETRY\":\"00000000014125fe9b57b4a23441503411cb1c432d\"\\n}";
309
        System.out.println("#"+s0+"#");
310
        String s1 = s0.replaceAll("(?:^\\\\n)|(?:([^\\\\])\\\\n)","$1\n");
311
        System.out.println("#"+s1+"#");
312
        String s2 = s1.replaceAll("([^\\\\])\\\\r","$1{r}");
313
        System.out.println("#"+s2+"#");
314
        String s3 = StringEscapeUtils.unescapeCsv(s0);
315
        System.out.println("#"+s3+"#");
316
    }
317
    
318
    
319
    public static File getIndexFile(File data_file) {
320
        if (data_file == null){
321
            return null;
322
        }
323
        File index_file = new File(FilenameUtils.removeExtension(data_file.getAbsolutePath()) + ".idx");
324
        return index_file;
325
    }
326

    
327
}