Revision 47506

View differences:

trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/simplereaders/CSVReaderSuperCSV.java
1 1
package org.gvsig.fmap.dal.store.csv.simplereaders;
2 2

  
3
import java.io.BufferedReader;
3 4
import java.io.File;
4 5
import java.io.IOException;
5 6
import java.io.Reader;
6 7
import java.util.List;
8
import java.util.function.Function;
7 9
import org.apache.commons.io.FilenameUtils;
10
import org.apache.commons.io.input.CloseShieldReader;
11
import org.apache.commons.lang3.StringUtils;
8 12
import org.apache.commons.text.StringEscapeUtils;
9
import org.apache.commons.lang3.StringUtils;
10 13
import org.gvsig.fmap.dal.store.csv.CSVStoreParameters;
11 14
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileIndex;
12 15
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileReader;
......
79 82
            if( data_file.length()< 10*1024*1024 ) {
80 83
                return null;
81 84
            }
85
            
86
            Function<BufferedReader, Integer> numberOfLinesInRecord = new Function<BufferedReader, Integer>() {
87
                @Override
88
                public Integer apply(BufferedReader breader) {
89
                    CloseShieldReader theReader = CloseShieldReader.wrap(breader);
90
                    CsvListReader parser = new CsvListReader(theReader, getCSVPreferences());
91
                    try {
92
                        List<String> values = parser.read();
93
                    } catch (IOException ex) {
94
                        return 1;
95
                    }
96
                    return parser.getLineNumber();
97
                }
98
            };
82 99

  
83 100
            String charset = CSVStoreParameters.getCharset(params);
84 101
            File index_file = getIndexFile(data_file);
85 102
            
86 103
            theReader = new RandomAccessFileReader(data_file, charset);
87
            theIndex = theReader.createOrOpenIndexOfLines(index_file, false, FILTER_NONE, status);
104
            theIndex = theReader.createOrOpenIndexOfLines(index_file, false, FILTER_NONE, status, numberOfLinesInRecord);
88 105
            
89 106
            SuperCSVList list = new SuperCSVList(
90 107
                    theReader, 
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/virtualrows/RandomAccessFileIndex.java
8 8
import java.nio.MappedByteBuffer;
9 9
import java.nio.channels.FileChannel;
10 10
import java.util.AbstractList;
11
import java.util.Iterator;
12 11
import org.apache.commons.io.IOUtils;
13 12
import org.gvsig.tools.util.GetItemWithSize64;
14 13
import org.gvsig.tools.util.SetItem;
......
66 65
        this.create(theRaf,sz);
67 66
    }
68 67
    
69
    public void create(RandomAccessFile raf, long sz) throws IOException {
68
    public void create(RandomAccessFile raf, long numElements) throws IOException {
70 69
        this.raf = raf;
71
        this.raf.setLength((sz+HEADER_SIZE)*SIZE_OF_LONG);
70
        this.raf.setLength((numElements+HEADER_SIZE)*SIZE_OF_LONG);
72 71
        this.fileByteBuffer = this.raf.getChannel().map(
73 72
                FileChannel.MapMode.READ_WRITE, 
74 73
                0, 
......
77 76
        this.buffer = this.fileByteBuffer.asLongBuffer();
78 77
        this.sz = buffer.limit()-HEADER_SIZE;
79 78
    }
79
    
80
    public void setNumElements(long numElements) throws IOException {
81
        long size = ((numElements+HEADER_SIZE)*SIZE_OF_LONG);
82
        this.fileByteBuffer.force();
83
        this.raf.setLength(size);
84
        this.sz=numElements;
85
    }
80 86

  
81 87
    @Override
82 88
    public void close() throws IOException {
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/virtualrows/RandomAccessFileReader.java
14 14
import java.util.NoSuchElementException;
15 15
import java.util.Spliterator;
16 16
import java.util.Spliterators;
17
import java.util.function.Function;
17 18
import java.util.function.Predicate;
18 19
import java.util.stream.Stream;
19 20
import java.util.stream.StreamSupport;
20 21
import org.apache.commons.io.FilenameUtils;
21 22
import org.apache.commons.io.IOUtils;
23
import org.apache.commons.lang3.StringUtils;
22 24
import org.gvsig.tools.ToolsLocator;
23 25
import org.gvsig.tools.i18n.I18nManager;
24 26
import org.gvsig.tools.library.impl.DefaultLibrariesInitializer;
......
37 39

  
38 40
    protected static final int INDEX_HEADER_FILESIZE = 0;
39 41
    protected static final int INDEX_HEADER_INDEXCREATIONCOST = 1;
42
    
43
    protected static final int MAX_BUFFER_FOR_LINE = 50*1024; //50K
40 44

  
41 45
    protected RandomAccessFile raf;
42 46
    protected Reader reader;
......
169 173
            status.message(i18n.getTranslation("_Calculating_number_of_lines"));
170 174
            status.setIndeterminate();
171 175
        }
172
        BufferedReader breader = new BufferedReader(this, 10240);
176
        BufferedReader breader = new BufferedReader(this, MAX_BUFFER_FOR_LINE);
173 177
        try {
174 178
            String line;
175 179
            count = 0;
......
188 192
                }
189 193
                count++;
190 194
            }
191
            status.setCurValue(count);
192 195
            if (status != null) {
196
                status.setCurValue(count);
193 197
                status.message("");
194 198
                status.setIndeterminate();
195 199
            }
......
234 238
    }
235 239

  
236 240
    public RandomAccessFileIndex createOrOpenIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status) throws IOException {
241
        return createOrOpenIndexOfLines(index, safe, filter, status, null);
242
    }
243

  
244
    public RandomAccessFileIndex createOrOpenIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status, Function<BufferedReader,Integer> numberOfLines) throws IOException {
237 245
        if (this.isRecomemendedTheRecreationOfTheLinesIndex(index)) {
238
            return this.createIndexOfLines(index, safe, filter, status);
246
            return this.createIndexOfLines(index, safe, filter, status, numberOfLines);
239 247
        }
240 248
        return new RandomAccessFileIndex(index);
241 249
    }
......
245 253
    }
246 254

  
247 255
    public RandomAccessFileIndex createIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status) throws IOException {
256
        return createIndexOfLines(index, safe, filter, status, null);
257
    }
258
    
259
    public RandomAccessFileIndex createIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status, Function<BufferedReader,Integer> numberOfLines) throws IOException {
248 260
        long countLines = this.countLines(filter, status);
249 261
        if (countLines < 1) {
250 262
            return null;
......
262 274
                status.setCurValue(0);
263 275
            }
264 276
            long t1 = System.currentTimeMillis();
265
            String line;
277
            String line = null;
266 278
            int lineno = 0;
267 279
            long position = 0;
268 280
//            line_idx.set(lineno++, position);
......
294 306
                status.setCurValue(lineno);
295 307
            } else {
296 308
                // Use buffered reader, fast and unsafe calculate position.
309
                StringBuilder builder = new StringBuilder();
310
                MyBufferedReader breader = new MyBufferedReader(this, MAX_BUFFER_FOR_LINE);
297 311
                while (lineno < countLines) {
298 312
                    this.seek(position);
299
                    MyBufferedReader breader = new MyBufferedReader(this, 10240);
300
                    line = breader.readLine();
313
                    breader.clean();
314
                    if(numberOfLines == null){
315
                        line = breader.readLine();
316
                    } else {
317
                        breader.mark(MAX_BUFFER_FOR_LINE);
318
                        Integer nextLine = numberOfLines.apply(breader);
319
                        breader.reset();
320
                        builder.setLength(0);
321
                        for (int i = 0; i < nextLine; i++) {
322
                            String l = breader.readLine();
323
                            if(l != null){
324
                                builder.append(l);
325
                            } else {
326
                                break;
327
                            }
328
                        }
329
                        line = StringUtils.defaultIfBlank(builder.toString(), null);
330
                    }
301 331
                    if (line == null) {
302 332
                        break;
303 333
                    }
......
326 356
                }
327 357
            }
328 358
            long t2 = System.currentTimeMillis();
359
            line_idx.setNumElements(lineno);
329 360
            line_idx.setHeader(INDEX_HEADER_FILESIZE, this.raf.length());
330 361
            line_idx.setHeader(INDEX_HEADER_INDEXCREATIONCOST, t2 - t1);
331 362
            if (status != null) {
......
375 406
        for (int linenumber = 0; linenumber < lines_idx.size(); linenumber++) {
376 407
            long lineoffset = lines_idx.get(linenumber);
377 408
            reader.seek(lineoffset);
378
            MyBufferedReader breader = new MyBufferedReader(reader, 10240);
409
            MyBufferedReader breader = new MyBufferedReader(reader, MAX_BUFFER_FOR_LINE);
379 410
            String line = breader.readLine();
380 411
            if (linenumber < 100) {
381 412
                System.out.println(String.format("%6d/%d: %s", lineoffset, linenumber, line));
......
389 420
        for (int linenumber = lines_idx.size() - 1; linenumber >= 0; linenumber--) {
390 421
            long lineoffset = lines_idx.get(linenumber);
391 422
            reader.seek(lineoffset);
392
            MyBufferedReader breader = new MyBufferedReader(reader, 10240);
423
            MyBufferedReader breader = new MyBufferedReader(reader, MAX_BUFFER_FOR_LINE);
393 424
            String line = breader.readLine();
394 425
            if (linenumber < 100) {
395 426
                System.out.println(String.format("%6d/%d: %s", lineoffset, linenumber, line));
......
400 431

  
401 432
    }
402 433

  
434
    /*
435
        Copy of java's BufferedReader adding clean and isSkipLf methods
436
    */
403 437
    public static class MyBufferedReader extends BufferedReader {
404 438

  
405 439
        private Reader in;
......
454 488
        public MyBufferedReader(Reader in) {
455 489
            this(in, defaultCharBufferSize);
456 490
        }
457

  
491
        
458 492
        /**
459 493
         * Checks to make sure that the stream has not been closed
460 494
         */
......
516 550
         * reached
517 551
         * @exception IOException If an I/O error occurs
518 552
         */
553
        @Override
519 554
        public int read() throws IOException {
520 555
            synchronized (lock) {
521 556
                ensureOpen();
......
623 658
         *
624 659
         * @exception IOException If an I/O error occurs
625 660
         */
661
        @Override
626 662
        public int read(char cbuf[], int off, int len) throws IOException {
627 663
            synchronized (lock) {
628 664
                ensureOpen();
......
744 780
         *
745 781
         * @see java.nio.file.Files#readAllLines
746 782
         */
783
        @Override
747 784
        public String readLine() throws IOException {
748 785
            return readLine(false);
749 786
        }
......
758 795
         * @exception IllegalArgumentException If <code>n</code> is negative.
759 796
         * @exception IOException If an I/O error occurs
760 797
         */
798
        @Override
761 799
        public long skip(long n) throws IOException {
762 800
            if (n < 0L) {
763 801
                throw new IllegalArgumentException("skip value is negative");
......
799 837
         *
800 838
         * @exception IOException If an I/O error occurs
801 839
         */
840
        @Override
802 841
        public boolean ready() throws IOException {
803 842
            synchronized (lock) {
804 843
                ensureOpen();
......
829 868
         * Tells whether this stream supports the mark() operation, which it
830 869
         * does.
831 870
         */
871
        @Override
832 872
        public boolean markSupported() {
833 873
            return true;
834 874
        }
......
847 887
         * @exception IllegalArgumentException If {@code readAheadLimit < 0}
848 888
         * @exception IOException If an I/O error occurs
849 889
         */
890
        @Override
850 891
        public void mark(int readAheadLimit) throws IOException {
851 892
            if (readAheadLimit < 0) {
852 893
                throw new IllegalArgumentException("Read-ahead limit < 0");
......
865 906
         * @exception IOException If the stream has never been marked, or if the
866 907
         * mark has been invalidated
867 908
         */
909
        @Override
868 910
        public void reset() throws IOException {
869 911
            synchronized (lock) {
870 912
                ensureOpen();
......
878 920
            }
879 921
        }
880 922

  
923
        @Override
881 924
        public void close() throws IOException {
882 925
            synchronized (lock) {
883 926
                if (in == null) {
......
923 966
         *
924 967
         * @since 1.8
925 968
         */
969
        @Override
926 970
        public Stream<String> lines() {
927 971
            Iterator<String> iter = new Iterator<String>() {
928 972
                String nextLine = null;
......
959 1003
        public boolean isSkipLf() {
960 1004
            return this.skipLF;
961 1005
        }
962

  
1006
        
1007
        public void clean() {
1008
            nextChar = nChars = 0;
1009
            markedChar = UNMARKED;
1010
            readAheadLimit = 0;
1011
            skipLF = false;
1012
            markedSkipLF = false;
1013
            
1014
        }
963 1015
    }
964 1016

  
965 1017
}
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/virtualrows/SuperCSVList.java
5 5
 */
6 6
package org.gvsig.fmap.dal.store.csv.virtualrows;
7 7

  
8
import java.io.BufferedReader;
8 9
import java.io.File;
9 10
import java.io.IOException;
10 11
import java.nio.charset.Charset;
11 12
import java.util.Date;
12 13
import java.util.List;
14
import java.util.function.Function;
13 15
import org.apache.commons.io.FilenameUtils;
14 16
import org.apache.commons.io.IOUtils;
15 17
import org.apache.commons.io.input.CloseShieldReader;
......
64 66
        new DefaultLibrariesInitializer().fullInitialize();
65 67
        
66 68
        String fname;
67
        fname = "/home/fdiaz/Descargas/origen_coordenadas.csv";
69
        fname = "/home/fdiaz/Descargas/error_union_tablas/Expedientes_CON_REFCAT_trimmed.csv";
70
//        fname = "/home/fdiaz/Descargas/origen_coordenadas.csv";
68 71
//        fname = "/home/jjdelcerro/Descargas/test/origen_coordenadas.csv";
69 72
//        fname = "/home/jjdelcerro/Descargas/test/esp_poblaciones.csv";
70 73
//        fname = "/home/jjdelcerro/Descargas/test/esp_provincias.csv";
......
86 89
        });
87 90
        SimpleTaskStatus status = taskStatusManager.createDefaultSimpleTaskStatus(data_file.getName());
88 91
        status.add();
89

  
92
        Function<BufferedReader, Integer> numberOfLinesInRecord = (BufferedReader breader) -> {
93
            CloseShieldReader theReader = CloseShieldReader.wrap(breader);
94
            CsvListReader parser = new CsvListReader(theReader, CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE);
95
            try {
96
                List<String> values = parser.read();
97
            } catch (IOException ex) {
98
                return 1;
99
            }
100
            return parser.getLineNumber();
101
        };
90 102
        RandomAccessFileReader reader = new RandomAccessFileReader(data_file, "UTF-8");
91
        RandomAccessFileIndex index_lines = reader.createOrOpenIndexOfLines(index_file, FILTER_NONE, null);
103
        RandomAccessFileIndex index_lines = reader.createIndexOfLines(index_file, false, FILTER_NONE, null, numberOfLinesInRecord);
92 104

  
93 105
        CSVList csv = new SuperCSVList(reader, index_lines, 0);
94 106
        System.out.println("Lines " + csv.size());

Also available in: Unified diff