Revision 47506
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/simplereaders/CSVReaderSuperCSV.java | ||
---|---|---|
1 | 1 |
package org.gvsig.fmap.dal.store.csv.simplereaders; |
2 | 2 |
|
3 |
import java.io.BufferedReader; |
|
3 | 4 |
import java.io.File; |
4 | 5 |
import java.io.IOException; |
5 | 6 |
import java.io.Reader; |
6 | 7 |
import java.util.List; |
8 |
import java.util.function.Function; |
|
7 | 9 |
import org.apache.commons.io.FilenameUtils; |
10 |
import org.apache.commons.io.input.CloseShieldReader; |
|
11 |
import org.apache.commons.lang3.StringUtils; |
|
8 | 12 |
import org.apache.commons.text.StringEscapeUtils; |
9 |
import org.apache.commons.lang3.StringUtils; |
|
10 | 13 |
import org.gvsig.fmap.dal.store.csv.CSVStoreParameters; |
11 | 14 |
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileIndex; |
12 | 15 |
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileReader; |
... | ... | |
79 | 82 |
if( data_file.length()< 10*1024*1024 ) { |
80 | 83 |
return null; |
81 | 84 |
} |
85 |
|
|
86 |
Function<BufferedReader, Integer> numberOfLinesInRecord = new Function<BufferedReader, Integer>() { |
|
87 |
@Override |
|
88 |
public Integer apply(BufferedReader breader) { |
|
89 |
CloseShieldReader theReader = CloseShieldReader.wrap(breader); |
|
90 |
CsvListReader parser = new CsvListReader(theReader, getCSVPreferences()); |
|
91 |
try { |
|
92 |
List<String> values = parser.read(); |
|
93 |
} catch (IOException ex) { |
|
94 |
return 1; |
|
95 |
} |
|
96 |
return parser.getLineNumber(); |
|
97 |
} |
|
98 |
}; |
|
82 | 99 |
|
83 | 100 |
String charset = CSVStoreParameters.getCharset(params); |
84 | 101 |
File index_file = getIndexFile(data_file); |
85 | 102 |
|
86 | 103 |
theReader = new RandomAccessFileReader(data_file, charset); |
87 |
theIndex = theReader.createOrOpenIndexOfLines(index_file, false, FILTER_NONE, status); |
|
104 |
theIndex = theReader.createOrOpenIndexOfLines(index_file, false, FILTER_NONE, status, numberOfLinesInRecord);
|
|
88 | 105 |
|
89 | 106 |
SuperCSVList list = new SuperCSVList( |
90 | 107 |
theReader, |
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/virtualrows/RandomAccessFileIndex.java | ||
---|---|---|
8 | 8 |
import java.nio.MappedByteBuffer; |
9 | 9 |
import java.nio.channels.FileChannel; |
10 | 10 |
import java.util.AbstractList; |
11 |
import java.util.Iterator; |
|
12 | 11 |
import org.apache.commons.io.IOUtils; |
13 | 12 |
import org.gvsig.tools.util.GetItemWithSize64; |
14 | 13 |
import org.gvsig.tools.util.SetItem; |
... | ... | |
66 | 65 |
this.create(theRaf,sz); |
67 | 66 |
} |
68 | 67 |
|
69 |
public void create(RandomAccessFile raf, long sz) throws IOException {
|
|
68 |
public void create(RandomAccessFile raf, long numElements) throws IOException {
|
|
70 | 69 |
this.raf = raf; |
71 |
this.raf.setLength((sz+HEADER_SIZE)*SIZE_OF_LONG);
|
|
70 |
this.raf.setLength((numElements+HEADER_SIZE)*SIZE_OF_LONG);
|
|
72 | 71 |
this.fileByteBuffer = this.raf.getChannel().map( |
73 | 72 |
FileChannel.MapMode.READ_WRITE, |
74 | 73 |
0, |
... | ... | |
77 | 76 |
this.buffer = this.fileByteBuffer.asLongBuffer(); |
78 | 77 |
this.sz = buffer.limit()-HEADER_SIZE; |
79 | 78 |
} |
79 |
|
|
80 |
public void setNumElements(long numElements) throws IOException { |
|
81 |
long size = ((numElements+HEADER_SIZE)*SIZE_OF_LONG); |
|
82 |
this.fileByteBuffer.force(); |
|
83 |
this.raf.setLength(size); |
|
84 |
this.sz=numElements; |
|
85 |
} |
|
80 | 86 |
|
81 | 87 |
@Override |
82 | 88 |
public void close() throws IOException { |
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/virtualrows/RandomAccessFileReader.java | ||
---|---|---|
14 | 14 |
import java.util.NoSuchElementException; |
15 | 15 |
import java.util.Spliterator; |
16 | 16 |
import java.util.Spliterators; |
17 |
import java.util.function.Function; |
|
17 | 18 |
import java.util.function.Predicate; |
18 | 19 |
import java.util.stream.Stream; |
19 | 20 |
import java.util.stream.StreamSupport; |
20 | 21 |
import org.apache.commons.io.FilenameUtils; |
21 | 22 |
import org.apache.commons.io.IOUtils; |
23 |
import org.apache.commons.lang3.StringUtils; |
|
22 | 24 |
import org.gvsig.tools.ToolsLocator; |
23 | 25 |
import org.gvsig.tools.i18n.I18nManager; |
24 | 26 |
import org.gvsig.tools.library.impl.DefaultLibrariesInitializer; |
... | ... | |
37 | 39 |
|
38 | 40 |
protected static final int INDEX_HEADER_FILESIZE = 0; |
39 | 41 |
protected static final int INDEX_HEADER_INDEXCREATIONCOST = 1; |
42 |
|
|
43 |
protected static final int MAX_BUFFER_FOR_LINE = 50*1024; //50K |
|
40 | 44 |
|
41 | 45 |
protected RandomAccessFile raf; |
42 | 46 |
protected Reader reader; |
... | ... | |
169 | 173 |
status.message(i18n.getTranslation("_Calculating_number_of_lines")); |
170 | 174 |
status.setIndeterminate(); |
171 | 175 |
} |
172 |
BufferedReader breader = new BufferedReader(this, 10240);
|
|
176 |
BufferedReader breader = new BufferedReader(this, MAX_BUFFER_FOR_LINE);
|
|
173 | 177 |
try { |
174 | 178 |
String line; |
175 | 179 |
count = 0; |
... | ... | |
188 | 192 |
} |
189 | 193 |
count++; |
190 | 194 |
} |
191 |
status.setCurValue(count); |
|
192 | 195 |
if (status != null) { |
196 |
status.setCurValue(count); |
|
193 | 197 |
status.message(""); |
194 | 198 |
status.setIndeterminate(); |
195 | 199 |
} |
... | ... | |
234 | 238 |
} |
235 | 239 |
|
236 | 240 |
public RandomAccessFileIndex createOrOpenIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status) throws IOException { |
241 |
return createOrOpenIndexOfLines(index, safe, filter, status, null); |
|
242 |
} |
|
243 |
|
|
244 |
public RandomAccessFileIndex createOrOpenIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status, Function<BufferedReader,Integer> numberOfLines) throws IOException { |
|
237 | 245 |
if (this.isRecomemendedTheRecreationOfTheLinesIndex(index)) { |
238 |
return this.createIndexOfLines(index, safe, filter, status); |
|
246 |
return this.createIndexOfLines(index, safe, filter, status, numberOfLines);
|
|
239 | 247 |
} |
240 | 248 |
return new RandomAccessFileIndex(index); |
241 | 249 |
} |
... | ... | |
245 | 253 |
} |
246 | 254 |
|
247 | 255 |
public RandomAccessFileIndex createIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status) throws IOException { |
256 |
return createIndexOfLines(index, safe, filter, status, null); |
|
257 |
} |
|
258 |
|
|
259 |
public RandomAccessFileIndex createIndexOfLines(File index, boolean safe, Predicate<String> filter, SimpleTaskStatus status, Function<BufferedReader,Integer> numberOfLines) throws IOException { |
|
248 | 260 |
long countLines = this.countLines(filter, status); |
249 | 261 |
if (countLines < 1) { |
250 | 262 |
return null; |
... | ... | |
262 | 274 |
status.setCurValue(0); |
263 | 275 |
} |
264 | 276 |
long t1 = System.currentTimeMillis(); |
265 |
String line; |
|
277 |
String line = null;
|
|
266 | 278 |
int lineno = 0; |
267 | 279 |
long position = 0; |
268 | 280 |
// line_idx.set(lineno++, position); |
... | ... | |
294 | 306 |
status.setCurValue(lineno); |
295 | 307 |
} else { |
296 | 308 |
// Use buffered reader, fast and unsafe calculate position. |
309 |
StringBuilder builder = new StringBuilder(); |
|
310 |
MyBufferedReader breader = new MyBufferedReader(this, MAX_BUFFER_FOR_LINE); |
|
297 | 311 |
while (lineno < countLines) { |
298 | 312 |
this.seek(position); |
299 |
MyBufferedReader breader = new MyBufferedReader(this, 10240); |
|
300 |
line = breader.readLine(); |
|
313 |
breader.clean(); |
|
314 |
if(numberOfLines == null){ |
|
315 |
line = breader.readLine(); |
|
316 |
} else { |
|
317 |
breader.mark(MAX_BUFFER_FOR_LINE); |
|
318 |
Integer nextLine = numberOfLines.apply(breader); |
|
319 |
breader.reset(); |
|
320 |
builder.setLength(0); |
|
321 |
for (int i = 0; i < nextLine; i++) { |
|
322 |
String l = breader.readLine(); |
|
323 |
if(l != null){ |
|
324 |
builder.append(l); |
|
325 |
} else { |
|
326 |
break; |
|
327 |
} |
|
328 |
} |
|
329 |
line = StringUtils.defaultIfBlank(builder.toString(), null); |
|
330 |
} |
|
301 | 331 |
if (line == null) { |
302 | 332 |
break; |
303 | 333 |
} |
... | ... | |
326 | 356 |
} |
327 | 357 |
} |
328 | 358 |
long t2 = System.currentTimeMillis(); |
359 |
line_idx.setNumElements(lineno); |
|
329 | 360 |
line_idx.setHeader(INDEX_HEADER_FILESIZE, this.raf.length()); |
330 | 361 |
line_idx.setHeader(INDEX_HEADER_INDEXCREATIONCOST, t2 - t1); |
331 | 362 |
if (status != null) { |
... | ... | |
375 | 406 |
for (int linenumber = 0; linenumber < lines_idx.size(); linenumber++) { |
376 | 407 |
long lineoffset = lines_idx.get(linenumber); |
377 | 408 |
reader.seek(lineoffset); |
378 |
MyBufferedReader breader = new MyBufferedReader(reader, 10240);
|
|
409 |
MyBufferedReader breader = new MyBufferedReader(reader, MAX_BUFFER_FOR_LINE);
|
|
379 | 410 |
String line = breader.readLine(); |
380 | 411 |
if (linenumber < 100) { |
381 | 412 |
System.out.println(String.format("%6d/%d: %s", lineoffset, linenumber, line)); |
... | ... | |
389 | 420 |
for (int linenumber = lines_idx.size() - 1; linenumber >= 0; linenumber--) { |
390 | 421 |
long lineoffset = lines_idx.get(linenumber); |
391 | 422 |
reader.seek(lineoffset); |
392 |
MyBufferedReader breader = new MyBufferedReader(reader, 10240);
|
|
423 |
MyBufferedReader breader = new MyBufferedReader(reader, MAX_BUFFER_FOR_LINE);
|
|
393 | 424 |
String line = breader.readLine(); |
394 | 425 |
if (linenumber < 100) { |
395 | 426 |
System.out.println(String.format("%6d/%d: %s", lineoffset, linenumber, line)); |
... | ... | |
400 | 431 |
|
401 | 432 |
} |
402 | 433 |
|
434 |
/* |
|
435 |
Copy of java's BufferedReader adding clean and isSkipLf methods |
|
436 |
*/ |
|
403 | 437 |
public static class MyBufferedReader extends BufferedReader { |
404 | 438 |
|
405 | 439 |
private Reader in; |
... | ... | |
454 | 488 |
public MyBufferedReader(Reader in) { |
455 | 489 |
this(in, defaultCharBufferSize); |
456 | 490 |
} |
457 |
|
|
491 |
|
|
458 | 492 |
/** |
459 | 493 |
* Checks to make sure that the stream has not been closed |
460 | 494 |
*/ |
... | ... | |
516 | 550 |
* reached |
517 | 551 |
* @exception IOException If an I/O error occurs |
518 | 552 |
*/ |
553 |
@Override |
|
519 | 554 |
public int read() throws IOException { |
520 | 555 |
synchronized (lock) { |
521 | 556 |
ensureOpen(); |
... | ... | |
623 | 658 |
* |
624 | 659 |
* @exception IOException If an I/O error occurs |
625 | 660 |
*/ |
661 |
@Override |
|
626 | 662 |
public int read(char cbuf[], int off, int len) throws IOException { |
627 | 663 |
synchronized (lock) { |
628 | 664 |
ensureOpen(); |
... | ... | |
744 | 780 |
* |
745 | 781 |
* @see java.nio.file.Files#readAllLines |
746 | 782 |
*/ |
783 |
@Override |
|
747 | 784 |
public String readLine() throws IOException { |
748 | 785 |
return readLine(false); |
749 | 786 |
} |
... | ... | |
758 | 795 |
* @exception IllegalArgumentException If <code>n</code> is negative. |
759 | 796 |
* @exception IOException If an I/O error occurs |
760 | 797 |
*/ |
798 |
@Override |
|
761 | 799 |
public long skip(long n) throws IOException { |
762 | 800 |
if (n < 0L) { |
763 | 801 |
throw new IllegalArgumentException("skip value is negative"); |
... | ... | |
799 | 837 |
* |
800 | 838 |
* @exception IOException If an I/O error occurs |
801 | 839 |
*/ |
840 |
@Override |
|
802 | 841 |
public boolean ready() throws IOException { |
803 | 842 |
synchronized (lock) { |
804 | 843 |
ensureOpen(); |
... | ... | |
829 | 868 |
* Tells whether this stream supports the mark() operation, which it |
830 | 869 |
* does. |
831 | 870 |
*/ |
871 |
@Override |
|
832 | 872 |
public boolean markSupported() { |
833 | 873 |
return true; |
834 | 874 |
} |
... | ... | |
847 | 887 |
* @exception IllegalArgumentException If {@code readAheadLimit < 0} |
848 | 888 |
* @exception IOException If an I/O error occurs |
849 | 889 |
*/ |
890 |
@Override |
|
850 | 891 |
public void mark(int readAheadLimit) throws IOException { |
851 | 892 |
if (readAheadLimit < 0) { |
852 | 893 |
throw new IllegalArgumentException("Read-ahead limit < 0"); |
... | ... | |
865 | 906 |
* @exception IOException If the stream has never been marked, or if the |
866 | 907 |
* mark has been invalidated |
867 | 908 |
*/ |
909 |
@Override |
|
868 | 910 |
public void reset() throws IOException { |
869 | 911 |
synchronized (lock) { |
870 | 912 |
ensureOpen(); |
... | ... | |
878 | 920 |
} |
879 | 921 |
} |
880 | 922 |
|
923 |
@Override |
|
881 | 924 |
public void close() throws IOException { |
882 | 925 |
synchronized (lock) { |
883 | 926 |
if (in == null) { |
... | ... | |
923 | 966 |
* |
924 | 967 |
* @since 1.8 |
925 | 968 |
*/ |
969 |
@Override |
|
926 | 970 |
public Stream<String> lines() { |
927 | 971 |
Iterator<String> iter = new Iterator<String>() { |
928 | 972 |
String nextLine = null; |
... | ... | |
959 | 1003 |
public boolean isSkipLf() { |
960 | 1004 |
return this.skipLF; |
961 | 1005 |
} |
962 |
|
|
1006 |
|
|
1007 |
public void clean() { |
|
1008 |
nextChar = nChars = 0; |
|
1009 |
markedChar = UNMARKED; |
|
1010 |
readAheadLimit = 0; |
|
1011 |
skipLF = false; |
|
1012 |
markedSkipLF = false; |
|
1013 |
|
|
1014 |
} |
|
963 | 1015 |
} |
964 | 1016 |
|
965 | 1017 |
} |
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/virtualrows/SuperCSVList.java | ||
---|---|---|
5 | 5 |
*/ |
6 | 6 |
package org.gvsig.fmap.dal.store.csv.virtualrows; |
7 | 7 |
|
8 |
import java.io.BufferedReader; |
|
8 | 9 |
import java.io.File; |
9 | 10 |
import java.io.IOException; |
10 | 11 |
import java.nio.charset.Charset; |
11 | 12 |
import java.util.Date; |
12 | 13 |
import java.util.List; |
14 |
import java.util.function.Function; |
|
13 | 15 |
import org.apache.commons.io.FilenameUtils; |
14 | 16 |
import org.apache.commons.io.IOUtils; |
15 | 17 |
import org.apache.commons.io.input.CloseShieldReader; |
... | ... | |
64 | 66 |
new DefaultLibrariesInitializer().fullInitialize(); |
65 | 67 |
|
66 | 68 |
String fname; |
67 |
fname = "/home/fdiaz/Descargas/origen_coordenadas.csv"; |
|
69 |
fname = "/home/fdiaz/Descargas/error_union_tablas/Expedientes_CON_REFCAT_trimmed.csv"; |
|
70 |
// fname = "/home/fdiaz/Descargas/origen_coordenadas.csv"; |
|
68 | 71 |
// fname = "/home/jjdelcerro/Descargas/test/origen_coordenadas.csv"; |
69 | 72 |
// fname = "/home/jjdelcerro/Descargas/test/esp_poblaciones.csv"; |
70 | 73 |
// fname = "/home/jjdelcerro/Descargas/test/esp_provincias.csv"; |
... | ... | |
86 | 89 |
}); |
87 | 90 |
SimpleTaskStatus status = taskStatusManager.createDefaultSimpleTaskStatus(data_file.getName()); |
88 | 91 |
status.add(); |
89 |
|
|
92 |
Function<BufferedReader, Integer> numberOfLinesInRecord = (BufferedReader breader) -> { |
|
93 |
CloseShieldReader theReader = CloseShieldReader.wrap(breader); |
|
94 |
CsvListReader parser = new CsvListReader(theReader, CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE); |
|
95 |
try { |
|
96 |
List<String> values = parser.read(); |
|
97 |
} catch (IOException ex) { |
|
98 |
return 1; |
|
99 |
} |
|
100 |
return parser.getLineNumber(); |
|
101 |
}; |
|
90 | 102 |
RandomAccessFileReader reader = new RandomAccessFileReader(data_file, "UTF-8"); |
91 |
RandomAccessFileIndex index_lines = reader.createOrOpenIndexOfLines(index_file, FILTER_NONE, null);
|
|
103 |
RandomAccessFileIndex index_lines = reader.createIndexOfLines(index_file, false, FILTER_NONE, null, numberOfLinesInRecord);
|
|
92 | 104 |
|
93 | 105 |
CSVList csv = new SuperCSVList(reader, index_lines, 0); |
94 | 106 |
System.out.println("Lines " + csv.size()); |
Also available in: Unified diff