svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.dbf / src / main / java / org / gvsig / fmap / dal / store / dbf / utils / DbaseFileWriter.java @ 46904
History | View | Annotate | Download (19 KB)
1 |
/**
|
---|---|
2 |
* gvSIG. Desktop Geographic Information System.
|
3 |
*
|
4 |
* Copyright (C) 2007-2013 gvSIG Association.
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 3
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
* For any additional information, do not hesitate to contact us
|
22 |
* at info AT gvsig.com, or visit our website www.gvsig.com.
|
23 |
*/
|
24 |
package org.gvsig.fmap.dal.store.dbf.utils; |
25 |
|
26 |
import java.io.IOException; |
27 |
import java.math.BigDecimal; |
28 |
import java.nio.Buffer; |
29 |
import java.nio.BufferOverflowException; |
30 |
import java.nio.ByteBuffer; |
31 |
import java.nio.MappedByteBuffer; |
32 |
import java.nio.channels.FileChannel; |
33 |
import java.nio.charset.Charset; |
34 |
import java.util.Date; |
35 |
import java.util.Iterator; |
36 |
import org.apache.commons.lang3.StringUtils; |
37 |
|
38 |
import org.gvsig.fmap.dal.DataTypes; |
39 |
import org.gvsig.fmap.dal.exception.CloseException; |
40 |
import org.gvsig.fmap.dal.exception.InitializeException; |
41 |
import org.gvsig.fmap.dal.exception.UnsupportedEncodingException; |
42 |
import org.gvsig.fmap.dal.exception.WriteException; |
43 |
import org.gvsig.fmap.dal.feature.Feature; |
44 |
import org.gvsig.fmap.dal.feature.FeatureAttributeDescriptor; |
45 |
import org.gvsig.fmap.dal.feature.FeatureType; |
46 |
import org.gvsig.tools.logger.FilteredLogger; |
47 |
import org.slf4j.Logger; |
48 |
import org.slf4j.LoggerFactory; |
49 |
|
50 |
/**
|
51 |
* A DbaseFileReader is used to read a dbase III format file. The general use of
|
52 |
* this class is: <CODE><PRE>
|
53 |
* DbaseFileHeader header = ...
|
54 |
* WritableFileChannel out = new FileOutputStream("thefile.dbf").getChannel();
|
55 |
* DbaseFileWriter w = new DbaseFileWriter(header,out);
|
56 |
* while ( moreRecords ) {
|
57 |
* w.write( getMyRecord() );
|
58 |
* }
|
59 |
* w.close();
|
60 |
* </PRE></CODE> You must supply the <CODE>moreRecords</CODE> and
|
61 |
* <CODE>getMyRecord()</CODE> logic...
|
62 |
*
|
63 |
* @author Ian Schneider
|
64 |
*/
|
65 |
public class DbaseFileWriter { |
66 |
|
67 |
private static final Logger LOGGER = LoggerFactory.getLogger(DbaseFileWriter.class); |
68 |
|
69 |
private final DbaseFileHeader header; |
70 |
private FieldFormatter formatter = new FieldFormatter(); |
71 |
private FileChannel channel; |
72 |
private ByteBuffer buffer; |
73 |
private boolean headDrity = false; |
74 |
private ByteBuffer blank; |
75 |
private int blankSize; |
76 |
|
77 |
private Charset charset; |
78 |
|
79 |
private FilteredLogger logger;
|
80 |
|
81 |
/**
|
82 |
* Create a DbaseFileWriter using the specified header and writing to the
|
83 |
* given channel.
|
84 |
*
|
85 |
* @param header The DbaseFileHeader to write.
|
86 |
* @param out The Channel to write to.
|
87 |
* @param isNew
|
88 |
* @throws org.gvsig.fmap.dal.exception.InitializeException
|
89 |
*
|
90 |
*
|
91 |
*/
|
92 |
public DbaseFileWriter(DbaseFileHeader header, FileChannel out, |
93 |
boolean isNew) throws InitializeException { |
94 |
this.header = header;
|
95 |
this.channel = out;
|
96 |
this.headDrity = isNew;
|
97 |
this.setCharset(Charset.forName(header.mappingEncoding(header.getCharsetName()))); |
98 |
|
99 |
init(); |
100 |
} |
101 |
|
102 |
private void init() throws InitializeException { |
103 |
try {
|
104 |
if (this.channel.size() < this.header.getHeaderLength()) { |
105 |
this.writeHeader();
|
106 |
} |
107 |
buffer = ByteBuffer.allocateDirect(header.getRecordLength());
|
108 |
} catch (Exception e) { |
109 |
throw new InitializeException("DBF Writer", e); |
110 |
} |
111 |
} |
112 |
|
113 |
private void write() throws WriteException { |
114 |
((Buffer) buffer).position(0); |
115 |
int r = buffer.remaining();
|
116 |
try {
|
117 |
while ((r -= channel.write(buffer)) > 0) { |
118 |
// do nothing
|
119 |
} |
120 |
} catch (IOException e) { |
121 |
throw new WriteException("DBF Writer", e); |
122 |
} |
123 |
} |
124 |
|
125 |
private void writeHeader() throws WriteException { |
126 |
try {
|
127 |
channel.position(0);
|
128 |
header.write(channel); |
129 |
} catch (IOException e) { |
130 |
throw new WriteException("DBF Writer", e); |
131 |
} |
132 |
} |
133 |
|
134 |
/**
|
135 |
* Write a single dbase record.
|
136 |
*
|
137 |
* @param feature
|
138 |
* @throws UnsupportedEncodingException
|
139 |
* @throws WriteException
|
140 |
*/
|
141 |
public void append(Feature feature) throws WriteException, |
142 |
UnsupportedEncodingException {
|
143 |
this.fillBuffer(feature);
|
144 |
try {
|
145 |
this.moveToEOF();
|
146 |
} catch (IOException e) { |
147 |
throw new WriteException("DbaseFileWriter", e); |
148 |
} |
149 |
this.header.setNumRecords(this.header.getNumRecords() + 1); |
150 |
write(); |
151 |
|
152 |
this.headDrity = true; |
153 |
} |
154 |
|
155 |
private void fillBuffer(Feature feature) |
156 |
throws UnsupportedEncodingException, WriteException { |
157 |
FeatureType featureType = feature.getType(); |
158 |
try {
|
159 |
((Buffer) buffer).position(0); |
160 |
|
161 |
// put the 'not-deleted' marker
|
162 |
buffer.put((byte) ' '); |
163 |
|
164 |
@SuppressWarnings("unchecked") |
165 |
Iterator<FeatureAttributeDescriptor> iterator
|
166 |
= featureType.iterator(); |
167 |
|
168 |
while (iterator.hasNext()) {
|
169 |
FeatureAttributeDescriptor fad = iterator.next(); |
170 |
if (fad.isComputed()) {
|
171 |
continue;
|
172 |
} |
173 |
|
174 |
if (fad.getName().length() > DbaseFile.MAX_FIELD_NAME_LENGTH) {
|
175 |
throw new FieldNameTooLongException( |
176 |
"DBF file", fad.getName());
|
177 |
} |
178 |
|
179 |
int type = fad.getType();
|
180 |
if (type == DataTypes.GEOMETRY) {
|
181 |
continue;
|
182 |
} |
183 |
encodeField(fad, feature); |
184 |
} |
185 |
} catch (Exception e) { |
186 |
throw new WriteException("DbaseFileWriter", e); |
187 |
} |
188 |
} |
189 |
|
190 |
private void moveToEOF() throws IOException { |
191 |
this.moveTo(this.header.getNumRecords()); |
192 |
} |
193 |
|
194 |
private void moveTo(long numReg) throws IOException { |
195 |
// if (!(channel instanceof FileChannel)) {
|
196 |
// throw new IOException(
|
197 |
// "DbaseFileWriterNIO: channel is not a FileChannel. Cannot position properly");
|
198 |
// }
|
199 |
|
200 |
long newPos
|
201 |
= header.getHeaderLength() + numReg * header.getRecordLength(); |
202 |
if (this.channel.position() != newPos) { |
203 |
this.channel.position(newPos);
|
204 |
} |
205 |
} |
206 |
|
207 |
/**
|
208 |
* Write a single dbase record. Useful to update a dbf.
|
209 |
*
|
210 |
* @param feature
|
211 |
* @param numReg
|
212 |
* @throws WriteException
|
213 |
* @throws UnsupportedEncodingException
|
214 |
*/
|
215 |
public void update(Feature feature, long numReg) throws WriteException, |
216 |
UnsupportedEncodingException {
|
217 |
this.fillBuffer(feature);
|
218 |
|
219 |
try {
|
220 |
this.moveTo(numReg);
|
221 |
} catch (IOException e) { |
222 |
throw new WriteException("DbaseFileWriter", e); |
223 |
} |
224 |
|
225 |
write(); |
226 |
} |
227 |
|
228 |
private void encodeField(FeatureAttributeDescriptor attr, Feature feature) throws java.io.UnsupportedEncodingException, UnsupportedEncodingException { |
229 |
if (attr == null) { |
230 |
throw new NullPointerException("attr is NULL"); |
231 |
} |
232 |
if (feature == null) { |
233 |
throw new NullPointerException("feature is NULL"); |
234 |
} |
235 |
int fieldLen = -1; |
236 |
try {
|
237 |
DbaseFieldDescriptor descriptor = this.header.getFieldDescription(attr.getName());
|
238 |
|
239 |
int type = attr.getType();
|
240 |
fieldLen = descriptor.getSize(); |
241 |
String fieldString;
|
242 |
|
243 |
// if( buffer.position()!=descriptor.getOffsetInRecord() ) {
|
244 |
// throw new RuntimeException("Encoding field '"+descriptor.getName()+"' found an incorrect offset.");
|
245 |
// }
|
246 |
String fieldName = attr.getName();
|
247 |
if (feature.isNull(attr.getIndex())) {
|
248 |
safeEncode(fieldName, " ", fieldLen, false); |
249 |
return;
|
250 |
} |
251 |
if (DataTypes.BOOLEAN == type) {
|
252 |
boolean b = feature.getBoolean(attr.getIndex());
|
253 |
safeEncode(fieldName, b ? "T" : "F", 1, true); |
254 |
|
255 |
} else if (DataTypes.TIME == type) { |
256 |
Date date = feature.getTime(attr.getIndex());
|
257 |
fieldString = formatter.formatTime(date); |
258 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
259 |
|
260 |
} else if (DataTypes.TIMESTAMP == type) { |
261 |
Date date = feature.getTimestamp(attr.getIndex());
|
262 |
fieldString = formatter.formatTimestamp(date); |
263 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
264 |
|
265 |
} else if (DataTypes.DATE == type) { |
266 |
Date date = feature.getDate(attr.getIndex());
|
267 |
fieldString = formatter.formatDate(date); |
268 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
269 |
|
270 |
} else if (DataTypes.DECIMAL == type) { |
271 |
BigDecimal n = feature.getDecimal(attr.getIndex());
|
272 |
fieldString = formatter.format(n, fieldLen); |
273 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
274 |
|
275 |
} else if (DataTypes.DOUBLE == type) { |
276 |
double n = feature.getDouble(attr.getIndex());
|
277 |
fieldString = formatter.format(n, fieldLen, descriptor.getScale()); |
278 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
279 |
|
280 |
} else if (DataTypes.FLOAT == type) { |
281 |
float n = feature.getFloat(attr.getIndex());
|
282 |
fieldString = formatter.format(n, fieldLen, descriptor.getScale()); |
283 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
284 |
|
285 |
} else if (DataTypes.LONG == type) { |
286 |
long l = feature.getLong(attr.getIndex());
|
287 |
fieldString = formatter.format(l, fieldLen); |
288 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
289 |
|
290 |
} else if (DataTypes.INT == type) { |
291 |
int n = feature.getInt(attr.getIndex());
|
292 |
fieldString = formatter.format(n, fieldLen); |
293 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
294 |
|
295 |
} else if (DataTypes.BYTE == type) { |
296 |
int n = feature.getInt(attr.getIndex());
|
297 |
fieldString = formatter.format(n, fieldLen); |
298 |
safeEncode(fieldName, fieldString, fieldLen, false);
|
299 |
|
300 |
} else if (DataTypes.STRING == type) { |
301 |
String s = feature.getString(attr.getIndex());
|
302 |
safeEncode(fieldName, StringUtils.defaultIfEmpty(s, ""), fieldLen, true); |
303 |
|
304 |
} else {
|
305 |
// Si no conocemos el tipo intentamos guardarlo como un string
|
306 |
String s = feature.getString(attr.getIndex());
|
307 |
safeEncode(fieldName, StringUtils.defaultIfEmpty(s, ""), fieldLen, true); |
308 |
|
309 |
} |
310 |
} catch (Exception ex) { |
311 |
throw new RuntimeException("Can't encode field '" + attr.getName() + "' with size "+fieldLen, ex); |
312 |
} |
313 |
|
314 |
} |
315 |
|
316 |
/**
|
317 |
* Returns a safely padded (and potentially truncated) string
|
318 |
*
|
319 |
* This may truncate some record, but it is required to ensure that the
|
320 |
* field limit is not overflowed when using variable-length charsets such as
|
321 |
* UTF-8.
|
322 |
*
|
323 |
* @throws UnsupportedEncodingException
|
324 |
*/
|
325 |
private void safeEncode(String fieldName, String in, int limit, boolean rightPadding) throws UnsupportedEncodingException { |
326 |
try {
|
327 |
byte[] encodedString = in.getBytes(this.charset); |
328 |
if (encodedString.length > limit) {
|
329 |
this.getLogger().error("'"+fieldName+"' field size ["+encodedString.length+"] exceeds limit "+limit); |
330 |
// too long, truncating
|
331 |
/*
|
332 |
* The block code bellow is equivalent to this simple code
|
333 |
* fragment:
|
334 |
|
335 |
if (rightPadding) {
|
336 |
in = in.substring(0, in.length()-1);
|
337 |
encodedString = in.getBytes(charset);
|
338 |
}
|
339 |
else {
|
340 |
in.substring(1, in.length());
|
341 |
encodedString = in.getBytes(charset);
|
342 |
}
|
343 |
|
344 |
However, the implemented algorithm has a much better performance
|
345 |
for the average and worst cases (when the input string has a lot
|
346 |
of multibyte characters), while keeping a good performance
|
347 |
for the best case (when all the characters in the input string
|
348 |
can be represented as single bytes using the selected charset).
|
349 |
|
350 |
The general strategy is to compute the deviation from the
|
351 |
required maximum number of bytes (limit) and the actual number
|
352 |
of bytes of the encoded String.
|
353 |
|
354 |
Then, we use this deviation to estimate the amount of characters
|
355 |
to truncate, based on the average factor of bytes per char in the
|
356 |
input string.
|
357 |
|
358 |
We truncate the string using this approach until the deviation
|
359 |
gets stable.
|
360 |
|
361 |
Finally, as we should be close enough to the right truncation position,
|
362 |
we increment/decrement the truncated string by only 1 character, to
|
363 |
ensure we truncate in the exact position.
|
364 |
*/
|
365 |
String str = in;
|
366 |
int estimatedDiff, deviation;
|
367 |
int deviationPrev;
|
368 |
double ratio;
|
369 |
byte[] encodedChar; |
370 |
int truncatePos = 0; |
371 |
deviation = encodedString.length - limit; |
372 |
deviationPrev = deviation - 1;
|
373 |
while (Math.abs(deviation) > Math.abs(deviationPrev) && str.length() > 0) { |
374 |
ratio = ((double) encodedString.length) / ((double) str.length()); |
375 |
// apply the estimated diff, ensuring it is at least >= 1.0 in absolute value
|
376 |
estimatedDiff = Math.max((int) (((double) deviation) / ratio), (int) (Math.signum(deviation) * 1)); |
377 |
// too long, truncating
|
378 |
if (rightPadding) {
|
379 |
truncatePos = Math.max(str.length() - estimatedDiff, 0); |
380 |
str = in.substring(0, truncatePos);
|
381 |
} else {
|
382 |
truncatePos = Math.max(truncatePos + estimatedDiff, 0); |
383 |
str = in.substring(truncatePos); |
384 |
} |
385 |
encodedString = str.getBytes(charset); |
386 |
deviationPrev = deviation; |
387 |
deviation = encodedString.length - limit; |
388 |
} |
389 |
// now we are close enough, get the exact position for truncating
|
390 |
while (encodedString.length > limit) {
|
391 |
// too long, truncating
|
392 |
// System.out.println("truncating");
|
393 |
if (rightPadding) {
|
394 |
str = in.substring(0, str.length() - 1); |
395 |
} else {
|
396 |
truncatePos = truncatePos + 1;
|
397 |
str = in.substring(truncatePos); |
398 |
} |
399 |
encodedString = str.getBytes(charset); |
400 |
} |
401 |
while (encodedString.length < limit && str.length() < in.length()) {
|
402 |
// Extend if necessary:
|
403 |
// 1 - Get the length in bytes of the next char
|
404 |
// 2 - Add the char to the substring if we are still within the limits
|
405 |
// System.out.println("extending");
|
406 |
if (rightPadding) {
|
407 |
encodedChar = in.substring(str.length(), str.length() + 1).getBytes(charset);
|
408 |
} else {
|
409 |
encodedChar = in.substring(truncatePos - 1, truncatePos).getBytes(charset);
|
410 |
// System.out.println(encodedChar);
|
411 |
// System.out.println(encodedChar.length);
|
412 |
// System.out.println(testStrings[i].substring(truncatePos-1, truncatePos));
|
413 |
} |
414 |
// System.out.println(testStrings[i].substring(in.length(), in.length()+1));
|
415 |
if ((encodedString.length + encodedChar.length) > limit) {
|
416 |
// one more char would overflow the limit
|
417 |
break;
|
418 |
} |
419 |
// too short, extending
|
420 |
if (rightPadding) {
|
421 |
str = in.substring(0, str.length() + 1); |
422 |
} else {
|
423 |
truncatePos = truncatePos - 1;
|
424 |
str = in.substring(truncatePos); |
425 |
} |
426 |
encodedString = str.getBytes(charset); |
427 |
} |
428 |
} |
429 |
if (rightPadding) {
|
430 |
buffer.put(encodedString); |
431 |
} |
432 |
if (encodedString.length < limit) {
|
433 |
// too short, padding
|
434 |
int i = encodedString.length;
|
435 |
while (i < limit) {
|
436 |
((Buffer) blank).position(0); |
437 |
buffer.put(blank); |
438 |
i = i + blankSize; |
439 |
} |
440 |
if (i > limit) {
|
441 |
// Might happen for instance if charset is UTF16 and the
|
442 |
// limit of characters in the field is an odd number
|
443 |
throw new UnsupportedEncodingException(new Exception("Impossible to encode this DBF using the selected charset")); |
444 |
} |
445 |
} |
446 |
if (!rightPadding) {
|
447 |
buffer.put(encodedString); |
448 |
} |
449 |
} catch (BufferOverflowException exc) { |
450 |
// Might happen for instance if charset is UTF16 and the
|
451 |
// limit of characters in the field is an odd number
|
452 |
throw new UnsupportedEncodingException(exc); |
453 |
} |
454 |
} |
455 |
|
456 |
|
457 |
/**
|
458 |
* Release resources associated with this writer. <B>Highly recommended</B>
|
459 |
*
|
460 |
* @throws CloseException
|
461 |
*/
|
462 |
public void close() throws CloseException { |
463 |
// IANS - GEOT 193, bogus 0x00 written. According to dbf spec, optional
|
464 |
// eof 0x1a marker is, well, optional. Since the original code wrote a
|
465 |
// 0x00 (which is wrong anyway) lets just do away with this :)
|
466 |
// - produced dbf works in OpenOffice and ArcExplorer java, so it must
|
467 |
// be okay.
|
468 |
// buffer.position(0);
|
469 |
// buffer.put((byte) 0).position(0).limit(1);
|
470 |
// write();
|
471 |
|
472 |
if (headDrity) {
|
473 |
try {
|
474 |
this.writeHeader();
|
475 |
} catch (WriteException e) {
|
476 |
throw new CloseException("DbaseFileWriter", e); |
477 |
} |
478 |
} |
479 |
|
480 |
try {
|
481 |
channel.close(); |
482 |
} catch (IOException e) { |
483 |
throw new CloseException("DBF Writer", e); |
484 |
} |
485 |
if (buffer instanceof MappedByteBuffer) { |
486 |
// NIOUtilities.clean(buffer);
|
487 |
} |
488 |
|
489 |
buffer = null;
|
490 |
channel = null;
|
491 |
formatter = null;
|
492 |
} |
493 |
|
494 |
public void setCharset(Charset charset) { |
495 |
this.charset = charset;
|
496 |
blank = charset.encode(" ");
|
497 |
blankSize = blank.limit(); |
498 |
} |
499 |
|
500 |
protected FilteredLogger getLogger() {
|
501 |
if(this.logger == null){ |
502 |
this.logger = new FilteredLogger(LOGGER, "DbaseFileWriter", 2000L); |
503 |
} |
504 |
return this.logger; |
505 |
} |
506 |
|
507 |
} |