svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.dbf / src / main / java / org / gvsig / fmap / dal / store / dbf / utils / DbaseFileWriter.java @ 45209
History | View | Annotate | Download (17.7 KB)
1 |
/**
|
---|---|
2 |
* gvSIG. Desktop Geographic Information System.
|
3 |
*
|
4 |
* Copyright (C) 2007-2013 gvSIG Association.
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 3
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
* For any additional information, do not hesitate to contact us
|
22 |
* at info AT gvsig.com, or visit our website www.gvsig.com.
|
23 |
*/
|
24 |
package org.gvsig.fmap.dal.store.dbf.utils; |
25 |
|
26 |
import java.io.IOException; |
27 |
import java.math.BigDecimal; |
28 |
import java.nio.Buffer; |
29 |
import java.nio.BufferOverflowException; |
30 |
import java.nio.ByteBuffer; |
31 |
import java.nio.MappedByteBuffer; |
32 |
import java.nio.channels.FileChannel; |
33 |
import java.nio.charset.Charset; |
34 |
import java.util.Date; |
35 |
import java.util.Iterator; |
36 |
import org.apache.commons.lang3.StringUtils; |
37 |
|
38 |
import org.gvsig.fmap.dal.DataTypes; |
39 |
import org.gvsig.fmap.dal.exception.CloseException; |
40 |
import org.gvsig.fmap.dal.exception.InitializeException; |
41 |
import org.gvsig.fmap.dal.exception.UnsupportedEncodingException; |
42 |
import org.gvsig.fmap.dal.exception.WriteException; |
43 |
import org.gvsig.fmap.dal.feature.Feature; |
44 |
import org.gvsig.fmap.dal.feature.FeatureAttributeDescriptor; |
45 |
import org.gvsig.fmap.dal.feature.FeatureType; |
46 |
|
47 |
/**
|
48 |
* A DbaseFileReader is used to read a dbase III format file. The general use of
|
49 |
* this class is: <CODE><PRE>
|
50 |
* DbaseFileHeader header = ...
|
51 |
* WritableFileChannel out = new FileOutputStream("thefile.dbf").getChannel();
|
52 |
* DbaseFileWriter w = new DbaseFileWriter(header,out);
|
53 |
* while ( moreRecords ) {
|
54 |
* w.write( getMyRecord() );
|
55 |
* }
|
56 |
* w.close();
|
57 |
* </PRE></CODE> You must supply the <CODE>moreRecords</CODE> and
|
58 |
* <CODE>getMyRecord()</CODE> logic...
|
59 |
*
|
60 |
* @author Ian Schneider
|
61 |
*/
|
62 |
public class DbaseFileWriter { |
63 |
|
64 |
private final DbaseFileHeader header; |
65 |
private FieldFormatter formatter = new FieldFormatter(); |
66 |
private FileChannel channel; |
67 |
private ByteBuffer buffer; |
68 |
private boolean headDrity = false; |
69 |
private ByteBuffer blank; |
70 |
private int blankSize; |
71 |
|
72 |
private Charset charset; |
73 |
|
74 |
/**
|
75 |
* Create a DbaseFileWriter using the specified header and writing to the
|
76 |
* given channel.
|
77 |
*
|
78 |
* @param header The DbaseFileHeader to write.
|
79 |
* @param out The Channel to write to.
|
80 |
* @param isNew
|
81 |
* @throws org.gvsig.fmap.dal.exception.InitializeException
|
82 |
*
|
83 |
*
|
84 |
*/
|
85 |
public DbaseFileWriter(DbaseFileHeader header, FileChannel out, |
86 |
boolean isNew) throws InitializeException { |
87 |
this.header = header;
|
88 |
this.channel = out;
|
89 |
this.headDrity = isNew;
|
90 |
this.setCharset(Charset.forName(header.mappingEncoding(header.getCharsetName()))); |
91 |
|
92 |
init(); |
93 |
} |
94 |
|
95 |
private void init() throws InitializeException { |
96 |
try {
|
97 |
if (this.channel.size() < this.header.getHeaderLength()) { |
98 |
this.writeHeader();
|
99 |
} |
100 |
buffer = ByteBuffer.allocateDirect(header.getRecordLength());
|
101 |
} catch (Exception e) { |
102 |
throw new InitializeException("DBF Writer", e); |
103 |
} |
104 |
} |
105 |
|
106 |
private void write() throws WriteException { |
107 |
((Buffer) buffer).position(0); |
108 |
int r = buffer.remaining();
|
109 |
try {
|
110 |
while ((r -= channel.write(buffer)) > 0) { |
111 |
// do nothing
|
112 |
} |
113 |
} catch (IOException e) { |
114 |
throw new WriteException("DBF Writer", e); |
115 |
} |
116 |
} |
117 |
|
118 |
private void writeHeader() throws WriteException { |
119 |
try {
|
120 |
channel.position(0);
|
121 |
header.write(channel); |
122 |
} catch (IOException e) { |
123 |
throw new WriteException("DBF Writer", e); |
124 |
} |
125 |
} |
126 |
|
127 |
/**
|
128 |
* Write a single dbase record.
|
129 |
*
|
130 |
* @param feature
|
131 |
* @throws UnsupportedEncodingException
|
132 |
* @throws WriteException
|
133 |
*/
|
134 |
public void append(Feature feature) throws WriteException, |
135 |
UnsupportedEncodingException {
|
136 |
this.fillBuffer(feature);
|
137 |
try {
|
138 |
this.moveToEOF();
|
139 |
} catch (IOException e) { |
140 |
throw new WriteException("DbaseFileWriter", e); |
141 |
} |
142 |
this.header.setNumRecords(this.header.getNumRecords() + 1); |
143 |
write(); |
144 |
|
145 |
this.headDrity = true; |
146 |
} |
147 |
|
148 |
private void fillBuffer(Feature feature) |
149 |
throws UnsupportedEncodingException, WriteException { |
150 |
FeatureType featureType = feature.getType(); |
151 |
try {
|
152 |
((Buffer) buffer).position(0); |
153 |
|
154 |
// put the 'not-deleted' marker
|
155 |
buffer.put((byte) ' '); |
156 |
|
157 |
@SuppressWarnings("unchecked") |
158 |
Iterator<FeatureAttributeDescriptor> iterator
|
159 |
= featureType.iterator(); |
160 |
|
161 |
while (iterator.hasNext()) {
|
162 |
FeatureAttributeDescriptor fad = iterator.next(); |
163 |
if (fad.isComputed()) {
|
164 |
continue;
|
165 |
} |
166 |
|
167 |
if (fad.getName().length() > DbaseFile.MAX_FIELD_NAME_LENGTH) {
|
168 |
throw new FieldNameTooLongException( |
169 |
"DBF file", fad.getName());
|
170 |
} |
171 |
|
172 |
int type = fad.getType();
|
173 |
if (type == DataTypes.GEOMETRY) {
|
174 |
continue;
|
175 |
} |
176 |
encodeField(fad, feature); |
177 |
} |
178 |
} catch (Exception e) { |
179 |
throw new WriteException("DbaseFileWriter", e); |
180 |
} |
181 |
} |
182 |
|
183 |
private void moveToEOF() throws IOException { |
184 |
this.moveTo(this.header.getNumRecords()); |
185 |
} |
186 |
|
187 |
private void moveTo(long numReg) throws IOException { |
188 |
// if (!(channel instanceof FileChannel)) {
|
189 |
// throw new IOException(
|
190 |
// "DbaseFileWriterNIO: channel is not a FileChannel. Cannot position properly");
|
191 |
// }
|
192 |
|
193 |
long newPos
|
194 |
= header.getHeaderLength() + numReg * header.getRecordLength(); |
195 |
if (this.channel.position() != newPos) { |
196 |
this.channel.position(newPos);
|
197 |
} |
198 |
} |
199 |
|
200 |
/**
|
201 |
* Write a single dbase record. Useful to update a dbf.
|
202 |
*
|
203 |
* @param feature
|
204 |
* @param numReg
|
205 |
* @throws WriteException
|
206 |
* @throws UnsupportedEncodingException
|
207 |
*/
|
208 |
public void update(Feature feature, long numReg) throws WriteException, |
209 |
UnsupportedEncodingException {
|
210 |
this.fillBuffer(feature);
|
211 |
|
212 |
try {
|
213 |
this.moveTo(numReg);
|
214 |
} catch (IOException e) { |
215 |
throw new WriteException("DbaseFileWriter", e); |
216 |
} |
217 |
|
218 |
write(); |
219 |
} |
220 |
|
221 |
private void encodeField(FeatureAttributeDescriptor attr, Feature feature) throws java.io.UnsupportedEncodingException, UnsupportedEncodingException { |
222 |
DbaseFieldDescriptor descriptor = this.header.getFieldDescription(attr.getName());
|
223 |
|
224 |
int type = attr.getType();
|
225 |
final int fieldLen = descriptor.getSize(); |
226 |
String fieldString;
|
227 |
|
228 |
// if( buffer.position()!=descriptor.getOffsetInRecord() ) {
|
229 |
// throw new RuntimeException("Encoding field '"+descriptor.getName()+"' found an incorrect offset.");
|
230 |
// }
|
231 |
if( feature.isNull(attr.getIndex()) ) {
|
232 |
safeEncode(" ", fieldLen, false); |
233 |
return;
|
234 |
} |
235 |
if (DataTypes.BOOLEAN == type) {
|
236 |
boolean b = feature.getBoolean(attr.getIndex());
|
237 |
safeEncode(b? "T":"F", 1, true); |
238 |
|
239 |
} else if (DataTypes.TIME == type) { |
240 |
Date date = feature.getTime(attr.getIndex());
|
241 |
fieldString = formatter.formatTime(date); |
242 |
safeEncode(fieldString, fieldLen, false);
|
243 |
|
244 |
} else if (DataTypes.TIMESTAMP == type) { |
245 |
Date date = feature.getTimestamp(attr.getIndex());
|
246 |
fieldString = formatter.formatTimestamp(date); |
247 |
safeEncode(fieldString, fieldLen, false);
|
248 |
|
249 |
} else if (DataTypes.DATE == type) { |
250 |
Date date = feature.getDate(attr.getIndex());
|
251 |
fieldString = formatter.formatDate(date); |
252 |
safeEncode(fieldString, fieldLen, false);
|
253 |
|
254 |
} else if (DataTypes.DECIMAL == type) { |
255 |
BigDecimal n = feature.getDecimal(attr.getIndex());
|
256 |
fieldString = formatter.format(n, fieldLen); |
257 |
safeEncode(fieldString, fieldLen, false);
|
258 |
|
259 |
} else if (DataTypes.DOUBLE == type) { |
260 |
double n = feature.getDouble(attr.getIndex());
|
261 |
fieldString = formatter.format(n, fieldLen, descriptor.getScale()); |
262 |
safeEncode(fieldString, fieldLen, false);
|
263 |
|
264 |
} else if (DataTypes.FLOAT == type) { |
265 |
float n = feature.getFloat(attr.getIndex());
|
266 |
fieldString = formatter.format(n, fieldLen, descriptor.getScale()); |
267 |
safeEncode(fieldString, fieldLen, false);
|
268 |
|
269 |
} else if (DataTypes.LONG == type) { |
270 |
long l = feature.getLong(attr.getIndex());
|
271 |
fieldString = formatter.format(l, fieldLen); |
272 |
safeEncode(fieldString, fieldLen, false);
|
273 |
|
274 |
} else if (DataTypes.INT == type) { |
275 |
int n = feature.getInt(attr.getIndex());
|
276 |
fieldString = formatter.format(n, fieldLen); |
277 |
safeEncode(fieldString, fieldLen, false);
|
278 |
|
279 |
} else if (DataTypes.BYTE == type) { |
280 |
int n = feature.getInt(attr.getIndex());
|
281 |
fieldString = formatter.format(n, fieldLen); |
282 |
safeEncode(fieldString, fieldLen, false);
|
283 |
|
284 |
} else if (DataTypes.STRING == type) { |
285 |
String s = feature.getString(attr.getIndex());
|
286 |
safeEncode(StringUtils.defaultIfEmpty(s, ""), fieldLen, true); |
287 |
|
288 |
} else {
|
289 |
// Si no conocemos el tipo intentamos guardarlo como un string
|
290 |
String s = feature.getString(attr.getIndex());
|
291 |
safeEncode(StringUtils.defaultIfEmpty(s, ""), fieldLen, true); |
292 |
|
293 |
} |
294 |
|
295 |
} |
296 |
|
297 |
/**
|
298 |
* Returns a safely padded (and potentially truncated) string
|
299 |
*
|
300 |
* This may truncate some record, but it is required to ensure that the
|
301 |
* field limit is not overflowed when using variable-length charsets such as
|
302 |
* UTF-8.
|
303 |
*
|
304 |
* @throws UnsupportedEncodingException
|
305 |
*/
|
306 |
private void safeEncode(String in, int limit, boolean rightPadding) throws UnsupportedEncodingException { |
307 |
try {
|
308 |
byte[] encodedString = in.getBytes(this.charset); |
309 |
if (encodedString.length > limit) {
|
310 |
// too long, truncating
|
311 |
/*
|
312 |
* The block code bellow is equivalent to this simple code
|
313 |
* fragment:
|
314 |
|
315 |
if (rightPadding) {
|
316 |
in = in.substring(0, in.length()-1);
|
317 |
encodedString = in.getBytes(charset);
|
318 |
}
|
319 |
else {
|
320 |
in.substring(1, in.length());
|
321 |
encodedString = in.getBytes(charset);
|
322 |
}
|
323 |
|
324 |
However, the implemented algorithm has a much better performance
|
325 |
for the average and worst cases (when the input string has a lot
|
326 |
of multibyte characters), while keeping a good performance
|
327 |
for the best case (when all the characters in the input string
|
328 |
can be represented as single bytes using the selected charset).
|
329 |
|
330 |
The general strategy is to compute the deviation from the
|
331 |
required maximum number of bytes (limit) and the actual number
|
332 |
of bytes of the encoded String.
|
333 |
|
334 |
Then, we use this deviation to estimate the amount of characters
|
335 |
to truncate, based on the average factor of bytes per char in the
|
336 |
input string.
|
337 |
|
338 |
We truncate the string using this approach until the deviation
|
339 |
gets stable.
|
340 |
|
341 |
Finally, as we should be close enough to the right truncation position,
|
342 |
we increment/decrement the truncated string by only 1 character, to
|
343 |
ensure we truncate in the exact position.
|
344 |
*/
|
345 |
String str = in;
|
346 |
int estimatedDiff, deviation;
|
347 |
int deviationPrev;
|
348 |
double ratio;
|
349 |
byte[] encodedChar; |
350 |
int truncatePos = 0; |
351 |
deviation = encodedString.length - limit; |
352 |
deviationPrev = deviation - 1;
|
353 |
while (Math.abs(deviation) > Math.abs(deviationPrev) && str.length() > 0) { |
354 |
ratio = ((double) encodedString.length) / ((double) str.length()); |
355 |
// apply the estimated diff, ensuring it is at least >= 1.0 in absolute value
|
356 |
estimatedDiff = Math.max((int) (((double) deviation) / ratio), (int) (Math.signum(deviation) * 1)); |
357 |
// too long, truncating
|
358 |
if (rightPadding) {
|
359 |
truncatePos = Math.max(str.length() - estimatedDiff, 0); |
360 |
str = in.substring(0, truncatePos);
|
361 |
} else {
|
362 |
truncatePos = Math.max(truncatePos + estimatedDiff, 0); |
363 |
str = in.substring(truncatePos); |
364 |
} |
365 |
encodedString = str.getBytes(charset); |
366 |
deviationPrev = deviation; |
367 |
deviation = encodedString.length - limit; |
368 |
} |
369 |
// now we are close enough, get the exact position for truncating
|
370 |
while (encodedString.length > limit) {
|
371 |
// too long, truncating
|
372 |
// System.out.println("truncating");
|
373 |
if (rightPadding) {
|
374 |
str = in.substring(0, str.length() - 1); |
375 |
} else {
|
376 |
truncatePos = truncatePos + 1;
|
377 |
str = in.substring(truncatePos); |
378 |
} |
379 |
encodedString = str.getBytes(charset); |
380 |
} |
381 |
while (encodedString.length < limit && str.length() < in.length()) {
|
382 |
// Extend if necessary:
|
383 |
// 1 - Get the length in bytes of the next char
|
384 |
// 2 - Add the char to the substring if we are still within the limits
|
385 |
// System.out.println("extending");
|
386 |
if (rightPadding) {
|
387 |
encodedChar = in.substring(str.length(), str.length() + 1).getBytes(charset);
|
388 |
} else {
|
389 |
encodedChar = in.substring(truncatePos - 1, truncatePos).getBytes(charset);
|
390 |
// System.out.println(encodedChar);
|
391 |
// System.out.println(encodedChar.length);
|
392 |
// System.out.println(testStrings[i].substring(truncatePos-1, truncatePos));
|
393 |
} |
394 |
// System.out.println(testStrings[i].substring(in.length(), in.length()+1));
|
395 |
if ((encodedString.length + encodedChar.length) > limit) {
|
396 |
// one more char would overflow the limit
|
397 |
break;
|
398 |
} |
399 |
// too short, extending
|
400 |
if (rightPadding) {
|
401 |
str = in.substring(0, str.length() + 1); |
402 |
} else {
|
403 |
truncatePos = truncatePos - 1;
|
404 |
str = in.substring(truncatePos); |
405 |
} |
406 |
encodedString = str.getBytes(charset); |
407 |
} |
408 |
} |
409 |
if (rightPadding) {
|
410 |
buffer.put(encodedString); |
411 |
} |
412 |
if (encodedString.length < limit) {
|
413 |
// too short, padding
|
414 |
int i = encodedString.length;
|
415 |
while (i < limit) {
|
416 |
((Buffer) blank).position(0); |
417 |
buffer.put(blank); |
418 |
i = i + blankSize; |
419 |
} |
420 |
if (i > limit) {
|
421 |
// Might happen for instance if charset is UTF16 and the
|
422 |
// limit of characters in the field is an odd number
|
423 |
throw new UnsupportedEncodingException(new Exception("Impossible to encode this DBF using the selected charset")); |
424 |
} |
425 |
} |
426 |
if (!rightPadding) {
|
427 |
buffer.put(encodedString); |
428 |
} |
429 |
} catch (BufferOverflowException exc) { |
430 |
// Might happen for instance if charset is UTF16 and the
|
431 |
// limit of characters in the field is an odd number
|
432 |
throw new UnsupportedEncodingException(exc); |
433 |
} |
434 |
} |
435 |
|
436 |
|
437 |
/**
|
438 |
* Release resources associated with this writer. <B>Highly recommended</B>
|
439 |
*
|
440 |
* @throws CloseException
|
441 |
*/
|
442 |
public void close() throws CloseException { |
443 |
// IANS - GEOT 193, bogus 0x00 written. According to dbf spec, optional
|
444 |
// eof 0x1a marker is, well, optional. Since the original code wrote a
|
445 |
// 0x00 (which is wrong anyway) lets just do away with this :)
|
446 |
// - produced dbf works in OpenOffice and ArcExplorer java, so it must
|
447 |
// be okay.
|
448 |
// buffer.position(0);
|
449 |
// buffer.put((byte) 0).position(0).limit(1);
|
450 |
// write();
|
451 |
|
452 |
if (headDrity) {
|
453 |
try {
|
454 |
this.writeHeader();
|
455 |
} catch (WriteException e) {
|
456 |
throw new CloseException("DbaseFileWriter", e); |
457 |
} |
458 |
} |
459 |
|
460 |
try {
|
461 |
channel.close(); |
462 |
} catch (IOException e) { |
463 |
throw new CloseException("DBF Writer", e); |
464 |
} |
465 |
if (buffer instanceof MappedByteBuffer) { |
466 |
// NIOUtilities.clean(buffer);
|
467 |
} |
468 |
|
469 |
buffer = null;
|
470 |
channel = null;
|
471 |
formatter = null;
|
472 |
} |
473 |
|
474 |
public void setCharset(Charset charset) { |
475 |
this.charset = charset;
|
476 |
blank = charset.encode(" ");
|
477 |
blankSize = blank.limit(); |
478 |
} |
479 |
|
480 |
} |