Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.dbf / src / main / java / org / gvsig / fmap / dal / store / dbf / utils / DbaseFileWriter.java @ 46904

History | View | Annotate | Download (19 KB)

1
/**
2
 * gvSIG. Desktop Geographic Information System.
3
 *
4
 * Copyright (C) 2007-2013 gvSIG Association.
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 3
9
 * of the License, or (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
 * MA  02110-1301, USA.
20
 *
21
 * For any additional information, do not hesitate to contact us
22
 * at info AT gvsig.com, or visit our website www.gvsig.com.
23
 */
24
package org.gvsig.fmap.dal.store.dbf.utils;
25

    
26
import java.io.IOException;
27
import java.math.BigDecimal;
28
import java.nio.Buffer;
29
import java.nio.BufferOverflowException;
30
import java.nio.ByteBuffer;
31
import java.nio.MappedByteBuffer;
32
import java.nio.channels.FileChannel;
33
import java.nio.charset.Charset;
34
import java.util.Date;
35
import java.util.Iterator;
36
import org.apache.commons.lang3.StringUtils;
37

    
38
import org.gvsig.fmap.dal.DataTypes;
39
import org.gvsig.fmap.dal.exception.CloseException;
40
import org.gvsig.fmap.dal.exception.InitializeException;
41
import org.gvsig.fmap.dal.exception.UnsupportedEncodingException;
42
import org.gvsig.fmap.dal.exception.WriteException;
43
import org.gvsig.fmap.dal.feature.Feature;
44
import org.gvsig.fmap.dal.feature.FeatureAttributeDescriptor;
45
import org.gvsig.fmap.dal.feature.FeatureType;
46
import org.gvsig.tools.logger.FilteredLogger;
47
import org.slf4j.Logger;
48
import org.slf4j.LoggerFactory;
49

    
50
/**
51
 * A DbaseFileReader is used to read a dbase III format file. The general use of
52
 * this class is: <CODE><PRE>
53
 * DbaseFileHeader header = ...
54
 * WritableFileChannel out = new FileOutputStream("thefile.dbf").getChannel();
55
 * DbaseFileWriter w = new DbaseFileWriter(header,out);
56
 * while ( moreRecords ) {
57
 *   w.write( getMyRecord() );
58
 * }
59
 * w.close();
60
 * </PRE></CODE> You must supply the <CODE>moreRecords</CODE> and
61
 * <CODE>getMyRecord()</CODE> logic...
62
 *
63
 * @author Ian Schneider
64
 */
65
public class DbaseFileWriter {
66
    
67
    private static final Logger LOGGER = LoggerFactory.getLogger(DbaseFileWriter.class);
68

    
69
    private final DbaseFileHeader header;
70
    private FieldFormatter formatter = new FieldFormatter();
71
    private FileChannel channel;
72
    private ByteBuffer buffer;
73
    private boolean headDrity = false;
74
    private ByteBuffer blank;
75
    private int blankSize;
76

    
77
    private Charset charset;
78
    
79
    private FilteredLogger logger;
80

    
81
    /**
82
     * Create a DbaseFileWriter using the specified header and writing to the
83
     * given channel.
84
     *
85
     * @param header The DbaseFileHeader to write.
86
     * @param out The Channel to write to.
87
     * @param isNew
88
     * @throws org.gvsig.fmap.dal.exception.InitializeException
89
     *
90
     *
91
     */
92
    public DbaseFileWriter(DbaseFileHeader header, FileChannel out,
93
            boolean isNew) throws InitializeException {
94
        this.header = header;
95
        this.channel = out;
96
        this.headDrity = isNew;
97
        this.setCharset(Charset.forName(header.mappingEncoding(header.getCharsetName())));
98

    
99
        init();
100
    }
101

    
102
    private void init() throws InitializeException {
103
        try {
104
            if (this.channel.size() < this.header.getHeaderLength()) {
105
                this.writeHeader();
106
            }
107
            buffer = ByteBuffer.allocateDirect(header.getRecordLength());
108
        } catch (Exception e) {
109
            throw new InitializeException("DBF Writer", e);
110
        }
111
    }
112

    
113
    private void write() throws WriteException {
114
        ((Buffer) buffer).position(0);
115
        int r = buffer.remaining();
116
        try {
117
            while ((r -= channel.write(buffer)) > 0) {
118
                // do nothing
119
            }
120
        } catch (IOException e) {
121
            throw new WriteException("DBF Writer", e);
122
        }
123
    }
124

    
125
    private void writeHeader() throws WriteException {
126
        try {
127
            channel.position(0);
128
            header.write(channel);
129
        } catch (IOException e) {
130
            throw new WriteException("DBF Writer", e);
131
        }
132
    }
133

    
134
    /**
135
     * Write a single dbase record.
136
     *
137
     * @param feature
138
     * @throws UnsupportedEncodingException
139
     * @throws WriteException
140
     */
141
    public void append(Feature feature) throws WriteException,
142
            UnsupportedEncodingException {
143
        this.fillBuffer(feature);
144
        try {
145
            this.moveToEOF();
146
        } catch (IOException e) {
147
            throw new WriteException("DbaseFileWriter", e);
148
        }
149
        this.header.setNumRecords(this.header.getNumRecords() + 1);
150
        write();
151

    
152
        this.headDrity = true;
153
    }
154

    
155
    private void fillBuffer(Feature feature)
156
            throws UnsupportedEncodingException, WriteException {
157
        FeatureType featureType = feature.getType();
158
        try {
159
            ((Buffer) buffer).position(0);
160

    
161
            // put the 'not-deleted' marker
162
            buffer.put((byte) ' ');
163

    
164
            @SuppressWarnings("unchecked")
165
            Iterator<FeatureAttributeDescriptor> iterator
166
                    = featureType.iterator();
167

    
168
            while (iterator.hasNext()) {
169
                FeatureAttributeDescriptor fad = iterator.next();
170
                if (fad.isComputed()) {
171
                    continue;
172
                }
173

    
174
                if (fad.getName().length() > DbaseFile.MAX_FIELD_NAME_LENGTH) {
175
                    throw new FieldNameTooLongException(
176
                            "DBF file", fad.getName());
177
                }
178

    
179
                int type = fad.getType();
180
                if (type == DataTypes.GEOMETRY) {
181
                    continue;
182
                }
183
                encodeField(fad, feature);
184
            }
185
        } catch (Exception e) {
186
            throw new WriteException("DbaseFileWriter", e);
187
        }
188
    }
189

    
190
    private void moveToEOF() throws IOException {
191
        this.moveTo(this.header.getNumRecords());
192
    }
193

    
194
    private void moveTo(long numReg) throws IOException {
195
        // if (!(channel instanceof FileChannel)) {
196
        // throw new IOException(
197
        // "DbaseFileWriterNIO: channel is not a FileChannel. Cannot position properly");
198
        // }
199

    
200
        long newPos
201
                = header.getHeaderLength() + numReg * header.getRecordLength();
202
        if (this.channel.position() != newPos) {
203
            this.channel.position(newPos);
204
        }
205
    }
206

    
207
    /**
208
     * Write a single dbase record. Useful to update a dbf.
209
     *
210
     * @param feature
211
     * @param numReg
212
     * @throws WriteException
213
     * @throws UnsupportedEncodingException
214
     */
215
    public void update(Feature feature, long numReg) throws WriteException,
216
            UnsupportedEncodingException {
217
        this.fillBuffer(feature);
218

    
219
        try {
220
            this.moveTo(numReg);
221
        } catch (IOException e) {
222
            throw new WriteException("DbaseFileWriter", e);
223
        }
224

    
225
        write();
226
    }
227

    
228
    private void encodeField(FeatureAttributeDescriptor attr, Feature feature) throws java.io.UnsupportedEncodingException, UnsupportedEncodingException {
229
        if (attr == null) {
230
            throw new NullPointerException("attr is NULL");
231
        }
232
        if (feature == null) {
233
            throw new NullPointerException("feature is NULL");
234
        }
235
        int fieldLen = -1;
236
        try {
237
            DbaseFieldDescriptor descriptor = this.header.getFieldDescription(attr.getName());
238

    
239
            int type = attr.getType();
240
            fieldLen = descriptor.getSize();
241
            String fieldString;
242

    
243
//        if( buffer.position()!=descriptor.getOffsetInRecord() ) {
244
//            throw new RuntimeException("Encoding field '"+descriptor.getName()+"' found an incorrect offset.");
245
//        }
246
            String fieldName = attr.getName();
247
            if (feature.isNull(attr.getIndex())) {
248
                safeEncode(fieldName, " ", fieldLen, false);
249
                return;
250
            }
251
            if (DataTypes.BOOLEAN == type) {
252
                boolean b = feature.getBoolean(attr.getIndex());
253
                safeEncode(fieldName, b ? "T" : "F", 1, true);
254

    
255
            } else if (DataTypes.TIME == type) {
256
                Date date = feature.getTime(attr.getIndex());
257
                fieldString = formatter.formatTime(date);
258
                safeEncode(fieldName, fieldString, fieldLen, false);
259

    
260
            } else if (DataTypes.TIMESTAMP == type) {
261
                Date date = feature.getTimestamp(attr.getIndex());
262
                fieldString = formatter.formatTimestamp(date);
263
                safeEncode(fieldName, fieldString, fieldLen, false);
264

    
265
            } else if (DataTypes.DATE == type) {
266
                Date date = feature.getDate(attr.getIndex());
267
                fieldString = formatter.formatDate(date);
268
                safeEncode(fieldName, fieldString, fieldLen, false);
269

    
270
            } else if (DataTypes.DECIMAL == type) {
271
                BigDecimal n = feature.getDecimal(attr.getIndex());
272
                fieldString = formatter.format(n, fieldLen);
273
                safeEncode(fieldName, fieldString, fieldLen, false);
274

    
275
            } else if (DataTypes.DOUBLE == type) {
276
                double n = feature.getDouble(attr.getIndex());
277
                fieldString = formatter.format(n, fieldLen, descriptor.getScale());
278
                safeEncode(fieldName, fieldString, fieldLen, false);
279

    
280
            } else if (DataTypes.FLOAT == type) {
281
                float n = feature.getFloat(attr.getIndex());
282
                fieldString = formatter.format(n, fieldLen, descriptor.getScale());
283
                safeEncode(fieldName, fieldString, fieldLen, false);
284

    
285
            } else if (DataTypes.LONG == type) {
286
                long l = feature.getLong(attr.getIndex());
287
                fieldString = formatter.format(l, fieldLen);
288
                safeEncode(fieldName, fieldString, fieldLen, false);
289

    
290
            } else if (DataTypes.INT == type) {
291
                int n = feature.getInt(attr.getIndex());
292
                fieldString = formatter.format(n, fieldLen);
293
                safeEncode(fieldName, fieldString, fieldLen, false);
294

    
295
            } else if (DataTypes.BYTE == type) {
296
                int n = feature.getInt(attr.getIndex());
297
                fieldString = formatter.format(n, fieldLen);
298
                safeEncode(fieldName, fieldString, fieldLen, false);
299

    
300
            } else if (DataTypes.STRING == type) {
301
                String s = feature.getString(attr.getIndex());
302
                safeEncode(fieldName, StringUtils.defaultIfEmpty(s, ""), fieldLen, true);
303

    
304
            } else {
305
                // Si no conocemos el tipo intentamos guardarlo como un string
306
                String s = feature.getString(attr.getIndex());
307
                safeEncode(fieldName, StringUtils.defaultIfEmpty(s, ""), fieldLen, true);
308

    
309
            }
310
        } catch (Exception ex) {
311
            throw new RuntimeException("Can't encode field '" + attr.getName() + "' with size "+fieldLen, ex);
312
        }
313

    
314
    }
315

    
316
    /**
317
     * Returns a safely padded (and potentially truncated) string
318
     *
319
     * This may truncate some record, but it is required to ensure that the
320
     * field limit is not overflowed when using variable-length charsets such as
321
     * UTF-8.
322
     *
323
     * @throws UnsupportedEncodingException
324
     */
325
    private void safeEncode(String fieldName, String in, int limit, boolean rightPadding) throws UnsupportedEncodingException {
326
        try {
327
            byte[] encodedString = in.getBytes(this.charset);
328
            if (encodedString.length > limit) {
329
                this.getLogger().error("'"+fieldName+"' field size ["+encodedString.length+"] exceeds limit "+limit);
330
                // too long, truncating
331
                /*
332
                             * The block code bellow is equivalent to this simple code
333
                             * fragment:
334

335
                    if (rightPadding) {
336
                            in = in.substring(0, in.length()-1);
337
                            encodedString = in.getBytes(charset);
338
                    }
339
                    else {
340
                            in.substring(1, in.length());
341
                            encodedString = in.getBytes(charset);
342
                    }
343

344
                    However, the implemented algorithm has a much better performance
345
                    for the average and worst cases (when the input string has a lot
346
                    of multibyte characters), while keeping a good performance
347
                    for the best case (when all the characters in the input string
348
                    can be represented as single bytes using the selected charset).
349

350
                    The general strategy is to compute the deviation from the
351
                    required maximum number of bytes (limit) and the actual number
352
                    of bytes of the encoded String.
353

354
                    Then, we use this deviation to estimate the amount of characters
355
                    to truncate, based on the average factor of bytes per char in the
356
                    input string.
357

358
                    We truncate the string using this approach until the deviation
359
                    gets stable.
360

361
                    Finally, as we should be close enough to the right truncation position,
362
                    we increment/decrement the truncated string by only 1 character, to
363
                    ensure we truncate in the exact position. 
364
                 */
365
                String str = in;
366
                int estimatedDiff, deviation;
367
                int deviationPrev;
368
                double ratio;
369
                byte[] encodedChar;
370
                int truncatePos = 0;
371
                deviation = encodedString.length - limit;
372
                deviationPrev = deviation - 1;
373
                while (Math.abs(deviation) > Math.abs(deviationPrev) && str.length() > 0) {
374
                    ratio = ((double) encodedString.length) / ((double) str.length());
375
                    // apply the estimated diff, ensuring it is at least >= 1.0 in absolute value
376
                    estimatedDiff = Math.max((int) (((double) deviation) / ratio), (int) (Math.signum(deviation) * 1));
377
                    // too long, truncating
378
                    if (rightPadding) {
379
                        truncatePos = Math.max(str.length() - estimatedDiff, 0);
380
                        str = in.substring(0, truncatePos);
381
                    } else {
382
                        truncatePos = Math.max(truncatePos + estimatedDiff, 0);
383
                        str = in.substring(truncatePos);
384
                    }
385
                    encodedString = str.getBytes(charset);
386
                    deviationPrev = deviation;
387
                    deviation = encodedString.length - limit;
388
                }
389
                // now we are close enough, get the exact position for truncating
390
                while (encodedString.length > limit) {
391
                    // too long, truncating
392
                    //                                      System.out.println("truncating");
393
                    if (rightPadding) {
394
                        str = in.substring(0, str.length() - 1);
395
                    } else {
396
                        truncatePos = truncatePos + 1;
397
                        str = in.substring(truncatePos);
398
                    }
399
                    encodedString = str.getBytes(charset);
400
                }
401
                while (encodedString.length < limit && str.length() < in.length()) {
402
                    // Extend if necessary:
403
                    // 1 - Get the length in bytes of the next char
404
                    // 2 - Add the char to the substring if we are still within the limits 
405
                    //                                      System.out.println("extending");
406
                    if (rightPadding) {
407
                        encodedChar = in.substring(str.length(), str.length() + 1).getBytes(charset);
408
                    } else {
409
                        encodedChar = in.substring(truncatePos - 1, truncatePos).getBytes(charset);
410
                        //                                              System.out.println(encodedChar);
411
                        //                                              System.out.println(encodedChar.length);
412
                        //                                              System.out.println(testStrings[i].substring(truncatePos-1, truncatePos));
413
                    }
414
                    //                                      System.out.println(testStrings[i].substring(in.length(), in.length()+1));
415
                    if ((encodedString.length + encodedChar.length) > limit) {
416
                        // one more char would overflow the limit
417
                        break;
418
                    }
419
                    // too short, extending
420
                    if (rightPadding) {
421
                        str = in.substring(0, str.length() + 1);
422
                    } else {
423
                        truncatePos = truncatePos - 1;
424
                        str = in.substring(truncatePos);
425
                    }
426
                    encodedString = str.getBytes(charset);
427
                }
428
            }
429
            if (rightPadding) {
430
                buffer.put(encodedString);
431
            }
432
            if (encodedString.length < limit) {
433
                // too short, padding
434
                int i = encodedString.length;
435
                while (i < limit) {
436
                    ((Buffer) blank).position(0);
437
                    buffer.put(blank);
438
                    i = i + blankSize;
439
                }
440
                if (i > limit) {
441
                    // Might happen for instance if charset is UTF16 and the
442
                    // limit of characters in the field is an odd number
443
                    throw new UnsupportedEncodingException(new Exception("Impossible to encode this DBF using the selected charset"));
444
                }
445
            }
446
            if (!rightPadding) {
447
                buffer.put(encodedString);
448
            }
449
        } catch (BufferOverflowException exc) {
450
            // Might happen for instance if charset is UTF16 and the
451
            // limit of characters in the field is an odd number
452
            throw new UnsupportedEncodingException(exc);
453
        }
454
    }
455

    
456

    
457
    /**
458
     * Release resources associated with this writer. <B>Highly recommended</B>
459
     *
460
     * @throws CloseException
461
     */
462
    public void close() throws CloseException {
463
        // IANS - GEOT 193, bogus 0x00 written. According to dbf spec, optional
464
        // eof 0x1a marker is, well, optional. Since the original code wrote a
465
        // 0x00 (which is wrong anyway) lets just do away with this :)
466
        // - produced dbf works in OpenOffice and ArcExplorer java, so it must
467
        // be okay.
468
        // buffer.position(0);
469
        // buffer.put((byte) 0).position(0).limit(1);
470
        // write();
471

    
472
        if (headDrity) {
473
            try {
474
                this.writeHeader();
475
            } catch (WriteException e) {
476
                throw new CloseException("DbaseFileWriter", e);
477
            }
478
        }
479

    
480
        try {
481
            channel.close();
482
        } catch (IOException e) {
483
            throw new CloseException("DBF Writer", e);
484
        }
485
        if (buffer instanceof MappedByteBuffer) {
486
            // NIOUtilities.clean(buffer);
487
        }
488

    
489
        buffer = null;
490
        channel = null;
491
        formatter = null;
492
    }
493

    
494
    public void setCharset(Charset charset) {
495
        this.charset = charset;
496
        blank = charset.encode(" ");
497
        blankSize = blank.limit();
498
    }
499
    
500
    protected FilteredLogger getLogger() {
501
        if(this.logger == null){
502
            this.logger = new FilteredLogger(LOGGER, "DbaseFileWriter", 2000L);
503
        }
504
        return this.logger;
505
    }
506

    
507
}